1
2
3#include <linux/bitops.h>
4#include <linux/slab.h>
5#include <linux/bio.h>
6#include <linux/mm.h>
7#include <linux/pagemap.h>
8#include <linux/page-flags.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/writeback.h>
13#include <linux/pagevec.h>
14#include <linux/prefetch.h>
15#include <linux/cleancache.h>
16#include "extent_io.h"
17#include "extent_map.h"
18#include "ctree.h"
19#include "btrfs_inode.h"
20#include "volumes.h"
21#include "check-integrity.h"
22#include "locking.h"
23#include "rcu-string.h"
24#include "backref.h"
25#include "disk-io.h"
26
27static struct kmem_cache *extent_state_cache;
28static struct kmem_cache *extent_buffer_cache;
29static struct bio_set btrfs_bioset;
30
31static inline bool extent_state_in_tree(const struct extent_state *state)
32{
33 return !RB_EMPTY_NODE(&state->rb_node);
34}
35
36#ifdef CONFIG_BTRFS_DEBUG
37static LIST_HEAD(buffers);
38static LIST_HEAD(states);
39
40static DEFINE_SPINLOCK(leak_lock);
41
42static inline
43void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
44{
45 unsigned long flags;
46
47 spin_lock_irqsave(&leak_lock, flags);
48 list_add(new, head);
49 spin_unlock_irqrestore(&leak_lock, flags);
50}
51
52static inline
53void btrfs_leak_debug_del(struct list_head *entry)
54{
55 unsigned long flags;
56
57 spin_lock_irqsave(&leak_lock, flags);
58 list_del(entry);
59 spin_unlock_irqrestore(&leak_lock, flags);
60}
61
62static inline
63void btrfs_leak_debug_check(void)
64{
65 struct extent_state *state;
66 struct extent_buffer *eb;
67
68 while (!list_empty(&states)) {
69 state = list_entry(states.next, struct extent_state, leak_list);
70 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
71 state->start, state->end, state->state,
72 extent_state_in_tree(state),
73 refcount_read(&state->refs));
74 list_del(&state->leak_list);
75 kmem_cache_free(extent_state_cache, state);
76 }
77
78 while (!list_empty(&buffers)) {
79 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
80 pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
81 eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
82 list_del(&eb->leak_list);
83 kmem_cache_free(extent_buffer_cache, eb);
84 }
85}
86
87#define btrfs_debug_check_extent_io_range(tree, start, end) \
88 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
89static inline void __btrfs_debug_check_extent_io_range(const char *caller,
90 struct extent_io_tree *tree, u64 start, u64 end)
91{
92 if (tree->ops && tree->ops->check_extent_io_range)
93 tree->ops->check_extent_io_range(tree->private_data, caller,
94 start, end);
95}
96#else
97#define btrfs_leak_debug_add(new, head) do {} while (0)
98#define btrfs_leak_debug_del(entry) do {} while (0)
99#define btrfs_leak_debug_check() do {} while (0)
100#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
101#endif
102
103#define BUFFER_LRU_MAX 64
104
105struct tree_entry {
106 u64 start;
107 u64 end;
108 struct rb_node rb_node;
109};
110
111struct extent_page_data {
112 struct bio *bio;
113 struct extent_io_tree *tree;
114
115
116
117 unsigned int extent_locked:1;
118
119
120 unsigned int sync_io:1;
121};
122
123static int add_extent_changeset(struct extent_state *state, unsigned bits,
124 struct extent_changeset *changeset,
125 int set)
126{
127 int ret;
128
129 if (!changeset)
130 return 0;
131 if (set && (state->state & bits) == bits)
132 return 0;
133 if (!set && (state->state & bits) == 0)
134 return 0;
135 changeset->bytes_changed += state->end - state->start + 1;
136 ret = ulist_add(&changeset->range_changed, state->start, state->end,
137 GFP_ATOMIC);
138 return ret;
139}
140
141static void flush_write_bio(struct extent_page_data *epd);
142
143static inline struct btrfs_fs_info *
144tree_fs_info(struct extent_io_tree *tree)
145{
146 if (tree->ops)
147 return tree->ops->tree_fs_info(tree->private_data);
148 return NULL;
149}
150
151int __init extent_io_init(void)
152{
153 extent_state_cache = kmem_cache_create("btrfs_extent_state",
154 sizeof(struct extent_state), 0,
155 SLAB_MEM_SPREAD, NULL);
156 if (!extent_state_cache)
157 return -ENOMEM;
158
159 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
160 sizeof(struct extent_buffer), 0,
161 SLAB_MEM_SPREAD, NULL);
162 if (!extent_buffer_cache)
163 goto free_state_cache;
164
165 if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
166 offsetof(struct btrfs_io_bio, bio),
167 BIOSET_NEED_BVECS))
168 goto free_buffer_cache;
169
170 if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
171 goto free_bioset;
172
173 return 0;
174
175free_bioset:
176 bioset_exit(&btrfs_bioset);
177
178free_buffer_cache:
179 kmem_cache_destroy(extent_buffer_cache);
180 extent_buffer_cache = NULL;
181
182free_state_cache:
183 kmem_cache_destroy(extent_state_cache);
184 extent_state_cache = NULL;
185 return -ENOMEM;
186}
187
188void __cold extent_io_exit(void)
189{
190 btrfs_leak_debug_check();
191
192
193
194
195
196 rcu_barrier();
197 kmem_cache_destroy(extent_state_cache);
198 kmem_cache_destroy(extent_buffer_cache);
199 bioset_exit(&btrfs_bioset);
200}
201
202void extent_io_tree_init(struct extent_io_tree *tree,
203 void *private_data)
204{
205 tree->state = RB_ROOT;
206 tree->ops = NULL;
207 tree->dirty_bytes = 0;
208 spin_lock_init(&tree->lock);
209 tree->private_data = private_data;
210}
211
212static struct extent_state *alloc_extent_state(gfp_t mask)
213{
214 struct extent_state *state;
215
216
217
218
219
220 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
221 state = kmem_cache_alloc(extent_state_cache, mask);
222 if (!state)
223 return state;
224 state->state = 0;
225 state->failrec = NULL;
226 RB_CLEAR_NODE(&state->rb_node);
227 btrfs_leak_debug_add(&state->leak_list, &states);
228 refcount_set(&state->refs, 1);
229 init_waitqueue_head(&state->wq);
230 trace_alloc_extent_state(state, mask, _RET_IP_);
231 return state;
232}
233
234void free_extent_state(struct extent_state *state)
235{
236 if (!state)
237 return;
238 if (refcount_dec_and_test(&state->refs)) {
239 WARN_ON(extent_state_in_tree(state));
240 btrfs_leak_debug_del(&state->leak_list);
241 trace_free_extent_state(state, _RET_IP_);
242 kmem_cache_free(extent_state_cache, state);
243 }
244}
245
246static struct rb_node *tree_insert(struct rb_root *root,
247 struct rb_node *search_start,
248 u64 offset,
249 struct rb_node *node,
250 struct rb_node ***p_in,
251 struct rb_node **parent_in)
252{
253 struct rb_node **p;
254 struct rb_node *parent = NULL;
255 struct tree_entry *entry;
256
257 if (p_in && parent_in) {
258 p = *p_in;
259 parent = *parent_in;
260 goto do_insert;
261 }
262
263 p = search_start ? &search_start : &root->rb_node;
264 while (*p) {
265 parent = *p;
266 entry = rb_entry(parent, struct tree_entry, rb_node);
267
268 if (offset < entry->start)
269 p = &(*p)->rb_left;
270 else if (offset > entry->end)
271 p = &(*p)->rb_right;
272 else
273 return parent;
274 }
275
276do_insert:
277 rb_link_node(node, parent, p);
278 rb_insert_color(node, root);
279 return NULL;
280}
281
282static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
283 struct rb_node **prev_ret,
284 struct rb_node **next_ret,
285 struct rb_node ***p_ret,
286 struct rb_node **parent_ret)
287{
288 struct rb_root *root = &tree->state;
289 struct rb_node **n = &root->rb_node;
290 struct rb_node *prev = NULL;
291 struct rb_node *orig_prev = NULL;
292 struct tree_entry *entry;
293 struct tree_entry *prev_entry = NULL;
294
295 while (*n) {
296 prev = *n;
297 entry = rb_entry(prev, struct tree_entry, rb_node);
298 prev_entry = entry;
299
300 if (offset < entry->start)
301 n = &(*n)->rb_left;
302 else if (offset > entry->end)
303 n = &(*n)->rb_right;
304 else
305 return *n;
306 }
307
308 if (p_ret)
309 *p_ret = n;
310 if (parent_ret)
311 *parent_ret = prev;
312
313 if (prev_ret) {
314 orig_prev = prev;
315 while (prev && offset > prev_entry->end) {
316 prev = rb_next(prev);
317 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
318 }
319 *prev_ret = prev;
320 prev = orig_prev;
321 }
322
323 if (next_ret) {
324 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
325 while (prev && offset < prev_entry->start) {
326 prev = rb_prev(prev);
327 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
328 }
329 *next_ret = prev;
330 }
331 return NULL;
332}
333
334static inline struct rb_node *
335tree_search_for_insert(struct extent_io_tree *tree,
336 u64 offset,
337 struct rb_node ***p_ret,
338 struct rb_node **parent_ret)
339{
340 struct rb_node *prev = NULL;
341 struct rb_node *ret;
342
343 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
344 if (!ret)
345 return prev;
346 return ret;
347}
348
349static inline struct rb_node *tree_search(struct extent_io_tree *tree,
350 u64 offset)
351{
352 return tree_search_for_insert(tree, offset, NULL, NULL);
353}
354
355static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
356 struct extent_state *other)
357{
358 if (tree->ops && tree->ops->merge_extent_hook)
359 tree->ops->merge_extent_hook(tree->private_data, new, other);
360}
361
362
363
364
365
366
367
368
369
370
371static void merge_state(struct extent_io_tree *tree,
372 struct extent_state *state)
373{
374 struct extent_state *other;
375 struct rb_node *other_node;
376
377 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
378 return;
379
380 other_node = rb_prev(&state->rb_node);
381 if (other_node) {
382 other = rb_entry(other_node, struct extent_state, rb_node);
383 if (other->end == state->start - 1 &&
384 other->state == state->state) {
385 merge_cb(tree, state, other);
386 state->start = other->start;
387 rb_erase(&other->rb_node, &tree->state);
388 RB_CLEAR_NODE(&other->rb_node);
389 free_extent_state(other);
390 }
391 }
392 other_node = rb_next(&state->rb_node);
393 if (other_node) {
394 other = rb_entry(other_node, struct extent_state, rb_node);
395 if (other->start == state->end + 1 &&
396 other->state == state->state) {
397 merge_cb(tree, state, other);
398 state->end = other->end;
399 rb_erase(&other->rb_node, &tree->state);
400 RB_CLEAR_NODE(&other->rb_node);
401 free_extent_state(other);
402 }
403 }
404}
405
406static void set_state_cb(struct extent_io_tree *tree,
407 struct extent_state *state, unsigned *bits)
408{
409 if (tree->ops && tree->ops->set_bit_hook)
410 tree->ops->set_bit_hook(tree->private_data, state, bits);
411}
412
413static void clear_state_cb(struct extent_io_tree *tree,
414 struct extent_state *state, unsigned *bits)
415{
416 if (tree->ops && tree->ops->clear_bit_hook)
417 tree->ops->clear_bit_hook(tree->private_data, state, bits);
418}
419
420static void set_state_bits(struct extent_io_tree *tree,
421 struct extent_state *state, unsigned *bits,
422 struct extent_changeset *changeset);
423
424
425
426
427
428
429
430
431
432
433
434static int insert_state(struct extent_io_tree *tree,
435 struct extent_state *state, u64 start, u64 end,
436 struct rb_node ***p,
437 struct rb_node **parent,
438 unsigned *bits, struct extent_changeset *changeset)
439{
440 struct rb_node *node;
441
442 if (end < start)
443 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
444 end, start);
445 state->start = start;
446 state->end = end;
447
448 set_state_bits(tree, state, bits, changeset);
449
450 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
451 if (node) {
452 struct extent_state *found;
453 found = rb_entry(node, struct extent_state, rb_node);
454 pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
455 found->start, found->end, start, end);
456 return -EEXIST;
457 }
458 merge_state(tree, state);
459 return 0;
460}
461
462static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
463 u64 split)
464{
465 if (tree->ops && tree->ops->split_extent_hook)
466 tree->ops->split_extent_hook(tree->private_data, orig, split);
467}
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
484 struct extent_state *prealloc, u64 split)
485{
486 struct rb_node *node;
487
488 split_cb(tree, orig, split);
489
490 prealloc->start = orig->start;
491 prealloc->end = split - 1;
492 prealloc->state = orig->state;
493 orig->start = split;
494
495 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
496 &prealloc->rb_node, NULL, NULL);
497 if (node) {
498 free_extent_state(prealloc);
499 return -EEXIST;
500 }
501 return 0;
502}
503
504static struct extent_state *next_state(struct extent_state *state)
505{
506 struct rb_node *next = rb_next(&state->rb_node);
507 if (next)
508 return rb_entry(next, struct extent_state, rb_node);
509 else
510 return NULL;
511}
512
513
514
515
516
517
518
519
520static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
521 struct extent_state *state,
522 unsigned *bits, int wake,
523 struct extent_changeset *changeset)
524{
525 struct extent_state *next;
526 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
527 int ret;
528
529 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
530 u64 range = state->end - state->start + 1;
531 WARN_ON(range > tree->dirty_bytes);
532 tree->dirty_bytes -= range;
533 }
534 clear_state_cb(tree, state, bits);
535 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
536 BUG_ON(ret < 0);
537 state->state &= ~bits_to_clear;
538 if (wake)
539 wake_up(&state->wq);
540 if (state->state == 0) {
541 next = next_state(state);
542 if (extent_state_in_tree(state)) {
543 rb_erase(&state->rb_node, &tree->state);
544 RB_CLEAR_NODE(&state->rb_node);
545 free_extent_state(state);
546 } else {
547 WARN_ON(1);
548 }
549 } else {
550 merge_state(tree, state);
551 next = next_state(state);
552 }
553 return next;
554}
555
556static struct extent_state *
557alloc_extent_state_atomic(struct extent_state *prealloc)
558{
559 if (!prealloc)
560 prealloc = alloc_extent_state(GFP_ATOMIC);
561
562 return prealloc;
563}
564
565static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
566{
567 btrfs_panic(tree_fs_info(tree), err,
568 "Locking error: Extent tree was modified by another thread while locked.");
569}
570
571
572
573
574
575
576
577
578
579
580
581
582
583int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
584 unsigned bits, int wake, int delete,
585 struct extent_state **cached_state,
586 gfp_t mask, struct extent_changeset *changeset)
587{
588 struct extent_state *state;
589 struct extent_state *cached;
590 struct extent_state *prealloc = NULL;
591 struct rb_node *node;
592 u64 last_end;
593 int err;
594 int clear = 0;
595
596 btrfs_debug_check_extent_io_range(tree, start, end);
597
598 if (bits & EXTENT_DELALLOC)
599 bits |= EXTENT_NORESERVE;
600
601 if (delete)
602 bits |= ~EXTENT_CTLBITS;
603 bits |= EXTENT_FIRST_DELALLOC;
604
605 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
606 clear = 1;
607again:
608 if (!prealloc && gfpflags_allow_blocking(mask)) {
609
610
611
612
613
614
615
616 prealloc = alloc_extent_state(mask);
617 }
618
619 spin_lock(&tree->lock);
620 if (cached_state) {
621 cached = *cached_state;
622
623 if (clear) {
624 *cached_state = NULL;
625 cached_state = NULL;
626 }
627
628 if (cached && extent_state_in_tree(cached) &&
629 cached->start <= start && cached->end > start) {
630 if (clear)
631 refcount_dec(&cached->refs);
632 state = cached;
633 goto hit_next;
634 }
635 if (clear)
636 free_extent_state(cached);
637 }
638
639
640
641
642 node = tree_search(tree, start);
643 if (!node)
644 goto out;
645 state = rb_entry(node, struct extent_state, rb_node);
646hit_next:
647 if (state->start > end)
648 goto out;
649 WARN_ON(state->end < start);
650 last_end = state->end;
651
652
653 if (!(state->state & bits)) {
654 state = next_state(state);
655 goto next;
656 }
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674 if (state->start < start) {
675 prealloc = alloc_extent_state_atomic(prealloc);
676 BUG_ON(!prealloc);
677 err = split_state(tree, state, prealloc, start);
678 if (err)
679 extent_io_tree_panic(tree, err);
680
681 prealloc = NULL;
682 if (err)
683 goto out;
684 if (state->end <= end) {
685 state = clear_state_bit(tree, state, &bits, wake,
686 changeset);
687 goto next;
688 }
689 goto search_again;
690 }
691
692
693
694
695
696
697 if (state->start <= end && state->end > end) {
698 prealloc = alloc_extent_state_atomic(prealloc);
699 BUG_ON(!prealloc);
700 err = split_state(tree, state, prealloc, end + 1);
701 if (err)
702 extent_io_tree_panic(tree, err);
703
704 if (wake)
705 wake_up(&state->wq);
706
707 clear_state_bit(tree, prealloc, &bits, wake, changeset);
708
709 prealloc = NULL;
710 goto out;
711 }
712
713 state = clear_state_bit(tree, state, &bits, wake, changeset);
714next:
715 if (last_end == (u64)-1)
716 goto out;
717 start = last_end + 1;
718 if (start <= end && state && !need_resched())
719 goto hit_next;
720
721search_again:
722 if (start > end)
723 goto out;
724 spin_unlock(&tree->lock);
725 if (gfpflags_allow_blocking(mask))
726 cond_resched();
727 goto again;
728
729out:
730 spin_unlock(&tree->lock);
731 if (prealloc)
732 free_extent_state(prealloc);
733
734 return 0;
735
736}
737
738static void wait_on_state(struct extent_io_tree *tree,
739 struct extent_state *state)
740 __releases(tree->lock)
741 __acquires(tree->lock)
742{
743 DEFINE_WAIT(wait);
744 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
745 spin_unlock(&tree->lock);
746 schedule();
747 spin_lock(&tree->lock);
748 finish_wait(&state->wq, &wait);
749}
750
751
752
753
754
755
756static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
757 unsigned long bits)
758{
759 struct extent_state *state;
760 struct rb_node *node;
761
762 btrfs_debug_check_extent_io_range(tree, start, end);
763
764 spin_lock(&tree->lock);
765again:
766 while (1) {
767
768
769
770
771 node = tree_search(tree, start);
772process_node:
773 if (!node)
774 break;
775
776 state = rb_entry(node, struct extent_state, rb_node);
777
778 if (state->start > end)
779 goto out;
780
781 if (state->state & bits) {
782 start = state->start;
783 refcount_inc(&state->refs);
784 wait_on_state(tree, state);
785 free_extent_state(state);
786 goto again;
787 }
788 start = state->end + 1;
789
790 if (start > end)
791 break;
792
793 if (!cond_resched_lock(&tree->lock)) {
794 node = rb_next(node);
795 goto process_node;
796 }
797 }
798out:
799 spin_unlock(&tree->lock);
800}
801
802static void set_state_bits(struct extent_io_tree *tree,
803 struct extent_state *state,
804 unsigned *bits, struct extent_changeset *changeset)
805{
806 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
807 int ret;
808
809 set_state_cb(tree, state, bits);
810 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
811 u64 range = state->end - state->start + 1;
812 tree->dirty_bytes += range;
813 }
814 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
815 BUG_ON(ret < 0);
816 state->state |= bits_to_set;
817}
818
819static void cache_state_if_flags(struct extent_state *state,
820 struct extent_state **cached_ptr,
821 unsigned flags)
822{
823 if (cached_ptr && !(*cached_ptr)) {
824 if (!flags || (state->state & flags)) {
825 *cached_ptr = state;
826 refcount_inc(&state->refs);
827 }
828 }
829}
830
831static void cache_state(struct extent_state *state,
832 struct extent_state **cached_ptr)
833{
834 return cache_state_if_flags(state, cached_ptr,
835 EXTENT_IOBITS | EXTENT_BOUNDARY);
836}
837
838
839
840
841
842
843
844
845
846
847
848
849static int __must_check
850__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
851 unsigned bits, unsigned exclusive_bits,
852 u64 *failed_start, struct extent_state **cached_state,
853 gfp_t mask, struct extent_changeset *changeset)
854{
855 struct extent_state *state;
856 struct extent_state *prealloc = NULL;
857 struct rb_node *node;
858 struct rb_node **p;
859 struct rb_node *parent;
860 int err = 0;
861 u64 last_start;
862 u64 last_end;
863
864 btrfs_debug_check_extent_io_range(tree, start, end);
865
866 bits |= EXTENT_FIRST_DELALLOC;
867again:
868 if (!prealloc && gfpflags_allow_blocking(mask)) {
869
870
871
872
873
874
875
876 prealloc = alloc_extent_state(mask);
877 }
878
879 spin_lock(&tree->lock);
880 if (cached_state && *cached_state) {
881 state = *cached_state;
882 if (state->start <= start && state->end > start &&
883 extent_state_in_tree(state)) {
884 node = &state->rb_node;
885 goto hit_next;
886 }
887 }
888
889
890
891
892 node = tree_search_for_insert(tree, start, &p, &parent);
893 if (!node) {
894 prealloc = alloc_extent_state_atomic(prealloc);
895 BUG_ON(!prealloc);
896 err = insert_state(tree, prealloc, start, end,
897 &p, &parent, &bits, changeset);
898 if (err)
899 extent_io_tree_panic(tree, err);
900
901 cache_state(prealloc, cached_state);
902 prealloc = NULL;
903 goto out;
904 }
905 state = rb_entry(node, struct extent_state, rb_node);
906hit_next:
907 last_start = state->start;
908 last_end = state->end;
909
910
911
912
913
914
915
916 if (state->start == start && state->end <= end) {
917 if (state->state & exclusive_bits) {
918 *failed_start = state->start;
919 err = -EEXIST;
920 goto out;
921 }
922
923 set_state_bits(tree, state, &bits, changeset);
924 cache_state(state, cached_state);
925 merge_state(tree, state);
926 if (last_end == (u64)-1)
927 goto out;
928 start = last_end + 1;
929 state = next_state(state);
930 if (start < end && state && state->start == start &&
931 !need_resched())
932 goto hit_next;
933 goto search_again;
934 }
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952 if (state->start < start) {
953 if (state->state & exclusive_bits) {
954 *failed_start = start;
955 err = -EEXIST;
956 goto out;
957 }
958
959 prealloc = alloc_extent_state_atomic(prealloc);
960 BUG_ON(!prealloc);
961 err = split_state(tree, state, prealloc, start);
962 if (err)
963 extent_io_tree_panic(tree, err);
964
965 prealloc = NULL;
966 if (err)
967 goto out;
968 if (state->end <= end) {
969 set_state_bits(tree, state, &bits, changeset);
970 cache_state(state, cached_state);
971 merge_state(tree, state);
972 if (last_end == (u64)-1)
973 goto out;
974 start = last_end + 1;
975 state = next_state(state);
976 if (start < end && state && state->start == start &&
977 !need_resched())
978 goto hit_next;
979 }
980 goto search_again;
981 }
982
983
984
985
986
987
988
989 if (state->start > start) {
990 u64 this_end;
991 if (end < last_start)
992 this_end = end;
993 else
994 this_end = last_start - 1;
995
996 prealloc = alloc_extent_state_atomic(prealloc);
997 BUG_ON(!prealloc);
998
999
1000
1001
1002
1003 err = insert_state(tree, prealloc, start, this_end,
1004 NULL, NULL, &bits, changeset);
1005 if (err)
1006 extent_io_tree_panic(tree, err);
1007
1008 cache_state(prealloc, cached_state);
1009 prealloc = NULL;
1010 start = this_end + 1;
1011 goto search_again;
1012 }
1013
1014
1015
1016
1017
1018
1019 if (state->start <= end && state->end > end) {
1020 if (state->state & exclusive_bits) {
1021 *failed_start = start;
1022 err = -EEXIST;
1023 goto out;
1024 }
1025
1026 prealloc = alloc_extent_state_atomic(prealloc);
1027 BUG_ON(!prealloc);
1028 err = split_state(tree, state, prealloc, end + 1);
1029 if (err)
1030 extent_io_tree_panic(tree, err);
1031
1032 set_state_bits(tree, prealloc, &bits, changeset);
1033 cache_state(prealloc, cached_state);
1034 merge_state(tree, prealloc);
1035 prealloc = NULL;
1036 goto out;
1037 }
1038
1039search_again:
1040 if (start > end)
1041 goto out;
1042 spin_unlock(&tree->lock);
1043 if (gfpflags_allow_blocking(mask))
1044 cond_resched();
1045 goto again;
1046
1047out:
1048 spin_unlock(&tree->lock);
1049 if (prealloc)
1050 free_extent_state(prealloc);
1051
1052 return err;
1053
1054}
1055
1056int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1057 unsigned bits, u64 * failed_start,
1058 struct extent_state **cached_state, gfp_t mask)
1059{
1060 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1061 cached_state, mask, NULL);
1062}
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1084 unsigned bits, unsigned clear_bits,
1085 struct extent_state **cached_state)
1086{
1087 struct extent_state *state;
1088 struct extent_state *prealloc = NULL;
1089 struct rb_node *node;
1090 struct rb_node **p;
1091 struct rb_node *parent;
1092 int err = 0;
1093 u64 last_start;
1094 u64 last_end;
1095 bool first_iteration = true;
1096
1097 btrfs_debug_check_extent_io_range(tree, start, end);
1098
1099again:
1100 if (!prealloc) {
1101
1102
1103
1104
1105
1106
1107
1108 prealloc = alloc_extent_state(GFP_NOFS);
1109 if (!prealloc && !first_iteration)
1110 return -ENOMEM;
1111 }
1112
1113 spin_lock(&tree->lock);
1114 if (cached_state && *cached_state) {
1115 state = *cached_state;
1116 if (state->start <= start && state->end > start &&
1117 extent_state_in_tree(state)) {
1118 node = &state->rb_node;
1119 goto hit_next;
1120 }
1121 }
1122
1123
1124
1125
1126
1127 node = tree_search_for_insert(tree, start, &p, &parent);
1128 if (!node) {
1129 prealloc = alloc_extent_state_atomic(prealloc);
1130 if (!prealloc) {
1131 err = -ENOMEM;
1132 goto out;
1133 }
1134 err = insert_state(tree, prealloc, start, end,
1135 &p, &parent, &bits, NULL);
1136 if (err)
1137 extent_io_tree_panic(tree, err);
1138 cache_state(prealloc, cached_state);
1139 prealloc = NULL;
1140 goto out;
1141 }
1142 state = rb_entry(node, struct extent_state, rb_node);
1143hit_next:
1144 last_start = state->start;
1145 last_end = state->end;
1146
1147
1148
1149
1150
1151
1152
1153 if (state->start == start && state->end <= end) {
1154 set_state_bits(tree, state, &bits, NULL);
1155 cache_state(state, cached_state);
1156 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1157 if (last_end == (u64)-1)
1158 goto out;
1159 start = last_end + 1;
1160 if (start < end && state && state->start == start &&
1161 !need_resched())
1162 goto hit_next;
1163 goto search_again;
1164 }
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182 if (state->start < start) {
1183 prealloc = alloc_extent_state_atomic(prealloc);
1184 if (!prealloc) {
1185 err = -ENOMEM;
1186 goto out;
1187 }
1188 err = split_state(tree, state, prealloc, start);
1189 if (err)
1190 extent_io_tree_panic(tree, err);
1191 prealloc = NULL;
1192 if (err)
1193 goto out;
1194 if (state->end <= end) {
1195 set_state_bits(tree, state, &bits, NULL);
1196 cache_state(state, cached_state);
1197 state = clear_state_bit(tree, state, &clear_bits, 0,
1198 NULL);
1199 if (last_end == (u64)-1)
1200 goto out;
1201 start = last_end + 1;
1202 if (start < end && state && state->start == start &&
1203 !need_resched())
1204 goto hit_next;
1205 }
1206 goto search_again;
1207 }
1208
1209
1210
1211
1212
1213
1214
1215 if (state->start > start) {
1216 u64 this_end;
1217 if (end < last_start)
1218 this_end = end;
1219 else
1220 this_end = last_start - 1;
1221
1222 prealloc = alloc_extent_state_atomic(prealloc);
1223 if (!prealloc) {
1224 err = -ENOMEM;
1225 goto out;
1226 }
1227
1228
1229
1230
1231
1232 err = insert_state(tree, prealloc, start, this_end,
1233 NULL, NULL, &bits, NULL);
1234 if (err)
1235 extent_io_tree_panic(tree, err);
1236 cache_state(prealloc, cached_state);
1237 prealloc = NULL;
1238 start = this_end + 1;
1239 goto search_again;
1240 }
1241
1242
1243
1244
1245
1246
1247 if (state->start <= end && state->end > end) {
1248 prealloc = alloc_extent_state_atomic(prealloc);
1249 if (!prealloc) {
1250 err = -ENOMEM;
1251 goto out;
1252 }
1253
1254 err = split_state(tree, state, prealloc, end + 1);
1255 if (err)
1256 extent_io_tree_panic(tree, err);
1257
1258 set_state_bits(tree, prealloc, &bits, NULL);
1259 cache_state(prealloc, cached_state);
1260 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1261 prealloc = NULL;
1262 goto out;
1263 }
1264
1265search_again:
1266 if (start > end)
1267 goto out;
1268 spin_unlock(&tree->lock);
1269 cond_resched();
1270 first_iteration = false;
1271 goto again;
1272
1273out:
1274 spin_unlock(&tree->lock);
1275 if (prealloc)
1276 free_extent_state(prealloc);
1277
1278 return err;
1279}
1280
1281
1282int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1283 unsigned bits, struct extent_changeset *changeset)
1284{
1285
1286
1287
1288
1289
1290
1291 BUG_ON(bits & EXTENT_LOCKED);
1292
1293 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1294 changeset);
1295}
1296
1297int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1298 unsigned bits, int wake, int delete,
1299 struct extent_state **cached)
1300{
1301 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1302 cached, GFP_NOFS, NULL);
1303}
1304
1305int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1306 unsigned bits, struct extent_changeset *changeset)
1307{
1308
1309
1310
1311
1312 BUG_ON(bits & EXTENT_LOCKED);
1313
1314 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1315 changeset);
1316}
1317
1318
1319
1320
1321
1322int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1323 struct extent_state **cached_state)
1324{
1325 int err;
1326 u64 failed_start;
1327
1328 while (1) {
1329 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1330 EXTENT_LOCKED, &failed_start,
1331 cached_state, GFP_NOFS, NULL);
1332 if (err == -EEXIST) {
1333 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1334 start = failed_start;
1335 } else
1336 break;
1337 WARN_ON(start > end);
1338 }
1339 return err;
1340}
1341
1342int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1343{
1344 int err;
1345 u64 failed_start;
1346
1347 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1348 &failed_start, NULL, GFP_NOFS, NULL);
1349 if (err == -EEXIST) {
1350 if (failed_start > start)
1351 clear_extent_bit(tree, start, failed_start - 1,
1352 EXTENT_LOCKED, 1, 0, NULL);
1353 return 0;
1354 }
1355 return 1;
1356}
1357
1358void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1359{
1360 unsigned long index = start >> PAGE_SHIFT;
1361 unsigned long end_index = end >> PAGE_SHIFT;
1362 struct page *page;
1363
1364 while (index <= end_index) {
1365 page = find_get_page(inode->i_mapping, index);
1366 BUG_ON(!page);
1367 clear_page_dirty_for_io(page);
1368 put_page(page);
1369 index++;
1370 }
1371}
1372
1373void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1374{
1375 unsigned long index = start >> PAGE_SHIFT;
1376 unsigned long end_index = end >> PAGE_SHIFT;
1377 struct page *page;
1378
1379 while (index <= end_index) {
1380 page = find_get_page(inode->i_mapping, index);
1381 BUG_ON(!page);
1382 __set_page_dirty_nobuffers(page);
1383 account_page_redirty(page);
1384 put_page(page);
1385 index++;
1386 }
1387}
1388
1389
1390
1391
1392static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1393{
1394 tree->ops->set_range_writeback(tree->private_data, start, end);
1395}
1396
1397
1398
1399
1400
1401static struct extent_state *
1402find_first_extent_bit_state(struct extent_io_tree *tree,
1403 u64 start, unsigned bits)
1404{
1405 struct rb_node *node;
1406 struct extent_state *state;
1407
1408
1409
1410
1411
1412 node = tree_search(tree, start);
1413 if (!node)
1414 goto out;
1415
1416 while (1) {
1417 state = rb_entry(node, struct extent_state, rb_node);
1418 if (state->end >= start && (state->state & bits))
1419 return state;
1420
1421 node = rb_next(node);
1422 if (!node)
1423 break;
1424 }
1425out:
1426 return NULL;
1427}
1428
1429
1430
1431
1432
1433
1434
1435
1436int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1437 u64 *start_ret, u64 *end_ret, unsigned bits,
1438 struct extent_state **cached_state)
1439{
1440 struct extent_state *state;
1441 struct rb_node *n;
1442 int ret = 1;
1443
1444 spin_lock(&tree->lock);
1445 if (cached_state && *cached_state) {
1446 state = *cached_state;
1447 if (state->end == start - 1 && extent_state_in_tree(state)) {
1448 n = rb_next(&state->rb_node);
1449 while (n) {
1450 state = rb_entry(n, struct extent_state,
1451 rb_node);
1452 if (state->state & bits)
1453 goto got_it;
1454 n = rb_next(n);
1455 }
1456 free_extent_state(*cached_state);
1457 *cached_state = NULL;
1458 goto out;
1459 }
1460 free_extent_state(*cached_state);
1461 *cached_state = NULL;
1462 }
1463
1464 state = find_first_extent_bit_state(tree, start, bits);
1465got_it:
1466 if (state) {
1467 cache_state_if_flags(state, cached_state, 0);
1468 *start_ret = state->start;
1469 *end_ret = state->end;
1470 ret = 0;
1471 }
1472out:
1473 spin_unlock(&tree->lock);
1474 return ret;
1475}
1476
1477
1478
1479
1480
1481
1482
1483static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1484 u64 *start, u64 *end, u64 max_bytes,
1485 struct extent_state **cached_state)
1486{
1487 struct rb_node *node;
1488 struct extent_state *state;
1489 u64 cur_start = *start;
1490 u64 found = 0;
1491 u64 total_bytes = 0;
1492
1493 spin_lock(&tree->lock);
1494
1495
1496
1497
1498
1499 node = tree_search(tree, cur_start);
1500 if (!node) {
1501 if (!found)
1502 *end = (u64)-1;
1503 goto out;
1504 }
1505
1506 while (1) {
1507 state = rb_entry(node, struct extent_state, rb_node);
1508 if (found && (state->start != cur_start ||
1509 (state->state & EXTENT_BOUNDARY))) {
1510 goto out;
1511 }
1512 if (!(state->state & EXTENT_DELALLOC)) {
1513 if (!found)
1514 *end = state->end;
1515 goto out;
1516 }
1517 if (!found) {
1518 *start = state->start;
1519 *cached_state = state;
1520 refcount_inc(&state->refs);
1521 }
1522 found++;
1523 *end = state->end;
1524 cur_start = state->end + 1;
1525 node = rb_next(node);
1526 total_bytes += state->end - state->start + 1;
1527 if (total_bytes >= max_bytes)
1528 break;
1529 if (!node)
1530 break;
1531 }
1532out:
1533 spin_unlock(&tree->lock);
1534 return found;
1535}
1536
1537static int __process_pages_contig(struct address_space *mapping,
1538 struct page *locked_page,
1539 pgoff_t start_index, pgoff_t end_index,
1540 unsigned long page_ops, pgoff_t *index_ret);
1541
1542static noinline void __unlock_for_delalloc(struct inode *inode,
1543 struct page *locked_page,
1544 u64 start, u64 end)
1545{
1546 unsigned long index = start >> PAGE_SHIFT;
1547 unsigned long end_index = end >> PAGE_SHIFT;
1548
1549 ASSERT(locked_page);
1550 if (index == locked_page->index && end_index == index)
1551 return;
1552
1553 __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
1554 PAGE_UNLOCK, NULL);
1555}
1556
1557static noinline int lock_delalloc_pages(struct inode *inode,
1558 struct page *locked_page,
1559 u64 delalloc_start,
1560 u64 delalloc_end)
1561{
1562 unsigned long index = delalloc_start >> PAGE_SHIFT;
1563 unsigned long index_ret = index;
1564 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1565 int ret;
1566
1567 ASSERT(locked_page);
1568 if (index == locked_page->index && index == end_index)
1569 return 0;
1570
1571 ret = __process_pages_contig(inode->i_mapping, locked_page, index,
1572 end_index, PAGE_LOCK, &index_ret);
1573 if (ret == -EAGAIN)
1574 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1575 (u64)index_ret << PAGE_SHIFT);
1576 return ret;
1577}
1578
1579
1580
1581
1582
1583
1584
1585STATIC u64 find_lock_delalloc_range(struct inode *inode,
1586 struct extent_io_tree *tree,
1587 struct page *locked_page, u64 *start,
1588 u64 *end, u64 max_bytes)
1589{
1590 u64 delalloc_start;
1591 u64 delalloc_end;
1592 u64 found;
1593 struct extent_state *cached_state = NULL;
1594 int ret;
1595 int loops = 0;
1596
1597again:
1598
1599 delalloc_start = *start;
1600 delalloc_end = 0;
1601 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1602 max_bytes, &cached_state);
1603 if (!found || delalloc_end <= *start) {
1604 *start = delalloc_start;
1605 *end = delalloc_end;
1606 free_extent_state(cached_state);
1607 return 0;
1608 }
1609
1610
1611
1612
1613
1614
1615 if (delalloc_start < *start)
1616 delalloc_start = *start;
1617
1618
1619
1620
1621 if (delalloc_end + 1 - delalloc_start > max_bytes)
1622 delalloc_end = delalloc_start + max_bytes - 1;
1623
1624
1625 ret = lock_delalloc_pages(inode, locked_page,
1626 delalloc_start, delalloc_end);
1627 if (ret == -EAGAIN) {
1628
1629
1630
1631 free_extent_state(cached_state);
1632 cached_state = NULL;
1633 if (!loops) {
1634 max_bytes = PAGE_SIZE;
1635 loops = 1;
1636 goto again;
1637 } else {
1638 found = 0;
1639 goto out_failed;
1640 }
1641 }
1642 BUG_ON(ret);
1643
1644
1645 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1646
1647
1648 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1649 EXTENT_DELALLOC, 1, cached_state);
1650 if (!ret) {
1651 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1652 &cached_state);
1653 __unlock_for_delalloc(inode, locked_page,
1654 delalloc_start, delalloc_end);
1655 cond_resched();
1656 goto again;
1657 }
1658 free_extent_state(cached_state);
1659 *start = delalloc_start;
1660 *end = delalloc_end;
1661out_failed:
1662 return found;
1663}
1664
1665static int __process_pages_contig(struct address_space *mapping,
1666 struct page *locked_page,
1667 pgoff_t start_index, pgoff_t end_index,
1668 unsigned long page_ops, pgoff_t *index_ret)
1669{
1670 unsigned long nr_pages = end_index - start_index + 1;
1671 unsigned long pages_locked = 0;
1672 pgoff_t index = start_index;
1673 struct page *pages[16];
1674 unsigned ret;
1675 int err = 0;
1676 int i;
1677
1678 if (page_ops & PAGE_LOCK) {
1679 ASSERT(page_ops == PAGE_LOCK);
1680 ASSERT(index_ret && *index_ret == start_index);
1681 }
1682
1683 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1684 mapping_set_error(mapping, -EIO);
1685
1686 while (nr_pages > 0) {
1687 ret = find_get_pages_contig(mapping, index,
1688 min_t(unsigned long,
1689 nr_pages, ARRAY_SIZE(pages)), pages);
1690 if (ret == 0) {
1691
1692
1693
1694
1695 ASSERT(page_ops & PAGE_LOCK);
1696 err = -EAGAIN;
1697 goto out;
1698 }
1699
1700 for (i = 0; i < ret; i++) {
1701 if (page_ops & PAGE_SET_PRIVATE2)
1702 SetPagePrivate2(pages[i]);
1703
1704 if (pages[i] == locked_page) {
1705 put_page(pages[i]);
1706 pages_locked++;
1707 continue;
1708 }
1709 if (page_ops & PAGE_CLEAR_DIRTY)
1710 clear_page_dirty_for_io(pages[i]);
1711 if (page_ops & PAGE_SET_WRITEBACK)
1712 set_page_writeback(pages[i]);
1713 if (page_ops & PAGE_SET_ERROR)
1714 SetPageError(pages[i]);
1715 if (page_ops & PAGE_END_WRITEBACK)
1716 end_page_writeback(pages[i]);
1717 if (page_ops & PAGE_UNLOCK)
1718 unlock_page(pages[i]);
1719 if (page_ops & PAGE_LOCK) {
1720 lock_page(pages[i]);
1721 if (!PageDirty(pages[i]) ||
1722 pages[i]->mapping != mapping) {
1723 unlock_page(pages[i]);
1724 put_page(pages[i]);
1725 err = -EAGAIN;
1726 goto out;
1727 }
1728 }
1729 put_page(pages[i]);
1730 pages_locked++;
1731 }
1732 nr_pages -= ret;
1733 index += ret;
1734 cond_resched();
1735 }
1736out:
1737 if (err && index_ret)
1738 *index_ret = start_index + pages_locked - 1;
1739 return err;
1740}
1741
1742void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1743 u64 delalloc_end, struct page *locked_page,
1744 unsigned clear_bits,
1745 unsigned long page_ops)
1746{
1747 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
1748 NULL);
1749
1750 __process_pages_contig(inode->i_mapping, locked_page,
1751 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
1752 page_ops, NULL);
1753}
1754
1755
1756
1757
1758
1759
1760u64 count_range_bits(struct extent_io_tree *tree,
1761 u64 *start, u64 search_end, u64 max_bytes,
1762 unsigned bits, int contig)
1763{
1764 struct rb_node *node;
1765 struct extent_state *state;
1766 u64 cur_start = *start;
1767 u64 total_bytes = 0;
1768 u64 last = 0;
1769 int found = 0;
1770
1771 if (WARN_ON(search_end <= cur_start))
1772 return 0;
1773
1774 spin_lock(&tree->lock);
1775 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1776 total_bytes = tree->dirty_bytes;
1777 goto out;
1778 }
1779
1780
1781
1782
1783 node = tree_search(tree, cur_start);
1784 if (!node)
1785 goto out;
1786
1787 while (1) {
1788 state = rb_entry(node, struct extent_state, rb_node);
1789 if (state->start > search_end)
1790 break;
1791 if (contig && found && state->start > last + 1)
1792 break;
1793 if (state->end >= cur_start && (state->state & bits) == bits) {
1794 total_bytes += min(search_end, state->end) + 1 -
1795 max(cur_start, state->start);
1796 if (total_bytes >= max_bytes)
1797 break;
1798 if (!found) {
1799 *start = max(cur_start, state->start);
1800 found = 1;
1801 }
1802 last = state->end;
1803 } else if (contig && found) {
1804 break;
1805 }
1806 node = rb_next(node);
1807 if (!node)
1808 break;
1809 }
1810out:
1811 spin_unlock(&tree->lock);
1812 return total_bytes;
1813}
1814
1815
1816
1817
1818
1819static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
1820 struct io_failure_record *failrec)
1821{
1822 struct rb_node *node;
1823 struct extent_state *state;
1824 int ret = 0;
1825
1826 spin_lock(&tree->lock);
1827
1828
1829
1830
1831 node = tree_search(tree, start);
1832 if (!node) {
1833 ret = -ENOENT;
1834 goto out;
1835 }
1836 state = rb_entry(node, struct extent_state, rb_node);
1837 if (state->start != start) {
1838 ret = -ENOENT;
1839 goto out;
1840 }
1841 state->failrec = failrec;
1842out:
1843 spin_unlock(&tree->lock);
1844 return ret;
1845}
1846
1847static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
1848 struct io_failure_record **failrec)
1849{
1850 struct rb_node *node;
1851 struct extent_state *state;
1852 int ret = 0;
1853
1854 spin_lock(&tree->lock);
1855
1856
1857
1858
1859 node = tree_search(tree, start);
1860 if (!node) {
1861 ret = -ENOENT;
1862 goto out;
1863 }
1864 state = rb_entry(node, struct extent_state, rb_node);
1865 if (state->start != start) {
1866 ret = -ENOENT;
1867 goto out;
1868 }
1869 *failrec = state->failrec;
1870out:
1871 spin_unlock(&tree->lock);
1872 return ret;
1873}
1874
1875
1876
1877
1878
1879
1880
1881int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1882 unsigned bits, int filled, struct extent_state *cached)
1883{
1884 struct extent_state *state = NULL;
1885 struct rb_node *node;
1886 int bitset = 0;
1887
1888 spin_lock(&tree->lock);
1889 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1890 cached->end > start)
1891 node = &cached->rb_node;
1892 else
1893 node = tree_search(tree, start);
1894 while (node && start <= end) {
1895 state = rb_entry(node, struct extent_state, rb_node);
1896
1897 if (filled && state->start > start) {
1898 bitset = 0;
1899 break;
1900 }
1901
1902 if (state->start > end)
1903 break;
1904
1905 if (state->state & bits) {
1906 bitset = 1;
1907 if (!filled)
1908 break;
1909 } else if (filled) {
1910 bitset = 0;
1911 break;
1912 }
1913
1914 if (state->end == (u64)-1)
1915 break;
1916
1917 start = state->end + 1;
1918 if (start > end)
1919 break;
1920 node = rb_next(node);
1921 if (!node) {
1922 if (filled)
1923 bitset = 0;
1924 break;
1925 }
1926 }
1927 spin_unlock(&tree->lock);
1928 return bitset;
1929}
1930
1931
1932
1933
1934
1935static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1936{
1937 u64 start = page_offset(page);
1938 u64 end = start + PAGE_SIZE - 1;
1939 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1940 SetPageUptodate(page);
1941}
1942
1943int free_io_failure(struct extent_io_tree *failure_tree,
1944 struct extent_io_tree *io_tree,
1945 struct io_failure_record *rec)
1946{
1947 int ret;
1948 int err = 0;
1949
1950 set_state_failrec(failure_tree, rec->start, NULL);
1951 ret = clear_extent_bits(failure_tree, rec->start,
1952 rec->start + rec->len - 1,
1953 EXTENT_LOCKED | EXTENT_DIRTY);
1954 if (ret)
1955 err = ret;
1956
1957 ret = clear_extent_bits(io_tree, rec->start,
1958 rec->start + rec->len - 1,
1959 EXTENT_DAMAGED);
1960 if (ret && !err)
1961 err = ret;
1962
1963 kfree(rec);
1964 return err;
1965}
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
1978 u64 length, u64 logical, struct page *page,
1979 unsigned int pg_offset, int mirror_num)
1980{
1981 struct bio *bio;
1982 struct btrfs_device *dev;
1983 u64 map_length = 0;
1984 u64 sector;
1985 struct btrfs_bio *bbio = NULL;
1986 int ret;
1987
1988 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
1989 BUG_ON(!mirror_num);
1990
1991 bio = btrfs_io_bio_alloc(1);
1992 bio->bi_iter.bi_size = 0;
1993 map_length = length;
1994
1995
1996
1997
1998
1999
2000 btrfs_bio_counter_inc_blocked(fs_info);
2001 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
2002
2003
2004
2005
2006
2007
2008 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2009 &map_length, &bbio, 0);
2010 if (ret) {
2011 btrfs_bio_counter_dec(fs_info);
2012 bio_put(bio);
2013 return -EIO;
2014 }
2015 ASSERT(bbio->mirror_num == 1);
2016 } else {
2017 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2018 &map_length, &bbio, mirror_num);
2019 if (ret) {
2020 btrfs_bio_counter_dec(fs_info);
2021 bio_put(bio);
2022 return -EIO;
2023 }
2024 BUG_ON(mirror_num != bbio->mirror_num);
2025 }
2026
2027 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
2028 bio->bi_iter.bi_sector = sector;
2029 dev = bbio->stripes[bbio->mirror_num - 1].dev;
2030 btrfs_put_bbio(bbio);
2031 if (!dev || !dev->bdev ||
2032 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
2033 btrfs_bio_counter_dec(fs_info);
2034 bio_put(bio);
2035 return -EIO;
2036 }
2037 bio_set_dev(bio, dev->bdev);
2038 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2039 bio_add_page(bio, page, length, pg_offset);
2040
2041 if (btrfsic_submit_bio_wait(bio)) {
2042
2043 btrfs_bio_counter_dec(fs_info);
2044 bio_put(bio);
2045 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2046 return -EIO;
2047 }
2048
2049 btrfs_info_rl_in_rcu(fs_info,
2050 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2051 ino, start,
2052 rcu_str_deref(dev->name), sector);
2053 btrfs_bio_counter_dec(fs_info);
2054 bio_put(bio);
2055 return 0;
2056}
2057
2058int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
2059 struct extent_buffer *eb, int mirror_num)
2060{
2061 u64 start = eb->start;
2062 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
2063 int ret = 0;
2064
2065 if (sb_rdonly(fs_info->sb))
2066 return -EROFS;
2067
2068 for (i = 0; i < num_pages; i++) {
2069 struct page *p = eb->pages[i];
2070
2071 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2072 start - page_offset(p), mirror_num);
2073 if (ret)
2074 break;
2075 start += PAGE_SIZE;
2076 }
2077
2078 return ret;
2079}
2080
2081
2082
2083
2084
2085int clean_io_failure(struct btrfs_fs_info *fs_info,
2086 struct extent_io_tree *failure_tree,
2087 struct extent_io_tree *io_tree, u64 start,
2088 struct page *page, u64 ino, unsigned int pg_offset)
2089{
2090 u64 private;
2091 struct io_failure_record *failrec;
2092 struct extent_state *state;
2093 int num_copies;
2094 int ret;
2095
2096 private = 0;
2097 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2098 EXTENT_DIRTY, 0);
2099 if (!ret)
2100 return 0;
2101
2102 ret = get_state_failrec(failure_tree, start, &failrec);
2103 if (ret)
2104 return 0;
2105
2106 BUG_ON(!failrec->this_mirror);
2107
2108 if (failrec->in_validation) {
2109
2110 btrfs_debug(fs_info,
2111 "clean_io_failure: freeing dummy error at %llu",
2112 failrec->start);
2113 goto out;
2114 }
2115 if (sb_rdonly(fs_info->sb))
2116 goto out;
2117
2118 spin_lock(&io_tree->lock);
2119 state = find_first_extent_bit_state(io_tree,
2120 failrec->start,
2121 EXTENT_LOCKED);
2122 spin_unlock(&io_tree->lock);
2123
2124 if (state && state->start <= failrec->start &&
2125 state->end >= failrec->start + failrec->len - 1) {
2126 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2127 failrec->len);
2128 if (num_copies > 1) {
2129 repair_io_failure(fs_info, ino, start, failrec->len,
2130 failrec->logical, page, pg_offset,
2131 failrec->failed_mirror);
2132 }
2133 }
2134
2135out:
2136 free_io_failure(failure_tree, io_tree, failrec);
2137
2138 return 0;
2139}
2140
2141
2142
2143
2144
2145
2146
2147void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2148{
2149 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2150 struct io_failure_record *failrec;
2151 struct extent_state *state, *next;
2152
2153 if (RB_EMPTY_ROOT(&failure_tree->state))
2154 return;
2155
2156 spin_lock(&failure_tree->lock);
2157 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2158 while (state) {
2159 if (state->start > end)
2160 break;
2161
2162 ASSERT(state->end <= end);
2163
2164 next = next_state(state);
2165
2166 failrec = state->failrec;
2167 free_extent_state(state);
2168 kfree(failrec);
2169
2170 state = next;
2171 }
2172 spin_unlock(&failure_tree->lock);
2173}
2174
2175int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2176 struct io_failure_record **failrec_ret)
2177{
2178 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2179 struct io_failure_record *failrec;
2180 struct extent_map *em;
2181 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2182 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2183 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2184 int ret;
2185 u64 logical;
2186
2187 ret = get_state_failrec(failure_tree, start, &failrec);
2188 if (ret) {
2189 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2190 if (!failrec)
2191 return -ENOMEM;
2192
2193 failrec->start = start;
2194 failrec->len = end - start + 1;
2195 failrec->this_mirror = 0;
2196 failrec->bio_flags = 0;
2197 failrec->in_validation = 0;
2198
2199 read_lock(&em_tree->lock);
2200 em = lookup_extent_mapping(em_tree, start, failrec->len);
2201 if (!em) {
2202 read_unlock(&em_tree->lock);
2203 kfree(failrec);
2204 return -EIO;
2205 }
2206
2207 if (em->start > start || em->start + em->len <= start) {
2208 free_extent_map(em);
2209 em = NULL;
2210 }
2211 read_unlock(&em_tree->lock);
2212 if (!em) {
2213 kfree(failrec);
2214 return -EIO;
2215 }
2216
2217 logical = start - em->start;
2218 logical = em->block_start + logical;
2219 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2220 logical = em->block_start;
2221 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2222 extent_set_compress_type(&failrec->bio_flags,
2223 em->compress_type);
2224 }
2225
2226 btrfs_debug(fs_info,
2227 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2228 logical, start, failrec->len);
2229
2230 failrec->logical = logical;
2231 free_extent_map(em);
2232
2233
2234 ret = set_extent_bits(failure_tree, start, end,
2235 EXTENT_LOCKED | EXTENT_DIRTY);
2236 if (ret >= 0)
2237 ret = set_state_failrec(failure_tree, start, failrec);
2238
2239 if (ret >= 0)
2240 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2241 if (ret < 0) {
2242 kfree(failrec);
2243 return ret;
2244 }
2245 } else {
2246 btrfs_debug(fs_info,
2247 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
2248 failrec->logical, failrec->start, failrec->len,
2249 failrec->in_validation);
2250
2251
2252
2253
2254
2255 }
2256
2257 *failrec_ret = failrec;
2258
2259 return 0;
2260}
2261
2262bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
2263 struct io_failure_record *failrec, int failed_mirror)
2264{
2265 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2266 int num_copies;
2267
2268 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2269 if (num_copies == 1) {
2270
2271
2272
2273
2274
2275 btrfs_debug(fs_info,
2276 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2277 num_copies, failrec->this_mirror, failed_mirror);
2278 return false;
2279 }
2280
2281
2282
2283
2284
2285
2286 if (failed_bio_pages > 1) {
2287
2288
2289
2290
2291
2292
2293
2294
2295 BUG_ON(failrec->in_validation);
2296 failrec->in_validation = 1;
2297 failrec->this_mirror = failed_mirror;
2298 } else {
2299
2300
2301
2302
2303
2304 if (failrec->in_validation) {
2305 BUG_ON(failrec->this_mirror != failed_mirror);
2306 failrec->in_validation = 0;
2307 failrec->this_mirror = 0;
2308 }
2309 failrec->failed_mirror = failed_mirror;
2310 failrec->this_mirror++;
2311 if (failrec->this_mirror == failed_mirror)
2312 failrec->this_mirror++;
2313 }
2314
2315 if (failrec->this_mirror > num_copies) {
2316 btrfs_debug(fs_info,
2317 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2318 num_copies, failrec->this_mirror, failed_mirror);
2319 return false;
2320 }
2321
2322 return true;
2323}
2324
2325
2326struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2327 struct io_failure_record *failrec,
2328 struct page *page, int pg_offset, int icsum,
2329 bio_end_io_t *endio_func, void *data)
2330{
2331 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2332 struct bio *bio;
2333 struct btrfs_io_bio *btrfs_failed_bio;
2334 struct btrfs_io_bio *btrfs_bio;
2335
2336 bio = btrfs_io_bio_alloc(1);
2337 bio->bi_end_io = endio_func;
2338 bio->bi_iter.bi_sector = failrec->logical >> 9;
2339 bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
2340 bio->bi_iter.bi_size = 0;
2341 bio->bi_private = data;
2342
2343 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2344 if (btrfs_failed_bio->csum) {
2345 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2346
2347 btrfs_bio = btrfs_io_bio(bio);
2348 btrfs_bio->csum = btrfs_bio->csum_inline;
2349 icsum *= csum_size;
2350 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2351 csum_size);
2352 }
2353
2354 bio_add_page(bio, page, failrec->len, pg_offset);
2355
2356 return bio;
2357}
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2368 struct page *page, u64 start, u64 end,
2369 int failed_mirror)
2370{
2371 struct io_failure_record *failrec;
2372 struct inode *inode = page->mapping->host;
2373 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2374 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2375 struct bio *bio;
2376 int read_mode = 0;
2377 blk_status_t status;
2378 int ret;
2379 unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
2380
2381 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2382
2383 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2384 if (ret)
2385 return ret;
2386
2387 if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
2388 failed_mirror)) {
2389 free_io_failure(failure_tree, tree, failrec);
2390 return -EIO;
2391 }
2392
2393 if (failed_bio_pages > 1)
2394 read_mode |= REQ_FAILFAST_DEV;
2395
2396 phy_offset >>= inode->i_sb->s_blocksize_bits;
2397 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2398 start - page_offset(page),
2399 (int)phy_offset, failed_bio->bi_end_io,
2400 NULL);
2401 bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
2402
2403 btrfs_debug(btrfs_sb(inode->i_sb),
2404 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2405 read_mode, failrec->this_mirror, failrec->in_validation);
2406
2407 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
2408 failrec->bio_flags, 0);
2409 if (status) {
2410 free_io_failure(failure_tree, tree, failrec);
2411 bio_put(bio);
2412 ret = blk_status_to_errno(status);
2413 }
2414
2415 return ret;
2416}
2417
2418
2419
2420void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2421{
2422 int uptodate = (err == 0);
2423 struct extent_io_tree *tree;
2424 int ret = 0;
2425
2426 tree = &BTRFS_I(page->mapping->host)->io_tree;
2427
2428 if (tree->ops && tree->ops->writepage_end_io_hook)
2429 tree->ops->writepage_end_io_hook(page, start, end, NULL,
2430 uptodate);
2431
2432 if (!uptodate) {
2433 ClearPageUptodate(page);
2434 SetPageError(page);
2435 ret = err < 0 ? err : -EIO;
2436 mapping_set_error(page->mapping, ret);
2437 }
2438}
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449static void end_bio_extent_writepage(struct bio *bio)
2450{
2451 int error = blk_status_to_errno(bio->bi_status);
2452 struct bio_vec *bvec;
2453 u64 start;
2454 u64 end;
2455 int i;
2456
2457 ASSERT(!bio_flagged(bio, BIO_CLONED));
2458 bio_for_each_segment_all(bvec, bio, i) {
2459 struct page *page = bvec->bv_page;
2460 struct inode *inode = page->mapping->host;
2461 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2462
2463
2464
2465
2466
2467
2468 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2469 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2470 btrfs_err(fs_info,
2471 "partial page write in btrfs with offset %u and length %u",
2472 bvec->bv_offset, bvec->bv_len);
2473 else
2474 btrfs_info(fs_info,
2475 "incomplete page write in btrfs with offset %u and length %u",
2476 bvec->bv_offset, bvec->bv_len);
2477 }
2478
2479 start = page_offset(page);
2480 end = start + bvec->bv_offset + bvec->bv_len - 1;
2481
2482 end_extent_writepage(page, error, start, end);
2483 end_page_writeback(page);
2484 }
2485
2486 bio_put(bio);
2487}
2488
2489static void
2490endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2491 int uptodate)
2492{
2493 struct extent_state *cached = NULL;
2494 u64 end = start + len - 1;
2495
2496 if (uptodate && tree->track_uptodate)
2497 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2498 unlock_extent_cached_atomic(tree, start, end, &cached);
2499}
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512static void end_bio_extent_readpage(struct bio *bio)
2513{
2514 struct bio_vec *bvec;
2515 int uptodate = !bio->bi_status;
2516 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2517 struct extent_io_tree *tree, *failure_tree;
2518 u64 offset = 0;
2519 u64 start;
2520 u64 end;
2521 u64 len;
2522 u64 extent_start = 0;
2523 u64 extent_len = 0;
2524 int mirror;
2525 int ret;
2526 int i;
2527
2528 ASSERT(!bio_flagged(bio, BIO_CLONED));
2529 bio_for_each_segment_all(bvec, bio, i) {
2530 struct page *page = bvec->bv_page;
2531 struct inode *inode = page->mapping->host;
2532 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2533
2534 btrfs_debug(fs_info,
2535 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2536 (u64)bio->bi_iter.bi_sector, bio->bi_status,
2537 io_bio->mirror_num);
2538 tree = &BTRFS_I(inode)->io_tree;
2539 failure_tree = &BTRFS_I(inode)->io_failure_tree;
2540
2541
2542
2543
2544
2545
2546 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2547 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2548 btrfs_err(fs_info,
2549 "partial page read in btrfs with offset %u and length %u",
2550 bvec->bv_offset, bvec->bv_len);
2551 else
2552 btrfs_info(fs_info,
2553 "incomplete page read in btrfs with offset %u and length %u",
2554 bvec->bv_offset, bvec->bv_len);
2555 }
2556
2557 start = page_offset(page);
2558 end = start + bvec->bv_offset + bvec->bv_len - 1;
2559 len = bvec->bv_len;
2560
2561 mirror = io_bio->mirror_num;
2562 if (likely(uptodate && tree->ops)) {
2563 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2564 page, start, end,
2565 mirror);
2566 if (ret)
2567 uptodate = 0;
2568 else
2569 clean_io_failure(BTRFS_I(inode)->root->fs_info,
2570 failure_tree, tree, start,
2571 page,
2572 btrfs_ino(BTRFS_I(inode)), 0);
2573 }
2574
2575 if (likely(uptodate))
2576 goto readpage_ok;
2577
2578 if (tree->ops) {
2579 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2580 if (ret == -EAGAIN) {
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595 ret = bio_readpage_error(bio, offset, page,
2596 start, end, mirror);
2597 if (ret == 0) {
2598 uptodate = !bio->bi_status;
2599 offset += len;
2600 continue;
2601 }
2602 }
2603
2604
2605
2606
2607
2608
2609 ASSERT(ret == -EIO);
2610 }
2611readpage_ok:
2612 if (likely(uptodate)) {
2613 loff_t i_size = i_size_read(inode);
2614 pgoff_t end_index = i_size >> PAGE_SHIFT;
2615 unsigned off;
2616
2617
2618 off = i_size & (PAGE_SIZE-1);
2619 if (page->index == end_index && off)
2620 zero_user_segment(page, off, PAGE_SIZE);
2621 SetPageUptodate(page);
2622 } else {
2623 ClearPageUptodate(page);
2624 SetPageError(page);
2625 }
2626 unlock_page(page);
2627 offset += len;
2628
2629 if (unlikely(!uptodate)) {
2630 if (extent_len) {
2631 endio_readpage_release_extent(tree,
2632 extent_start,
2633 extent_len, 1);
2634 extent_start = 0;
2635 extent_len = 0;
2636 }
2637 endio_readpage_release_extent(tree, start,
2638 end - start + 1, 0);
2639 } else if (!extent_len) {
2640 extent_start = start;
2641 extent_len = end + 1 - start;
2642 } else if (extent_start + extent_len == start) {
2643 extent_len += end + 1 - start;
2644 } else {
2645 endio_readpage_release_extent(tree, extent_start,
2646 extent_len, uptodate);
2647 extent_start = start;
2648 extent_len = end + 1 - start;
2649 }
2650 }
2651
2652 if (extent_len)
2653 endio_readpage_release_extent(tree, extent_start, extent_len,
2654 uptodate);
2655 if (io_bio->end_io)
2656 io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
2657 bio_put(bio);
2658}
2659
2660
2661
2662
2663
2664
2665static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
2666{
2667 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
2668}
2669
2670
2671
2672
2673
2674
2675struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
2676{
2677 struct bio *bio;
2678
2679 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
2680 bio_set_dev(bio, bdev);
2681 bio->bi_iter.bi_sector = first_byte >> 9;
2682 btrfs_io_bio_init(btrfs_io_bio(bio));
2683 return bio;
2684}
2685
2686struct bio *btrfs_bio_clone(struct bio *bio)
2687{
2688 struct btrfs_io_bio *btrfs_bio;
2689 struct bio *new;
2690
2691
2692 new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
2693 btrfs_bio = btrfs_io_bio(new);
2694 btrfs_io_bio_init(btrfs_bio);
2695 btrfs_bio->iter = bio->bi_iter;
2696 return new;
2697}
2698
2699struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
2700{
2701 struct bio *bio;
2702
2703
2704 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
2705 btrfs_io_bio_init(btrfs_io_bio(bio));
2706 return bio;
2707}
2708
2709struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
2710{
2711 struct bio *bio;
2712 struct btrfs_io_bio *btrfs_bio;
2713
2714
2715 bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
2716 ASSERT(bio);
2717
2718 btrfs_bio = btrfs_io_bio(bio);
2719 btrfs_io_bio_init(btrfs_bio);
2720
2721 bio_trim(bio, offset >> 9, size >> 9);
2722 btrfs_bio->iter = bio->bi_iter;
2723 return bio;
2724}
2725
2726static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
2727 unsigned long bio_flags)
2728{
2729 blk_status_t ret = 0;
2730 struct bio_vec *bvec = bio_last_bvec_all(bio);
2731 struct page *page = bvec->bv_page;
2732 struct extent_io_tree *tree = bio->bi_private;
2733 u64 start;
2734
2735 start = page_offset(page) + bvec->bv_offset;
2736
2737 bio->bi_private = NULL;
2738
2739 if (tree->ops)
2740 ret = tree->ops->submit_bio_hook(tree->private_data, bio,
2741 mirror_num, bio_flags, start);
2742 else
2743 btrfsic_submit_bio(bio);
2744
2745 return blk_status_to_errno(ret);
2746}
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
2765 struct writeback_control *wbc,
2766 struct page *page, u64 offset,
2767 size_t size, unsigned long pg_offset,
2768 struct block_device *bdev,
2769 struct bio **bio_ret,
2770 bio_end_io_t end_io_func,
2771 int mirror_num,
2772 unsigned long prev_bio_flags,
2773 unsigned long bio_flags,
2774 bool force_bio_submit)
2775{
2776 int ret = 0;
2777 struct bio *bio;
2778 size_t page_size = min_t(size_t, size, PAGE_SIZE);
2779 sector_t sector = offset >> 9;
2780
2781 ASSERT(bio_ret);
2782
2783 if (*bio_ret) {
2784 bool contig;
2785 bool can_merge = true;
2786
2787 bio = *bio_ret;
2788 if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
2789 contig = bio->bi_iter.bi_sector == sector;
2790 else
2791 contig = bio_end_sector(bio) == sector;
2792
2793 if (tree->ops && tree->ops->merge_bio_hook(page, offset,
2794 page_size, bio, bio_flags))
2795 can_merge = false;
2796
2797 if (prev_bio_flags != bio_flags || !contig || !can_merge ||
2798 force_bio_submit ||
2799 bio_add_page(bio, page, page_size, pg_offset) < page_size) {
2800 ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
2801 if (ret < 0) {
2802 *bio_ret = NULL;
2803 return ret;
2804 }
2805 bio = NULL;
2806 } else {
2807 if (wbc)
2808 wbc_account_cgroup_owner(wbc, page, page_size);
2809 return 0;
2810 }
2811 }
2812
2813 bio = btrfs_bio_alloc(bdev, offset);
2814 bio_add_page(bio, page, page_size, pg_offset);
2815 bio->bi_end_io = end_io_func;
2816 bio->bi_private = tree;
2817 bio->bi_write_hint = page->mapping->host->i_write_hint;
2818 bio->bi_opf = opf;
2819 if (wbc) {
2820 wbc_init_bio(wbc, bio);
2821 wbc_account_cgroup_owner(wbc, page, page_size);
2822 }
2823
2824 *bio_ret = bio;
2825
2826 return ret;
2827}
2828
2829static void attach_extent_buffer_page(struct extent_buffer *eb,
2830 struct page *page)
2831{
2832 if (!PagePrivate(page)) {
2833 SetPagePrivate(page);
2834 get_page(page);
2835 set_page_private(page, (unsigned long)eb);
2836 } else {
2837 WARN_ON(page->private != (unsigned long)eb);
2838 }
2839}
2840
2841void set_page_extent_mapped(struct page *page)
2842{
2843 if (!PagePrivate(page)) {
2844 SetPagePrivate(page);
2845 get_page(page);
2846 set_page_private(page, EXTENT_PAGE_PRIVATE);
2847 }
2848}
2849
2850static struct extent_map *
2851__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2852 u64 start, u64 len, get_extent_t *get_extent,
2853 struct extent_map **em_cached)
2854{
2855 struct extent_map *em;
2856
2857 if (em_cached && *em_cached) {
2858 em = *em_cached;
2859 if (extent_map_in_tree(em) && start >= em->start &&
2860 start < extent_map_end(em)) {
2861 refcount_inc(&em->refs);
2862 return em;
2863 }
2864
2865 free_extent_map(em);
2866 *em_cached = NULL;
2867 }
2868
2869 em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
2870 if (em_cached && !IS_ERR_OR_NULL(em)) {
2871 BUG_ON(*em_cached);
2872 refcount_inc(&em->refs);
2873 *em_cached = em;
2874 }
2875 return em;
2876}
2877
2878
2879
2880
2881
2882
2883
2884static int __do_readpage(struct extent_io_tree *tree,
2885 struct page *page,
2886 get_extent_t *get_extent,
2887 struct extent_map **em_cached,
2888 struct bio **bio, int mirror_num,
2889 unsigned long *bio_flags, unsigned int read_flags,
2890 u64 *prev_em_start)
2891{
2892 struct inode *inode = page->mapping->host;
2893 u64 start = page_offset(page);
2894 const u64 end = start + PAGE_SIZE - 1;
2895 u64 cur = start;
2896 u64 extent_offset;
2897 u64 last_byte = i_size_read(inode);
2898 u64 block_start;
2899 u64 cur_end;
2900 struct extent_map *em;
2901 struct block_device *bdev;
2902 int ret = 0;
2903 int nr = 0;
2904 size_t pg_offset = 0;
2905 size_t iosize;
2906 size_t disk_io_size;
2907 size_t blocksize = inode->i_sb->s_blocksize;
2908 unsigned long this_bio_flag = 0;
2909
2910 set_page_extent_mapped(page);
2911
2912 if (!PageUptodate(page)) {
2913 if (cleancache_get_page(page) == 0) {
2914 BUG_ON(blocksize != PAGE_SIZE);
2915 unlock_extent(tree, start, end);
2916 goto out;
2917 }
2918 }
2919
2920 if (page->index == last_byte >> PAGE_SHIFT) {
2921 char *userpage;
2922 size_t zero_offset = last_byte & (PAGE_SIZE - 1);
2923
2924 if (zero_offset) {
2925 iosize = PAGE_SIZE - zero_offset;
2926 userpage = kmap_atomic(page);
2927 memset(userpage + zero_offset, 0, iosize);
2928 flush_dcache_page(page);
2929 kunmap_atomic(userpage);
2930 }
2931 }
2932 while (cur <= end) {
2933 bool force_bio_submit = false;
2934 u64 offset;
2935
2936 if (cur >= last_byte) {
2937 char *userpage;
2938 struct extent_state *cached = NULL;
2939
2940 iosize = PAGE_SIZE - pg_offset;
2941 userpage = kmap_atomic(page);
2942 memset(userpage + pg_offset, 0, iosize);
2943 flush_dcache_page(page);
2944 kunmap_atomic(userpage);
2945 set_extent_uptodate(tree, cur, cur + iosize - 1,
2946 &cached, GFP_NOFS);
2947 unlock_extent_cached(tree, cur,
2948 cur + iosize - 1, &cached);
2949 break;
2950 }
2951 em = __get_extent_map(inode, page, pg_offset, cur,
2952 end - cur + 1, get_extent, em_cached);
2953 if (IS_ERR_OR_NULL(em)) {
2954 SetPageError(page);
2955 unlock_extent(tree, cur, end);
2956 break;
2957 }
2958 extent_offset = cur - em->start;
2959 BUG_ON(extent_map_end(em) <= cur);
2960 BUG_ON(end < cur);
2961
2962 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2963 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2964 extent_set_compress_type(&this_bio_flag,
2965 em->compress_type);
2966 }
2967
2968 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2969 cur_end = min(extent_map_end(em) - 1, end);
2970 iosize = ALIGN(iosize, blocksize);
2971 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2972 disk_io_size = em->block_len;
2973 offset = em->block_start;
2974 } else {
2975 offset = em->block_start + extent_offset;
2976 disk_io_size = iosize;
2977 }
2978 bdev = em->bdev;
2979 block_start = em->block_start;
2980 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2981 block_start = EXTENT_MAP_HOLE;
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3018 prev_em_start && *prev_em_start != (u64)-1 &&
3019 *prev_em_start != em->orig_start)
3020 force_bio_submit = true;
3021
3022 if (prev_em_start)
3023 *prev_em_start = em->orig_start;
3024
3025 free_extent_map(em);
3026 em = NULL;
3027
3028
3029 if (block_start == EXTENT_MAP_HOLE) {
3030 char *userpage;
3031 struct extent_state *cached = NULL;
3032
3033 userpage = kmap_atomic(page);
3034 memset(userpage + pg_offset, 0, iosize);
3035 flush_dcache_page(page);
3036 kunmap_atomic(userpage);
3037
3038 set_extent_uptodate(tree, cur, cur + iosize - 1,
3039 &cached, GFP_NOFS);
3040 unlock_extent_cached(tree, cur,
3041 cur + iosize - 1, &cached);
3042 cur = cur + iosize;
3043 pg_offset += iosize;
3044 continue;
3045 }
3046
3047 if (test_range_bit(tree, cur, cur_end,
3048 EXTENT_UPTODATE, 1, NULL)) {
3049 check_page_uptodate(tree, page);
3050 unlock_extent(tree, cur, cur + iosize - 1);
3051 cur = cur + iosize;
3052 pg_offset += iosize;
3053 continue;
3054 }
3055
3056
3057
3058 if (block_start == EXTENT_MAP_INLINE) {
3059 SetPageError(page);
3060 unlock_extent(tree, cur, cur + iosize - 1);
3061 cur = cur + iosize;
3062 pg_offset += iosize;
3063 continue;
3064 }
3065
3066 ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
3067 page, offset, disk_io_size,
3068 pg_offset, bdev, bio,
3069 end_bio_extent_readpage, mirror_num,
3070 *bio_flags,
3071 this_bio_flag,
3072 force_bio_submit);
3073 if (!ret) {
3074 nr++;
3075 *bio_flags = this_bio_flag;
3076 } else {
3077 SetPageError(page);
3078 unlock_extent(tree, cur, cur + iosize - 1);
3079 goto out;
3080 }
3081 cur = cur + iosize;
3082 pg_offset += iosize;
3083 }
3084out:
3085 if (!nr) {
3086 if (!PageError(page))
3087 SetPageUptodate(page);
3088 unlock_page(page);
3089 }
3090 return ret;
3091}
3092
3093static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3094 struct page *pages[], int nr_pages,
3095 u64 start, u64 end,
3096 struct extent_map **em_cached,
3097 struct bio **bio,
3098 unsigned long *bio_flags,
3099 u64 *prev_em_start)
3100{
3101 struct inode *inode;
3102 struct btrfs_ordered_extent *ordered;
3103 int index;
3104
3105 inode = pages[0]->mapping->host;
3106 while (1) {
3107 lock_extent(tree, start, end);
3108 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3109 end - start + 1);
3110 if (!ordered)
3111 break;
3112 unlock_extent(tree, start, end);
3113 btrfs_start_ordered_extent(inode, ordered, 1);
3114 btrfs_put_ordered_extent(ordered);
3115 }
3116
3117 for (index = 0; index < nr_pages; index++) {
3118 __do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
3119 bio, 0, bio_flags, 0, prev_em_start);
3120 put_page(pages[index]);
3121 }
3122}
3123
3124static void __extent_readpages(struct extent_io_tree *tree,
3125 struct page *pages[],
3126 int nr_pages,
3127 struct extent_map **em_cached,
3128 struct bio **bio, unsigned long *bio_flags,
3129 u64 *prev_em_start)
3130{
3131 u64 start = 0;
3132 u64 end = 0;
3133 u64 page_start;
3134 int index;
3135 int first_index = 0;
3136
3137 for (index = 0; index < nr_pages; index++) {
3138 page_start = page_offset(pages[index]);
3139 if (!end) {
3140 start = page_start;
3141 end = start + PAGE_SIZE - 1;
3142 first_index = index;
3143 } else if (end + 1 == page_start) {
3144 end += PAGE_SIZE;
3145 } else {
3146 __do_contiguous_readpages(tree, &pages[first_index],
3147 index - first_index, start,
3148 end, em_cached,
3149 bio, bio_flags,
3150 prev_em_start);
3151 start = page_start;
3152 end = start + PAGE_SIZE - 1;
3153 first_index = index;
3154 }
3155 }
3156
3157 if (end)
3158 __do_contiguous_readpages(tree, &pages[first_index],
3159 index - first_index, start,
3160 end, em_cached, bio,
3161 bio_flags, prev_em_start);
3162}
3163
3164static int __extent_read_full_page(struct extent_io_tree *tree,
3165 struct page *page,
3166 get_extent_t *get_extent,
3167 struct bio **bio, int mirror_num,
3168 unsigned long *bio_flags,
3169 unsigned int read_flags)
3170{
3171 struct inode *inode = page->mapping->host;
3172 struct btrfs_ordered_extent *ordered;
3173 u64 start = page_offset(page);
3174 u64 end = start + PAGE_SIZE - 1;
3175 int ret;
3176
3177 while (1) {
3178 lock_extent(tree, start, end);
3179 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3180 PAGE_SIZE);
3181 if (!ordered)
3182 break;
3183 unlock_extent(tree, start, end);
3184 btrfs_start_ordered_extent(inode, ordered, 1);
3185 btrfs_put_ordered_extent(ordered);
3186 }
3187
3188 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3189 bio_flags, read_flags, NULL);
3190 return ret;
3191}
3192
3193int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3194 get_extent_t *get_extent, int mirror_num)
3195{
3196 struct bio *bio = NULL;
3197 unsigned long bio_flags = 0;
3198 int ret;
3199
3200 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3201 &bio_flags, 0);
3202 if (bio)
3203 ret = submit_one_bio(bio, mirror_num, bio_flags);
3204 return ret;
3205}
3206
3207static void update_nr_written(struct writeback_control *wbc,
3208 unsigned long nr_written)
3209{
3210 wbc->nr_to_write -= nr_written;
3211}
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223static noinline_for_stack int writepage_delalloc(struct inode *inode,
3224 struct page *page, struct writeback_control *wbc,
3225 struct extent_page_data *epd,
3226 u64 delalloc_start,
3227 unsigned long *nr_written)
3228{
3229 struct extent_io_tree *tree = epd->tree;
3230 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3231 u64 nr_delalloc;
3232 u64 delalloc_to_write = 0;
3233 u64 delalloc_end = 0;
3234 int ret;
3235 int page_started = 0;
3236
3237 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3238 return 0;
3239
3240 while (delalloc_end < page_end) {
3241 nr_delalloc = find_lock_delalloc_range(inode, tree,
3242 page,
3243 &delalloc_start,
3244 &delalloc_end,
3245 BTRFS_MAX_EXTENT_SIZE);
3246 if (nr_delalloc == 0) {
3247 delalloc_start = delalloc_end + 1;
3248 continue;
3249 }
3250 ret = tree->ops->fill_delalloc(inode, page,
3251 delalloc_start,
3252 delalloc_end,
3253 &page_started,
3254 nr_written, wbc);
3255
3256 if (ret) {
3257 SetPageError(page);
3258
3259
3260
3261
3262
3263 ret = ret < 0 ? ret : -EIO;
3264 goto done;
3265 }
3266
3267
3268
3269
3270 delalloc_to_write += (delalloc_end - delalloc_start +
3271 PAGE_SIZE) >> PAGE_SHIFT;
3272 delalloc_start = delalloc_end + 1;
3273 }
3274 if (wbc->nr_to_write < delalloc_to_write) {
3275 int thresh = 8192;
3276
3277 if (delalloc_to_write < thresh * 2)
3278 thresh = delalloc_to_write;
3279 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3280 thresh);
3281 }
3282
3283
3284
3285
3286 if (page_started) {
3287
3288
3289
3290
3291
3292 wbc->nr_to_write -= *nr_written;
3293 return 1;
3294 }
3295
3296 ret = 0;
3297
3298done:
3299 return ret;
3300}
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3311 struct page *page,
3312 struct writeback_control *wbc,
3313 struct extent_page_data *epd,
3314 loff_t i_size,
3315 unsigned long nr_written,
3316 unsigned int write_flags, int *nr_ret)
3317{
3318 struct extent_io_tree *tree = epd->tree;
3319 u64 start = page_offset(page);
3320 u64 page_end = start + PAGE_SIZE - 1;
3321 u64 end;
3322 u64 cur = start;
3323 u64 extent_offset;
3324 u64 block_start;
3325 u64 iosize;
3326 struct extent_map *em;
3327 struct block_device *bdev;
3328 size_t pg_offset = 0;
3329 size_t blocksize;
3330 int ret = 0;
3331 int nr = 0;
3332 bool compressed;
3333
3334 if (tree->ops && tree->ops->writepage_start_hook) {
3335 ret = tree->ops->writepage_start_hook(page, start,
3336 page_end);
3337 if (ret) {
3338
3339 if (ret == -EBUSY)
3340 wbc->pages_skipped++;
3341 else
3342 redirty_page_for_writepage(wbc, page);
3343
3344 update_nr_written(wbc, nr_written);
3345 unlock_page(page);
3346 return 1;
3347 }
3348 }
3349
3350
3351
3352
3353
3354 update_nr_written(wbc, nr_written + 1);
3355
3356 end = page_end;
3357 if (i_size <= start) {
3358 if (tree->ops && tree->ops->writepage_end_io_hook)
3359 tree->ops->writepage_end_io_hook(page, start,
3360 page_end, NULL, 1);
3361 goto done;
3362 }
3363
3364 blocksize = inode->i_sb->s_blocksize;
3365
3366 while (cur <= end) {
3367 u64 em_end;
3368 u64 offset;
3369
3370 if (cur >= i_size) {
3371 if (tree->ops && tree->ops->writepage_end_io_hook)
3372 tree->ops->writepage_end_io_hook(page, cur,
3373 page_end, NULL, 1);
3374 break;
3375 }
3376 em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
3377 end - cur + 1, 1);
3378 if (IS_ERR_OR_NULL(em)) {
3379 SetPageError(page);
3380 ret = PTR_ERR_OR_ZERO(em);
3381 break;
3382 }
3383
3384 extent_offset = cur - em->start;
3385 em_end = extent_map_end(em);
3386 BUG_ON(em_end <= cur);
3387 BUG_ON(end < cur);
3388 iosize = min(em_end - cur, end - cur + 1);
3389 iosize = ALIGN(iosize, blocksize);
3390 offset = em->block_start + extent_offset;
3391 bdev = em->bdev;
3392 block_start = em->block_start;
3393 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3394 free_extent_map(em);
3395 em = NULL;
3396
3397
3398
3399
3400
3401 if (compressed || block_start == EXTENT_MAP_HOLE ||
3402 block_start == EXTENT_MAP_INLINE) {
3403
3404
3405
3406
3407 if (!compressed && tree->ops &&
3408 tree->ops->writepage_end_io_hook)
3409 tree->ops->writepage_end_io_hook(page, cur,
3410 cur + iosize - 1,
3411 NULL, 1);
3412 else if (compressed) {
3413
3414
3415
3416
3417 nr++;
3418 }
3419
3420 cur += iosize;
3421 pg_offset += iosize;
3422 continue;
3423 }
3424
3425 set_range_writeback(tree, cur, cur + iosize - 1);
3426 if (!PageWriteback(page)) {
3427 btrfs_err(BTRFS_I(inode)->root->fs_info,
3428 "page %lu not writeback, cur %llu end %llu",
3429 page->index, cur, end);
3430 }
3431
3432 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3433 page, offset, iosize, pg_offset,
3434 bdev, &epd->bio,
3435 end_bio_extent_writepage,
3436 0, 0, 0, false);
3437 if (ret) {
3438 SetPageError(page);
3439 if (PageWriteback(page))
3440 end_page_writeback(page);
3441 }
3442
3443 cur = cur + iosize;
3444 pg_offset += iosize;
3445 nr++;
3446 }
3447done:
3448 *nr_ret = nr;
3449 return ret;
3450}
3451
3452
3453
3454
3455
3456
3457
3458static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3459 struct extent_page_data *epd)
3460{
3461 struct inode *inode = page->mapping->host;
3462 u64 start = page_offset(page);
3463 u64 page_end = start + PAGE_SIZE - 1;
3464 int ret;
3465 int nr = 0;
3466 size_t pg_offset = 0;
3467 loff_t i_size = i_size_read(inode);
3468 unsigned long end_index = i_size >> PAGE_SHIFT;
3469 unsigned int write_flags = 0;
3470 unsigned long nr_written = 0;
3471
3472 write_flags = wbc_to_write_flags(wbc);
3473
3474 trace___extent_writepage(page, inode, wbc);
3475
3476 WARN_ON(!PageLocked(page));
3477
3478 ClearPageError(page);
3479
3480 pg_offset = i_size & (PAGE_SIZE - 1);
3481 if (page->index > end_index ||
3482 (page->index == end_index && !pg_offset)) {
3483 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
3484 unlock_page(page);
3485 return 0;
3486 }
3487
3488 if (page->index == end_index) {
3489 char *userpage;
3490
3491 userpage = kmap_atomic(page);
3492 memset(userpage + pg_offset, 0,
3493 PAGE_SIZE - pg_offset);
3494 kunmap_atomic(userpage);
3495 flush_dcache_page(page);
3496 }
3497
3498 pg_offset = 0;
3499
3500 set_page_extent_mapped(page);
3501
3502 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3503 if (ret == 1)
3504 goto done_unlocked;
3505 if (ret)
3506 goto done;
3507
3508 ret = __extent_writepage_io(inode, page, wbc, epd,
3509 i_size, nr_written, write_flags, &nr);
3510 if (ret == 1)
3511 goto done_unlocked;
3512
3513done:
3514 if (nr == 0) {
3515
3516 set_page_writeback(page);
3517 end_page_writeback(page);
3518 }
3519 if (PageError(page)) {
3520 ret = ret < 0 ? ret : -EIO;
3521 end_extent_writepage(page, ret, start, page_end);
3522 }
3523 unlock_page(page);
3524 return ret;
3525
3526done_unlocked:
3527 return 0;
3528}
3529
3530void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3531{
3532 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3533 TASK_UNINTERRUPTIBLE);
3534}
3535
3536static noinline_for_stack int
3537lock_extent_buffer_for_io(struct extent_buffer *eb,
3538 struct btrfs_fs_info *fs_info,
3539 struct extent_page_data *epd)
3540{
3541 unsigned long i, num_pages;
3542 int flush = 0;
3543 int ret = 0;
3544
3545 if (!btrfs_try_tree_write_lock(eb)) {
3546 flush = 1;
3547 flush_write_bio(epd);
3548 btrfs_tree_lock(eb);
3549 }
3550
3551 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3552 btrfs_tree_unlock(eb);
3553 if (!epd->sync_io)
3554 return 0;
3555 if (!flush) {
3556 flush_write_bio(epd);
3557 flush = 1;
3558 }
3559 while (1) {
3560 wait_on_extent_buffer_writeback(eb);
3561 btrfs_tree_lock(eb);
3562 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3563 break;
3564 btrfs_tree_unlock(eb);
3565 }
3566 }
3567
3568
3569
3570
3571
3572
3573 spin_lock(&eb->refs_lock);
3574 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3575 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3576 spin_unlock(&eb->refs_lock);
3577 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3578 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3579 -eb->len,
3580 fs_info->dirty_metadata_batch);
3581 ret = 1;
3582 } else {
3583 spin_unlock(&eb->refs_lock);
3584 }
3585
3586 btrfs_tree_unlock(eb);
3587
3588 if (!ret)
3589 return ret;
3590
3591 num_pages = num_extent_pages(eb->start, eb->len);
3592 for (i = 0; i < num_pages; i++) {
3593 struct page *p = eb->pages[i];
3594
3595 if (!trylock_page(p)) {
3596 if (!flush) {
3597 flush_write_bio(epd);
3598 flush = 1;
3599 }
3600 lock_page(p);
3601 }
3602 }
3603
3604 return ret;
3605}
3606
3607static void end_extent_buffer_writeback(struct extent_buffer *eb)
3608{
3609 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3610 smp_mb__after_atomic();
3611 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3612}
3613
3614static void set_btree_ioerr(struct page *page)
3615{
3616 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3617
3618 SetPageError(page);
3619 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3620 return;
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660 switch (eb->log_index) {
3661 case -1:
3662 set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
3663 break;
3664 case 0:
3665 set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
3666 break;
3667 case 1:
3668 set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
3669 break;
3670 default:
3671 BUG();
3672 }
3673}
3674
3675static void end_bio_extent_buffer_writepage(struct bio *bio)
3676{
3677 struct bio_vec *bvec;
3678 struct extent_buffer *eb;
3679 int i, done;
3680
3681 ASSERT(!bio_flagged(bio, BIO_CLONED));
3682 bio_for_each_segment_all(bvec, bio, i) {
3683 struct page *page = bvec->bv_page;
3684
3685 eb = (struct extent_buffer *)page->private;
3686 BUG_ON(!eb);
3687 done = atomic_dec_and_test(&eb->io_pages);
3688
3689 if (bio->bi_status ||
3690 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3691 ClearPageUptodate(page);
3692 set_btree_ioerr(page);
3693 }
3694
3695 end_page_writeback(page);
3696
3697 if (!done)
3698 continue;
3699
3700 end_extent_buffer_writeback(eb);
3701 }
3702
3703 bio_put(bio);
3704}
3705
3706static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3707 struct btrfs_fs_info *fs_info,
3708 struct writeback_control *wbc,
3709 struct extent_page_data *epd)
3710{
3711 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3712 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3713 u64 offset = eb->start;
3714 u32 nritems;
3715 unsigned long i, num_pages;
3716 unsigned long start, end;
3717 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3718 int ret = 0;
3719
3720 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3721 num_pages = num_extent_pages(eb->start, eb->len);
3722 atomic_set(&eb->io_pages, num_pages);
3723
3724
3725 nritems = btrfs_header_nritems(eb);
3726 if (btrfs_header_level(eb) > 0) {
3727 end = btrfs_node_key_ptr_offset(nritems);
3728
3729 memzero_extent_buffer(eb, end, eb->len - end);
3730 } else {
3731
3732
3733
3734
3735 start = btrfs_item_nr_offset(nritems);
3736 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
3737 memzero_extent_buffer(eb, start, end - start);
3738 }
3739
3740 for (i = 0; i < num_pages; i++) {
3741 struct page *p = eb->pages[i];
3742
3743 clear_page_dirty_for_io(p);
3744 set_page_writeback(p);
3745 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3746 p, offset, PAGE_SIZE, 0, bdev,
3747 &epd->bio,
3748 end_bio_extent_buffer_writepage,
3749 0, 0, 0, false);
3750 if (ret) {
3751 set_btree_ioerr(p);
3752 if (PageWriteback(p))
3753 end_page_writeback(p);
3754 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3755 end_extent_buffer_writeback(eb);
3756 ret = -EIO;
3757 break;
3758 }
3759 offset += PAGE_SIZE;
3760 update_nr_written(wbc, 1);
3761 unlock_page(p);
3762 }
3763
3764 if (unlikely(ret)) {
3765 for (; i < num_pages; i++) {
3766 struct page *p = eb->pages[i];
3767 clear_page_dirty_for_io(p);
3768 unlock_page(p);
3769 }
3770 }
3771
3772 return ret;
3773}
3774
3775int btree_write_cache_pages(struct address_space *mapping,
3776 struct writeback_control *wbc)
3777{
3778 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3779 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3780 struct extent_buffer *eb, *prev_eb = NULL;
3781 struct extent_page_data epd = {
3782 .bio = NULL,
3783 .tree = tree,
3784 .extent_locked = 0,
3785 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3786 };
3787 int ret = 0;
3788 int done = 0;
3789 int nr_to_write_done = 0;
3790 struct pagevec pvec;
3791 int nr_pages;
3792 pgoff_t index;
3793 pgoff_t end;
3794 int scanned = 0;
3795 int tag;
3796
3797 pagevec_init(&pvec);
3798 if (wbc->range_cyclic) {
3799 index = mapping->writeback_index;
3800 end = -1;
3801 } else {
3802 index = wbc->range_start >> PAGE_SHIFT;
3803 end = wbc->range_end >> PAGE_SHIFT;
3804 scanned = 1;
3805 }
3806 if (wbc->sync_mode == WB_SYNC_ALL)
3807 tag = PAGECACHE_TAG_TOWRITE;
3808 else
3809 tag = PAGECACHE_TAG_DIRTY;
3810retry:
3811 if (wbc->sync_mode == WB_SYNC_ALL)
3812 tag_pages_for_writeback(mapping, index, end);
3813 while (!done && !nr_to_write_done && (index <= end) &&
3814 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
3815 tag))) {
3816 unsigned i;
3817
3818 scanned = 1;
3819 for (i = 0; i < nr_pages; i++) {
3820 struct page *page = pvec.pages[i];
3821
3822 if (!PagePrivate(page))
3823 continue;
3824
3825 spin_lock(&mapping->private_lock);
3826 if (!PagePrivate(page)) {
3827 spin_unlock(&mapping->private_lock);
3828 continue;
3829 }
3830
3831 eb = (struct extent_buffer *)page->private;
3832
3833
3834
3835
3836
3837
3838 if (WARN_ON(!eb)) {
3839 spin_unlock(&mapping->private_lock);
3840 continue;
3841 }
3842
3843 if (eb == prev_eb) {
3844 spin_unlock(&mapping->private_lock);
3845 continue;
3846 }
3847
3848 ret = atomic_inc_not_zero(&eb->refs);
3849 spin_unlock(&mapping->private_lock);
3850 if (!ret)
3851 continue;
3852
3853 prev_eb = eb;
3854 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3855 if (!ret) {
3856 free_extent_buffer(eb);
3857 continue;
3858 }
3859
3860 ret = write_one_eb(eb, fs_info, wbc, &epd);
3861 if (ret) {
3862 done = 1;
3863 free_extent_buffer(eb);
3864 break;
3865 }
3866 free_extent_buffer(eb);
3867
3868
3869
3870
3871
3872
3873 nr_to_write_done = wbc->nr_to_write <= 0;
3874 }
3875 pagevec_release(&pvec);
3876 cond_resched();
3877 }
3878 if (!scanned && !done) {
3879
3880
3881
3882
3883 scanned = 1;
3884 index = 0;
3885 goto retry;
3886 }
3887 flush_write_bio(&epd);
3888 return ret;
3889}
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905static int extent_write_cache_pages(struct address_space *mapping,
3906 struct writeback_control *wbc,
3907 struct extent_page_data *epd)
3908{
3909 struct inode *inode = mapping->host;
3910 int ret = 0;
3911 int done = 0;
3912 int nr_to_write_done = 0;
3913 struct pagevec pvec;
3914 int nr_pages;
3915 pgoff_t index;
3916 pgoff_t end;
3917 pgoff_t done_index;
3918 int range_whole = 0;
3919 int scanned = 0;
3920 int tag;
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931 if (!igrab(inode))
3932 return 0;
3933
3934 pagevec_init(&pvec);
3935 if (wbc->range_cyclic) {
3936 index = mapping->writeback_index;
3937 end = -1;
3938 } else {
3939 index = wbc->range_start >> PAGE_SHIFT;
3940 end = wbc->range_end >> PAGE_SHIFT;
3941 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3942 range_whole = 1;
3943 scanned = 1;
3944 }
3945 if (wbc->sync_mode == WB_SYNC_ALL)
3946 tag = PAGECACHE_TAG_TOWRITE;
3947 else
3948 tag = PAGECACHE_TAG_DIRTY;
3949retry:
3950 if (wbc->sync_mode == WB_SYNC_ALL)
3951 tag_pages_for_writeback(mapping, index, end);
3952 done_index = index;
3953 while (!done && !nr_to_write_done && (index <= end) &&
3954 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
3955 &index, end, tag))) {
3956 unsigned i;
3957
3958 scanned = 1;
3959 for (i = 0; i < nr_pages; i++) {
3960 struct page *page = pvec.pages[i];
3961
3962 done_index = page->index;
3963
3964
3965
3966
3967
3968
3969
3970 if (!trylock_page(page)) {
3971 flush_write_bio(epd);
3972 lock_page(page);
3973 }
3974
3975 if (unlikely(page->mapping != mapping)) {
3976 unlock_page(page);
3977 continue;
3978 }
3979
3980 if (wbc->sync_mode != WB_SYNC_NONE) {
3981 if (PageWriteback(page))
3982 flush_write_bio(epd);
3983 wait_on_page_writeback(page);
3984 }
3985
3986 if (PageWriteback(page) ||
3987 !clear_page_dirty_for_io(page)) {
3988 unlock_page(page);
3989 continue;
3990 }
3991
3992 ret = __extent_writepage(page, wbc, epd);
3993
3994 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
3995 unlock_page(page);
3996 ret = 0;
3997 }
3998 if (ret < 0) {
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008 done_index = page->index + 1;
4009 done = 1;
4010 break;
4011 }
4012
4013
4014
4015
4016
4017
4018 nr_to_write_done = wbc->nr_to_write <= 0;
4019 }
4020 pagevec_release(&pvec);
4021 cond_resched();
4022 }
4023 if (!scanned && !done) {
4024
4025
4026
4027
4028 scanned = 1;
4029 index = 0;
4030 goto retry;
4031 }
4032
4033 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4034 mapping->writeback_index = done_index;
4035
4036 btrfs_add_delayed_iput(inode);
4037 return ret;
4038}
4039
4040static void flush_write_bio(struct extent_page_data *epd)
4041{
4042 if (epd->bio) {
4043 int ret;
4044
4045 ret = submit_one_bio(epd->bio, 0, 0);
4046 BUG_ON(ret < 0);
4047 epd->bio = NULL;
4048 }
4049}
4050
4051int extent_write_full_page(struct page *page, struct writeback_control *wbc)
4052{
4053 int ret;
4054 struct extent_page_data epd = {
4055 .bio = NULL,
4056 .tree = &BTRFS_I(page->mapping->host)->io_tree,
4057 .extent_locked = 0,
4058 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4059 };
4060
4061 ret = __extent_writepage(page, wbc, &epd);
4062
4063 flush_write_bio(&epd);
4064 return ret;
4065}
4066
4067int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
4068 int mode)
4069{
4070 int ret = 0;
4071 struct address_space *mapping = inode->i_mapping;
4072 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
4073 struct page *page;
4074 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4075 PAGE_SHIFT;
4076
4077 struct extent_page_data epd = {
4078 .bio = NULL,
4079 .tree = tree,
4080 .extent_locked = 1,
4081 .sync_io = mode == WB_SYNC_ALL,
4082 };
4083 struct writeback_control wbc_writepages = {
4084 .sync_mode = mode,
4085 .nr_to_write = nr_pages * 2,
4086 .range_start = start,
4087 .range_end = end + 1,
4088 };
4089
4090 while (start <= end) {
4091 page = find_get_page(mapping, start >> PAGE_SHIFT);
4092 if (clear_page_dirty_for_io(page))
4093 ret = __extent_writepage(page, &wbc_writepages, &epd);
4094 else {
4095 if (tree->ops && tree->ops->writepage_end_io_hook)
4096 tree->ops->writepage_end_io_hook(page, start,
4097 start + PAGE_SIZE - 1,
4098 NULL, 1);
4099 unlock_page(page);
4100 }
4101 put_page(page);
4102 start += PAGE_SIZE;
4103 }
4104
4105 flush_write_bio(&epd);
4106 return ret;
4107}
4108
4109int extent_writepages(struct address_space *mapping,
4110 struct writeback_control *wbc)
4111{
4112 int ret = 0;
4113 struct extent_page_data epd = {
4114 .bio = NULL,
4115 .tree = &BTRFS_I(mapping->host)->io_tree,
4116 .extent_locked = 0,
4117 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4118 };
4119
4120 ret = extent_write_cache_pages(mapping, wbc, &epd);
4121 flush_write_bio(&epd);
4122 return ret;
4123}
4124
4125int extent_readpages(struct address_space *mapping, struct list_head *pages,
4126 unsigned nr_pages)
4127{
4128 struct bio *bio = NULL;
4129 unsigned page_idx;
4130 unsigned long bio_flags = 0;
4131 struct page *pagepool[16];
4132 struct page *page;
4133 struct extent_map *em_cached = NULL;
4134 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
4135 int nr = 0;
4136 u64 prev_em_start = (u64)-1;
4137
4138 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
4139 page = list_entry(pages->prev, struct page, lru);
4140
4141 prefetchw(&page->flags);
4142 list_del(&page->lru);
4143 if (add_to_page_cache_lru(page, mapping,
4144 page->index,
4145 readahead_gfp_mask(mapping))) {
4146 put_page(page);
4147 continue;
4148 }
4149
4150 pagepool[nr++] = page;
4151 if (nr < ARRAY_SIZE(pagepool))
4152 continue;
4153 __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
4154 &bio_flags, &prev_em_start);
4155 nr = 0;
4156 }
4157 if (nr)
4158 __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
4159 &bio_flags, &prev_em_start);
4160
4161 if (em_cached)
4162 free_extent_map(em_cached);
4163
4164 BUG_ON(!list_empty(pages));
4165 if (bio)
4166 return submit_one_bio(bio, 0, bio_flags);
4167 return 0;
4168}
4169
4170
4171
4172
4173
4174
4175int extent_invalidatepage(struct extent_io_tree *tree,
4176 struct page *page, unsigned long offset)
4177{
4178 struct extent_state *cached_state = NULL;
4179 u64 start = page_offset(page);
4180 u64 end = start + PAGE_SIZE - 1;
4181 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4182
4183 start += ALIGN(offset, blocksize);
4184 if (start > end)
4185 return 0;
4186
4187 lock_extent_bits(tree, start, end, &cached_state);
4188 wait_on_page_writeback(page);
4189 clear_extent_bit(tree, start, end,
4190 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4191 EXTENT_DO_ACCOUNTING,
4192 1, 1, &cached_state);
4193 return 0;
4194}
4195
4196
4197
4198
4199
4200
4201static int try_release_extent_state(struct extent_io_tree *tree,
4202 struct page *page, gfp_t mask)
4203{
4204 u64 start = page_offset(page);
4205 u64 end = start + PAGE_SIZE - 1;
4206 int ret = 1;
4207
4208 if (test_range_bit(tree, start, end,
4209 EXTENT_IOBITS, 0, NULL))
4210 ret = 0;
4211 else {
4212
4213
4214
4215
4216 ret = __clear_extent_bit(tree, start, end,
4217 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4218 0, 0, NULL, mask, NULL);
4219
4220
4221
4222
4223 if (ret < 0)
4224 ret = 0;
4225 else
4226 ret = 1;
4227 }
4228 return ret;
4229}
4230
4231
4232
4233
4234
4235
4236int try_release_extent_mapping(struct page *page, gfp_t mask)
4237{
4238 struct extent_map *em;
4239 u64 start = page_offset(page);
4240 u64 end = start + PAGE_SIZE - 1;
4241 struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
4242 struct extent_io_tree *tree = &btrfs_inode->io_tree;
4243 struct extent_map_tree *map = &btrfs_inode->extent_tree;
4244
4245 if (gfpflags_allow_blocking(mask) &&
4246 page->mapping->host->i_size > SZ_16M) {
4247 u64 len;
4248 while (start <= end) {
4249 len = end - start + 1;
4250 write_lock(&map->lock);
4251 em = lookup_extent_mapping(map, start, len);
4252 if (!em) {
4253 write_unlock(&map->lock);
4254 break;
4255 }
4256 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4257 em->start != start) {
4258 write_unlock(&map->lock);
4259 free_extent_map(em);
4260 break;
4261 }
4262 if (!test_range_bit(tree, em->start,
4263 extent_map_end(em) - 1,
4264 EXTENT_LOCKED | EXTENT_WRITEBACK,
4265 0, NULL)) {
4266 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4267 &btrfs_inode->runtime_flags);
4268 remove_extent_mapping(map, em);
4269
4270 free_extent_map(em);
4271 }
4272 start = extent_map_end(em);
4273 write_unlock(&map->lock);
4274
4275
4276 free_extent_map(em);
4277 }
4278 }
4279 return try_release_extent_state(tree, page, mask);
4280}
4281
4282
4283
4284
4285
4286static struct extent_map *get_extent_skip_holes(struct inode *inode,
4287 u64 offset, u64 last)
4288{
4289 u64 sectorsize = btrfs_inode_sectorsize(inode);
4290 struct extent_map *em;
4291 u64 len;
4292
4293 if (offset >= last)
4294 return NULL;
4295
4296 while (1) {
4297 len = last - offset;
4298 if (len == 0)
4299 break;
4300 len = ALIGN(len, sectorsize);
4301 em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, offset,
4302 len, 0);
4303 if (IS_ERR_OR_NULL(em))
4304 return em;
4305
4306
4307 if (em->block_start != EXTENT_MAP_HOLE)
4308 return em;
4309
4310
4311 offset = extent_map_end(em);
4312 free_extent_map(em);
4313 if (offset >= last)
4314 break;
4315 }
4316 return NULL;
4317}
4318
4319
4320
4321
4322
4323
4324struct fiemap_cache {
4325 u64 offset;
4326 u64 phys;
4327 u64 len;
4328 u32 flags;
4329 bool cached;
4330};
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
4343 struct fiemap_cache *cache,
4344 u64 offset, u64 phys, u64 len, u32 flags)
4345{
4346 int ret = 0;
4347
4348 if (!cache->cached)
4349 goto assign;
4350
4351
4352
4353
4354
4355
4356
4357
4358 if (cache->offset + cache->len > offset) {
4359 WARN_ON(1);
4360 return -EINVAL;
4361 }
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374 if (cache->offset + cache->len == offset &&
4375 cache->phys + cache->len == phys &&
4376 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
4377 (flags & ~FIEMAP_EXTENT_LAST)) {
4378 cache->len += len;
4379 cache->flags |= flags;
4380 goto try_submit_last;
4381 }
4382
4383
4384 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4385 cache->len, cache->flags);
4386 cache->cached = false;
4387 if (ret)
4388 return ret;
4389assign:
4390 cache->cached = true;
4391 cache->offset = offset;
4392 cache->phys = phys;
4393 cache->len = len;
4394 cache->flags = flags;
4395try_submit_last:
4396 if (cache->flags & FIEMAP_EXTENT_LAST) {
4397 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
4398 cache->phys, cache->len, cache->flags);
4399 cache->cached = false;
4400 }
4401 return ret;
4402}
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
4416 struct fiemap_extent_info *fieinfo,
4417 struct fiemap_cache *cache)
4418{
4419 int ret;
4420
4421 if (!cache->cached)
4422 return 0;
4423
4424 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4425 cache->len, cache->flags);
4426 cache->cached = false;
4427 if (ret > 0)
4428 ret = 0;
4429 return ret;
4430}
4431
4432int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4433 __u64 start, __u64 len)
4434{
4435 int ret = 0;
4436 u64 off = start;
4437 u64 max = start + len;
4438 u32 flags = 0;
4439 u32 found_type;
4440 u64 last;
4441 u64 last_for_get_extent = 0;
4442 u64 disko = 0;
4443 u64 isize = i_size_read(inode);
4444 struct btrfs_key found_key;
4445 struct extent_map *em = NULL;
4446 struct extent_state *cached_state = NULL;
4447 struct btrfs_path *path;
4448 struct btrfs_root *root = BTRFS_I(inode)->root;
4449 struct fiemap_cache cache = { 0 };
4450 int end = 0;
4451 u64 em_start = 0;
4452 u64 em_len = 0;
4453 u64 em_end = 0;
4454
4455 if (len == 0)
4456 return -EINVAL;
4457
4458 path = btrfs_alloc_path();
4459 if (!path)
4460 return -ENOMEM;
4461 path->leave_spinning = 1;
4462
4463 start = round_down(start, btrfs_inode_sectorsize(inode));
4464 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
4465
4466
4467
4468
4469
4470 ret = btrfs_lookup_file_extent(NULL, root, path,
4471 btrfs_ino(BTRFS_I(inode)), -1, 0);
4472 if (ret < 0) {
4473 btrfs_free_path(path);
4474 return ret;
4475 } else {
4476 WARN_ON(!ret);
4477 if (ret == 1)
4478 ret = 0;
4479 }
4480
4481 path->slots[0]--;
4482 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4483 found_type = found_key.type;
4484
4485
4486 if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
4487 found_type != BTRFS_EXTENT_DATA_KEY) {
4488
4489 last = (u64)-1;
4490 last_for_get_extent = isize;
4491 } else {
4492
4493
4494
4495
4496
4497 last = found_key.offset;
4498 last_for_get_extent = last + 1;
4499 }
4500 btrfs_release_path(path);
4501
4502
4503
4504
4505
4506
4507 if (last < isize) {
4508 last = (u64)-1;
4509 last_for_get_extent = isize;
4510 }
4511
4512 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4513 &cached_state);
4514
4515 em = get_extent_skip_holes(inode, start, last_for_get_extent);
4516 if (!em)
4517 goto out;
4518 if (IS_ERR(em)) {
4519 ret = PTR_ERR(em);
4520 goto out;
4521 }
4522
4523 while (!end) {
4524 u64 offset_in_extent = 0;
4525
4526
4527 if (em->start >= max || extent_map_end(em) < off)
4528 break;
4529
4530
4531
4532
4533
4534
4535
4536 em_start = max(em->start, off);
4537
4538
4539
4540
4541
4542
4543
4544 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4545 offset_in_extent = em_start - em->start;
4546 em_end = extent_map_end(em);
4547 em_len = em_end - em_start;
4548 flags = 0;
4549 if (em->block_start < EXTENT_MAP_LAST_BYTE)
4550 disko = em->block_start + offset_in_extent;
4551 else
4552 disko = 0;
4553
4554
4555
4556
4557 off = extent_map_end(em);
4558 if (off >= max)
4559 end = 1;
4560
4561 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4562 end = 1;
4563 flags |= FIEMAP_EXTENT_LAST;
4564 } else if (em->block_start == EXTENT_MAP_INLINE) {
4565 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4566 FIEMAP_EXTENT_NOT_ALIGNED);
4567 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4568 flags |= (FIEMAP_EXTENT_DELALLOC |
4569 FIEMAP_EXTENT_UNKNOWN);
4570 } else if (fieinfo->fi_extents_max) {
4571 u64 bytenr = em->block_start -
4572 (em->start - em->orig_start);
4573
4574
4575
4576
4577
4578
4579
4580
4581 ret = btrfs_check_shared(root,
4582 btrfs_ino(BTRFS_I(inode)),
4583 bytenr);
4584 if (ret < 0)
4585 goto out_free;
4586 if (ret)
4587 flags |= FIEMAP_EXTENT_SHARED;
4588 ret = 0;
4589 }
4590 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4591 flags |= FIEMAP_EXTENT_ENCODED;
4592 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4593 flags |= FIEMAP_EXTENT_UNWRITTEN;
4594
4595 free_extent_map(em);
4596 em = NULL;
4597 if ((em_start >= last) || em_len == (u64)-1 ||
4598 (last == (u64)-1 && isize <= em_end)) {
4599 flags |= FIEMAP_EXTENT_LAST;
4600 end = 1;
4601 }
4602
4603
4604 em = get_extent_skip_holes(inode, off, last_for_get_extent);
4605 if (IS_ERR(em)) {
4606 ret = PTR_ERR(em);
4607 goto out;
4608 }
4609 if (!em) {
4610 flags |= FIEMAP_EXTENT_LAST;
4611 end = 1;
4612 }
4613 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
4614 em_len, flags);
4615 if (ret) {
4616 if (ret == 1)
4617 ret = 0;
4618 goto out_free;
4619 }
4620 }
4621out_free:
4622 if (!ret)
4623 ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
4624 free_extent_map(em);
4625out:
4626 btrfs_free_path(path);
4627 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4628 &cached_state);
4629 return ret;
4630}
4631
4632static void __free_extent_buffer(struct extent_buffer *eb)
4633{
4634 btrfs_leak_debug_del(&eb->leak_list);
4635 kmem_cache_free(extent_buffer_cache, eb);
4636}
4637
4638int extent_buffer_under_io(struct extent_buffer *eb)
4639{
4640 return (atomic_read(&eb->io_pages) ||
4641 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4642 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4643}
4644
4645
4646
4647
4648static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4649{
4650 unsigned long index;
4651 struct page *page;
4652 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4653
4654 BUG_ON(extent_buffer_under_io(eb));
4655
4656 index = num_extent_pages(eb->start, eb->len);
4657 if (index == 0)
4658 return;
4659
4660 do {
4661 index--;
4662 page = eb->pages[index];
4663 if (!page)
4664 continue;
4665 if (mapped)
4666 spin_lock(&page->mapping->private_lock);
4667
4668
4669
4670
4671
4672
4673
4674 if (PagePrivate(page) &&
4675 page->private == (unsigned long)eb) {
4676 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4677 BUG_ON(PageDirty(page));
4678 BUG_ON(PageWriteback(page));
4679
4680
4681
4682
4683 ClearPagePrivate(page);
4684 set_page_private(page, 0);
4685
4686 put_page(page);
4687 }
4688
4689 if (mapped)
4690 spin_unlock(&page->mapping->private_lock);
4691
4692
4693 put_page(page);
4694 } while (index != 0);
4695}
4696
4697
4698
4699
4700static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4701{
4702 btrfs_release_extent_buffer_page(eb);
4703 __free_extent_buffer(eb);
4704}
4705
4706static struct extent_buffer *
4707__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4708 unsigned long len)
4709{
4710 struct extent_buffer *eb = NULL;
4711
4712 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4713 eb->start = start;
4714 eb->len = len;
4715 eb->fs_info = fs_info;
4716 eb->bflags = 0;
4717 rwlock_init(&eb->lock);
4718 atomic_set(&eb->write_locks, 0);
4719 atomic_set(&eb->read_locks, 0);
4720 atomic_set(&eb->blocking_readers, 0);
4721 atomic_set(&eb->blocking_writers, 0);
4722 atomic_set(&eb->spinning_readers, 0);
4723 atomic_set(&eb->spinning_writers, 0);
4724 eb->lock_nested = 0;
4725 init_waitqueue_head(&eb->write_lock_wq);
4726 init_waitqueue_head(&eb->read_lock_wq);
4727
4728 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4729
4730 spin_lock_init(&eb->refs_lock);
4731 atomic_set(&eb->refs, 1);
4732 atomic_set(&eb->io_pages, 0);
4733
4734
4735
4736
4737 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4738 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4739 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4740
4741 return eb;
4742}
4743
4744struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4745{
4746 unsigned long i;
4747 struct page *p;
4748 struct extent_buffer *new;
4749 unsigned long num_pages = num_extent_pages(src->start, src->len);
4750
4751 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4752 if (new == NULL)
4753 return NULL;
4754
4755 for (i = 0; i < num_pages; i++) {
4756 p = alloc_page(GFP_NOFS);
4757 if (!p) {
4758 btrfs_release_extent_buffer(new);
4759 return NULL;
4760 }
4761 attach_extent_buffer_page(new, p);
4762 WARN_ON(PageDirty(p));
4763 SetPageUptodate(p);
4764 new->pages[i] = p;
4765 copy_page(page_address(p), page_address(src->pages[i]));
4766 }
4767
4768 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4769 set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
4770
4771 return new;
4772}
4773
4774struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4775 u64 start, unsigned long len)
4776{
4777 struct extent_buffer *eb;
4778 unsigned long num_pages;
4779 unsigned long i;
4780
4781 num_pages = num_extent_pages(start, len);
4782
4783 eb = __alloc_extent_buffer(fs_info, start, len);
4784 if (!eb)
4785 return NULL;
4786
4787 for (i = 0; i < num_pages; i++) {
4788 eb->pages[i] = alloc_page(GFP_NOFS);
4789 if (!eb->pages[i])
4790 goto err;
4791 }
4792 set_extent_buffer_uptodate(eb);
4793 btrfs_set_header_nritems(eb, 0);
4794 set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4795
4796 return eb;
4797err:
4798 for (; i > 0; i--)
4799 __free_page(eb->pages[i - 1]);
4800 __free_extent_buffer(eb);
4801 return NULL;
4802}
4803
4804struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4805 u64 start)
4806{
4807 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
4808}
4809
4810static void check_buffer_tree_ref(struct extent_buffer *eb)
4811{
4812 int refs;
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833 refs = atomic_read(&eb->refs);
4834 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4835 return;
4836
4837 spin_lock(&eb->refs_lock);
4838 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4839 atomic_inc(&eb->refs);
4840 spin_unlock(&eb->refs_lock);
4841}
4842
4843static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4844 struct page *accessed)
4845{
4846 unsigned long num_pages, i;
4847
4848 check_buffer_tree_ref(eb);
4849
4850 num_pages = num_extent_pages(eb->start, eb->len);
4851 for (i = 0; i < num_pages; i++) {
4852 struct page *p = eb->pages[i];
4853
4854 if (p != accessed)
4855 mark_page_accessed(p);
4856 }
4857}
4858
4859struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4860 u64 start)
4861{
4862 struct extent_buffer *eb;
4863
4864 rcu_read_lock();
4865 eb = radix_tree_lookup(&fs_info->buffer_radix,
4866 start >> PAGE_SHIFT);
4867 if (eb && atomic_inc_not_zero(&eb->refs)) {
4868 rcu_read_unlock();
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4885 spin_lock(&eb->refs_lock);
4886 spin_unlock(&eb->refs_lock);
4887 }
4888 mark_extent_buffer_accessed(eb, NULL);
4889 return eb;
4890 }
4891 rcu_read_unlock();
4892
4893 return NULL;
4894}
4895
4896#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4897struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4898 u64 start)
4899{
4900 struct extent_buffer *eb, *exists = NULL;
4901 int ret;
4902
4903 eb = find_extent_buffer(fs_info, start);
4904 if (eb)
4905 return eb;
4906 eb = alloc_dummy_extent_buffer(fs_info, start);
4907 if (!eb)
4908 return NULL;
4909 eb->fs_info = fs_info;
4910again:
4911 ret = radix_tree_preload(GFP_NOFS);
4912 if (ret)
4913 goto free_eb;
4914 spin_lock(&fs_info->buffer_lock);
4915 ret = radix_tree_insert(&fs_info->buffer_radix,
4916 start >> PAGE_SHIFT, eb);
4917 spin_unlock(&fs_info->buffer_lock);
4918 radix_tree_preload_end();
4919 if (ret == -EEXIST) {
4920 exists = find_extent_buffer(fs_info, start);
4921 if (exists)
4922 goto free_eb;
4923 else
4924 goto again;
4925 }
4926 check_buffer_tree_ref(eb);
4927 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4928
4929
4930
4931
4932
4933
4934
4935 atomic_inc(&eb->refs);
4936 return eb;
4937free_eb:
4938 btrfs_release_extent_buffer(eb);
4939 return exists;
4940}
4941#endif
4942
4943struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4944 u64 start)
4945{
4946 unsigned long len = fs_info->nodesize;
4947 unsigned long num_pages = num_extent_pages(start, len);
4948 unsigned long i;
4949 unsigned long index = start >> PAGE_SHIFT;
4950 struct extent_buffer *eb;
4951 struct extent_buffer *exists = NULL;
4952 struct page *p;
4953 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4954 int uptodate = 1;
4955 int ret;
4956
4957 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
4958 btrfs_err(fs_info, "bad tree block start %llu", start);
4959 return ERR_PTR(-EINVAL);
4960 }
4961
4962 eb = find_extent_buffer(fs_info, start);
4963 if (eb)
4964 return eb;
4965
4966 eb = __alloc_extent_buffer(fs_info, start, len);
4967 if (!eb)
4968 return ERR_PTR(-ENOMEM);
4969
4970 for (i = 0; i < num_pages; i++, index++) {
4971 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
4972 if (!p) {
4973 exists = ERR_PTR(-ENOMEM);
4974 goto free_eb;
4975 }
4976
4977 spin_lock(&mapping->private_lock);
4978 if (PagePrivate(p)) {
4979
4980
4981
4982
4983
4984
4985
4986 exists = (struct extent_buffer *)p->private;
4987 if (atomic_inc_not_zero(&exists->refs)) {
4988 spin_unlock(&mapping->private_lock);
4989 unlock_page(p);
4990 put_page(p);
4991 mark_extent_buffer_accessed(exists, p);
4992 goto free_eb;
4993 }
4994 exists = NULL;
4995
4996
4997
4998
4999
5000 ClearPagePrivate(p);
5001 WARN_ON(PageDirty(p));
5002 put_page(p);
5003 }
5004 attach_extent_buffer_page(eb, p);
5005 spin_unlock(&mapping->private_lock);
5006 WARN_ON(PageDirty(p));
5007 eb->pages[i] = p;
5008 if (!PageUptodate(p))
5009 uptodate = 0;
5010
5011
5012
5013
5014
5015 }
5016 if (uptodate)
5017 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5018again:
5019 ret = radix_tree_preload(GFP_NOFS);
5020 if (ret) {
5021 exists = ERR_PTR(ret);
5022 goto free_eb;
5023 }
5024
5025 spin_lock(&fs_info->buffer_lock);
5026 ret = radix_tree_insert(&fs_info->buffer_radix,
5027 start >> PAGE_SHIFT, eb);
5028 spin_unlock(&fs_info->buffer_lock);
5029 radix_tree_preload_end();
5030 if (ret == -EEXIST) {
5031 exists = find_extent_buffer(fs_info, start);
5032 if (exists)
5033 goto free_eb;
5034 else
5035 goto again;
5036 }
5037
5038 check_buffer_tree_ref(eb);
5039 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050 SetPageChecked(eb->pages[0]);
5051 for (i = 1; i < num_pages; i++) {
5052 p = eb->pages[i];
5053 ClearPageChecked(p);
5054 unlock_page(p);
5055 }
5056 unlock_page(eb->pages[0]);
5057 return eb;
5058
5059free_eb:
5060 WARN_ON(!atomic_dec_and_test(&eb->refs));
5061 for (i = 0; i < num_pages; i++) {
5062 if (eb->pages[i])
5063 unlock_page(eb->pages[i]);
5064 }
5065
5066 btrfs_release_extent_buffer(eb);
5067 return exists;
5068}
5069
5070static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5071{
5072 struct extent_buffer *eb =
5073 container_of(head, struct extent_buffer, rcu_head);
5074
5075 __free_extent_buffer(eb);
5076}
5077
5078
5079static int release_extent_buffer(struct extent_buffer *eb)
5080{
5081 WARN_ON(atomic_read(&eb->refs) == 0);
5082 if (atomic_dec_and_test(&eb->refs)) {
5083 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5084 struct btrfs_fs_info *fs_info = eb->fs_info;
5085
5086 spin_unlock(&eb->refs_lock);
5087
5088 spin_lock(&fs_info->buffer_lock);
5089 radix_tree_delete(&fs_info->buffer_radix,
5090 eb->start >> PAGE_SHIFT);
5091 spin_unlock(&fs_info->buffer_lock);
5092 } else {
5093 spin_unlock(&eb->refs_lock);
5094 }
5095
5096
5097 btrfs_release_extent_buffer_page(eb);
5098#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5099 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
5100 __free_extent_buffer(eb);
5101 return 1;
5102 }
5103#endif
5104 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5105 return 1;
5106 }
5107 spin_unlock(&eb->refs_lock);
5108
5109 return 0;
5110}
5111
5112void free_extent_buffer(struct extent_buffer *eb)
5113{
5114 int refs;
5115 int old;
5116 if (!eb)
5117 return;
5118
5119 while (1) {
5120 refs = atomic_read(&eb->refs);
5121 if (refs <= 3)
5122 break;
5123 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5124 if (old == refs)
5125 return;
5126 }
5127
5128 spin_lock(&eb->refs_lock);
5129 if (atomic_read(&eb->refs) == 2 &&
5130 test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
5131 atomic_dec(&eb->refs);
5132
5133 if (atomic_read(&eb->refs) == 2 &&
5134 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5135 !extent_buffer_under_io(eb) &&
5136 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5137 atomic_dec(&eb->refs);
5138
5139
5140
5141
5142
5143 release_extent_buffer(eb);
5144}
5145
5146void free_extent_buffer_stale(struct extent_buffer *eb)
5147{
5148 if (!eb)
5149 return;
5150
5151 spin_lock(&eb->refs_lock);
5152 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5153
5154 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5155 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5156 atomic_dec(&eb->refs);
5157 release_extent_buffer(eb);
5158}
5159
5160void clear_extent_buffer_dirty(struct extent_buffer *eb)
5161{
5162 unsigned long i;
5163 unsigned long num_pages;
5164 struct page *page;
5165
5166 num_pages = num_extent_pages(eb->start, eb->len);
5167
5168 for (i = 0; i < num_pages; i++) {
5169 page = eb->pages[i];
5170 if (!PageDirty(page))
5171 continue;
5172
5173 lock_page(page);
5174 WARN_ON(!PagePrivate(page));
5175
5176 clear_page_dirty_for_io(page);
5177 xa_lock_irq(&page->mapping->i_pages);
5178 if (!PageDirty(page)) {
5179 radix_tree_tag_clear(&page->mapping->i_pages,
5180 page_index(page),
5181 PAGECACHE_TAG_DIRTY);
5182 }
5183 xa_unlock_irq(&page->mapping->i_pages);
5184 ClearPageError(page);
5185 unlock_page(page);
5186 }
5187 WARN_ON(atomic_read(&eb->refs) == 0);
5188}
5189
5190int set_extent_buffer_dirty(struct extent_buffer *eb)
5191{
5192 unsigned long i;
5193 unsigned long num_pages;
5194 int was_dirty = 0;
5195
5196 check_buffer_tree_ref(eb);
5197
5198 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5199
5200 num_pages = num_extent_pages(eb->start, eb->len);
5201 WARN_ON(atomic_read(&eb->refs) == 0);
5202 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5203
5204 for (i = 0; i < num_pages; i++)
5205 set_page_dirty(eb->pages[i]);
5206 return was_dirty;
5207}
5208
5209void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5210{
5211 unsigned long i;
5212 struct page *page;
5213 unsigned long num_pages;
5214
5215 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5216 num_pages = num_extent_pages(eb->start, eb->len);
5217 for (i = 0; i < num_pages; i++) {
5218 page = eb->pages[i];
5219 if (page)
5220 ClearPageUptodate(page);
5221 }
5222}
5223
5224void set_extent_buffer_uptodate(struct extent_buffer *eb)
5225{
5226 unsigned long i;
5227 struct page *page;
5228 unsigned long num_pages;
5229
5230 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5231 num_pages = num_extent_pages(eb->start, eb->len);
5232 for (i = 0; i < num_pages; i++) {
5233 page = eb->pages[i];
5234 SetPageUptodate(page);
5235 }
5236}
5237
5238int read_extent_buffer_pages(struct extent_io_tree *tree,
5239 struct extent_buffer *eb, int wait, int mirror_num)
5240{
5241 unsigned long i;
5242 struct page *page;
5243 int err;
5244 int ret = 0;
5245 int locked_pages = 0;
5246 int all_uptodate = 1;
5247 unsigned long num_pages;
5248 unsigned long num_reads = 0;
5249 struct bio *bio = NULL;
5250 unsigned long bio_flags = 0;
5251
5252 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5253 return 0;
5254
5255 num_pages = num_extent_pages(eb->start, eb->len);
5256 for (i = 0; i < num_pages; i++) {
5257 page = eb->pages[i];
5258 if (wait == WAIT_NONE) {
5259 if (!trylock_page(page))
5260 goto unlock_exit;
5261 } else {
5262 lock_page(page);
5263 }
5264 locked_pages++;
5265 }
5266
5267
5268
5269
5270
5271 for (i = 0; i < num_pages; i++) {
5272 page = eb->pages[i];
5273 if (!PageUptodate(page)) {
5274 num_reads++;
5275 all_uptodate = 0;
5276 }
5277 }
5278
5279 if (all_uptodate) {
5280 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5281 goto unlock_exit;
5282 }
5283
5284 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5285 eb->read_mirror = 0;
5286 atomic_set(&eb->io_pages, num_reads);
5287 for (i = 0; i < num_pages; i++) {
5288 page = eb->pages[i];
5289
5290 if (!PageUptodate(page)) {
5291 if (ret) {
5292 atomic_dec(&eb->io_pages);
5293 unlock_page(page);
5294 continue;
5295 }
5296
5297 ClearPageError(page);
5298 err = __extent_read_full_page(tree, page,
5299 btree_get_extent, &bio,
5300 mirror_num, &bio_flags,
5301 REQ_META);
5302 if (err) {
5303 ret = err;
5304
5305
5306
5307
5308
5309
5310
5311
5312 atomic_dec(&eb->io_pages);
5313 }
5314 } else {
5315 unlock_page(page);
5316 }
5317 }
5318
5319 if (bio) {
5320 err = submit_one_bio(bio, mirror_num, bio_flags);
5321 if (err)
5322 return err;
5323 }
5324
5325 if (ret || wait != WAIT_COMPLETE)
5326 return ret;
5327
5328 for (i = 0; i < num_pages; i++) {
5329 page = eb->pages[i];
5330 wait_on_page_locked(page);
5331 if (!PageUptodate(page))
5332 ret = -EIO;
5333 }
5334
5335 return ret;
5336
5337unlock_exit:
5338 while (locked_pages > 0) {
5339 locked_pages--;
5340 page = eb->pages[locked_pages];
5341 unlock_page(page);
5342 }
5343 return ret;
5344}
5345
5346void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
5347 unsigned long start, unsigned long len)
5348{
5349 size_t cur;
5350 size_t offset;
5351 struct page *page;
5352 char *kaddr;
5353 char *dst = (char *)dstv;
5354 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5355 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5356
5357 if (start + len > eb->len) {
5358 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5359 eb->start, eb->len, start, len);
5360 memset(dst, 0, len);
5361 return;
5362 }
5363
5364 offset = (start_offset + start) & (PAGE_SIZE - 1);
5365
5366 while (len > 0) {
5367 page = eb->pages[i];
5368
5369 cur = min(len, (PAGE_SIZE - offset));
5370 kaddr = page_address(page);
5371 memcpy(dst, kaddr + offset, cur);
5372
5373 dst += cur;
5374 len -= cur;
5375 offset = 0;
5376 i++;
5377 }
5378}
5379
5380int read_extent_buffer_to_user(const struct extent_buffer *eb,
5381 void __user *dstv,
5382 unsigned long start, unsigned long len)
5383{
5384 size_t cur;
5385 size_t offset;
5386 struct page *page;
5387 char *kaddr;
5388 char __user *dst = (char __user *)dstv;
5389 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5390 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5391 int ret = 0;
5392
5393 WARN_ON(start > eb->len);
5394 WARN_ON(start + len > eb->start + eb->len);
5395
5396 offset = (start_offset + start) & (PAGE_SIZE - 1);
5397
5398 while (len > 0) {
5399 page = eb->pages[i];
5400
5401 cur = min(len, (PAGE_SIZE - offset));
5402 kaddr = page_address(page);
5403 if (copy_to_user(dst, kaddr + offset, cur)) {
5404 ret = -EFAULT;
5405 break;
5406 }
5407
5408 dst += cur;
5409 len -= cur;
5410 offset = 0;
5411 i++;
5412 }
5413
5414 return ret;
5415}
5416
5417
5418
5419
5420
5421
5422int map_private_extent_buffer(const struct extent_buffer *eb,
5423 unsigned long start, unsigned long min_len,
5424 char **map, unsigned long *map_start,
5425 unsigned long *map_len)
5426{
5427 size_t offset = start & (PAGE_SIZE - 1);
5428 char *kaddr;
5429 struct page *p;
5430 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5431 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5432 unsigned long end_i = (start_offset + start + min_len - 1) >>
5433 PAGE_SHIFT;
5434
5435 if (start + min_len > eb->len) {
5436 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5437 eb->start, eb->len, start, min_len);
5438 return -EINVAL;
5439 }
5440
5441 if (i != end_i)
5442 return 1;
5443
5444 if (i == 0) {
5445 offset = start_offset;
5446 *map_start = 0;
5447 } else {
5448 offset = 0;
5449 *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
5450 }
5451
5452 p = eb->pages[i];
5453 kaddr = page_address(p);
5454 *map = kaddr + offset;
5455 *map_len = PAGE_SIZE - offset;
5456 return 0;
5457}
5458
5459int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
5460 unsigned long start, unsigned long len)
5461{
5462 size_t cur;
5463 size_t offset;
5464 struct page *page;
5465 char *kaddr;
5466 char *ptr = (char *)ptrv;
5467 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5468 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5469 int ret = 0;
5470
5471 WARN_ON(start > eb->len);
5472 WARN_ON(start + len > eb->start + eb->len);
5473
5474 offset = (start_offset + start) & (PAGE_SIZE - 1);
5475
5476 while (len > 0) {
5477 page = eb->pages[i];
5478
5479 cur = min(len, (PAGE_SIZE - offset));
5480
5481 kaddr = page_address(page);
5482 ret = memcmp(ptr, kaddr + offset, cur);
5483 if (ret)
5484 break;
5485
5486 ptr += cur;
5487 len -= cur;
5488 offset = 0;
5489 i++;
5490 }
5491 return ret;
5492}
5493
5494void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
5495 const void *srcv)
5496{
5497 char *kaddr;
5498
5499 WARN_ON(!PageUptodate(eb->pages[0]));
5500 kaddr = page_address(eb->pages[0]);
5501 memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
5502 BTRFS_FSID_SIZE);
5503}
5504
5505void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
5506{
5507 char *kaddr;
5508
5509 WARN_ON(!PageUptodate(eb->pages[0]));
5510 kaddr = page_address(eb->pages[0]);
5511 memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
5512 BTRFS_FSID_SIZE);
5513}
5514
5515void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5516 unsigned long start, unsigned long len)
5517{
5518 size_t cur;
5519 size_t offset;
5520 struct page *page;
5521 char *kaddr;
5522 char *src = (char *)srcv;
5523 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5524 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5525
5526 WARN_ON(start > eb->len);
5527 WARN_ON(start + len > eb->start + eb->len);
5528
5529 offset = (start_offset + start) & (PAGE_SIZE - 1);
5530
5531 while (len > 0) {
5532 page = eb->pages[i];
5533 WARN_ON(!PageUptodate(page));
5534
5535 cur = min(len, PAGE_SIZE - offset);
5536 kaddr = page_address(page);
5537 memcpy(kaddr + offset, src, cur);
5538
5539 src += cur;
5540 len -= cur;
5541 offset = 0;
5542 i++;
5543 }
5544}
5545
5546void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
5547 unsigned long len)
5548{
5549 size_t cur;
5550 size_t offset;
5551 struct page *page;
5552 char *kaddr;
5553 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5554 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5555
5556 WARN_ON(start > eb->len);
5557 WARN_ON(start + len > eb->start + eb->len);
5558
5559 offset = (start_offset + start) & (PAGE_SIZE - 1);
5560
5561 while (len > 0) {
5562 page = eb->pages[i];
5563 WARN_ON(!PageUptodate(page));
5564
5565 cur = min(len, PAGE_SIZE - offset);
5566 kaddr = page_address(page);
5567 memset(kaddr + offset, 0, cur);
5568
5569 len -= cur;
5570 offset = 0;
5571 i++;
5572 }
5573}
5574
5575void copy_extent_buffer_full(struct extent_buffer *dst,
5576 struct extent_buffer *src)
5577{
5578 int i;
5579 unsigned num_pages;
5580
5581 ASSERT(dst->len == src->len);
5582
5583 num_pages = num_extent_pages(dst->start, dst->len);
5584 for (i = 0; i < num_pages; i++)
5585 copy_page(page_address(dst->pages[i]),
5586 page_address(src->pages[i]));
5587}
5588
5589void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5590 unsigned long dst_offset, unsigned long src_offset,
5591 unsigned long len)
5592{
5593 u64 dst_len = dst->len;
5594 size_t cur;
5595 size_t offset;
5596 struct page *page;
5597 char *kaddr;
5598 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5599 unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
5600
5601 WARN_ON(src->len != dst_len);
5602
5603 offset = (start_offset + dst_offset) &
5604 (PAGE_SIZE - 1);
5605
5606 while (len > 0) {
5607 page = dst->pages[i];
5608 WARN_ON(!PageUptodate(page));
5609
5610 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
5611
5612 kaddr = page_address(page);
5613 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5614
5615 src_offset += cur;
5616 len -= cur;
5617 offset = 0;
5618 i++;
5619 }
5620}
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635static inline void eb_bitmap_offset(struct extent_buffer *eb,
5636 unsigned long start, unsigned long nr,
5637 unsigned long *page_index,
5638 size_t *page_offset)
5639{
5640 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5641 size_t byte_offset = BIT_BYTE(nr);
5642 size_t offset;
5643
5644
5645
5646
5647
5648
5649 offset = start_offset + start + byte_offset;
5650
5651 *page_index = offset >> PAGE_SHIFT;
5652 *page_offset = offset & (PAGE_SIZE - 1);
5653}
5654
5655
5656
5657
5658
5659
5660
5661int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5662 unsigned long nr)
5663{
5664 u8 *kaddr;
5665 struct page *page;
5666 unsigned long i;
5667 size_t offset;
5668
5669 eb_bitmap_offset(eb, start, nr, &i, &offset);
5670 page = eb->pages[i];
5671 WARN_ON(!PageUptodate(page));
5672 kaddr = page_address(page);
5673 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5674}
5675
5676
5677
5678
5679
5680
5681
5682
5683void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5684 unsigned long pos, unsigned long len)
5685{
5686 u8 *kaddr;
5687 struct page *page;
5688 unsigned long i;
5689 size_t offset;
5690 const unsigned int size = pos + len;
5691 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5692 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
5693
5694 eb_bitmap_offset(eb, start, pos, &i, &offset);
5695 page = eb->pages[i];
5696 WARN_ON(!PageUptodate(page));
5697 kaddr = page_address(page);
5698
5699 while (len >= bits_to_set) {
5700 kaddr[offset] |= mask_to_set;
5701 len -= bits_to_set;
5702 bits_to_set = BITS_PER_BYTE;
5703 mask_to_set = ~0;
5704 if (++offset >= PAGE_SIZE && len > 0) {
5705 offset = 0;
5706 page = eb->pages[++i];
5707 WARN_ON(!PageUptodate(page));
5708 kaddr = page_address(page);
5709 }
5710 }
5711 if (len) {
5712 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5713 kaddr[offset] |= mask_to_set;
5714 }
5715}
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5726 unsigned long pos, unsigned long len)
5727{
5728 u8 *kaddr;
5729 struct page *page;
5730 unsigned long i;
5731 size_t offset;
5732 const unsigned int size = pos + len;
5733 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5734 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
5735
5736 eb_bitmap_offset(eb, start, pos, &i, &offset);
5737 page = eb->pages[i];
5738 WARN_ON(!PageUptodate(page));
5739 kaddr = page_address(page);
5740
5741 while (len >= bits_to_clear) {
5742 kaddr[offset] &= ~mask_to_clear;
5743 len -= bits_to_clear;
5744 bits_to_clear = BITS_PER_BYTE;
5745 mask_to_clear = ~0;
5746 if (++offset >= PAGE_SIZE && len > 0) {
5747 offset = 0;
5748 page = eb->pages[++i];
5749 WARN_ON(!PageUptodate(page));
5750 kaddr = page_address(page);
5751 }
5752 }
5753 if (len) {
5754 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5755 kaddr[offset] &= ~mask_to_clear;
5756 }
5757}
5758
5759static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5760{
5761 unsigned long distance = (src > dst) ? src - dst : dst - src;
5762 return distance < len;
5763}
5764
5765static void copy_pages(struct page *dst_page, struct page *src_page,
5766 unsigned long dst_off, unsigned long src_off,
5767 unsigned long len)
5768{
5769 char *dst_kaddr = page_address(dst_page);
5770 char *src_kaddr;
5771 int must_memmove = 0;
5772
5773 if (dst_page != src_page) {
5774 src_kaddr = page_address(src_page);
5775 } else {
5776 src_kaddr = dst_kaddr;
5777 if (areas_overlap(src_off, dst_off, len))
5778 must_memmove = 1;
5779 }
5780
5781 if (must_memmove)
5782 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5783 else
5784 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5785}
5786
5787void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5788 unsigned long src_offset, unsigned long len)
5789{
5790 struct btrfs_fs_info *fs_info = dst->fs_info;
5791 size_t cur;
5792 size_t dst_off_in_page;
5793 size_t src_off_in_page;
5794 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5795 unsigned long dst_i;
5796 unsigned long src_i;
5797
5798 if (src_offset + len > dst->len) {
5799 btrfs_err(fs_info,
5800 "memmove bogus src_offset %lu move len %lu dst len %lu",
5801 src_offset, len, dst->len);
5802 BUG_ON(1);
5803 }
5804 if (dst_offset + len > dst->len) {
5805 btrfs_err(fs_info,
5806 "memmove bogus dst_offset %lu move len %lu dst len %lu",
5807 dst_offset, len, dst->len);
5808 BUG_ON(1);
5809 }
5810
5811 while (len > 0) {
5812 dst_off_in_page = (start_offset + dst_offset) &
5813 (PAGE_SIZE - 1);
5814 src_off_in_page = (start_offset + src_offset) &
5815 (PAGE_SIZE - 1);
5816
5817 dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
5818 src_i = (start_offset + src_offset) >> PAGE_SHIFT;
5819
5820 cur = min(len, (unsigned long)(PAGE_SIZE -
5821 src_off_in_page));
5822 cur = min_t(unsigned long, cur,
5823 (unsigned long)(PAGE_SIZE - dst_off_in_page));
5824
5825 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5826 dst_off_in_page, src_off_in_page, cur);
5827
5828 src_offset += cur;
5829 dst_offset += cur;
5830 len -= cur;
5831 }
5832}
5833
5834void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5835 unsigned long src_offset, unsigned long len)
5836{
5837 struct btrfs_fs_info *fs_info = dst->fs_info;
5838 size_t cur;
5839 size_t dst_off_in_page;
5840 size_t src_off_in_page;
5841 unsigned long dst_end = dst_offset + len - 1;
5842 unsigned long src_end = src_offset + len - 1;
5843 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5844 unsigned long dst_i;
5845 unsigned long src_i;
5846
5847 if (src_offset + len > dst->len) {
5848 btrfs_err(fs_info,
5849 "memmove bogus src_offset %lu move len %lu len %lu",
5850 src_offset, len, dst->len);
5851 BUG_ON(1);
5852 }
5853 if (dst_offset + len > dst->len) {
5854 btrfs_err(fs_info,
5855 "memmove bogus dst_offset %lu move len %lu len %lu",
5856 dst_offset, len, dst->len);
5857 BUG_ON(1);
5858 }
5859 if (dst_offset < src_offset) {
5860 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5861 return;
5862 }
5863 while (len > 0) {
5864 dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
5865 src_i = (start_offset + src_end) >> PAGE_SHIFT;
5866
5867 dst_off_in_page = (start_offset + dst_end) &
5868 (PAGE_SIZE - 1);
5869 src_off_in_page = (start_offset + src_end) &
5870 (PAGE_SIZE - 1);
5871
5872 cur = min_t(unsigned long, len, src_off_in_page + 1);
5873 cur = min(cur, dst_off_in_page + 1);
5874 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5875 dst_off_in_page - cur + 1,
5876 src_off_in_page - cur + 1, cur);
5877
5878 dst_end -= cur;
5879 src_end -= cur;
5880 len -= cur;
5881 }
5882}
5883
5884int try_release_extent_buffer(struct page *page)
5885{
5886 struct extent_buffer *eb;
5887
5888
5889
5890
5891
5892 spin_lock(&page->mapping->private_lock);
5893 if (!PagePrivate(page)) {
5894 spin_unlock(&page->mapping->private_lock);
5895 return 1;
5896 }
5897
5898 eb = (struct extent_buffer *)page->private;
5899 BUG_ON(!eb);
5900
5901
5902
5903
5904
5905
5906 spin_lock(&eb->refs_lock);
5907 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
5908 spin_unlock(&eb->refs_lock);
5909 spin_unlock(&page->mapping->private_lock);
5910 return 0;
5911 }
5912 spin_unlock(&page->mapping->private_lock);
5913
5914
5915
5916
5917
5918 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
5919 spin_unlock(&eb->refs_lock);
5920 return 0;
5921 }
5922
5923 return release_extent_buffer(eb);
5924}
5925