1
2
3#include <linux/bitops.h>
4#include <linux/slab.h>
5#include <linux/bio.h>
6#include <linux/mm.h>
7#include <linux/pagemap.h>
8#include <linux/page-flags.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/writeback.h>
13#include <linux/pagevec.h>
14#include <linux/prefetch.h>
15#include <linux/cleancache.h>
16#include "extent_io.h"
17#include "extent-io-tree.h"
18#include "extent_map.h"
19#include "ctree.h"
20#include "btrfs_inode.h"
21#include "volumes.h"
22#include "check-integrity.h"
23#include "locking.h"
24#include "rcu-string.h"
25#include "backref.h"
26#include "disk-io.h"
27
28static struct kmem_cache *extent_state_cache;
29static struct kmem_cache *extent_buffer_cache;
30static struct bio_set btrfs_bioset;
31
32static inline bool extent_state_in_tree(const struct extent_state *state)
33{
34 return !RB_EMPTY_NODE(&state->rb_node);
35}
36
37#ifdef CONFIG_BTRFS_DEBUG
38static LIST_HEAD(states);
39static DEFINE_SPINLOCK(leak_lock);
40
41static inline void btrfs_leak_debug_add(spinlock_t *lock,
42 struct list_head *new,
43 struct list_head *head)
44{
45 unsigned long flags;
46
47 spin_lock_irqsave(lock, flags);
48 list_add(new, head);
49 spin_unlock_irqrestore(lock, flags);
50}
51
52static inline void btrfs_leak_debug_del(spinlock_t *lock,
53 struct list_head *entry)
54{
55 unsigned long flags;
56
57 spin_lock_irqsave(lock, flags);
58 list_del(entry);
59 spin_unlock_irqrestore(lock, flags);
60}
61
62void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
63{
64 struct extent_buffer *eb;
65 unsigned long flags;
66
67
68
69
70
71 if (!fs_info->allocated_ebs.next)
72 return;
73
74 spin_lock_irqsave(&fs_info->eb_leak_lock, flags);
75 while (!list_empty(&fs_info->allocated_ebs)) {
76 eb = list_first_entry(&fs_info->allocated_ebs,
77 struct extent_buffer, leak_list);
78 pr_err(
79 "BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n",
80 eb->start, eb->len, atomic_read(&eb->refs), eb->bflags,
81 btrfs_header_owner(eb));
82 list_del(&eb->leak_list);
83 kmem_cache_free(extent_buffer_cache, eb);
84 }
85 spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags);
86}
87
88static inline void btrfs_extent_state_leak_debug_check(void)
89{
90 struct extent_state *state;
91
92 while (!list_empty(&states)) {
93 state = list_entry(states.next, struct extent_state, leak_list);
94 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
95 state->start, state->end, state->state,
96 extent_state_in_tree(state),
97 refcount_read(&state->refs));
98 list_del(&state->leak_list);
99 kmem_cache_free(extent_state_cache, state);
100 }
101}
102
103#define btrfs_debug_check_extent_io_range(tree, start, end) \
104 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
105static inline void __btrfs_debug_check_extent_io_range(const char *caller,
106 struct extent_io_tree *tree, u64 start, u64 end)
107{
108 struct inode *inode = tree->private_data;
109 u64 isize;
110
111 if (!inode || !is_data_inode(inode))
112 return;
113
114 isize = i_size_read(inode);
115 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
116 btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
117 "%s: ino %llu isize %llu odd range [%llu,%llu]",
118 caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
119 }
120}
121#else
122#define btrfs_leak_debug_add(lock, new, head) do {} while (0)
123#define btrfs_leak_debug_del(lock, entry) do {} while (0)
124#define btrfs_extent_state_leak_debug_check() do {} while (0)
125#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
126#endif
127
128struct tree_entry {
129 u64 start;
130 u64 end;
131 struct rb_node rb_node;
132};
133
134struct extent_page_data {
135 struct bio *bio;
136
137
138
139 unsigned int extent_locked:1;
140
141
142 unsigned int sync_io:1;
143};
144
145static int add_extent_changeset(struct extent_state *state, u32 bits,
146 struct extent_changeset *changeset,
147 int set)
148{
149 int ret;
150
151 if (!changeset)
152 return 0;
153 if (set && (state->state & bits) == bits)
154 return 0;
155 if (!set && (state->state & bits) == 0)
156 return 0;
157 changeset->bytes_changed += state->end - state->start + 1;
158 ret = ulist_add(&changeset->range_changed, state->start, state->end,
159 GFP_ATOMIC);
160 return ret;
161}
162
163int __must_check submit_one_bio(struct bio *bio, int mirror_num,
164 unsigned long bio_flags)
165{
166 blk_status_t ret = 0;
167 struct extent_io_tree *tree = bio->bi_private;
168
169 bio->bi_private = NULL;
170
171 if (is_data_inode(tree->private_data))
172 ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
173 bio_flags);
174 else
175 ret = btrfs_submit_metadata_bio(tree->private_data, bio,
176 mirror_num, bio_flags);
177
178 return blk_status_to_errno(ret);
179}
180
181
182static void end_write_bio(struct extent_page_data *epd, int ret)
183{
184 if (epd->bio) {
185 epd->bio->bi_status = errno_to_blk_status(ret);
186 bio_endio(epd->bio);
187 epd->bio = NULL;
188 }
189}
190
191
192
193
194
195
196
197static int __must_check flush_write_bio(struct extent_page_data *epd)
198{
199 int ret = 0;
200
201 if (epd->bio) {
202 ret = submit_one_bio(epd->bio, 0, 0);
203
204
205
206
207
208
209
210 epd->bio = NULL;
211 }
212 return ret;
213}
214
215int __init extent_state_cache_init(void)
216{
217 extent_state_cache = kmem_cache_create("btrfs_extent_state",
218 sizeof(struct extent_state), 0,
219 SLAB_MEM_SPREAD, NULL);
220 if (!extent_state_cache)
221 return -ENOMEM;
222 return 0;
223}
224
225int __init extent_io_init(void)
226{
227 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
228 sizeof(struct extent_buffer), 0,
229 SLAB_MEM_SPREAD, NULL);
230 if (!extent_buffer_cache)
231 return -ENOMEM;
232
233 if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
234 offsetof(struct btrfs_io_bio, bio),
235 BIOSET_NEED_BVECS))
236 goto free_buffer_cache;
237
238 if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
239 goto free_bioset;
240
241 return 0;
242
243free_bioset:
244 bioset_exit(&btrfs_bioset);
245
246free_buffer_cache:
247 kmem_cache_destroy(extent_buffer_cache);
248 extent_buffer_cache = NULL;
249 return -ENOMEM;
250}
251
252void __cold extent_state_cache_exit(void)
253{
254 btrfs_extent_state_leak_debug_check();
255 kmem_cache_destroy(extent_state_cache);
256}
257
258void __cold extent_io_exit(void)
259{
260
261
262
263
264 rcu_barrier();
265 kmem_cache_destroy(extent_buffer_cache);
266 bioset_exit(&btrfs_bioset);
267}
268
269
270
271
272
273
274
275
276static struct lock_class_key file_extent_tree_class;
277
278void extent_io_tree_init(struct btrfs_fs_info *fs_info,
279 struct extent_io_tree *tree, unsigned int owner,
280 void *private_data)
281{
282 tree->fs_info = fs_info;
283 tree->state = RB_ROOT;
284 tree->dirty_bytes = 0;
285 spin_lock_init(&tree->lock);
286 tree->private_data = private_data;
287 tree->owner = owner;
288 if (owner == IO_TREE_INODE_FILE_EXTENT)
289 lockdep_set_class(&tree->lock, &file_extent_tree_class);
290}
291
292void extent_io_tree_release(struct extent_io_tree *tree)
293{
294 spin_lock(&tree->lock);
295
296
297
298
299
300 smp_mb();
301 while (!RB_EMPTY_ROOT(&tree->state)) {
302 struct rb_node *node;
303 struct extent_state *state;
304
305 node = rb_first(&tree->state);
306 state = rb_entry(node, struct extent_state, rb_node);
307 rb_erase(&state->rb_node, &tree->state);
308 RB_CLEAR_NODE(&state->rb_node);
309
310
311
312
313 ASSERT(!waitqueue_active(&state->wq));
314 free_extent_state(state);
315
316 cond_resched_lock(&tree->lock);
317 }
318 spin_unlock(&tree->lock);
319}
320
321static struct extent_state *alloc_extent_state(gfp_t mask)
322{
323 struct extent_state *state;
324
325
326
327
328
329 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
330 state = kmem_cache_alloc(extent_state_cache, mask);
331 if (!state)
332 return state;
333 state->state = 0;
334 state->failrec = NULL;
335 RB_CLEAR_NODE(&state->rb_node);
336 btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states);
337 refcount_set(&state->refs, 1);
338 init_waitqueue_head(&state->wq);
339 trace_alloc_extent_state(state, mask, _RET_IP_);
340 return state;
341}
342
343void free_extent_state(struct extent_state *state)
344{
345 if (!state)
346 return;
347 if (refcount_dec_and_test(&state->refs)) {
348 WARN_ON(extent_state_in_tree(state));
349 btrfs_leak_debug_del(&leak_lock, &state->leak_list);
350 trace_free_extent_state(state, _RET_IP_);
351 kmem_cache_free(extent_state_cache, state);
352 }
353}
354
355static struct rb_node *tree_insert(struct rb_root *root,
356 struct rb_node *search_start,
357 u64 offset,
358 struct rb_node *node,
359 struct rb_node ***p_in,
360 struct rb_node **parent_in)
361{
362 struct rb_node **p;
363 struct rb_node *parent = NULL;
364 struct tree_entry *entry;
365
366 if (p_in && parent_in) {
367 p = *p_in;
368 parent = *parent_in;
369 goto do_insert;
370 }
371
372 p = search_start ? &search_start : &root->rb_node;
373 while (*p) {
374 parent = *p;
375 entry = rb_entry(parent, struct tree_entry, rb_node);
376
377 if (offset < entry->start)
378 p = &(*p)->rb_left;
379 else if (offset > entry->end)
380 p = &(*p)->rb_right;
381 else
382 return parent;
383 }
384
385do_insert:
386 rb_link_node(node, parent, p);
387 rb_insert_color(node, root);
388 return NULL;
389}
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
410 struct rb_node **next_ret,
411 struct rb_node **prev_ret,
412 struct rb_node ***p_ret,
413 struct rb_node **parent_ret)
414{
415 struct rb_root *root = &tree->state;
416 struct rb_node **n = &root->rb_node;
417 struct rb_node *prev = NULL;
418 struct rb_node *orig_prev = NULL;
419 struct tree_entry *entry;
420 struct tree_entry *prev_entry = NULL;
421
422 while (*n) {
423 prev = *n;
424 entry = rb_entry(prev, struct tree_entry, rb_node);
425 prev_entry = entry;
426
427 if (offset < entry->start)
428 n = &(*n)->rb_left;
429 else if (offset > entry->end)
430 n = &(*n)->rb_right;
431 else
432 return *n;
433 }
434
435 if (p_ret)
436 *p_ret = n;
437 if (parent_ret)
438 *parent_ret = prev;
439
440 if (next_ret) {
441 orig_prev = prev;
442 while (prev && offset > prev_entry->end) {
443 prev = rb_next(prev);
444 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
445 }
446 *next_ret = prev;
447 prev = orig_prev;
448 }
449
450 if (prev_ret) {
451 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
452 while (prev && offset < prev_entry->start) {
453 prev = rb_prev(prev);
454 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
455 }
456 *prev_ret = prev;
457 }
458 return NULL;
459}
460
461static inline struct rb_node *
462tree_search_for_insert(struct extent_io_tree *tree,
463 u64 offset,
464 struct rb_node ***p_ret,
465 struct rb_node **parent_ret)
466{
467 struct rb_node *next= NULL;
468 struct rb_node *ret;
469
470 ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
471 if (!ret)
472 return next;
473 return ret;
474}
475
476static inline struct rb_node *tree_search(struct extent_io_tree *tree,
477 u64 offset)
478{
479 return tree_search_for_insert(tree, offset, NULL, NULL);
480}
481
482
483
484
485
486
487
488
489
490
491static void merge_state(struct extent_io_tree *tree,
492 struct extent_state *state)
493{
494 struct extent_state *other;
495 struct rb_node *other_node;
496
497 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
498 return;
499
500 other_node = rb_prev(&state->rb_node);
501 if (other_node) {
502 other = rb_entry(other_node, struct extent_state, rb_node);
503 if (other->end == state->start - 1 &&
504 other->state == state->state) {
505 if (tree->private_data &&
506 is_data_inode(tree->private_data))
507 btrfs_merge_delalloc_extent(tree->private_data,
508 state, other);
509 state->start = other->start;
510 rb_erase(&other->rb_node, &tree->state);
511 RB_CLEAR_NODE(&other->rb_node);
512 free_extent_state(other);
513 }
514 }
515 other_node = rb_next(&state->rb_node);
516 if (other_node) {
517 other = rb_entry(other_node, struct extent_state, rb_node);
518 if (other->start == state->end + 1 &&
519 other->state == state->state) {
520 if (tree->private_data &&
521 is_data_inode(tree->private_data))
522 btrfs_merge_delalloc_extent(tree->private_data,
523 state, other);
524 state->end = other->end;
525 rb_erase(&other->rb_node, &tree->state);
526 RB_CLEAR_NODE(&other->rb_node);
527 free_extent_state(other);
528 }
529 }
530}
531
532static void set_state_bits(struct extent_io_tree *tree,
533 struct extent_state *state, u32 *bits,
534 struct extent_changeset *changeset);
535
536
537
538
539
540
541
542
543
544
545
546static int insert_state(struct extent_io_tree *tree,
547 struct extent_state *state, u64 start, u64 end,
548 struct rb_node ***p,
549 struct rb_node **parent,
550 u32 *bits, struct extent_changeset *changeset)
551{
552 struct rb_node *node;
553
554 if (end < start) {
555 btrfs_err(tree->fs_info,
556 "insert state: end < start %llu %llu", end, start);
557 WARN_ON(1);
558 }
559 state->start = start;
560 state->end = end;
561
562 set_state_bits(tree, state, bits, changeset);
563
564 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
565 if (node) {
566 struct extent_state *found;
567 found = rb_entry(node, struct extent_state, rb_node);
568 btrfs_err(tree->fs_info,
569 "found node %llu %llu on insert of %llu %llu",
570 found->start, found->end, start, end);
571 return -EEXIST;
572 }
573 merge_state(tree, state);
574 return 0;
575}
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
592 struct extent_state *prealloc, u64 split)
593{
594 struct rb_node *node;
595
596 if (tree->private_data && is_data_inode(tree->private_data))
597 btrfs_split_delalloc_extent(tree->private_data, orig, split);
598
599 prealloc->start = orig->start;
600 prealloc->end = split - 1;
601 prealloc->state = orig->state;
602 orig->start = split;
603
604 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
605 &prealloc->rb_node, NULL, NULL);
606 if (node) {
607 free_extent_state(prealloc);
608 return -EEXIST;
609 }
610 return 0;
611}
612
613static struct extent_state *next_state(struct extent_state *state)
614{
615 struct rb_node *next = rb_next(&state->rb_node);
616 if (next)
617 return rb_entry(next, struct extent_state, rb_node);
618 else
619 return NULL;
620}
621
622
623
624
625
626
627
628
629static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
630 struct extent_state *state,
631 u32 *bits, int wake,
632 struct extent_changeset *changeset)
633{
634 struct extent_state *next;
635 u32 bits_to_clear = *bits & ~EXTENT_CTLBITS;
636 int ret;
637
638 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
639 u64 range = state->end - state->start + 1;
640 WARN_ON(range > tree->dirty_bytes);
641 tree->dirty_bytes -= range;
642 }
643
644 if (tree->private_data && is_data_inode(tree->private_data))
645 btrfs_clear_delalloc_extent(tree->private_data, state, bits);
646
647 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
648 BUG_ON(ret < 0);
649 state->state &= ~bits_to_clear;
650 if (wake)
651 wake_up(&state->wq);
652 if (state->state == 0) {
653 next = next_state(state);
654 if (extent_state_in_tree(state)) {
655 rb_erase(&state->rb_node, &tree->state);
656 RB_CLEAR_NODE(&state->rb_node);
657 free_extent_state(state);
658 } else {
659 WARN_ON(1);
660 }
661 } else {
662 merge_state(tree, state);
663 next = next_state(state);
664 }
665 return next;
666}
667
668static struct extent_state *
669alloc_extent_state_atomic(struct extent_state *prealloc)
670{
671 if (!prealloc)
672 prealloc = alloc_extent_state(GFP_ATOMIC);
673
674 return prealloc;
675}
676
677static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
678{
679 btrfs_panic(tree->fs_info, err,
680 "locking error: extent tree was modified by another thread while locked");
681}
682
683
684
685
686
687
688
689
690
691
692
693
694
695int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
696 u32 bits, int wake, int delete,
697 struct extent_state **cached_state,
698 gfp_t mask, struct extent_changeset *changeset)
699{
700 struct extent_state *state;
701 struct extent_state *cached;
702 struct extent_state *prealloc = NULL;
703 struct rb_node *node;
704 u64 last_end;
705 int err;
706 int clear = 0;
707
708 btrfs_debug_check_extent_io_range(tree, start, end);
709 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
710
711 if (bits & EXTENT_DELALLOC)
712 bits |= EXTENT_NORESERVE;
713
714 if (delete)
715 bits |= ~EXTENT_CTLBITS;
716
717 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
718 clear = 1;
719again:
720 if (!prealloc && gfpflags_allow_blocking(mask)) {
721
722
723
724
725
726
727
728 prealloc = alloc_extent_state(mask);
729 }
730
731 spin_lock(&tree->lock);
732 if (cached_state) {
733 cached = *cached_state;
734
735 if (clear) {
736 *cached_state = NULL;
737 cached_state = NULL;
738 }
739
740 if (cached && extent_state_in_tree(cached) &&
741 cached->start <= start && cached->end > start) {
742 if (clear)
743 refcount_dec(&cached->refs);
744 state = cached;
745 goto hit_next;
746 }
747 if (clear)
748 free_extent_state(cached);
749 }
750
751
752
753
754 node = tree_search(tree, start);
755 if (!node)
756 goto out;
757 state = rb_entry(node, struct extent_state, rb_node);
758hit_next:
759 if (state->start > end)
760 goto out;
761 WARN_ON(state->end < start);
762 last_end = state->end;
763
764
765 if (!(state->state & bits)) {
766 state = next_state(state);
767 goto next;
768 }
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786 if (state->start < start) {
787 prealloc = alloc_extent_state_atomic(prealloc);
788 BUG_ON(!prealloc);
789 err = split_state(tree, state, prealloc, start);
790 if (err)
791 extent_io_tree_panic(tree, err);
792
793 prealloc = NULL;
794 if (err)
795 goto out;
796 if (state->end <= end) {
797 state = clear_state_bit(tree, state, &bits, wake,
798 changeset);
799 goto next;
800 }
801 goto search_again;
802 }
803
804
805
806
807
808
809 if (state->start <= end && state->end > end) {
810 prealloc = alloc_extent_state_atomic(prealloc);
811 BUG_ON(!prealloc);
812 err = split_state(tree, state, prealloc, end + 1);
813 if (err)
814 extent_io_tree_panic(tree, err);
815
816 if (wake)
817 wake_up(&state->wq);
818
819 clear_state_bit(tree, prealloc, &bits, wake, changeset);
820
821 prealloc = NULL;
822 goto out;
823 }
824
825 state = clear_state_bit(tree, state, &bits, wake, changeset);
826next:
827 if (last_end == (u64)-1)
828 goto out;
829 start = last_end + 1;
830 if (start <= end && state && !need_resched())
831 goto hit_next;
832
833search_again:
834 if (start > end)
835 goto out;
836 spin_unlock(&tree->lock);
837 if (gfpflags_allow_blocking(mask))
838 cond_resched();
839 goto again;
840
841out:
842 spin_unlock(&tree->lock);
843 if (prealloc)
844 free_extent_state(prealloc);
845
846 return 0;
847
848}
849
850static void wait_on_state(struct extent_io_tree *tree,
851 struct extent_state *state)
852 __releases(tree->lock)
853 __acquires(tree->lock)
854{
855 DEFINE_WAIT(wait);
856 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
857 spin_unlock(&tree->lock);
858 schedule();
859 spin_lock(&tree->lock);
860 finish_wait(&state->wq, &wait);
861}
862
863
864
865
866
867
868static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
869 u32 bits)
870{
871 struct extent_state *state;
872 struct rb_node *node;
873
874 btrfs_debug_check_extent_io_range(tree, start, end);
875
876 spin_lock(&tree->lock);
877again:
878 while (1) {
879
880
881
882
883 node = tree_search(tree, start);
884process_node:
885 if (!node)
886 break;
887
888 state = rb_entry(node, struct extent_state, rb_node);
889
890 if (state->start > end)
891 goto out;
892
893 if (state->state & bits) {
894 start = state->start;
895 refcount_inc(&state->refs);
896 wait_on_state(tree, state);
897 free_extent_state(state);
898 goto again;
899 }
900 start = state->end + 1;
901
902 if (start > end)
903 break;
904
905 if (!cond_resched_lock(&tree->lock)) {
906 node = rb_next(node);
907 goto process_node;
908 }
909 }
910out:
911 spin_unlock(&tree->lock);
912}
913
914static void set_state_bits(struct extent_io_tree *tree,
915 struct extent_state *state,
916 u32 *bits, struct extent_changeset *changeset)
917{
918 u32 bits_to_set = *bits & ~EXTENT_CTLBITS;
919 int ret;
920
921 if (tree->private_data && is_data_inode(tree->private_data))
922 btrfs_set_delalloc_extent(tree->private_data, state, bits);
923
924 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
925 u64 range = state->end - state->start + 1;
926 tree->dirty_bytes += range;
927 }
928 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
929 BUG_ON(ret < 0);
930 state->state |= bits_to_set;
931}
932
933static void cache_state_if_flags(struct extent_state *state,
934 struct extent_state **cached_ptr,
935 unsigned flags)
936{
937 if (cached_ptr && !(*cached_ptr)) {
938 if (!flags || (state->state & flags)) {
939 *cached_ptr = state;
940 refcount_inc(&state->refs);
941 }
942 }
943}
944
945static void cache_state(struct extent_state *state,
946 struct extent_state **cached_ptr)
947{
948 return cache_state_if_flags(state, cached_ptr,
949 EXTENT_LOCKED | EXTENT_BOUNDARY);
950}
951
952
953
954
955
956
957
958
959
960
961
962int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
963 u32 exclusive_bits, u64 *failed_start,
964 struct extent_state **cached_state, gfp_t mask,
965 struct extent_changeset *changeset)
966{
967 struct extent_state *state;
968 struct extent_state *prealloc = NULL;
969 struct rb_node *node;
970 struct rb_node **p;
971 struct rb_node *parent;
972 int err = 0;
973 u64 last_start;
974 u64 last_end;
975
976 btrfs_debug_check_extent_io_range(tree, start, end);
977 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
978
979 if (exclusive_bits)
980 ASSERT(failed_start);
981 else
982 ASSERT(failed_start == NULL);
983again:
984 if (!prealloc && gfpflags_allow_blocking(mask)) {
985
986
987
988
989
990
991
992 prealloc = alloc_extent_state(mask);
993 }
994
995 spin_lock(&tree->lock);
996 if (cached_state && *cached_state) {
997 state = *cached_state;
998 if (state->start <= start && state->end > start &&
999 extent_state_in_tree(state)) {
1000 node = &state->rb_node;
1001 goto hit_next;
1002 }
1003 }
1004
1005
1006
1007
1008 node = tree_search_for_insert(tree, start, &p, &parent);
1009 if (!node) {
1010 prealloc = alloc_extent_state_atomic(prealloc);
1011 BUG_ON(!prealloc);
1012 err = insert_state(tree, prealloc, start, end,
1013 &p, &parent, &bits, changeset);
1014 if (err)
1015 extent_io_tree_panic(tree, err);
1016
1017 cache_state(prealloc, cached_state);
1018 prealloc = NULL;
1019 goto out;
1020 }
1021 state = rb_entry(node, struct extent_state, rb_node);
1022hit_next:
1023 last_start = state->start;
1024 last_end = state->end;
1025
1026
1027
1028
1029
1030
1031
1032 if (state->start == start && state->end <= end) {
1033 if (state->state & exclusive_bits) {
1034 *failed_start = state->start;
1035 err = -EEXIST;
1036 goto out;
1037 }
1038
1039 set_state_bits(tree, state, &bits, changeset);
1040 cache_state(state, cached_state);
1041 merge_state(tree, state);
1042 if (last_end == (u64)-1)
1043 goto out;
1044 start = last_end + 1;
1045 state = next_state(state);
1046 if (start < end && state && state->start == start &&
1047 !need_resched())
1048 goto hit_next;
1049 goto search_again;
1050 }
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068 if (state->start < start) {
1069 if (state->state & exclusive_bits) {
1070 *failed_start = start;
1071 err = -EEXIST;
1072 goto out;
1073 }
1074
1075
1076
1077
1078
1079 if ((state->state & bits) == bits) {
1080 start = state->end + 1;
1081 cache_state(state, cached_state);
1082 goto search_again;
1083 }
1084
1085 prealloc = alloc_extent_state_atomic(prealloc);
1086 BUG_ON(!prealloc);
1087 err = split_state(tree, state, prealloc, start);
1088 if (err)
1089 extent_io_tree_panic(tree, err);
1090
1091 prealloc = NULL;
1092 if (err)
1093 goto out;
1094 if (state->end <= end) {
1095 set_state_bits(tree, state, &bits, changeset);
1096 cache_state(state, cached_state);
1097 merge_state(tree, state);
1098 if (last_end == (u64)-1)
1099 goto out;
1100 start = last_end + 1;
1101 state = next_state(state);
1102 if (start < end && state && state->start == start &&
1103 !need_resched())
1104 goto hit_next;
1105 }
1106 goto search_again;
1107 }
1108
1109
1110
1111
1112
1113
1114
1115 if (state->start > start) {
1116 u64 this_end;
1117 if (end < last_start)
1118 this_end = end;
1119 else
1120 this_end = last_start - 1;
1121
1122 prealloc = alloc_extent_state_atomic(prealloc);
1123 BUG_ON(!prealloc);
1124
1125
1126
1127
1128
1129 err = insert_state(tree, prealloc, start, this_end,
1130 NULL, NULL, &bits, changeset);
1131 if (err)
1132 extent_io_tree_panic(tree, err);
1133
1134 cache_state(prealloc, cached_state);
1135 prealloc = NULL;
1136 start = this_end + 1;
1137 goto search_again;
1138 }
1139
1140
1141
1142
1143
1144
1145 if (state->start <= end && state->end > end) {
1146 if (state->state & exclusive_bits) {
1147 *failed_start = start;
1148 err = -EEXIST;
1149 goto out;
1150 }
1151
1152 prealloc = alloc_extent_state_atomic(prealloc);
1153 BUG_ON(!prealloc);
1154 err = split_state(tree, state, prealloc, end + 1);
1155 if (err)
1156 extent_io_tree_panic(tree, err);
1157
1158 set_state_bits(tree, prealloc, &bits, changeset);
1159 cache_state(prealloc, cached_state);
1160 merge_state(tree, prealloc);
1161 prealloc = NULL;
1162 goto out;
1163 }
1164
1165search_again:
1166 if (start > end)
1167 goto out;
1168 spin_unlock(&tree->lock);
1169 if (gfpflags_allow_blocking(mask))
1170 cond_resched();
1171 goto again;
1172
1173out:
1174 spin_unlock(&tree->lock);
1175 if (prealloc)
1176 free_extent_state(prealloc);
1177
1178 return err;
1179
1180}
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1201 u32 bits, u32 clear_bits,
1202 struct extent_state **cached_state)
1203{
1204 struct extent_state *state;
1205 struct extent_state *prealloc = NULL;
1206 struct rb_node *node;
1207 struct rb_node **p;
1208 struct rb_node *parent;
1209 int err = 0;
1210 u64 last_start;
1211 u64 last_end;
1212 bool first_iteration = true;
1213
1214 btrfs_debug_check_extent_io_range(tree, start, end);
1215 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1216 clear_bits);
1217
1218again:
1219 if (!prealloc) {
1220
1221
1222
1223
1224
1225
1226
1227 prealloc = alloc_extent_state(GFP_NOFS);
1228 if (!prealloc && !first_iteration)
1229 return -ENOMEM;
1230 }
1231
1232 spin_lock(&tree->lock);
1233 if (cached_state && *cached_state) {
1234 state = *cached_state;
1235 if (state->start <= start && state->end > start &&
1236 extent_state_in_tree(state)) {
1237 node = &state->rb_node;
1238 goto hit_next;
1239 }
1240 }
1241
1242
1243
1244
1245
1246 node = tree_search_for_insert(tree, start, &p, &parent);
1247 if (!node) {
1248 prealloc = alloc_extent_state_atomic(prealloc);
1249 if (!prealloc) {
1250 err = -ENOMEM;
1251 goto out;
1252 }
1253 err = insert_state(tree, prealloc, start, end,
1254 &p, &parent, &bits, NULL);
1255 if (err)
1256 extent_io_tree_panic(tree, err);
1257 cache_state(prealloc, cached_state);
1258 prealloc = NULL;
1259 goto out;
1260 }
1261 state = rb_entry(node, struct extent_state, rb_node);
1262hit_next:
1263 last_start = state->start;
1264 last_end = state->end;
1265
1266
1267
1268
1269
1270
1271
1272 if (state->start == start && state->end <= end) {
1273 set_state_bits(tree, state, &bits, NULL);
1274 cache_state(state, cached_state);
1275 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1276 if (last_end == (u64)-1)
1277 goto out;
1278 start = last_end + 1;
1279 if (start < end && state && state->start == start &&
1280 !need_resched())
1281 goto hit_next;
1282 goto search_again;
1283 }
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301 if (state->start < start) {
1302 prealloc = alloc_extent_state_atomic(prealloc);
1303 if (!prealloc) {
1304 err = -ENOMEM;
1305 goto out;
1306 }
1307 err = split_state(tree, state, prealloc, start);
1308 if (err)
1309 extent_io_tree_panic(tree, err);
1310 prealloc = NULL;
1311 if (err)
1312 goto out;
1313 if (state->end <= end) {
1314 set_state_bits(tree, state, &bits, NULL);
1315 cache_state(state, cached_state);
1316 state = clear_state_bit(tree, state, &clear_bits, 0,
1317 NULL);
1318 if (last_end == (u64)-1)
1319 goto out;
1320 start = last_end + 1;
1321 if (start < end && state && state->start == start &&
1322 !need_resched())
1323 goto hit_next;
1324 }
1325 goto search_again;
1326 }
1327
1328
1329
1330
1331
1332
1333
1334 if (state->start > start) {
1335 u64 this_end;
1336 if (end < last_start)
1337 this_end = end;
1338 else
1339 this_end = last_start - 1;
1340
1341 prealloc = alloc_extent_state_atomic(prealloc);
1342 if (!prealloc) {
1343 err = -ENOMEM;
1344 goto out;
1345 }
1346
1347
1348
1349
1350
1351 err = insert_state(tree, prealloc, start, this_end,
1352 NULL, NULL, &bits, NULL);
1353 if (err)
1354 extent_io_tree_panic(tree, err);
1355 cache_state(prealloc, cached_state);
1356 prealloc = NULL;
1357 start = this_end + 1;
1358 goto search_again;
1359 }
1360
1361
1362
1363
1364
1365
1366 if (state->start <= end && state->end > end) {
1367 prealloc = alloc_extent_state_atomic(prealloc);
1368 if (!prealloc) {
1369 err = -ENOMEM;
1370 goto out;
1371 }
1372
1373 err = split_state(tree, state, prealloc, end + 1);
1374 if (err)
1375 extent_io_tree_panic(tree, err);
1376
1377 set_state_bits(tree, prealloc, &bits, NULL);
1378 cache_state(prealloc, cached_state);
1379 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1380 prealloc = NULL;
1381 goto out;
1382 }
1383
1384search_again:
1385 if (start > end)
1386 goto out;
1387 spin_unlock(&tree->lock);
1388 cond_resched();
1389 first_iteration = false;
1390 goto again;
1391
1392out:
1393 spin_unlock(&tree->lock);
1394 if (prealloc)
1395 free_extent_state(prealloc);
1396
1397 return err;
1398}
1399
1400
1401int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1402 u32 bits, struct extent_changeset *changeset)
1403{
1404
1405
1406
1407
1408
1409
1410 BUG_ON(bits & EXTENT_LOCKED);
1411
1412 return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1413 changeset);
1414}
1415
1416int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
1417 u32 bits)
1418{
1419 return set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
1420 GFP_NOWAIT, NULL);
1421}
1422
1423int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1424 u32 bits, int wake, int delete,
1425 struct extent_state **cached)
1426{
1427 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1428 cached, GFP_NOFS, NULL);
1429}
1430
1431int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1432 u32 bits, struct extent_changeset *changeset)
1433{
1434
1435
1436
1437
1438 BUG_ON(bits & EXTENT_LOCKED);
1439
1440 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1441 changeset);
1442}
1443
1444
1445
1446
1447
1448int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1449 struct extent_state **cached_state)
1450{
1451 int err;
1452 u64 failed_start;
1453
1454 while (1) {
1455 err = set_extent_bit(tree, start, end, EXTENT_LOCKED,
1456 EXTENT_LOCKED, &failed_start,
1457 cached_state, GFP_NOFS, NULL);
1458 if (err == -EEXIST) {
1459 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1460 start = failed_start;
1461 } else
1462 break;
1463 WARN_ON(start > end);
1464 }
1465 return err;
1466}
1467
1468int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1469{
1470 int err;
1471 u64 failed_start;
1472
1473 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1474 &failed_start, NULL, GFP_NOFS, NULL);
1475 if (err == -EEXIST) {
1476 if (failed_start > start)
1477 clear_extent_bit(tree, start, failed_start - 1,
1478 EXTENT_LOCKED, 1, 0, NULL);
1479 return 0;
1480 }
1481 return 1;
1482}
1483
1484void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1485{
1486 unsigned long index = start >> PAGE_SHIFT;
1487 unsigned long end_index = end >> PAGE_SHIFT;
1488 struct page *page;
1489
1490 while (index <= end_index) {
1491 page = find_get_page(inode->i_mapping, index);
1492 BUG_ON(!page);
1493 clear_page_dirty_for_io(page);
1494 put_page(page);
1495 index++;
1496 }
1497}
1498
1499void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1500{
1501 unsigned long index = start >> PAGE_SHIFT;
1502 unsigned long end_index = end >> PAGE_SHIFT;
1503 struct page *page;
1504
1505 while (index <= end_index) {
1506 page = find_get_page(inode->i_mapping, index);
1507 BUG_ON(!page);
1508 __set_page_dirty_nobuffers(page);
1509 account_page_redirty(page);
1510 put_page(page);
1511 index++;
1512 }
1513}
1514
1515
1516
1517
1518
1519static struct extent_state *
1520find_first_extent_bit_state(struct extent_io_tree *tree, u64 start, u32 bits)
1521{
1522 struct rb_node *node;
1523 struct extent_state *state;
1524
1525
1526
1527
1528
1529 node = tree_search(tree, start);
1530 if (!node)
1531 goto out;
1532
1533 while (1) {
1534 state = rb_entry(node, struct extent_state, rb_node);
1535 if (state->end >= start && (state->state & bits))
1536 return state;
1537
1538 node = rb_next(node);
1539 if (!node)
1540 break;
1541 }
1542out:
1543 return NULL;
1544}
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1555 u64 *start_ret, u64 *end_ret, u32 bits,
1556 struct extent_state **cached_state)
1557{
1558 struct extent_state *state;
1559 int ret = 1;
1560
1561 spin_lock(&tree->lock);
1562 if (cached_state && *cached_state) {
1563 state = *cached_state;
1564 if (state->end == start - 1 && extent_state_in_tree(state)) {
1565 while ((state = next_state(state)) != NULL) {
1566 if (state->state & bits)
1567 goto got_it;
1568 }
1569 free_extent_state(*cached_state);
1570 *cached_state = NULL;
1571 goto out;
1572 }
1573 free_extent_state(*cached_state);
1574 *cached_state = NULL;
1575 }
1576
1577 state = find_first_extent_bit_state(tree, start, bits);
1578got_it:
1579 if (state) {
1580 cache_state_if_flags(state, cached_state, 0);
1581 *start_ret = state->start;
1582 *end_ret = state->end;
1583 ret = 0;
1584 }
1585out:
1586 spin_unlock(&tree->lock);
1587 return ret;
1588}
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
1606 u64 *start_ret, u64 *end_ret, u32 bits)
1607{
1608 struct extent_state *state;
1609 int ret = 1;
1610
1611 spin_lock(&tree->lock);
1612 state = find_first_extent_bit_state(tree, start, bits);
1613 if (state) {
1614 *start_ret = state->start;
1615 *end_ret = state->end;
1616 while ((state = next_state(state)) != NULL) {
1617 if (state->start > (*end_ret + 1))
1618 break;
1619 *end_ret = state->end;
1620 }
1621 ret = 0;
1622 }
1623 spin_unlock(&tree->lock);
1624 return ret;
1625}
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1643 u64 *start_ret, u64 *end_ret, u32 bits)
1644{
1645 struct extent_state *state;
1646 struct rb_node *node, *prev = NULL, *next;
1647
1648 spin_lock(&tree->lock);
1649
1650
1651 while (1) {
1652 node = __etree_search(tree, start, &next, &prev, NULL, NULL);
1653 if (!node && !next && !prev) {
1654
1655
1656
1657
1658 *start_ret = 0;
1659 *end_ret = -1;
1660 goto out;
1661 } else if (!node && !next) {
1662
1663
1664
1665
1666 state = rb_entry(prev, struct extent_state, rb_node);
1667 *start_ret = state->end + 1;
1668 *end_ret = -1;
1669 goto out;
1670 } else if (!node) {
1671 node = next;
1672 }
1673
1674
1675
1676
1677 state = rb_entry(node, struct extent_state, rb_node);
1678
1679 if (in_range(start, state->start, state->end - state->start + 1)) {
1680 if (state->state & bits) {
1681
1682
1683
1684
1685
1686 start = state->end + 1;
1687 } else {
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697 *start_ret = state->start;
1698 break;
1699 }
1700 } else {
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712 if (prev) {
1713 state = rb_entry(prev, struct extent_state,
1714 rb_node);
1715 *start_ret = state->end + 1;
1716 } else {
1717 *start_ret = 0;
1718 }
1719 break;
1720 }
1721 }
1722
1723
1724
1725
1726
1727 while (1) {
1728 state = rb_entry(node, struct extent_state, rb_node);
1729 if (state->end >= start && !(state->state & bits)) {
1730 *end_ret = state->end;
1731 } else {
1732 *end_ret = state->start - 1;
1733 break;
1734 }
1735
1736 node = rb_next(node);
1737 if (!node)
1738 break;
1739 }
1740out:
1741 spin_unlock(&tree->lock);
1742}
1743
1744
1745
1746
1747
1748
1749
1750bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
1751 u64 *end, u64 max_bytes,
1752 struct extent_state **cached_state)
1753{
1754 struct rb_node *node;
1755 struct extent_state *state;
1756 u64 cur_start = *start;
1757 bool found = false;
1758 u64 total_bytes = 0;
1759
1760 spin_lock(&tree->lock);
1761
1762
1763
1764
1765
1766 node = tree_search(tree, cur_start);
1767 if (!node) {
1768 *end = (u64)-1;
1769 goto out;
1770 }
1771
1772 while (1) {
1773 state = rb_entry(node, struct extent_state, rb_node);
1774 if (found && (state->start != cur_start ||
1775 (state->state & EXTENT_BOUNDARY))) {
1776 goto out;
1777 }
1778 if (!(state->state & EXTENT_DELALLOC)) {
1779 if (!found)
1780 *end = state->end;
1781 goto out;
1782 }
1783 if (!found) {
1784 *start = state->start;
1785 *cached_state = state;
1786 refcount_inc(&state->refs);
1787 }
1788 found = true;
1789 *end = state->end;
1790 cur_start = state->end + 1;
1791 node = rb_next(node);
1792 total_bytes += state->end - state->start + 1;
1793 if (total_bytes >= max_bytes)
1794 break;
1795 if (!node)
1796 break;
1797 }
1798out:
1799 spin_unlock(&tree->lock);
1800 return found;
1801}
1802
1803static int __process_pages_contig(struct address_space *mapping,
1804 struct page *locked_page,
1805 pgoff_t start_index, pgoff_t end_index,
1806 unsigned long page_ops, pgoff_t *index_ret);
1807
1808static noinline void __unlock_for_delalloc(struct inode *inode,
1809 struct page *locked_page,
1810 u64 start, u64 end)
1811{
1812 unsigned long index = start >> PAGE_SHIFT;
1813 unsigned long end_index = end >> PAGE_SHIFT;
1814
1815 ASSERT(locked_page);
1816 if (index == locked_page->index && end_index == index)
1817 return;
1818
1819 __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
1820 PAGE_UNLOCK, NULL);
1821}
1822
1823static noinline int lock_delalloc_pages(struct inode *inode,
1824 struct page *locked_page,
1825 u64 delalloc_start,
1826 u64 delalloc_end)
1827{
1828 unsigned long index = delalloc_start >> PAGE_SHIFT;
1829 unsigned long index_ret = index;
1830 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1831 int ret;
1832
1833 ASSERT(locked_page);
1834 if (index == locked_page->index && index == end_index)
1835 return 0;
1836
1837 ret = __process_pages_contig(inode->i_mapping, locked_page, index,
1838 end_index, PAGE_LOCK, &index_ret);
1839 if (ret == -EAGAIN)
1840 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1841 (u64)index_ret << PAGE_SHIFT);
1842 return ret;
1843}
1844
1845
1846
1847
1848
1849
1850
1851
1852EXPORT_FOR_TESTS
1853noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
1854 struct page *locked_page, u64 *start,
1855 u64 *end)
1856{
1857 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1858 u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
1859 u64 delalloc_start;
1860 u64 delalloc_end;
1861 bool found;
1862 struct extent_state *cached_state = NULL;
1863 int ret;
1864 int loops = 0;
1865
1866again:
1867
1868 delalloc_start = *start;
1869 delalloc_end = 0;
1870 found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1871 max_bytes, &cached_state);
1872 if (!found || delalloc_end <= *start) {
1873 *start = delalloc_start;
1874 *end = delalloc_end;
1875 free_extent_state(cached_state);
1876 return false;
1877 }
1878
1879
1880
1881
1882
1883
1884 if (delalloc_start < *start)
1885 delalloc_start = *start;
1886
1887
1888
1889
1890 if (delalloc_end + 1 - delalloc_start > max_bytes)
1891 delalloc_end = delalloc_start + max_bytes - 1;
1892
1893
1894 ret = lock_delalloc_pages(inode, locked_page,
1895 delalloc_start, delalloc_end);
1896 ASSERT(!ret || ret == -EAGAIN);
1897 if (ret == -EAGAIN) {
1898
1899
1900
1901 free_extent_state(cached_state);
1902 cached_state = NULL;
1903 if (!loops) {
1904 max_bytes = PAGE_SIZE;
1905 loops = 1;
1906 goto again;
1907 } else {
1908 found = false;
1909 goto out_failed;
1910 }
1911 }
1912
1913
1914 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1915
1916
1917 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1918 EXTENT_DELALLOC, 1, cached_state);
1919 if (!ret) {
1920 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1921 &cached_state);
1922 __unlock_for_delalloc(inode, locked_page,
1923 delalloc_start, delalloc_end);
1924 cond_resched();
1925 goto again;
1926 }
1927 free_extent_state(cached_state);
1928 *start = delalloc_start;
1929 *end = delalloc_end;
1930out_failed:
1931 return found;
1932}
1933
1934static int __process_pages_contig(struct address_space *mapping,
1935 struct page *locked_page,
1936 pgoff_t start_index, pgoff_t end_index,
1937 unsigned long page_ops, pgoff_t *index_ret)
1938{
1939 unsigned long nr_pages = end_index - start_index + 1;
1940 unsigned long pages_processed = 0;
1941 pgoff_t index = start_index;
1942 struct page *pages[16];
1943 unsigned ret;
1944 int err = 0;
1945 int i;
1946
1947 if (page_ops & PAGE_LOCK) {
1948 ASSERT(page_ops == PAGE_LOCK);
1949 ASSERT(index_ret && *index_ret == start_index);
1950 }
1951
1952 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1953 mapping_set_error(mapping, -EIO);
1954
1955 while (nr_pages > 0) {
1956 ret = find_get_pages_contig(mapping, index,
1957 min_t(unsigned long,
1958 nr_pages, ARRAY_SIZE(pages)), pages);
1959 if (ret == 0) {
1960
1961
1962
1963
1964 ASSERT(page_ops & PAGE_LOCK);
1965 err = -EAGAIN;
1966 goto out;
1967 }
1968
1969 for (i = 0; i < ret; i++) {
1970 if (page_ops & PAGE_SET_PRIVATE2)
1971 SetPagePrivate2(pages[i]);
1972
1973 if (locked_page && pages[i] == locked_page) {
1974 put_page(pages[i]);
1975 pages_processed++;
1976 continue;
1977 }
1978 if (page_ops & PAGE_CLEAR_DIRTY)
1979 clear_page_dirty_for_io(pages[i]);
1980 if (page_ops & PAGE_SET_WRITEBACK)
1981 set_page_writeback(pages[i]);
1982 if (page_ops & PAGE_SET_ERROR)
1983 SetPageError(pages[i]);
1984 if (page_ops & PAGE_END_WRITEBACK)
1985 end_page_writeback(pages[i]);
1986 if (page_ops & PAGE_UNLOCK)
1987 unlock_page(pages[i]);
1988 if (page_ops & PAGE_LOCK) {
1989 lock_page(pages[i]);
1990 if (!PageDirty(pages[i]) ||
1991 pages[i]->mapping != mapping) {
1992 unlock_page(pages[i]);
1993 for (; i < ret; i++)
1994 put_page(pages[i]);
1995 err = -EAGAIN;
1996 goto out;
1997 }
1998 }
1999 put_page(pages[i]);
2000 pages_processed++;
2001 }
2002 nr_pages -= ret;
2003 index += ret;
2004 cond_resched();
2005 }
2006out:
2007 if (err && index_ret)
2008 *index_ret = start_index + pages_processed - 1;
2009 return err;
2010}
2011
2012void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
2013 struct page *locked_page,
2014 u32 clear_bits, unsigned long page_ops)
2015{
2016 clear_extent_bit(&inode->io_tree, start, end, clear_bits, 1, 0, NULL);
2017
2018 __process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
2019 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
2020 page_ops, NULL);
2021}
2022
2023
2024
2025
2026
2027
2028u64 count_range_bits(struct extent_io_tree *tree,
2029 u64 *start, u64 search_end, u64 max_bytes,
2030 u32 bits, int contig)
2031{
2032 struct rb_node *node;
2033 struct extent_state *state;
2034 u64 cur_start = *start;
2035 u64 total_bytes = 0;
2036 u64 last = 0;
2037 int found = 0;
2038
2039 if (WARN_ON(search_end <= cur_start))
2040 return 0;
2041
2042 spin_lock(&tree->lock);
2043 if (cur_start == 0 && bits == EXTENT_DIRTY) {
2044 total_bytes = tree->dirty_bytes;
2045 goto out;
2046 }
2047
2048
2049
2050
2051 node = tree_search(tree, cur_start);
2052 if (!node)
2053 goto out;
2054
2055 while (1) {
2056 state = rb_entry(node, struct extent_state, rb_node);
2057 if (state->start > search_end)
2058 break;
2059 if (contig && found && state->start > last + 1)
2060 break;
2061 if (state->end >= cur_start && (state->state & bits) == bits) {
2062 total_bytes += min(search_end, state->end) + 1 -
2063 max(cur_start, state->start);
2064 if (total_bytes >= max_bytes)
2065 break;
2066 if (!found) {
2067 *start = max(cur_start, state->start);
2068 found = 1;
2069 }
2070 last = state->end;
2071 } else if (contig && found) {
2072 break;
2073 }
2074 node = rb_next(node);
2075 if (!node)
2076 break;
2077 }
2078out:
2079 spin_unlock(&tree->lock);
2080 return total_bytes;
2081}
2082
2083
2084
2085
2086
2087int set_state_failrec(struct extent_io_tree *tree, u64 start,
2088 struct io_failure_record *failrec)
2089{
2090 struct rb_node *node;
2091 struct extent_state *state;
2092 int ret = 0;
2093
2094 spin_lock(&tree->lock);
2095
2096
2097
2098
2099 node = tree_search(tree, start);
2100 if (!node) {
2101 ret = -ENOENT;
2102 goto out;
2103 }
2104 state = rb_entry(node, struct extent_state, rb_node);
2105 if (state->start != start) {
2106 ret = -ENOENT;
2107 goto out;
2108 }
2109 state->failrec = failrec;
2110out:
2111 spin_unlock(&tree->lock);
2112 return ret;
2113}
2114
2115struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start)
2116{
2117 struct rb_node *node;
2118 struct extent_state *state;
2119 struct io_failure_record *failrec;
2120
2121 spin_lock(&tree->lock);
2122
2123
2124
2125
2126 node = tree_search(tree, start);
2127 if (!node) {
2128 failrec = ERR_PTR(-ENOENT);
2129 goto out;
2130 }
2131 state = rb_entry(node, struct extent_state, rb_node);
2132 if (state->start != start) {
2133 failrec = ERR_PTR(-ENOENT);
2134 goto out;
2135 }
2136
2137 failrec = state->failrec;
2138out:
2139 spin_unlock(&tree->lock);
2140 return failrec;
2141}
2142
2143
2144
2145
2146
2147
2148
2149int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
2150 u32 bits, int filled, struct extent_state *cached)
2151{
2152 struct extent_state *state = NULL;
2153 struct rb_node *node;
2154 int bitset = 0;
2155
2156 spin_lock(&tree->lock);
2157 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
2158 cached->end > start)
2159 node = &cached->rb_node;
2160 else
2161 node = tree_search(tree, start);
2162 while (node && start <= end) {
2163 state = rb_entry(node, struct extent_state, rb_node);
2164
2165 if (filled && state->start > start) {
2166 bitset = 0;
2167 break;
2168 }
2169
2170 if (state->start > end)
2171 break;
2172
2173 if (state->state & bits) {
2174 bitset = 1;
2175 if (!filled)
2176 break;
2177 } else if (filled) {
2178 bitset = 0;
2179 break;
2180 }
2181
2182 if (state->end == (u64)-1)
2183 break;
2184
2185 start = state->end + 1;
2186 if (start > end)
2187 break;
2188 node = rb_next(node);
2189 if (!node) {
2190 if (filled)
2191 bitset = 0;
2192 break;
2193 }
2194 }
2195 spin_unlock(&tree->lock);
2196 return bitset;
2197}
2198
2199
2200
2201
2202
2203static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
2204{
2205 u64 start = page_offset(page);
2206 u64 end = start + PAGE_SIZE - 1;
2207 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
2208 SetPageUptodate(page);
2209}
2210
2211int free_io_failure(struct extent_io_tree *failure_tree,
2212 struct extent_io_tree *io_tree,
2213 struct io_failure_record *rec)
2214{
2215 int ret;
2216 int err = 0;
2217
2218 set_state_failrec(failure_tree, rec->start, NULL);
2219 ret = clear_extent_bits(failure_tree, rec->start,
2220 rec->start + rec->len - 1,
2221 EXTENT_LOCKED | EXTENT_DIRTY);
2222 if (ret)
2223 err = ret;
2224
2225 ret = clear_extent_bits(io_tree, rec->start,
2226 rec->start + rec->len - 1,
2227 EXTENT_DAMAGED);
2228 if (ret && !err)
2229 err = ret;
2230
2231 kfree(rec);
2232 return err;
2233}
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
2246 u64 length, u64 logical, struct page *page,
2247 unsigned int pg_offset, int mirror_num)
2248{
2249 struct bio *bio;
2250 struct btrfs_device *dev;
2251 u64 map_length = 0;
2252 u64 sector;
2253 struct btrfs_bio *bbio = NULL;
2254 int ret;
2255
2256 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
2257 BUG_ON(!mirror_num);
2258
2259 bio = btrfs_io_bio_alloc(1);
2260 bio->bi_iter.bi_size = 0;
2261 map_length = length;
2262
2263
2264
2265
2266
2267
2268 btrfs_bio_counter_inc_blocked(fs_info);
2269 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
2270
2271
2272
2273
2274
2275
2276 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2277 &map_length, &bbio, 0);
2278 if (ret) {
2279 btrfs_bio_counter_dec(fs_info);
2280 bio_put(bio);
2281 return -EIO;
2282 }
2283 ASSERT(bbio->mirror_num == 1);
2284 } else {
2285 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2286 &map_length, &bbio, mirror_num);
2287 if (ret) {
2288 btrfs_bio_counter_dec(fs_info);
2289 bio_put(bio);
2290 return -EIO;
2291 }
2292 BUG_ON(mirror_num != bbio->mirror_num);
2293 }
2294
2295 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
2296 bio->bi_iter.bi_sector = sector;
2297 dev = bbio->stripes[bbio->mirror_num - 1].dev;
2298 btrfs_put_bbio(bbio);
2299 if (!dev || !dev->bdev ||
2300 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
2301 btrfs_bio_counter_dec(fs_info);
2302 bio_put(bio);
2303 return -EIO;
2304 }
2305 bio_set_dev(bio, dev->bdev);
2306 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2307 bio_add_page(bio, page, length, pg_offset);
2308
2309 if (btrfsic_submit_bio_wait(bio)) {
2310
2311 btrfs_bio_counter_dec(fs_info);
2312 bio_put(bio);
2313 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2314 return -EIO;
2315 }
2316
2317 btrfs_info_rl_in_rcu(fs_info,
2318 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2319 ino, start,
2320 rcu_str_deref(dev->name), sector);
2321 btrfs_bio_counter_dec(fs_info);
2322 bio_put(bio);
2323 return 0;
2324}
2325
2326int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
2327{
2328 struct btrfs_fs_info *fs_info = eb->fs_info;
2329 u64 start = eb->start;
2330 int i, num_pages = num_extent_pages(eb);
2331 int ret = 0;
2332
2333 if (sb_rdonly(fs_info->sb))
2334 return -EROFS;
2335
2336 for (i = 0; i < num_pages; i++) {
2337 struct page *p = eb->pages[i];
2338
2339 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2340 start - page_offset(p), mirror_num);
2341 if (ret)
2342 break;
2343 start += PAGE_SIZE;
2344 }
2345
2346 return ret;
2347}
2348
2349
2350
2351
2352
2353int clean_io_failure(struct btrfs_fs_info *fs_info,
2354 struct extent_io_tree *failure_tree,
2355 struct extent_io_tree *io_tree, u64 start,
2356 struct page *page, u64 ino, unsigned int pg_offset)
2357{
2358 u64 private;
2359 struct io_failure_record *failrec;
2360 struct extent_state *state;
2361 int num_copies;
2362 int ret;
2363
2364 private = 0;
2365 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2366 EXTENT_DIRTY, 0);
2367 if (!ret)
2368 return 0;
2369
2370 failrec = get_state_failrec(failure_tree, start);
2371 if (IS_ERR(failrec))
2372 return 0;
2373
2374 BUG_ON(!failrec->this_mirror);
2375
2376 if (failrec->in_validation) {
2377
2378 btrfs_debug(fs_info,
2379 "clean_io_failure: freeing dummy error at %llu",
2380 failrec->start);
2381 goto out;
2382 }
2383 if (sb_rdonly(fs_info->sb))
2384 goto out;
2385
2386 spin_lock(&io_tree->lock);
2387 state = find_first_extent_bit_state(io_tree,
2388 failrec->start,
2389 EXTENT_LOCKED);
2390 spin_unlock(&io_tree->lock);
2391
2392 if (state && state->start <= failrec->start &&
2393 state->end >= failrec->start + failrec->len - 1) {
2394 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2395 failrec->len);
2396 if (num_copies > 1) {
2397 repair_io_failure(fs_info, ino, start, failrec->len,
2398 failrec->logical, page, pg_offset,
2399 failrec->failed_mirror);
2400 }
2401 }
2402
2403out:
2404 free_io_failure(failure_tree, io_tree, failrec);
2405
2406 return 0;
2407}
2408
2409
2410
2411
2412
2413
2414
2415void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2416{
2417 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2418 struct io_failure_record *failrec;
2419 struct extent_state *state, *next;
2420
2421 if (RB_EMPTY_ROOT(&failure_tree->state))
2422 return;
2423
2424 spin_lock(&failure_tree->lock);
2425 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2426 while (state) {
2427 if (state->start > end)
2428 break;
2429
2430 ASSERT(state->end <= end);
2431
2432 next = next_state(state);
2433
2434 failrec = state->failrec;
2435 free_extent_state(state);
2436 kfree(failrec);
2437
2438 state = next;
2439 }
2440 spin_unlock(&failure_tree->lock);
2441}
2442
2443static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
2444 u64 start, u64 end)
2445{
2446 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2447 struct io_failure_record *failrec;
2448 struct extent_map *em;
2449 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2450 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2451 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2452 int ret;
2453 u64 logical;
2454
2455 failrec = get_state_failrec(failure_tree, start);
2456 if (!IS_ERR(failrec)) {
2457 btrfs_debug(fs_info,
2458 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
2459 failrec->logical, failrec->start, failrec->len,
2460 failrec->in_validation);
2461
2462
2463
2464
2465
2466
2467 return failrec;
2468 }
2469
2470 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2471 if (!failrec)
2472 return ERR_PTR(-ENOMEM);
2473
2474 failrec->start = start;
2475 failrec->len = end - start + 1;
2476 failrec->this_mirror = 0;
2477 failrec->bio_flags = 0;
2478 failrec->in_validation = 0;
2479
2480 read_lock(&em_tree->lock);
2481 em = lookup_extent_mapping(em_tree, start, failrec->len);
2482 if (!em) {
2483 read_unlock(&em_tree->lock);
2484 kfree(failrec);
2485 return ERR_PTR(-EIO);
2486 }
2487
2488 if (em->start > start || em->start + em->len <= start) {
2489 free_extent_map(em);
2490 em = NULL;
2491 }
2492 read_unlock(&em_tree->lock);
2493 if (!em) {
2494 kfree(failrec);
2495 return ERR_PTR(-EIO);
2496 }
2497
2498 logical = start - em->start;
2499 logical = em->block_start + logical;
2500 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2501 logical = em->block_start;
2502 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2503 extent_set_compress_type(&failrec->bio_flags, em->compress_type);
2504 }
2505
2506 btrfs_debug(fs_info,
2507 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2508 logical, start, failrec->len);
2509
2510 failrec->logical = logical;
2511 free_extent_map(em);
2512
2513
2514 ret = set_extent_bits(failure_tree, start, end,
2515 EXTENT_LOCKED | EXTENT_DIRTY);
2516 if (ret >= 0) {
2517 ret = set_state_failrec(failure_tree, start, failrec);
2518
2519 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2520 } else if (ret < 0) {
2521 kfree(failrec);
2522 return ERR_PTR(ret);
2523 }
2524
2525 return failrec;
2526}
2527
2528static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
2529 struct io_failure_record *failrec,
2530 int failed_mirror)
2531{
2532 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2533 int num_copies;
2534
2535 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2536 if (num_copies == 1) {
2537
2538
2539
2540
2541
2542 btrfs_debug(fs_info,
2543 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2544 num_copies, failrec->this_mirror, failed_mirror);
2545 return false;
2546 }
2547
2548
2549
2550
2551
2552
2553 if (needs_validation) {
2554
2555
2556
2557
2558
2559
2560
2561
2562 BUG_ON(failrec->in_validation);
2563 failrec->in_validation = 1;
2564 failrec->this_mirror = failed_mirror;
2565 } else {
2566
2567
2568
2569
2570
2571 if (failrec->in_validation) {
2572 BUG_ON(failrec->this_mirror != failed_mirror);
2573 failrec->in_validation = 0;
2574 failrec->this_mirror = 0;
2575 }
2576 failrec->failed_mirror = failed_mirror;
2577 failrec->this_mirror++;
2578 if (failrec->this_mirror == failed_mirror)
2579 failrec->this_mirror++;
2580 }
2581
2582 if (failrec->this_mirror > num_copies) {
2583 btrfs_debug(fs_info,
2584 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2585 num_copies, failrec->this_mirror, failed_mirror);
2586 return false;
2587 }
2588
2589 return true;
2590}
2591
2592static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
2593{
2594 u64 len = 0;
2595 const u32 blocksize = inode->i_sb->s_blocksize;
2596
2597
2598
2599
2600
2601
2602 if (bio->bi_status == BLK_STS_OK)
2603 return false;
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618 if (bio_flagged(bio, BIO_CLONED)) {
2619 if (btrfs_io_bio(bio)->iter.bi_size > blocksize)
2620 return true;
2621 } else {
2622 struct bio_vec *bvec;
2623 int i;
2624
2625 bio_for_each_bvec_all(bvec, bio, i) {
2626 len += bvec->bv_len;
2627 if (len > blocksize)
2628 return true;
2629 }
2630 }
2631 return false;
2632}
2633
2634blk_status_t btrfs_submit_read_repair(struct inode *inode,
2635 struct bio *failed_bio, u32 bio_offset,
2636 struct page *page, unsigned int pgoff,
2637 u64 start, u64 end, int failed_mirror,
2638 submit_bio_hook_t *submit_bio_hook)
2639{
2640 struct io_failure_record *failrec;
2641 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2642 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2643 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2644 struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
2645 const int icsum = bio_offset >> fs_info->sectorsize_bits;
2646 bool need_validation;
2647 struct bio *repair_bio;
2648 struct btrfs_io_bio *repair_io_bio;
2649 blk_status_t status;
2650
2651 btrfs_debug(fs_info,
2652 "repair read error: read error at %llu", start);
2653
2654 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2655
2656 failrec = btrfs_get_io_failure_record(inode, start, end);
2657 if (IS_ERR(failrec))
2658 return errno_to_blk_status(PTR_ERR(failrec));
2659
2660 need_validation = btrfs_io_needs_validation(inode, failed_bio);
2661
2662 if (!btrfs_check_repairable(inode, need_validation, failrec,
2663 failed_mirror)) {
2664 free_io_failure(failure_tree, tree, failrec);
2665 return BLK_STS_IOERR;
2666 }
2667
2668 repair_bio = btrfs_io_bio_alloc(1);
2669 repair_io_bio = btrfs_io_bio(repair_bio);
2670 repair_bio->bi_opf = REQ_OP_READ;
2671 if (need_validation)
2672 repair_bio->bi_opf |= REQ_FAILFAST_DEV;
2673 repair_bio->bi_end_io = failed_bio->bi_end_io;
2674 repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
2675 repair_bio->bi_private = failed_bio->bi_private;
2676
2677 if (failed_io_bio->csum) {
2678 const u32 csum_size = fs_info->csum_size;
2679
2680 repair_io_bio->csum = repair_io_bio->csum_inline;
2681 memcpy(repair_io_bio->csum,
2682 failed_io_bio->csum + csum_size * icsum, csum_size);
2683 }
2684
2685 bio_add_page(repair_bio, page, failrec->len, pgoff);
2686 repair_io_bio->logical = failrec->start;
2687 repair_io_bio->iter = repair_bio->bi_iter;
2688
2689 btrfs_debug(btrfs_sb(inode->i_sb),
2690"repair read error: submitting new read to mirror %d, in_validation=%d",
2691 failrec->this_mirror, failrec->in_validation);
2692
2693 status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
2694 failrec->bio_flags);
2695 if (status) {
2696 free_io_failure(failure_tree, tree, failrec);
2697 bio_put(repair_bio);
2698 }
2699 return status;
2700}
2701
2702
2703
2704void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2705{
2706 int uptodate = (err == 0);
2707 int ret = 0;
2708
2709 btrfs_writepage_endio_finish_ordered(page, start, end, uptodate);
2710
2711 if (!uptodate) {
2712 ClearPageUptodate(page);
2713 SetPageError(page);
2714 ret = err < 0 ? err : -EIO;
2715 mapping_set_error(page->mapping, ret);
2716 }
2717}
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728static void end_bio_extent_writepage(struct bio *bio)
2729{
2730 int error = blk_status_to_errno(bio->bi_status);
2731 struct bio_vec *bvec;
2732 u64 start;
2733 u64 end;
2734 struct bvec_iter_all iter_all;
2735
2736 ASSERT(!bio_flagged(bio, BIO_CLONED));
2737 bio_for_each_segment_all(bvec, bio, iter_all) {
2738 struct page *page = bvec->bv_page;
2739 struct inode *inode = page->mapping->host;
2740 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2741
2742
2743
2744
2745
2746
2747 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2748 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2749 btrfs_err(fs_info,
2750 "partial page write in btrfs with offset %u and length %u",
2751 bvec->bv_offset, bvec->bv_len);
2752 else
2753 btrfs_info(fs_info,
2754 "incomplete page write in btrfs with offset %u and length %u",
2755 bvec->bv_offset, bvec->bv_len);
2756 }
2757
2758 start = page_offset(page);
2759 end = start + bvec->bv_offset + bvec->bv_len - 1;
2760
2761 end_extent_writepage(page, error, start, end);
2762 end_page_writeback(page);
2763 }
2764
2765 bio_put(bio);
2766}
2767
2768
2769
2770
2771
2772
2773
2774struct processed_extent {
2775 struct btrfs_inode *inode;
2776
2777 u64 start;
2778
2779 u64 end;
2780 bool uptodate;
2781};
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794static void endio_readpage_release_extent(struct processed_extent *processed,
2795 struct btrfs_inode *inode, u64 start, u64 end,
2796 bool uptodate)
2797{
2798 struct extent_state *cached = NULL;
2799 struct extent_io_tree *tree;
2800
2801
2802 if (!processed->inode)
2803 goto update;
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816 if (processed->inode == inode && processed->uptodate == uptodate &&
2817 processed->end + 1 >= start && end >= processed->end) {
2818 processed->end = end;
2819 return;
2820 }
2821
2822 tree = &processed->inode->io_tree;
2823
2824
2825
2826
2827 if (processed->uptodate && tree->track_uptodate)
2828 set_extent_uptodate(tree, processed->start, processed->end,
2829 &cached, GFP_ATOMIC);
2830 unlock_extent_cached_atomic(tree, processed->start, processed->end,
2831 &cached);
2832
2833update:
2834
2835 processed->inode = inode;
2836 processed->start = start;
2837 processed->end = end;
2838 processed->uptodate = uptodate;
2839}
2840
2841static void endio_readpage_update_page_status(struct page *page, bool uptodate)
2842{
2843 if (uptodate) {
2844 SetPageUptodate(page);
2845 } else {
2846 ClearPageUptodate(page);
2847 SetPageError(page);
2848 }
2849 unlock_page(page);
2850}
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863static void end_bio_extent_readpage(struct bio *bio)
2864{
2865 struct bio_vec *bvec;
2866 int uptodate = !bio->bi_status;
2867 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2868 struct extent_io_tree *tree, *failure_tree;
2869 struct processed_extent processed = { 0 };
2870
2871
2872
2873
2874 u32 bio_offset = 0;
2875 int mirror;
2876 int ret;
2877 struct bvec_iter_all iter_all;
2878
2879 ASSERT(!bio_flagged(bio, BIO_CLONED));
2880 bio_for_each_segment_all(bvec, bio, iter_all) {
2881 struct page *page = bvec->bv_page;
2882 struct inode *inode = page->mapping->host;
2883 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2884 const u32 sectorsize = fs_info->sectorsize;
2885 u64 start;
2886 u64 end;
2887 u32 len;
2888
2889 btrfs_debug(fs_info,
2890 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2891 bio->bi_iter.bi_sector, bio->bi_status,
2892 io_bio->mirror_num);
2893 tree = &BTRFS_I(inode)->io_tree;
2894 failure_tree = &BTRFS_I(inode)->io_failure_tree;
2895
2896
2897
2898
2899
2900
2901
2902
2903 if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
2904 btrfs_err(fs_info,
2905 "partial page read in btrfs with offset %u and length %u",
2906 bvec->bv_offset, bvec->bv_len);
2907 else if (!IS_ALIGNED(bvec->bv_offset + bvec->bv_len,
2908 sectorsize))
2909 btrfs_info(fs_info,
2910 "incomplete page read with offset %u and length %u",
2911 bvec->bv_offset, bvec->bv_len);
2912
2913 start = page_offset(page) + bvec->bv_offset;
2914 end = start + bvec->bv_len - 1;
2915 len = bvec->bv_len;
2916
2917 mirror = io_bio->mirror_num;
2918 if (likely(uptodate)) {
2919 if (is_data_inode(inode))
2920 ret = btrfs_verify_data_csum(io_bio,
2921 bio_offset, page, start, end,
2922 mirror);
2923 else
2924 ret = btrfs_validate_metadata_buffer(io_bio,
2925 page, start, end, mirror);
2926 if (ret)
2927 uptodate = 0;
2928 else
2929 clean_io_failure(BTRFS_I(inode)->root->fs_info,
2930 failure_tree, tree, start,
2931 page,
2932 btrfs_ino(BTRFS_I(inode)), 0);
2933 }
2934
2935 if (likely(uptodate))
2936 goto readpage_ok;
2937
2938 if (is_data_inode(inode)) {
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950 if (!btrfs_submit_read_repair(inode, bio, bio_offset,
2951 page,
2952 start - page_offset(page),
2953 start, end, mirror,
2954 btrfs_submit_data_bio)) {
2955 uptodate = !bio->bi_status;
2956 ASSERT(bio_offset + len > bio_offset);
2957 bio_offset += len;
2958 continue;
2959 }
2960 } else {
2961 struct extent_buffer *eb;
2962
2963 eb = (struct extent_buffer *)page->private;
2964 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
2965 eb->read_mirror = mirror;
2966 atomic_dec(&eb->io_pages);
2967 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
2968 &eb->bflags))
2969 btree_readahead_hook(eb, -EIO);
2970 }
2971readpage_ok:
2972 if (likely(uptodate)) {
2973 loff_t i_size = i_size_read(inode);
2974 pgoff_t end_index = i_size >> PAGE_SHIFT;
2975 unsigned off;
2976
2977
2978 off = offset_in_page(i_size);
2979 if (page->index == end_index && off)
2980 zero_user_segment(page, off, PAGE_SIZE);
2981 }
2982 ASSERT(bio_offset + len > bio_offset);
2983 bio_offset += len;
2984
2985
2986 endio_readpage_update_page_status(page, uptodate);
2987 endio_readpage_release_extent(&processed, BTRFS_I(inode),
2988 start, end, uptodate);
2989 }
2990
2991 endio_readpage_release_extent(&processed, NULL, 0, 0, false);
2992 btrfs_io_bio_free_csum(io_bio);
2993 bio_put(bio);
2994}
2995
2996
2997
2998
2999
3000
3001static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
3002{
3003 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
3004}
3005
3006
3007
3008
3009
3010
3011struct bio *btrfs_bio_alloc(u64 first_byte)
3012{
3013 struct bio *bio;
3014
3015 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
3016 bio->bi_iter.bi_sector = first_byte >> 9;
3017 btrfs_io_bio_init(btrfs_io_bio(bio));
3018 return bio;
3019}
3020
3021struct bio *btrfs_bio_clone(struct bio *bio)
3022{
3023 struct btrfs_io_bio *btrfs_bio;
3024 struct bio *new;
3025
3026
3027 new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
3028 btrfs_bio = btrfs_io_bio(new);
3029 btrfs_io_bio_init(btrfs_bio);
3030 btrfs_bio->iter = bio->bi_iter;
3031 return new;
3032}
3033
3034struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
3035{
3036 struct bio *bio;
3037
3038
3039 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
3040 btrfs_io_bio_init(btrfs_io_bio(bio));
3041 return bio;
3042}
3043
3044struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
3045{
3046 struct bio *bio;
3047 struct btrfs_io_bio *btrfs_bio;
3048
3049
3050 bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
3051 ASSERT(bio);
3052
3053 btrfs_bio = btrfs_io_bio(bio);
3054 btrfs_io_bio_init(btrfs_bio);
3055
3056 bio_trim(bio, offset >> 9, size >> 9);
3057 btrfs_bio->iter = bio->bi_iter;
3058 return bio;
3059}
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075static int submit_extent_page(unsigned int opf,
3076 struct writeback_control *wbc,
3077 struct page *page, u64 offset,
3078 size_t size, unsigned long pg_offset,
3079 struct bio **bio_ret,
3080 bio_end_io_t end_io_func,
3081 int mirror_num,
3082 unsigned long prev_bio_flags,
3083 unsigned long bio_flags,
3084 bool force_bio_submit)
3085{
3086 int ret = 0;
3087 struct bio *bio;
3088 size_t io_size = min_t(size_t, size, PAGE_SIZE);
3089 sector_t sector = offset >> 9;
3090 struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree;
3091
3092 ASSERT(bio_ret);
3093
3094 if (*bio_ret) {
3095 bool contig;
3096 bool can_merge = true;
3097
3098 bio = *bio_ret;
3099 if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
3100 contig = bio->bi_iter.bi_sector == sector;
3101 else
3102 contig = bio_end_sector(bio) == sector;
3103
3104 if (btrfs_bio_fits_in_stripe(page, io_size, bio, bio_flags))
3105 can_merge = false;
3106
3107 if (prev_bio_flags != bio_flags || !contig || !can_merge ||
3108 force_bio_submit ||
3109 bio_add_page(bio, page, io_size, pg_offset) < io_size) {
3110 ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
3111 if (ret < 0) {
3112 *bio_ret = NULL;
3113 return ret;
3114 }
3115 bio = NULL;
3116 } else {
3117 if (wbc)
3118 wbc_account_cgroup_owner(wbc, page, io_size);
3119 return 0;
3120 }
3121 }
3122
3123 bio = btrfs_bio_alloc(offset);
3124 bio_add_page(bio, page, io_size, pg_offset);
3125 bio->bi_end_io = end_io_func;
3126 bio->bi_private = tree;
3127 bio->bi_write_hint = page->mapping->host->i_write_hint;
3128 bio->bi_opf = opf;
3129 if (wbc) {
3130 struct block_device *bdev;
3131
3132 bdev = BTRFS_I(page->mapping->host)->root->fs_info->fs_devices->latest_bdev;
3133 bio_set_dev(bio, bdev);
3134 wbc_init_bio(wbc, bio);
3135 wbc_account_cgroup_owner(wbc, page, io_size);
3136 }
3137
3138 *bio_ret = bio;
3139
3140 return ret;
3141}
3142
3143static void attach_extent_buffer_page(struct extent_buffer *eb,
3144 struct page *page)
3145{
3146
3147
3148
3149
3150
3151
3152 if (page->mapping)
3153 lockdep_assert_held(&page->mapping->private_lock);
3154
3155 if (!PagePrivate(page))
3156 attach_page_private(page, eb);
3157 else
3158 WARN_ON(page->private != (unsigned long)eb);
3159}
3160
3161void set_page_extent_mapped(struct page *page)
3162{
3163 if (!PagePrivate(page))
3164 attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE);
3165}
3166
3167static struct extent_map *
3168__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
3169 u64 start, u64 len, struct extent_map **em_cached)
3170{
3171 struct extent_map *em;
3172
3173 if (em_cached && *em_cached) {
3174 em = *em_cached;
3175 if (extent_map_in_tree(em) && start >= em->start &&
3176 start < extent_map_end(em)) {
3177 refcount_inc(&em->refs);
3178 return em;
3179 }
3180
3181 free_extent_map(em);
3182 *em_cached = NULL;
3183 }
3184
3185 em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
3186 if (em_cached && !IS_ERR_OR_NULL(em)) {
3187 BUG_ON(*em_cached);
3188 refcount_inc(&em->refs);
3189 *em_cached = em;
3190 }
3191 return em;
3192}
3193
3194
3195
3196
3197
3198
3199
3200int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
3201 struct bio **bio, unsigned long *bio_flags,
3202 unsigned int read_flags, u64 *prev_em_start)
3203{
3204 struct inode *inode = page->mapping->host;
3205 u64 start = page_offset(page);
3206 const u64 end = start + PAGE_SIZE - 1;
3207 u64 cur = start;
3208 u64 extent_offset;
3209 u64 last_byte = i_size_read(inode);
3210 u64 block_start;
3211 u64 cur_end;
3212 struct extent_map *em;
3213 int ret = 0;
3214 int nr = 0;
3215 size_t pg_offset = 0;
3216 size_t iosize;
3217 size_t blocksize = inode->i_sb->s_blocksize;
3218 unsigned long this_bio_flag = 0;
3219 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
3220
3221 set_page_extent_mapped(page);
3222
3223 if (!PageUptodate(page)) {
3224 if (cleancache_get_page(page) == 0) {
3225 BUG_ON(blocksize != PAGE_SIZE);
3226 unlock_extent(tree, start, end);
3227 goto out;
3228 }
3229 }
3230
3231 if (page->index == last_byte >> PAGE_SHIFT) {
3232 char *userpage;
3233 size_t zero_offset = offset_in_page(last_byte);
3234
3235 if (zero_offset) {
3236 iosize = PAGE_SIZE - zero_offset;
3237 userpage = kmap_atomic(page);
3238 memset(userpage + zero_offset, 0, iosize);
3239 flush_dcache_page(page);
3240 kunmap_atomic(userpage);
3241 }
3242 }
3243 while (cur <= end) {
3244 bool force_bio_submit = false;
3245 u64 offset;
3246
3247 if (cur >= last_byte) {
3248 char *userpage;
3249 struct extent_state *cached = NULL;
3250
3251 iosize = PAGE_SIZE - pg_offset;
3252 userpage = kmap_atomic(page);
3253 memset(userpage + pg_offset, 0, iosize);
3254 flush_dcache_page(page);
3255 kunmap_atomic(userpage);
3256 set_extent_uptodate(tree, cur, cur + iosize - 1,
3257 &cached, GFP_NOFS);
3258 unlock_extent_cached(tree, cur,
3259 cur + iosize - 1, &cached);
3260 break;
3261 }
3262 em = __get_extent_map(inode, page, pg_offset, cur,
3263 end - cur + 1, em_cached);
3264 if (IS_ERR_OR_NULL(em)) {
3265 SetPageError(page);
3266 unlock_extent(tree, cur, end);
3267 break;
3268 }
3269 extent_offset = cur - em->start;
3270 BUG_ON(extent_map_end(em) <= cur);
3271 BUG_ON(end < cur);
3272
3273 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
3274 this_bio_flag |= EXTENT_BIO_COMPRESSED;
3275 extent_set_compress_type(&this_bio_flag,
3276 em->compress_type);
3277 }
3278
3279 iosize = min(extent_map_end(em) - cur, end - cur + 1);
3280 cur_end = min(extent_map_end(em) - 1, end);
3281 iosize = ALIGN(iosize, blocksize);
3282 if (this_bio_flag & EXTENT_BIO_COMPRESSED)
3283 offset = em->block_start;
3284 else
3285 offset = em->block_start + extent_offset;
3286 block_start = em->block_start;
3287 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3288 block_start = EXTENT_MAP_HOLE;
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3325 prev_em_start && *prev_em_start != (u64)-1 &&
3326 *prev_em_start != em->start)
3327 force_bio_submit = true;
3328
3329 if (prev_em_start)
3330 *prev_em_start = em->start;
3331
3332 free_extent_map(em);
3333 em = NULL;
3334
3335
3336 if (block_start == EXTENT_MAP_HOLE) {
3337 char *userpage;
3338 struct extent_state *cached = NULL;
3339
3340 userpage = kmap_atomic(page);
3341 memset(userpage + pg_offset, 0, iosize);
3342 flush_dcache_page(page);
3343 kunmap_atomic(userpage);
3344
3345 set_extent_uptodate(tree, cur, cur + iosize - 1,
3346 &cached, GFP_NOFS);
3347 unlock_extent_cached(tree, cur,
3348 cur + iosize - 1, &cached);
3349 cur = cur + iosize;
3350 pg_offset += iosize;
3351 continue;
3352 }
3353
3354 if (test_range_bit(tree, cur, cur_end,
3355 EXTENT_UPTODATE, 1, NULL)) {
3356 check_page_uptodate(tree, page);
3357 unlock_extent(tree, cur, cur + iosize - 1);
3358 cur = cur + iosize;
3359 pg_offset += iosize;
3360 continue;
3361 }
3362
3363
3364
3365 if (block_start == EXTENT_MAP_INLINE) {
3366 SetPageError(page);
3367 unlock_extent(tree, cur, cur + iosize - 1);
3368 cur = cur + iosize;
3369 pg_offset += iosize;
3370 continue;
3371 }
3372
3373 ret = submit_extent_page(REQ_OP_READ | read_flags, NULL,
3374 page, offset, iosize,
3375 pg_offset, bio,
3376 end_bio_extent_readpage, 0,
3377 *bio_flags,
3378 this_bio_flag,
3379 force_bio_submit);
3380 if (!ret) {
3381 nr++;
3382 *bio_flags = this_bio_flag;
3383 } else {
3384 SetPageError(page);
3385 unlock_extent(tree, cur, cur + iosize - 1);
3386 goto out;
3387 }
3388 cur = cur + iosize;
3389 pg_offset += iosize;
3390 }
3391out:
3392 if (!nr) {
3393 if (!PageError(page))
3394 SetPageUptodate(page);
3395 unlock_page(page);
3396 }
3397 return ret;
3398}
3399
3400static inline void contiguous_readpages(struct page *pages[], int nr_pages,
3401 u64 start, u64 end,
3402 struct extent_map **em_cached,
3403 struct bio **bio,
3404 unsigned long *bio_flags,
3405 u64 *prev_em_start)
3406{
3407 struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
3408 int index;
3409
3410 btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
3411
3412 for (index = 0; index < nr_pages; index++) {
3413 btrfs_do_readpage(pages[index], em_cached, bio, bio_flags,
3414 REQ_RAHEAD, prev_em_start);
3415 put_page(pages[index]);
3416 }
3417}
3418
3419static void update_nr_written(struct writeback_control *wbc,
3420 unsigned long nr_written)
3421{
3422 wbc->nr_to_write -= nr_written;
3423}
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
3436 struct page *page, struct writeback_control *wbc,
3437 u64 delalloc_start, unsigned long *nr_written)
3438{
3439 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3440 bool found;
3441 u64 delalloc_to_write = 0;
3442 u64 delalloc_end = 0;
3443 int ret;
3444 int page_started = 0;
3445
3446
3447 while (delalloc_end < page_end) {
3448 found = find_lock_delalloc_range(&inode->vfs_inode, page,
3449 &delalloc_start,
3450 &delalloc_end);
3451 if (!found) {
3452 delalloc_start = delalloc_end + 1;
3453 continue;
3454 }
3455 ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
3456 delalloc_end, &page_started, nr_written, wbc);
3457 if (ret) {
3458 SetPageError(page);
3459
3460
3461
3462
3463
3464
3465 return ret < 0 ? ret : -EIO;
3466 }
3467
3468
3469
3470
3471 delalloc_to_write += (delalloc_end - delalloc_start +
3472 PAGE_SIZE) >> PAGE_SHIFT;
3473 delalloc_start = delalloc_end + 1;
3474 }
3475 if (wbc->nr_to_write < delalloc_to_write) {
3476 int thresh = 8192;
3477
3478 if (delalloc_to_write < thresh * 2)
3479 thresh = delalloc_to_write;
3480 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3481 thresh);
3482 }
3483
3484
3485
3486
3487 if (page_started) {
3488
3489
3490
3491
3492
3493 wbc->nr_to_write -= *nr_written;
3494 return 1;
3495 }
3496
3497 return 0;
3498}
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
3509 struct page *page,
3510 struct writeback_control *wbc,
3511 struct extent_page_data *epd,
3512 loff_t i_size,
3513 unsigned long nr_written,
3514 int *nr_ret)
3515{
3516 struct extent_io_tree *tree = &inode->io_tree;
3517 u64 start = page_offset(page);
3518 u64 page_end = start + PAGE_SIZE - 1;
3519 u64 end;
3520 u64 cur = start;
3521 u64 extent_offset;
3522 u64 block_start;
3523 u64 iosize;
3524 struct extent_map *em;
3525 size_t pg_offset = 0;
3526 size_t blocksize;
3527 int ret = 0;
3528 int nr = 0;
3529 const unsigned int write_flags = wbc_to_write_flags(wbc);
3530 bool compressed;
3531
3532 ret = btrfs_writepage_cow_fixup(page, start, page_end);
3533 if (ret) {
3534
3535 redirty_page_for_writepage(wbc, page);
3536 update_nr_written(wbc, nr_written);
3537 unlock_page(page);
3538 return 1;
3539 }
3540
3541
3542
3543
3544
3545 update_nr_written(wbc, nr_written + 1);
3546
3547 end = page_end;
3548 blocksize = inode->vfs_inode.i_sb->s_blocksize;
3549
3550 while (cur <= end) {
3551 u64 em_end;
3552 u64 offset;
3553
3554 if (cur >= i_size) {
3555 btrfs_writepage_endio_finish_ordered(page, cur,
3556 page_end, 1);
3557 break;
3558 }
3559 em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
3560 if (IS_ERR_OR_NULL(em)) {
3561 SetPageError(page);
3562 ret = PTR_ERR_OR_ZERO(em);
3563 break;
3564 }
3565
3566 extent_offset = cur - em->start;
3567 em_end = extent_map_end(em);
3568 BUG_ON(em_end <= cur);
3569 BUG_ON(end < cur);
3570 iosize = min(em_end - cur, end - cur + 1);
3571 iosize = ALIGN(iosize, blocksize);
3572 offset = em->block_start + extent_offset;
3573 block_start = em->block_start;
3574 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3575 free_extent_map(em);
3576 em = NULL;
3577
3578
3579
3580
3581
3582 if (compressed || block_start == EXTENT_MAP_HOLE ||
3583 block_start == EXTENT_MAP_INLINE) {
3584 if (compressed)
3585 nr++;
3586 else
3587 btrfs_writepage_endio_finish_ordered(page, cur,
3588 cur + iosize - 1, 1);
3589 cur += iosize;
3590 pg_offset += iosize;
3591 continue;
3592 }
3593
3594 btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
3595 if (!PageWriteback(page)) {
3596 btrfs_err(inode->root->fs_info,
3597 "page %lu not writeback, cur %llu end %llu",
3598 page->index, cur, end);
3599 }
3600
3601 ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
3602 page, offset, iosize, pg_offset,
3603 &epd->bio,
3604 end_bio_extent_writepage,
3605 0, 0, 0, false);
3606 if (ret) {
3607 SetPageError(page);
3608 if (PageWriteback(page))
3609 end_page_writeback(page);
3610 }
3611
3612 cur = cur + iosize;
3613 pg_offset += iosize;
3614 nr++;
3615 }
3616 *nr_ret = nr;
3617 return ret;
3618}
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3630 struct extent_page_data *epd)
3631{
3632 struct inode *inode = page->mapping->host;
3633 u64 start = page_offset(page);
3634 u64 page_end = start + PAGE_SIZE - 1;
3635 int ret;
3636 int nr = 0;
3637 size_t pg_offset;
3638 loff_t i_size = i_size_read(inode);
3639 unsigned long end_index = i_size >> PAGE_SHIFT;
3640 unsigned long nr_written = 0;
3641
3642 trace___extent_writepage(page, inode, wbc);
3643
3644 WARN_ON(!PageLocked(page));
3645
3646 ClearPageError(page);
3647
3648 pg_offset = offset_in_page(i_size);
3649 if (page->index > end_index ||
3650 (page->index == end_index && !pg_offset)) {
3651 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
3652 unlock_page(page);
3653 return 0;
3654 }
3655
3656 if (page->index == end_index) {
3657 char *userpage;
3658
3659 userpage = kmap_atomic(page);
3660 memset(userpage + pg_offset, 0,
3661 PAGE_SIZE - pg_offset);
3662 kunmap_atomic(userpage);
3663 flush_dcache_page(page);
3664 }
3665
3666 set_page_extent_mapped(page);
3667
3668 if (!epd->extent_locked) {
3669 ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
3670 &nr_written);
3671 if (ret == 1)
3672 return 0;
3673 if (ret)
3674 goto done;
3675 }
3676
3677 ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, epd, i_size,
3678 nr_written, &nr);
3679 if (ret == 1)
3680 return 0;
3681
3682done:
3683 if (nr == 0) {
3684
3685 set_page_writeback(page);
3686 end_page_writeback(page);
3687 }
3688 if (PageError(page)) {
3689 ret = ret < 0 ? ret : -EIO;
3690 end_extent_writepage(page, ret, start, page_end);
3691 }
3692 unlock_page(page);
3693 ASSERT(ret <= 0);
3694 return ret;
3695}
3696
3697void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3698{
3699 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3700 TASK_UNINTERRUPTIBLE);
3701}
3702
3703static void end_extent_buffer_writeback(struct extent_buffer *eb)
3704{
3705 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3706 smp_mb__after_atomic();
3707 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3708}
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
3721 struct extent_page_data *epd)
3722{
3723 struct btrfs_fs_info *fs_info = eb->fs_info;
3724 int i, num_pages, failed_page_nr;
3725 int flush = 0;
3726 int ret = 0;
3727
3728 if (!btrfs_try_tree_write_lock(eb)) {
3729 ret = flush_write_bio(epd);
3730 if (ret < 0)
3731 return ret;
3732 flush = 1;
3733 btrfs_tree_lock(eb);
3734 }
3735
3736 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3737 btrfs_tree_unlock(eb);
3738 if (!epd->sync_io)
3739 return 0;
3740 if (!flush) {
3741 ret = flush_write_bio(epd);
3742 if (ret < 0)
3743 return ret;
3744 flush = 1;
3745 }
3746 while (1) {
3747 wait_on_extent_buffer_writeback(eb);
3748 btrfs_tree_lock(eb);
3749 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3750 break;
3751 btrfs_tree_unlock(eb);
3752 }
3753 }
3754
3755
3756
3757
3758
3759
3760 spin_lock(&eb->refs_lock);
3761 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3762 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3763 spin_unlock(&eb->refs_lock);
3764 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3765 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3766 -eb->len,
3767 fs_info->dirty_metadata_batch);
3768 ret = 1;
3769 } else {
3770 spin_unlock(&eb->refs_lock);
3771 }
3772
3773 btrfs_tree_unlock(eb);
3774
3775 if (!ret)
3776 return ret;
3777
3778 num_pages = num_extent_pages(eb);
3779 for (i = 0; i < num_pages; i++) {
3780 struct page *p = eb->pages[i];
3781
3782 if (!trylock_page(p)) {
3783 if (!flush) {
3784 int err;
3785
3786 err = flush_write_bio(epd);
3787 if (err < 0) {
3788 ret = err;
3789 failed_page_nr = i;
3790 goto err_unlock;
3791 }
3792 flush = 1;
3793 }
3794 lock_page(p);
3795 }
3796 }
3797
3798 return ret;
3799err_unlock:
3800
3801 for (i = 0; i < failed_page_nr; i++)
3802 unlock_page(eb->pages[i]);
3803
3804
3805
3806
3807
3808 btrfs_tree_lock(eb);
3809 spin_lock(&eb->refs_lock);
3810 set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
3811 end_extent_buffer_writeback(eb);
3812 spin_unlock(&eb->refs_lock);
3813 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len,
3814 fs_info->dirty_metadata_batch);
3815 btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3816 btrfs_tree_unlock(eb);
3817 return ret;
3818}
3819
3820static void set_btree_ioerr(struct page *page)
3821{
3822 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3823 struct btrfs_fs_info *fs_info;
3824
3825 SetPageError(page);
3826 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3827 return;
3828
3829
3830
3831
3832
3833 fs_info = eb->fs_info;
3834 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3835 eb->len, fs_info->dirty_metadata_batch);
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875 switch (eb->log_index) {
3876 case -1:
3877 set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
3878 break;
3879 case 0:
3880 set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
3881 break;
3882 case 1:
3883 set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
3884 break;
3885 default:
3886 BUG();
3887 }
3888}
3889
3890static void end_bio_extent_buffer_writepage(struct bio *bio)
3891{
3892 struct bio_vec *bvec;
3893 struct extent_buffer *eb;
3894 int done;
3895 struct bvec_iter_all iter_all;
3896
3897 ASSERT(!bio_flagged(bio, BIO_CLONED));
3898 bio_for_each_segment_all(bvec, bio, iter_all) {
3899 struct page *page = bvec->bv_page;
3900
3901 eb = (struct extent_buffer *)page->private;
3902 BUG_ON(!eb);
3903 done = atomic_dec_and_test(&eb->io_pages);
3904
3905 if (bio->bi_status ||
3906 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3907 ClearPageUptodate(page);
3908 set_btree_ioerr(page);
3909 }
3910
3911 end_page_writeback(page);
3912
3913 if (!done)
3914 continue;
3915
3916 end_extent_buffer_writeback(eb);
3917 }
3918
3919 bio_put(bio);
3920}
3921
3922static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3923 struct writeback_control *wbc,
3924 struct extent_page_data *epd)
3925{
3926 u64 offset = eb->start;
3927 u32 nritems;
3928 int i, num_pages;
3929 unsigned long start, end;
3930 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3931 int ret = 0;
3932
3933 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3934 num_pages = num_extent_pages(eb);
3935 atomic_set(&eb->io_pages, num_pages);
3936
3937
3938 nritems = btrfs_header_nritems(eb);
3939 if (btrfs_header_level(eb) > 0) {
3940 end = btrfs_node_key_ptr_offset(nritems);
3941
3942 memzero_extent_buffer(eb, end, eb->len - end);
3943 } else {
3944
3945
3946
3947
3948 start = btrfs_item_nr_offset(nritems);
3949 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
3950 memzero_extent_buffer(eb, start, end - start);
3951 }
3952
3953 for (i = 0; i < num_pages; i++) {
3954 struct page *p = eb->pages[i];
3955
3956 clear_page_dirty_for_io(p);
3957 set_page_writeback(p);
3958 ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
3959 p, offset, PAGE_SIZE, 0,
3960 &epd->bio,
3961 end_bio_extent_buffer_writepage,
3962 0, 0, 0, false);
3963 if (ret) {
3964 set_btree_ioerr(p);
3965 if (PageWriteback(p))
3966 end_page_writeback(p);
3967 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3968 end_extent_buffer_writeback(eb);
3969 ret = -EIO;
3970 break;
3971 }
3972 offset += PAGE_SIZE;
3973 update_nr_written(wbc, 1);
3974 unlock_page(p);
3975 }
3976
3977 if (unlikely(ret)) {
3978 for (; i < num_pages; i++) {
3979 struct page *p = eb->pages[i];
3980 clear_page_dirty_for_io(p);
3981 unlock_page(p);
3982 }
3983 }
3984
3985 return ret;
3986}
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008static int submit_eb_page(struct page *page, struct writeback_control *wbc,
4009 struct extent_page_data *epd,
4010 struct extent_buffer **eb_context)
4011{
4012 struct address_space *mapping = page->mapping;
4013 struct extent_buffer *eb;
4014 int ret;
4015
4016 if (!PagePrivate(page))
4017 return 0;
4018
4019 spin_lock(&mapping->private_lock);
4020 if (!PagePrivate(page)) {
4021 spin_unlock(&mapping->private_lock);
4022 return 0;
4023 }
4024
4025 eb = (struct extent_buffer *)page->private;
4026
4027
4028
4029
4030
4031 if (WARN_ON(!eb)) {
4032 spin_unlock(&mapping->private_lock);
4033 return 0;
4034 }
4035
4036 if (eb == *eb_context) {
4037 spin_unlock(&mapping->private_lock);
4038 return 0;
4039 }
4040 ret = atomic_inc_not_zero(&eb->refs);
4041 spin_unlock(&mapping->private_lock);
4042 if (!ret)
4043 return 0;
4044
4045 *eb_context = eb;
4046
4047 ret = lock_extent_buffer_for_io(eb, epd);
4048 if (ret <= 0) {
4049 free_extent_buffer(eb);
4050 return ret;
4051 }
4052 ret = write_one_eb(eb, wbc, epd);
4053 free_extent_buffer(eb);
4054 if (ret < 0)
4055 return ret;
4056 return 1;
4057}
4058
4059int btree_write_cache_pages(struct address_space *mapping,
4060 struct writeback_control *wbc)
4061{
4062 struct extent_buffer *eb_context = NULL;
4063 struct extent_page_data epd = {
4064 .bio = NULL,
4065 .extent_locked = 0,
4066 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4067 };
4068 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
4069 int ret = 0;
4070 int done = 0;
4071 int nr_to_write_done = 0;
4072 struct pagevec pvec;
4073 int nr_pages;
4074 pgoff_t index;
4075 pgoff_t end;
4076 int scanned = 0;
4077 xa_mark_t tag;
4078
4079 pagevec_init(&pvec);
4080 if (wbc->range_cyclic) {
4081 index = mapping->writeback_index;
4082 end = -1;
4083
4084
4085
4086
4087 scanned = (index == 0);
4088 } else {
4089 index = wbc->range_start >> PAGE_SHIFT;
4090 end = wbc->range_end >> PAGE_SHIFT;
4091 scanned = 1;
4092 }
4093 if (wbc->sync_mode == WB_SYNC_ALL)
4094 tag = PAGECACHE_TAG_TOWRITE;
4095 else
4096 tag = PAGECACHE_TAG_DIRTY;
4097retry:
4098 if (wbc->sync_mode == WB_SYNC_ALL)
4099 tag_pages_for_writeback(mapping, index, end);
4100 while (!done && !nr_to_write_done && (index <= end) &&
4101 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
4102 tag))) {
4103 unsigned i;
4104
4105 for (i = 0; i < nr_pages; i++) {
4106 struct page *page = pvec.pages[i];
4107
4108 ret = submit_eb_page(page, wbc, &epd, &eb_context);
4109 if (ret == 0)
4110 continue;
4111 if (ret < 0) {
4112 done = 1;
4113 break;
4114 }
4115
4116
4117
4118
4119
4120
4121 nr_to_write_done = wbc->nr_to_write <= 0;
4122 }
4123 pagevec_release(&pvec);
4124 cond_resched();
4125 }
4126 if (!scanned && !done) {
4127
4128
4129
4130
4131 scanned = 1;
4132 index = 0;
4133 goto retry;
4134 }
4135 if (ret < 0) {
4136 end_write_bio(&epd, ret);
4137 return ret;
4138 }
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166 if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
4167 ret = flush_write_bio(&epd);
4168 } else {
4169 ret = -EROFS;
4170 end_write_bio(&epd, ret);
4171 }
4172 return ret;
4173}
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189static int extent_write_cache_pages(struct address_space *mapping,
4190 struct writeback_control *wbc,
4191 struct extent_page_data *epd)
4192{
4193 struct inode *inode = mapping->host;
4194 int ret = 0;
4195 int done = 0;
4196 int nr_to_write_done = 0;
4197 struct pagevec pvec;
4198 int nr_pages;
4199 pgoff_t index;
4200 pgoff_t end;
4201 pgoff_t done_index;
4202 int range_whole = 0;
4203 int scanned = 0;
4204 xa_mark_t tag;
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215 if (!igrab(inode))
4216 return 0;
4217
4218 pagevec_init(&pvec);
4219 if (wbc->range_cyclic) {
4220 index = mapping->writeback_index;
4221 end = -1;
4222
4223
4224
4225
4226 scanned = (index == 0);
4227 } else {
4228 index = wbc->range_start >> PAGE_SHIFT;
4229 end = wbc->range_end >> PAGE_SHIFT;
4230 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
4231 range_whole = 1;
4232 scanned = 1;
4233 }
4234
4235
4236
4237
4238
4239
4240
4241
4242 if (range_whole && wbc->nr_to_write == LONG_MAX &&
4243 test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
4244 &BTRFS_I(inode)->runtime_flags))
4245 wbc->tagged_writepages = 1;
4246
4247 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4248 tag = PAGECACHE_TAG_TOWRITE;
4249 else
4250 tag = PAGECACHE_TAG_DIRTY;
4251retry:
4252 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4253 tag_pages_for_writeback(mapping, index, end);
4254 done_index = index;
4255 while (!done && !nr_to_write_done && (index <= end) &&
4256 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
4257 &index, end, tag))) {
4258 unsigned i;
4259
4260 for (i = 0; i < nr_pages; i++) {
4261 struct page *page = pvec.pages[i];
4262
4263 done_index = page->index + 1;
4264
4265
4266
4267
4268
4269
4270
4271 if (!trylock_page(page)) {
4272 ret = flush_write_bio(epd);
4273 BUG_ON(ret < 0);
4274 lock_page(page);
4275 }
4276
4277 if (unlikely(page->mapping != mapping)) {
4278 unlock_page(page);
4279 continue;
4280 }
4281
4282 if (wbc->sync_mode != WB_SYNC_NONE) {
4283 if (PageWriteback(page)) {
4284 ret = flush_write_bio(epd);
4285 BUG_ON(ret < 0);
4286 }
4287 wait_on_page_writeback(page);
4288 }
4289
4290 if (PageWriteback(page) ||
4291 !clear_page_dirty_for_io(page)) {
4292 unlock_page(page);
4293 continue;
4294 }
4295
4296 ret = __extent_writepage(page, wbc, epd);
4297 if (ret < 0) {
4298 done = 1;
4299 break;
4300 }
4301
4302
4303
4304
4305
4306
4307 nr_to_write_done = wbc->nr_to_write <= 0;
4308 }
4309 pagevec_release(&pvec);
4310 cond_resched();
4311 }
4312 if (!scanned && !done) {
4313
4314
4315
4316
4317 scanned = 1;
4318 index = 0;
4319
4320
4321
4322
4323
4324
4325
4326 ret = flush_write_bio(epd);
4327 if (!ret)
4328 goto retry;
4329 }
4330
4331 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4332 mapping->writeback_index = done_index;
4333
4334 btrfs_add_delayed_iput(inode);
4335 return ret;
4336}
4337
4338int extent_write_full_page(struct page *page, struct writeback_control *wbc)
4339{
4340 int ret;
4341 struct extent_page_data epd = {
4342 .bio = NULL,
4343 .extent_locked = 0,
4344 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4345 };
4346
4347 ret = __extent_writepage(page, wbc, &epd);
4348 ASSERT(ret <= 0);
4349 if (ret < 0) {
4350 end_write_bio(&epd, ret);
4351 return ret;
4352 }
4353
4354 ret = flush_write_bio(&epd);
4355 ASSERT(ret <= 0);
4356 return ret;
4357}
4358
4359int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
4360 int mode)
4361{
4362 int ret = 0;
4363 struct address_space *mapping = inode->i_mapping;
4364 struct page *page;
4365 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4366 PAGE_SHIFT;
4367
4368 struct extent_page_data epd = {
4369 .bio = NULL,
4370 .extent_locked = 1,
4371 .sync_io = mode == WB_SYNC_ALL,
4372 };
4373 struct writeback_control wbc_writepages = {
4374 .sync_mode = mode,
4375 .nr_to_write = nr_pages * 2,
4376 .range_start = start,
4377 .range_end = end + 1,
4378
4379 .punt_to_cgroup = 1,
4380 .no_cgroup_owner = 1,
4381 };
4382
4383 wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
4384 while (start <= end) {
4385 page = find_get_page(mapping, start >> PAGE_SHIFT);
4386 if (clear_page_dirty_for_io(page))
4387 ret = __extent_writepage(page, &wbc_writepages, &epd);
4388 else {
4389 btrfs_writepage_endio_finish_ordered(page, start,
4390 start + PAGE_SIZE - 1, 1);
4391 unlock_page(page);
4392 }
4393 put_page(page);
4394 start += PAGE_SIZE;
4395 }
4396
4397 ASSERT(ret <= 0);
4398 if (ret == 0)
4399 ret = flush_write_bio(&epd);
4400 else
4401 end_write_bio(&epd, ret);
4402
4403 wbc_detach_inode(&wbc_writepages);
4404 return ret;
4405}
4406
4407int extent_writepages(struct address_space *mapping,
4408 struct writeback_control *wbc)
4409{
4410 int ret = 0;
4411 struct extent_page_data epd = {
4412 .bio = NULL,
4413 .extent_locked = 0,
4414 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4415 };
4416
4417 ret = extent_write_cache_pages(mapping, wbc, &epd);
4418 ASSERT(ret <= 0);
4419 if (ret < 0) {
4420 end_write_bio(&epd, ret);
4421 return ret;
4422 }
4423 ret = flush_write_bio(&epd);
4424 return ret;
4425}
4426
4427void extent_readahead(struct readahead_control *rac)
4428{
4429 struct bio *bio = NULL;
4430 unsigned long bio_flags = 0;
4431 struct page *pagepool[16];
4432 struct extent_map *em_cached = NULL;
4433 u64 prev_em_start = (u64)-1;
4434 int nr;
4435
4436 while ((nr = readahead_page_batch(rac, pagepool))) {
4437 u64 contig_start = page_offset(pagepool[0]);
4438 u64 contig_end = page_offset(pagepool[nr - 1]) + PAGE_SIZE - 1;
4439
4440 ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
4441
4442 contiguous_readpages(pagepool, nr, contig_start, contig_end,
4443 &em_cached, &bio, &bio_flags, &prev_em_start);
4444 }
4445
4446 if (em_cached)
4447 free_extent_map(em_cached);
4448
4449 if (bio) {
4450 if (submit_one_bio(bio, 0, bio_flags))
4451 return;
4452 }
4453}
4454
4455
4456
4457
4458
4459
4460int extent_invalidatepage(struct extent_io_tree *tree,
4461 struct page *page, unsigned long offset)
4462{
4463 struct extent_state *cached_state = NULL;
4464 u64 start = page_offset(page);
4465 u64 end = start + PAGE_SIZE - 1;
4466 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4467
4468
4469 ASSERT(tree->owner == IO_TREE_BTREE_INODE_IO);
4470
4471 start += ALIGN(offset, blocksize);
4472 if (start > end)
4473 return 0;
4474
4475 lock_extent_bits(tree, start, end, &cached_state);
4476 wait_on_page_writeback(page);
4477
4478
4479
4480
4481
4482
4483 unlock_extent_cached(tree, start, end, &cached_state);
4484 return 0;
4485}
4486
4487
4488
4489
4490
4491
4492static int try_release_extent_state(struct extent_io_tree *tree,
4493 struct page *page, gfp_t mask)
4494{
4495 u64 start = page_offset(page);
4496 u64 end = start + PAGE_SIZE - 1;
4497 int ret = 1;
4498
4499 if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
4500 ret = 0;
4501 } else {
4502
4503
4504
4505
4506
4507
4508 ret = __clear_extent_bit(tree, start, end,
4509 ~(EXTENT_LOCKED | EXTENT_NODATASUM | EXTENT_DELALLOC_NEW),
4510 0, 0, NULL, mask, NULL);
4511
4512
4513
4514
4515 if (ret < 0)
4516 ret = 0;
4517 else
4518 ret = 1;
4519 }
4520 return ret;
4521}
4522
4523
4524
4525
4526
4527
4528int try_release_extent_mapping(struct page *page, gfp_t mask)
4529{
4530 struct extent_map *em;
4531 u64 start = page_offset(page);
4532 u64 end = start + PAGE_SIZE - 1;
4533 struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
4534 struct extent_io_tree *tree = &btrfs_inode->io_tree;
4535 struct extent_map_tree *map = &btrfs_inode->extent_tree;
4536
4537 if (gfpflags_allow_blocking(mask) &&
4538 page->mapping->host->i_size > SZ_16M) {
4539 u64 len;
4540 while (start <= end) {
4541 struct btrfs_fs_info *fs_info;
4542 u64 cur_gen;
4543
4544 len = end - start + 1;
4545 write_lock(&map->lock);
4546 em = lookup_extent_mapping(map, start, len);
4547 if (!em) {
4548 write_unlock(&map->lock);
4549 break;
4550 }
4551 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4552 em->start != start) {
4553 write_unlock(&map->lock);
4554 free_extent_map(em);
4555 break;
4556 }
4557 if (test_range_bit(tree, em->start,
4558 extent_map_end(em) - 1,
4559 EXTENT_LOCKED, 0, NULL))
4560 goto next;
4561
4562
4563
4564
4565
4566
4567 if (list_empty(&em->list) ||
4568 test_bit(EXTENT_FLAG_LOGGING, &em->flags))
4569 goto remove_em;
4570
4571
4572
4573
4574
4575
4576
4577 fs_info = btrfs_inode->root->fs_info;
4578 spin_lock(&fs_info->trans_lock);
4579 cur_gen = fs_info->generation;
4580 spin_unlock(&fs_info->trans_lock);
4581 if (em->generation >= cur_gen)
4582 goto next;
4583remove_em:
4584
4585
4586
4587
4588
4589
4590
4591
4592 remove_extent_mapping(map, em);
4593
4594 free_extent_map(em);
4595next:
4596 start = extent_map_end(em);
4597 write_unlock(&map->lock);
4598
4599
4600 free_extent_map(em);
4601
4602 cond_resched();
4603 }
4604 }
4605 return try_release_extent_state(tree, page, mask);
4606}
4607
4608
4609
4610
4611
4612static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode,
4613 u64 offset, u64 last)
4614{
4615 u64 sectorsize = btrfs_inode_sectorsize(inode);
4616 struct extent_map *em;
4617 u64 len;
4618
4619 if (offset >= last)
4620 return NULL;
4621
4622 while (1) {
4623 len = last - offset;
4624 if (len == 0)
4625 break;
4626 len = ALIGN(len, sectorsize);
4627 em = btrfs_get_extent_fiemap(inode, offset, len);
4628 if (IS_ERR_OR_NULL(em))
4629 return em;
4630
4631
4632 if (em->block_start != EXTENT_MAP_HOLE)
4633 return em;
4634
4635
4636 offset = extent_map_end(em);
4637 free_extent_map(em);
4638 if (offset >= last)
4639 break;
4640 }
4641 return NULL;
4642}
4643
4644
4645
4646
4647
4648
4649struct fiemap_cache {
4650 u64 offset;
4651 u64 phys;
4652 u64 len;
4653 u32 flags;
4654 bool cached;
4655};
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
4668 struct fiemap_cache *cache,
4669 u64 offset, u64 phys, u64 len, u32 flags)
4670{
4671 int ret = 0;
4672
4673 if (!cache->cached)
4674 goto assign;
4675
4676
4677
4678
4679
4680
4681
4682
4683 if (cache->offset + cache->len > offset) {
4684 WARN_ON(1);
4685 return -EINVAL;
4686 }
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699 if (cache->offset + cache->len == offset &&
4700 cache->phys + cache->len == phys &&
4701 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
4702 (flags & ~FIEMAP_EXTENT_LAST)) {
4703 cache->len += len;
4704 cache->flags |= flags;
4705 goto try_submit_last;
4706 }
4707
4708
4709 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4710 cache->len, cache->flags);
4711 cache->cached = false;
4712 if (ret)
4713 return ret;
4714assign:
4715 cache->cached = true;
4716 cache->offset = offset;
4717 cache->phys = phys;
4718 cache->len = len;
4719 cache->flags = flags;
4720try_submit_last:
4721 if (cache->flags & FIEMAP_EXTENT_LAST) {
4722 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
4723 cache->phys, cache->len, cache->flags);
4724 cache->cached = false;
4725 }
4726 return ret;
4727}
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
4741 struct fiemap_cache *cache)
4742{
4743 int ret;
4744
4745 if (!cache->cached)
4746 return 0;
4747
4748 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4749 cache->len, cache->flags);
4750 cache->cached = false;
4751 if (ret > 0)
4752 ret = 0;
4753 return ret;
4754}
4755
4756int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
4757 u64 start, u64 len)
4758{
4759 int ret = 0;
4760 u64 off = start;
4761 u64 max = start + len;
4762 u32 flags = 0;
4763 u32 found_type;
4764 u64 last;
4765 u64 last_for_get_extent = 0;
4766 u64 disko = 0;
4767 u64 isize = i_size_read(&inode->vfs_inode);
4768 struct btrfs_key found_key;
4769 struct extent_map *em = NULL;
4770 struct extent_state *cached_state = NULL;
4771 struct btrfs_path *path;
4772 struct btrfs_root *root = inode->root;
4773 struct fiemap_cache cache = { 0 };
4774 struct ulist *roots;
4775 struct ulist *tmp_ulist;
4776 int end = 0;
4777 u64 em_start = 0;
4778 u64 em_len = 0;
4779 u64 em_end = 0;
4780
4781 if (len == 0)
4782 return -EINVAL;
4783
4784 path = btrfs_alloc_path();
4785 if (!path)
4786 return -ENOMEM;
4787
4788 roots = ulist_alloc(GFP_KERNEL);
4789 tmp_ulist = ulist_alloc(GFP_KERNEL);
4790 if (!roots || !tmp_ulist) {
4791 ret = -ENOMEM;
4792 goto out_free_ulist;
4793 }
4794
4795 start = round_down(start, btrfs_inode_sectorsize(inode));
4796 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
4797
4798
4799
4800
4801
4802 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
4803 0);
4804 if (ret < 0) {
4805 goto out_free_ulist;
4806 } else {
4807 WARN_ON(!ret);
4808 if (ret == 1)
4809 ret = 0;
4810 }
4811
4812 path->slots[0]--;
4813 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4814 found_type = found_key.type;
4815
4816
4817 if (found_key.objectid != btrfs_ino(inode) ||
4818 found_type != BTRFS_EXTENT_DATA_KEY) {
4819
4820 last = (u64)-1;
4821 last_for_get_extent = isize;
4822 } else {
4823
4824
4825
4826
4827
4828 last = found_key.offset;
4829 last_for_get_extent = last + 1;
4830 }
4831 btrfs_release_path(path);
4832
4833
4834
4835
4836
4837
4838 if (last < isize) {
4839 last = (u64)-1;
4840 last_for_get_extent = isize;
4841 }
4842
4843 lock_extent_bits(&inode->io_tree, start, start + len - 1,
4844 &cached_state);
4845
4846 em = get_extent_skip_holes(inode, start, last_for_get_extent);
4847 if (!em)
4848 goto out;
4849 if (IS_ERR(em)) {
4850 ret = PTR_ERR(em);
4851 goto out;
4852 }
4853
4854 while (!end) {
4855 u64 offset_in_extent = 0;
4856
4857
4858 if (em->start >= max || extent_map_end(em) < off)
4859 break;
4860
4861
4862
4863
4864
4865
4866
4867 em_start = max(em->start, off);
4868
4869
4870
4871
4872
4873
4874
4875 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4876 offset_in_extent = em_start - em->start;
4877 em_end = extent_map_end(em);
4878 em_len = em_end - em_start;
4879 flags = 0;
4880 if (em->block_start < EXTENT_MAP_LAST_BYTE)
4881 disko = em->block_start + offset_in_extent;
4882 else
4883 disko = 0;
4884
4885
4886
4887
4888 off = extent_map_end(em);
4889 if (off >= max)
4890 end = 1;
4891
4892 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4893 end = 1;
4894 flags |= FIEMAP_EXTENT_LAST;
4895 } else if (em->block_start == EXTENT_MAP_INLINE) {
4896 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4897 FIEMAP_EXTENT_NOT_ALIGNED);
4898 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4899 flags |= (FIEMAP_EXTENT_DELALLOC |
4900 FIEMAP_EXTENT_UNKNOWN);
4901 } else if (fieinfo->fi_extents_max) {
4902 u64 bytenr = em->block_start -
4903 (em->start - em->orig_start);
4904
4905
4906
4907
4908
4909
4910
4911
4912 ret = btrfs_check_shared(root, btrfs_ino(inode),
4913 bytenr, roots, tmp_ulist);
4914 if (ret < 0)
4915 goto out_free;
4916 if (ret)
4917 flags |= FIEMAP_EXTENT_SHARED;
4918 ret = 0;
4919 }
4920 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4921 flags |= FIEMAP_EXTENT_ENCODED;
4922 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4923 flags |= FIEMAP_EXTENT_UNWRITTEN;
4924
4925 free_extent_map(em);
4926 em = NULL;
4927 if ((em_start >= last) || em_len == (u64)-1 ||
4928 (last == (u64)-1 && isize <= em_end)) {
4929 flags |= FIEMAP_EXTENT_LAST;
4930 end = 1;
4931 }
4932
4933
4934 em = get_extent_skip_holes(inode, off, last_for_get_extent);
4935 if (IS_ERR(em)) {
4936 ret = PTR_ERR(em);
4937 goto out;
4938 }
4939 if (!em) {
4940 flags |= FIEMAP_EXTENT_LAST;
4941 end = 1;
4942 }
4943 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
4944 em_len, flags);
4945 if (ret) {
4946 if (ret == 1)
4947 ret = 0;
4948 goto out_free;
4949 }
4950 }
4951out_free:
4952 if (!ret)
4953 ret = emit_last_fiemap_cache(fieinfo, &cache);
4954 free_extent_map(em);
4955out:
4956 unlock_extent_cached(&inode->io_tree, start, start + len - 1,
4957 &cached_state);
4958
4959out_free_ulist:
4960 btrfs_free_path(path);
4961 ulist_free(roots);
4962 ulist_free(tmp_ulist);
4963 return ret;
4964}
4965
4966static void __free_extent_buffer(struct extent_buffer *eb)
4967{
4968 kmem_cache_free(extent_buffer_cache, eb);
4969}
4970
4971int extent_buffer_under_io(const struct extent_buffer *eb)
4972{
4973 return (atomic_read(&eb->io_pages) ||
4974 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4975 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4976}
4977
4978
4979
4980
4981static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
4982{
4983 int i;
4984 int num_pages;
4985 int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4986
4987 BUG_ON(extent_buffer_under_io(eb));
4988
4989 num_pages = num_extent_pages(eb);
4990 for (i = 0; i < num_pages; i++) {
4991 struct page *page = eb->pages[i];
4992
4993 if (!page)
4994 continue;
4995 if (mapped)
4996 spin_lock(&page->mapping->private_lock);
4997
4998
4999
5000
5001
5002
5003
5004 if (PagePrivate(page) &&
5005 page->private == (unsigned long)eb) {
5006 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
5007 BUG_ON(PageDirty(page));
5008 BUG_ON(PageWriteback(page));
5009
5010
5011
5012
5013 detach_page_private(page);
5014 }
5015
5016 if (mapped)
5017 spin_unlock(&page->mapping->private_lock);
5018
5019
5020 put_page(page);
5021 }
5022}
5023
5024
5025
5026
5027static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
5028{
5029 btrfs_release_extent_buffer_pages(eb);
5030 btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
5031 __free_extent_buffer(eb);
5032}
5033
5034static struct extent_buffer *
5035__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
5036 unsigned long len)
5037{
5038 struct extent_buffer *eb = NULL;
5039
5040 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
5041 eb->start = start;
5042 eb->len = len;
5043 eb->fs_info = fs_info;
5044 eb->bflags = 0;
5045 init_rwsem(&eb->lock);
5046
5047 btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list,
5048 &fs_info->allocated_ebs);
5049
5050 spin_lock_init(&eb->refs_lock);
5051 atomic_set(&eb->refs, 1);
5052 atomic_set(&eb->io_pages, 0);
5053
5054 ASSERT(len <= BTRFS_MAX_METADATA_BLOCKSIZE);
5055
5056 return eb;
5057}
5058
5059struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
5060{
5061 int i;
5062 struct page *p;
5063 struct extent_buffer *new;
5064 int num_pages = num_extent_pages(src);
5065
5066 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
5067 if (new == NULL)
5068 return NULL;
5069
5070 for (i = 0; i < num_pages; i++) {
5071 p = alloc_page(GFP_NOFS);
5072 if (!p) {
5073 btrfs_release_extent_buffer(new);
5074 return NULL;
5075 }
5076 attach_extent_buffer_page(new, p);
5077 WARN_ON(PageDirty(p));
5078 SetPageUptodate(p);
5079 new->pages[i] = p;
5080 copy_page(page_address(p), page_address(src->pages[i]));
5081 }
5082
5083 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
5084 set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
5085
5086 return new;
5087}
5088
5089struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
5090 u64 start, unsigned long len)
5091{
5092 struct extent_buffer *eb;
5093 int num_pages;
5094 int i;
5095
5096 eb = __alloc_extent_buffer(fs_info, start, len);
5097 if (!eb)
5098 return NULL;
5099
5100 num_pages = num_extent_pages(eb);
5101 for (i = 0; i < num_pages; i++) {
5102 eb->pages[i] = alloc_page(GFP_NOFS);
5103 if (!eb->pages[i])
5104 goto err;
5105 }
5106 set_extent_buffer_uptodate(eb);
5107 btrfs_set_header_nritems(eb, 0);
5108 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
5109
5110 return eb;
5111err:
5112 for (; i > 0; i--)
5113 __free_page(eb->pages[i - 1]);
5114 __free_extent_buffer(eb);
5115 return NULL;
5116}
5117
5118struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
5119 u64 start)
5120{
5121 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
5122}
5123
5124static void check_buffer_tree_ref(struct extent_buffer *eb)
5125{
5126 int refs;
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150 refs = atomic_read(&eb->refs);
5151 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5152 return;
5153
5154 spin_lock(&eb->refs_lock);
5155 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5156 atomic_inc(&eb->refs);
5157 spin_unlock(&eb->refs_lock);
5158}
5159
5160static void mark_extent_buffer_accessed(struct extent_buffer *eb,
5161 struct page *accessed)
5162{
5163 int num_pages, i;
5164
5165 check_buffer_tree_ref(eb);
5166
5167 num_pages = num_extent_pages(eb);
5168 for (i = 0; i < num_pages; i++) {
5169 struct page *p = eb->pages[i];
5170
5171 if (p != accessed)
5172 mark_page_accessed(p);
5173 }
5174}
5175
5176struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
5177 u64 start)
5178{
5179 struct extent_buffer *eb;
5180
5181 rcu_read_lock();
5182 eb = radix_tree_lookup(&fs_info->buffer_radix,
5183 start >> fs_info->sectorsize_bits);
5184 if (eb && atomic_inc_not_zero(&eb->refs)) {
5185 rcu_read_unlock();
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
5202 spin_lock(&eb->refs_lock);
5203 spin_unlock(&eb->refs_lock);
5204 }
5205 mark_extent_buffer_accessed(eb, NULL);
5206 return eb;
5207 }
5208 rcu_read_unlock();
5209
5210 return NULL;
5211}
5212
5213#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5214struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
5215 u64 start)
5216{
5217 struct extent_buffer *eb, *exists = NULL;
5218 int ret;
5219
5220 eb = find_extent_buffer(fs_info, start);
5221 if (eb)
5222 return eb;
5223 eb = alloc_dummy_extent_buffer(fs_info, start);
5224 if (!eb)
5225 return ERR_PTR(-ENOMEM);
5226 eb->fs_info = fs_info;
5227again:
5228 ret = radix_tree_preload(GFP_NOFS);
5229 if (ret) {
5230 exists = ERR_PTR(ret);
5231 goto free_eb;
5232 }
5233 spin_lock(&fs_info->buffer_lock);
5234 ret = radix_tree_insert(&fs_info->buffer_radix,
5235 start >> fs_info->sectorsize_bits, eb);
5236 spin_unlock(&fs_info->buffer_lock);
5237 radix_tree_preload_end();
5238 if (ret == -EEXIST) {
5239 exists = find_extent_buffer(fs_info, start);
5240 if (exists)
5241 goto free_eb;
5242 else
5243 goto again;
5244 }
5245 check_buffer_tree_ref(eb);
5246 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5247
5248 return eb;
5249free_eb:
5250 btrfs_release_extent_buffer(eb);
5251 return exists;
5252}
5253#endif
5254
5255struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
5256 u64 start, u64 owner_root, int level)
5257{
5258 unsigned long len = fs_info->nodesize;
5259 int num_pages;
5260 int i;
5261 unsigned long index = start >> PAGE_SHIFT;
5262 struct extent_buffer *eb;
5263 struct extent_buffer *exists = NULL;
5264 struct page *p;
5265 struct address_space *mapping = fs_info->btree_inode->i_mapping;
5266 int uptodate = 1;
5267 int ret;
5268
5269 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
5270 btrfs_err(fs_info, "bad tree block start %llu", start);
5271 return ERR_PTR(-EINVAL);
5272 }
5273
5274 if (fs_info->sectorsize < PAGE_SIZE &&
5275 offset_in_page(start) + len > PAGE_SIZE) {
5276 btrfs_err(fs_info,
5277 "tree block crosses page boundary, start %llu nodesize %lu",
5278 start, len);
5279 return ERR_PTR(-EINVAL);
5280 }
5281
5282 eb = find_extent_buffer(fs_info, start);
5283 if (eb)
5284 return eb;
5285
5286 eb = __alloc_extent_buffer(fs_info, start, len);
5287 if (!eb)
5288 return ERR_PTR(-ENOMEM);
5289 btrfs_set_buffer_lockdep_class(owner_root, eb, level);
5290
5291 num_pages = num_extent_pages(eb);
5292 for (i = 0; i < num_pages; i++, index++) {
5293 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
5294 if (!p) {
5295 exists = ERR_PTR(-ENOMEM);
5296 goto free_eb;
5297 }
5298
5299 spin_lock(&mapping->private_lock);
5300 if (PagePrivate(p)) {
5301
5302
5303
5304
5305
5306
5307
5308 exists = (struct extent_buffer *)p->private;
5309 if (atomic_inc_not_zero(&exists->refs)) {
5310 spin_unlock(&mapping->private_lock);
5311 unlock_page(p);
5312 put_page(p);
5313 mark_extent_buffer_accessed(exists, p);
5314 goto free_eb;
5315 }
5316 exists = NULL;
5317
5318 WARN_ON(PageDirty(p));
5319 detach_page_private(p);
5320 }
5321 attach_extent_buffer_page(eb, p);
5322 spin_unlock(&mapping->private_lock);
5323 WARN_ON(PageDirty(p));
5324 eb->pages[i] = p;
5325 if (!PageUptodate(p))
5326 uptodate = 0;
5327
5328
5329
5330
5331
5332
5333
5334
5335 }
5336 if (uptodate)
5337 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5338again:
5339 ret = radix_tree_preload(GFP_NOFS);
5340 if (ret) {
5341 exists = ERR_PTR(ret);
5342 goto free_eb;
5343 }
5344
5345 spin_lock(&fs_info->buffer_lock);
5346 ret = radix_tree_insert(&fs_info->buffer_radix,
5347 start >> fs_info->sectorsize_bits, eb);
5348 spin_unlock(&fs_info->buffer_lock);
5349 radix_tree_preload_end();
5350 if (ret == -EEXIST) {
5351 exists = find_extent_buffer(fs_info, start);
5352 if (exists)
5353 goto free_eb;
5354 else
5355 goto again;
5356 }
5357
5358 check_buffer_tree_ref(eb);
5359 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5360
5361
5362
5363
5364
5365
5366 for (i = 0; i < num_pages; i++)
5367 unlock_page(eb->pages[i]);
5368 return eb;
5369
5370free_eb:
5371 WARN_ON(!atomic_dec_and_test(&eb->refs));
5372 for (i = 0; i < num_pages; i++) {
5373 if (eb->pages[i])
5374 unlock_page(eb->pages[i]);
5375 }
5376
5377 btrfs_release_extent_buffer(eb);
5378 return exists;
5379}
5380
5381static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5382{
5383 struct extent_buffer *eb =
5384 container_of(head, struct extent_buffer, rcu_head);
5385
5386 __free_extent_buffer(eb);
5387}
5388
5389static int release_extent_buffer(struct extent_buffer *eb)
5390 __releases(&eb->refs_lock)
5391{
5392 lockdep_assert_held(&eb->refs_lock);
5393
5394 WARN_ON(atomic_read(&eb->refs) == 0);
5395 if (atomic_dec_and_test(&eb->refs)) {
5396 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5397 struct btrfs_fs_info *fs_info = eb->fs_info;
5398
5399 spin_unlock(&eb->refs_lock);
5400
5401 spin_lock(&fs_info->buffer_lock);
5402 radix_tree_delete(&fs_info->buffer_radix,
5403 eb->start >> fs_info->sectorsize_bits);
5404 spin_unlock(&fs_info->buffer_lock);
5405 } else {
5406 spin_unlock(&eb->refs_lock);
5407 }
5408
5409 btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
5410
5411 btrfs_release_extent_buffer_pages(eb);
5412#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5413 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
5414 __free_extent_buffer(eb);
5415 return 1;
5416 }
5417#endif
5418 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5419 return 1;
5420 }
5421 spin_unlock(&eb->refs_lock);
5422
5423 return 0;
5424}
5425
5426void free_extent_buffer(struct extent_buffer *eb)
5427{
5428 int refs;
5429 int old;
5430 if (!eb)
5431 return;
5432
5433 while (1) {
5434 refs = atomic_read(&eb->refs);
5435 if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
5436 || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
5437 refs == 1))
5438 break;
5439 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5440 if (old == refs)
5441 return;
5442 }
5443
5444 spin_lock(&eb->refs_lock);
5445 if (atomic_read(&eb->refs) == 2 &&
5446 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5447 !extent_buffer_under_io(eb) &&
5448 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5449 atomic_dec(&eb->refs);
5450
5451
5452
5453
5454
5455 release_extent_buffer(eb);
5456}
5457
5458void free_extent_buffer_stale(struct extent_buffer *eb)
5459{
5460 if (!eb)
5461 return;
5462
5463 spin_lock(&eb->refs_lock);
5464 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5465
5466 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5467 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5468 atomic_dec(&eb->refs);
5469 release_extent_buffer(eb);
5470}
5471
5472void clear_extent_buffer_dirty(const struct extent_buffer *eb)
5473{
5474 int i;
5475 int num_pages;
5476 struct page *page;
5477
5478 num_pages = num_extent_pages(eb);
5479
5480 for (i = 0; i < num_pages; i++) {
5481 page = eb->pages[i];
5482 if (!PageDirty(page))
5483 continue;
5484
5485 lock_page(page);
5486 WARN_ON(!PagePrivate(page));
5487
5488 clear_page_dirty_for_io(page);
5489 xa_lock_irq(&page->mapping->i_pages);
5490 if (!PageDirty(page))
5491 __xa_clear_mark(&page->mapping->i_pages,
5492 page_index(page), PAGECACHE_TAG_DIRTY);
5493 xa_unlock_irq(&page->mapping->i_pages);
5494 ClearPageError(page);
5495 unlock_page(page);
5496 }
5497 WARN_ON(atomic_read(&eb->refs) == 0);
5498}
5499
5500bool set_extent_buffer_dirty(struct extent_buffer *eb)
5501{
5502 int i;
5503 int num_pages;
5504 bool was_dirty;
5505
5506 check_buffer_tree_ref(eb);
5507
5508 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5509
5510 num_pages = num_extent_pages(eb);
5511 WARN_ON(atomic_read(&eb->refs) == 0);
5512 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5513
5514 if (!was_dirty)
5515 for (i = 0; i < num_pages; i++)
5516 set_page_dirty(eb->pages[i]);
5517
5518#ifdef CONFIG_BTRFS_DEBUG
5519 for (i = 0; i < num_pages; i++)
5520 ASSERT(PageDirty(eb->pages[i]));
5521#endif
5522
5523 return was_dirty;
5524}
5525
5526void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5527{
5528 int i;
5529 struct page *page;
5530 int num_pages;
5531
5532 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5533 num_pages = num_extent_pages(eb);
5534 for (i = 0; i < num_pages; i++) {
5535 page = eb->pages[i];
5536 if (page)
5537 ClearPageUptodate(page);
5538 }
5539}
5540
5541void set_extent_buffer_uptodate(struct extent_buffer *eb)
5542{
5543 int i;
5544 struct page *page;
5545 int num_pages;
5546
5547 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5548 num_pages = num_extent_pages(eb);
5549 for (i = 0; i < num_pages; i++) {
5550 page = eb->pages[i];
5551 SetPageUptodate(page);
5552 }
5553}
5554
5555int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
5556{
5557 int i;
5558 struct page *page;
5559 int err;
5560 int ret = 0;
5561 int locked_pages = 0;
5562 int all_uptodate = 1;
5563 int num_pages;
5564 unsigned long num_reads = 0;
5565 struct bio *bio = NULL;
5566 unsigned long bio_flags = 0;
5567
5568 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5569 return 0;
5570
5571 num_pages = num_extent_pages(eb);
5572 for (i = 0; i < num_pages; i++) {
5573 page = eb->pages[i];
5574 if (wait == WAIT_NONE) {
5575 if (!trylock_page(page))
5576 goto unlock_exit;
5577 } else {
5578 lock_page(page);
5579 }
5580 locked_pages++;
5581 }
5582
5583
5584
5585
5586
5587 for (i = 0; i < num_pages; i++) {
5588 page = eb->pages[i];
5589 if (!PageUptodate(page)) {
5590 num_reads++;
5591 all_uptodate = 0;
5592 }
5593 }
5594
5595 if (all_uptodate) {
5596 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5597 goto unlock_exit;
5598 }
5599
5600 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5601 eb->read_mirror = 0;
5602 atomic_set(&eb->io_pages, num_reads);
5603
5604
5605
5606
5607 check_buffer_tree_ref(eb);
5608 for (i = 0; i < num_pages; i++) {
5609 page = eb->pages[i];
5610
5611 if (!PageUptodate(page)) {
5612 if (ret) {
5613 atomic_dec(&eb->io_pages);
5614 unlock_page(page);
5615 continue;
5616 }
5617
5618 ClearPageError(page);
5619 err = submit_extent_page(REQ_OP_READ | REQ_META, NULL,
5620 page, page_offset(page), PAGE_SIZE, 0,
5621 &bio, end_bio_extent_readpage,
5622 mirror_num, 0, 0, false);
5623 if (err) {
5624
5625
5626
5627
5628
5629 ret = err;
5630 SetPageError(page);
5631 unlock_page(page);
5632 atomic_dec(&eb->io_pages);
5633 }
5634 } else {
5635 unlock_page(page);
5636 }
5637 }
5638
5639 if (bio) {
5640 err = submit_one_bio(bio, mirror_num, bio_flags);
5641 if (err)
5642 return err;
5643 }
5644
5645 if (ret || wait != WAIT_COMPLETE)
5646 return ret;
5647
5648 for (i = 0; i < num_pages; i++) {
5649 page = eb->pages[i];
5650 wait_on_page_locked(page);
5651 if (!PageUptodate(page))
5652 ret = -EIO;
5653 }
5654
5655 return ret;
5656
5657unlock_exit:
5658 while (locked_pages > 0) {
5659 locked_pages--;
5660 page = eb->pages[locked_pages];
5661 unlock_page(page);
5662 }
5663 return ret;
5664}
5665
5666static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
5667 unsigned long len)
5668{
5669 btrfs_warn(eb->fs_info,
5670 "access to eb bytenr %llu len %lu out of range start %lu len %lu",
5671 eb->start, eb->len, start, len);
5672 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
5673
5674 return true;
5675}
5676
5677
5678
5679
5680
5681
5682
5683
5684static inline int check_eb_range(const struct extent_buffer *eb,
5685 unsigned long start, unsigned long len)
5686{
5687 unsigned long offset;
5688
5689
5690 if (unlikely(check_add_overflow(start, len, &offset) || offset > eb->len))
5691 return report_eb_range(eb, start, len);
5692
5693 return false;
5694}
5695
5696void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
5697 unsigned long start, unsigned long len)
5698{
5699 size_t cur;
5700 size_t offset;
5701 struct page *page;
5702 char *kaddr;
5703 char *dst = (char *)dstv;
5704 unsigned long i = get_eb_page_index(start);
5705
5706 if (check_eb_range(eb, start, len))
5707 return;
5708
5709 offset = get_eb_offset_in_page(eb, start);
5710
5711 while (len > 0) {
5712 page = eb->pages[i];
5713
5714 cur = min(len, (PAGE_SIZE - offset));
5715 kaddr = page_address(page);
5716 memcpy(dst, kaddr + offset, cur);
5717
5718 dst += cur;
5719 len -= cur;
5720 offset = 0;
5721 i++;
5722 }
5723}
5724
5725int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
5726 void __user *dstv,
5727 unsigned long start, unsigned long len)
5728{
5729 size_t cur;
5730 size_t offset;
5731 struct page *page;
5732 char *kaddr;
5733 char __user *dst = (char __user *)dstv;
5734 unsigned long i = get_eb_page_index(start);
5735 int ret = 0;
5736
5737 WARN_ON(start > eb->len);
5738 WARN_ON(start + len > eb->start + eb->len);
5739
5740 offset = get_eb_offset_in_page(eb, start);
5741
5742 while (len > 0) {
5743 page = eb->pages[i];
5744
5745 cur = min(len, (PAGE_SIZE - offset));
5746 kaddr = page_address(page);
5747 if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
5748 ret = -EFAULT;
5749 break;
5750 }
5751
5752 dst += cur;
5753 len -= cur;
5754 offset = 0;
5755 i++;
5756 }
5757
5758 return ret;
5759}
5760
5761int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
5762 unsigned long start, unsigned long len)
5763{
5764 size_t cur;
5765 size_t offset;
5766 struct page *page;
5767 char *kaddr;
5768 char *ptr = (char *)ptrv;
5769 unsigned long i = get_eb_page_index(start);
5770 int ret = 0;
5771
5772 if (check_eb_range(eb, start, len))
5773 return -EINVAL;
5774
5775 offset = get_eb_offset_in_page(eb, start);
5776
5777 while (len > 0) {
5778 page = eb->pages[i];
5779
5780 cur = min(len, (PAGE_SIZE - offset));
5781
5782 kaddr = page_address(page);
5783 ret = memcmp(ptr, kaddr + offset, cur);
5784 if (ret)
5785 break;
5786
5787 ptr += cur;
5788 len -= cur;
5789 offset = 0;
5790 i++;
5791 }
5792 return ret;
5793}
5794
5795void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
5796 const void *srcv)
5797{
5798 char *kaddr;
5799
5800 WARN_ON(!PageUptodate(eb->pages[0]));
5801 kaddr = page_address(eb->pages[0]) + get_eb_offset_in_page(eb, 0);
5802 memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
5803 BTRFS_FSID_SIZE);
5804}
5805
5806void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
5807{
5808 char *kaddr;
5809
5810 WARN_ON(!PageUptodate(eb->pages[0]));
5811 kaddr = page_address(eb->pages[0]) + get_eb_offset_in_page(eb, 0);
5812 memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
5813 BTRFS_FSID_SIZE);
5814}
5815
5816void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
5817 unsigned long start, unsigned long len)
5818{
5819 size_t cur;
5820 size_t offset;
5821 struct page *page;
5822 char *kaddr;
5823 char *src = (char *)srcv;
5824 unsigned long i = get_eb_page_index(start);
5825
5826 if (check_eb_range(eb, start, len))
5827 return;
5828
5829 offset = get_eb_offset_in_page(eb, start);
5830
5831 while (len > 0) {
5832 page = eb->pages[i];
5833 WARN_ON(!PageUptodate(page));
5834
5835 cur = min(len, PAGE_SIZE - offset);
5836 kaddr = page_address(page);
5837 memcpy(kaddr + offset, src, cur);
5838
5839 src += cur;
5840 len -= cur;
5841 offset = 0;
5842 i++;
5843 }
5844}
5845
5846void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
5847 unsigned long len)
5848{
5849 size_t cur;
5850 size_t offset;
5851 struct page *page;
5852 char *kaddr;
5853 unsigned long i = get_eb_page_index(start);
5854
5855 if (check_eb_range(eb, start, len))
5856 return;
5857
5858 offset = get_eb_offset_in_page(eb, start);
5859
5860 while (len > 0) {
5861 page = eb->pages[i];
5862 WARN_ON(!PageUptodate(page));
5863
5864 cur = min(len, PAGE_SIZE - offset);
5865 kaddr = page_address(page);
5866 memset(kaddr + offset, 0, cur);
5867
5868 len -= cur;
5869 offset = 0;
5870 i++;
5871 }
5872}
5873
5874void copy_extent_buffer_full(const struct extent_buffer *dst,
5875 const struct extent_buffer *src)
5876{
5877 int i;
5878 int num_pages;
5879
5880 ASSERT(dst->len == src->len);
5881
5882 if (dst->fs_info->sectorsize == PAGE_SIZE) {
5883 num_pages = num_extent_pages(dst);
5884 for (i = 0; i < num_pages; i++)
5885 copy_page(page_address(dst->pages[i]),
5886 page_address(src->pages[i]));
5887 } else {
5888 size_t src_offset = get_eb_offset_in_page(src, 0);
5889 size_t dst_offset = get_eb_offset_in_page(dst, 0);
5890
5891 ASSERT(src->fs_info->sectorsize < PAGE_SIZE);
5892 memcpy(page_address(dst->pages[0]) + dst_offset,
5893 page_address(src->pages[0]) + src_offset,
5894 src->len);
5895 }
5896}
5897
5898void copy_extent_buffer(const struct extent_buffer *dst,
5899 const struct extent_buffer *src,
5900 unsigned long dst_offset, unsigned long src_offset,
5901 unsigned long len)
5902{
5903 u64 dst_len = dst->len;
5904 size_t cur;
5905 size_t offset;
5906 struct page *page;
5907 char *kaddr;
5908 unsigned long i = get_eb_page_index(dst_offset);
5909
5910 if (check_eb_range(dst, dst_offset, len) ||
5911 check_eb_range(src, src_offset, len))
5912 return;
5913
5914 WARN_ON(src->len != dst_len);
5915
5916 offset = get_eb_offset_in_page(dst, dst_offset);
5917
5918 while (len > 0) {
5919 page = dst->pages[i];
5920 WARN_ON(!PageUptodate(page));
5921
5922 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
5923
5924 kaddr = page_address(page);
5925 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5926
5927 src_offset += cur;
5928 len -= cur;
5929 offset = 0;
5930 i++;
5931 }
5932}
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947static inline void eb_bitmap_offset(const struct extent_buffer *eb,
5948 unsigned long start, unsigned long nr,
5949 unsigned long *page_index,
5950 size_t *page_offset)
5951{
5952 size_t byte_offset = BIT_BYTE(nr);
5953 size_t offset;
5954
5955
5956
5957
5958
5959
5960 offset = start + offset_in_page(eb->start) + byte_offset;
5961
5962 *page_index = offset >> PAGE_SHIFT;
5963 *page_offset = offset_in_page(offset);
5964}
5965
5966
5967
5968
5969
5970
5971
5972int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
5973 unsigned long nr)
5974{
5975 u8 *kaddr;
5976 struct page *page;
5977 unsigned long i;
5978 size_t offset;
5979
5980 eb_bitmap_offset(eb, start, nr, &i, &offset);
5981 page = eb->pages[i];
5982 WARN_ON(!PageUptodate(page));
5983 kaddr = page_address(page);
5984 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5985}
5986
5987
5988
5989
5990
5991
5992
5993
5994void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
5995 unsigned long pos, unsigned long len)
5996{
5997 u8 *kaddr;
5998 struct page *page;
5999 unsigned long i;
6000 size_t offset;
6001 const unsigned int size = pos + len;
6002 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
6003 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
6004
6005 eb_bitmap_offset(eb, start, pos, &i, &offset);
6006 page = eb->pages[i];
6007 WARN_ON(!PageUptodate(page));
6008 kaddr = page_address(page);
6009
6010 while (len >= bits_to_set) {
6011 kaddr[offset] |= mask_to_set;
6012 len -= bits_to_set;
6013 bits_to_set = BITS_PER_BYTE;
6014 mask_to_set = ~0;
6015 if (++offset >= PAGE_SIZE && len > 0) {
6016 offset = 0;
6017 page = eb->pages[++i];
6018 WARN_ON(!PageUptodate(page));
6019 kaddr = page_address(page);
6020 }
6021 }
6022 if (len) {
6023 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
6024 kaddr[offset] |= mask_to_set;
6025 }
6026}
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
6037 unsigned long start, unsigned long pos,
6038 unsigned long len)
6039{
6040 u8 *kaddr;
6041 struct page *page;
6042 unsigned long i;
6043 size_t offset;
6044 const unsigned int size = pos + len;
6045 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
6046 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
6047
6048 eb_bitmap_offset(eb, start, pos, &i, &offset);
6049 page = eb->pages[i];
6050 WARN_ON(!PageUptodate(page));
6051 kaddr = page_address(page);
6052
6053 while (len >= bits_to_clear) {
6054 kaddr[offset] &= ~mask_to_clear;
6055 len -= bits_to_clear;
6056 bits_to_clear = BITS_PER_BYTE;
6057 mask_to_clear = ~0;
6058 if (++offset >= PAGE_SIZE && len > 0) {
6059 offset = 0;
6060 page = eb->pages[++i];
6061 WARN_ON(!PageUptodate(page));
6062 kaddr = page_address(page);
6063 }
6064 }
6065 if (len) {
6066 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
6067 kaddr[offset] &= ~mask_to_clear;
6068 }
6069}
6070
6071static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
6072{
6073 unsigned long distance = (src > dst) ? src - dst : dst - src;
6074 return distance < len;
6075}
6076
6077static void copy_pages(struct page *dst_page, struct page *src_page,
6078 unsigned long dst_off, unsigned long src_off,
6079 unsigned long len)
6080{
6081 char *dst_kaddr = page_address(dst_page);
6082 char *src_kaddr;
6083 int must_memmove = 0;
6084
6085 if (dst_page != src_page) {
6086 src_kaddr = page_address(src_page);
6087 } else {
6088 src_kaddr = dst_kaddr;
6089 if (areas_overlap(src_off, dst_off, len))
6090 must_memmove = 1;
6091 }
6092
6093 if (must_memmove)
6094 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
6095 else
6096 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
6097}
6098
6099void memcpy_extent_buffer(const struct extent_buffer *dst,
6100 unsigned long dst_offset, unsigned long src_offset,
6101 unsigned long len)
6102{
6103 size_t cur;
6104 size_t dst_off_in_page;
6105 size_t src_off_in_page;
6106 unsigned long dst_i;
6107 unsigned long src_i;
6108
6109 if (check_eb_range(dst, dst_offset, len) ||
6110 check_eb_range(dst, src_offset, len))
6111 return;
6112
6113 while (len > 0) {
6114 dst_off_in_page = get_eb_offset_in_page(dst, dst_offset);
6115 src_off_in_page = get_eb_offset_in_page(dst, src_offset);
6116
6117 dst_i = get_eb_page_index(dst_offset);
6118 src_i = get_eb_page_index(src_offset);
6119
6120 cur = min(len, (unsigned long)(PAGE_SIZE -
6121 src_off_in_page));
6122 cur = min_t(unsigned long, cur,
6123 (unsigned long)(PAGE_SIZE - dst_off_in_page));
6124
6125 copy_pages(dst->pages[dst_i], dst->pages[src_i],
6126 dst_off_in_page, src_off_in_page, cur);
6127
6128 src_offset += cur;
6129 dst_offset += cur;
6130 len -= cur;
6131 }
6132}
6133
6134void memmove_extent_buffer(const struct extent_buffer *dst,
6135 unsigned long dst_offset, unsigned long src_offset,
6136 unsigned long len)
6137{
6138 size_t cur;
6139 size_t dst_off_in_page;
6140 size_t src_off_in_page;
6141 unsigned long dst_end = dst_offset + len - 1;
6142 unsigned long src_end = src_offset + len - 1;
6143 unsigned long dst_i;
6144 unsigned long src_i;
6145
6146 if (check_eb_range(dst, dst_offset, len) ||
6147 check_eb_range(dst, src_offset, len))
6148 return;
6149 if (dst_offset < src_offset) {
6150 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
6151 return;
6152 }
6153 while (len > 0) {
6154 dst_i = get_eb_page_index(dst_end);
6155 src_i = get_eb_page_index(src_end);
6156
6157 dst_off_in_page = get_eb_offset_in_page(dst, dst_end);
6158 src_off_in_page = get_eb_offset_in_page(dst, src_end);
6159
6160 cur = min_t(unsigned long, len, src_off_in_page + 1);
6161 cur = min(cur, dst_off_in_page + 1);
6162 copy_pages(dst->pages[dst_i], dst->pages[src_i],
6163 dst_off_in_page - cur + 1,
6164 src_off_in_page - cur + 1, cur);
6165
6166 dst_end -= cur;
6167 src_end -= cur;
6168 len -= cur;
6169 }
6170}
6171
6172int try_release_extent_buffer(struct page *page)
6173{
6174 struct extent_buffer *eb;
6175
6176
6177
6178
6179
6180 spin_lock(&page->mapping->private_lock);
6181 if (!PagePrivate(page)) {
6182 spin_unlock(&page->mapping->private_lock);
6183 return 1;
6184 }
6185
6186 eb = (struct extent_buffer *)page->private;
6187 BUG_ON(!eb);
6188
6189
6190
6191
6192
6193
6194 spin_lock(&eb->refs_lock);
6195 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
6196 spin_unlock(&eb->refs_lock);
6197 spin_unlock(&page->mapping->private_lock);
6198 return 0;
6199 }
6200 spin_unlock(&page->mapping->private_lock);
6201
6202
6203
6204
6205
6206 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
6207 spin_unlock(&eb->refs_lock);
6208 return 0;
6209 }
6210
6211 return release_extent_buffer(eb);
6212}
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
6227 u64 bytenr, u64 owner_root, u64 gen, int level)
6228{
6229 struct extent_buffer *eb;
6230 int ret;
6231
6232 eb = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
6233 if (IS_ERR(eb))
6234 return;
6235
6236 if (btrfs_buffer_uptodate(eb, gen, 1)) {
6237 free_extent_buffer(eb);
6238 return;
6239 }
6240
6241 ret = read_extent_buffer_pages(eb, WAIT_NONE, 0);
6242 if (ret < 0)
6243 free_extent_buffer_stale(eb);
6244 else
6245 free_extent_buffer(eb);
6246}
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256void btrfs_readahead_node_child(struct extent_buffer *node, int slot)
6257{
6258 btrfs_readahead_tree_block(node->fs_info,
6259 btrfs_node_blockptr(node, slot),
6260 btrfs_header_owner(node),
6261 btrfs_node_ptr_generation(node, slot),
6262 btrfs_header_level(node) - 1);
6263}
6264