1
2
3#include <linux/bitops.h>
4#include <linux/slab.h>
5#include <linux/bio.h>
6#include <linux/mm.h>
7#include <linux/pagemap.h>
8#include <linux/page-flags.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/writeback.h>
13#include <linux/pagevec.h>
14#include <linux/prefetch.h>
15#include <linux/fsverity.h>
16#include "misc.h"
17#include "extent_io.h"
18#include "extent-io-tree.h"
19#include "extent_map.h"
20#include "ctree.h"
21#include "btrfs_inode.h"
22#include "volumes.h"
23#include "check-integrity.h"
24#include "locking.h"
25#include "rcu-string.h"
26#include "backref.h"
27#include "disk-io.h"
28#include "subpage.h"
29#include "zoned.h"
30#include "block-group.h"
31
32static struct kmem_cache *extent_state_cache;
33static struct kmem_cache *extent_buffer_cache;
34static struct bio_set btrfs_bioset;
35
36static inline bool extent_state_in_tree(const struct extent_state *state)
37{
38 return !RB_EMPTY_NODE(&state->rb_node);
39}
40
41#ifdef CONFIG_BTRFS_DEBUG
42static LIST_HEAD(states);
43static DEFINE_SPINLOCK(leak_lock);
44
45static inline void btrfs_leak_debug_add(spinlock_t *lock,
46 struct list_head *new,
47 struct list_head *head)
48{
49 unsigned long flags;
50
51 spin_lock_irqsave(lock, flags);
52 list_add(new, head);
53 spin_unlock_irqrestore(lock, flags);
54}
55
56static inline void btrfs_leak_debug_del(spinlock_t *lock,
57 struct list_head *entry)
58{
59 unsigned long flags;
60
61 spin_lock_irqsave(lock, flags);
62 list_del(entry);
63 spin_unlock_irqrestore(lock, flags);
64}
65
66void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
67{
68 struct extent_buffer *eb;
69 unsigned long flags;
70
71
72
73
74
75 if (!fs_info->allocated_ebs.next)
76 return;
77
78 spin_lock_irqsave(&fs_info->eb_leak_lock, flags);
79 while (!list_empty(&fs_info->allocated_ebs)) {
80 eb = list_first_entry(&fs_info->allocated_ebs,
81 struct extent_buffer, leak_list);
82 pr_err(
83 "BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n",
84 eb->start, eb->len, atomic_read(&eb->refs), eb->bflags,
85 btrfs_header_owner(eb));
86 list_del(&eb->leak_list);
87 kmem_cache_free(extent_buffer_cache, eb);
88 }
89 spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags);
90}
91
92static inline void btrfs_extent_state_leak_debug_check(void)
93{
94 struct extent_state *state;
95
96 while (!list_empty(&states)) {
97 state = list_entry(states.next, struct extent_state, leak_list);
98 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
99 state->start, state->end, state->state,
100 extent_state_in_tree(state),
101 refcount_read(&state->refs));
102 list_del(&state->leak_list);
103 kmem_cache_free(extent_state_cache, state);
104 }
105}
106
107#define btrfs_debug_check_extent_io_range(tree, start, end) \
108 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
109static inline void __btrfs_debug_check_extent_io_range(const char *caller,
110 struct extent_io_tree *tree, u64 start, u64 end)
111{
112 struct inode *inode = tree->private_data;
113 u64 isize;
114
115 if (!inode || !is_data_inode(inode))
116 return;
117
118 isize = i_size_read(inode);
119 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
120 btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
121 "%s: ino %llu isize %llu odd range [%llu,%llu]",
122 caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
123 }
124}
125#else
126#define btrfs_leak_debug_add(lock, new, head) do {} while (0)
127#define btrfs_leak_debug_del(lock, entry) do {} while (0)
128#define btrfs_extent_state_leak_debug_check() do {} while (0)
129#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
130#endif
131
132struct tree_entry {
133 u64 start;
134 u64 end;
135 struct rb_node rb_node;
136};
137
138struct extent_page_data {
139 struct btrfs_bio_ctrl bio_ctrl;
140
141
142
143 unsigned int extent_locked:1;
144
145
146 unsigned int sync_io:1;
147};
148
149static int add_extent_changeset(struct extent_state *state, u32 bits,
150 struct extent_changeset *changeset,
151 int set)
152{
153 int ret;
154
155 if (!changeset)
156 return 0;
157 if (set && (state->state & bits) == bits)
158 return 0;
159 if (!set && (state->state & bits) == 0)
160 return 0;
161 changeset->bytes_changed += state->end - state->start + 1;
162 ret = ulist_add(&changeset->range_changed, state->start, state->end,
163 GFP_ATOMIC);
164 return ret;
165}
166
167int __must_check submit_one_bio(struct bio *bio, int mirror_num,
168 unsigned long bio_flags)
169{
170 blk_status_t ret = 0;
171 struct extent_io_tree *tree = bio->bi_private;
172
173 bio->bi_private = NULL;
174
175
176 ASSERT(bio->bi_iter.bi_size);
177 if (is_data_inode(tree->private_data))
178 ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
179 bio_flags);
180 else
181 ret = btrfs_submit_metadata_bio(tree->private_data, bio,
182 mirror_num, bio_flags);
183
184 return blk_status_to_errno(ret);
185}
186
187
188static void end_write_bio(struct extent_page_data *epd, int ret)
189{
190 struct bio *bio = epd->bio_ctrl.bio;
191
192 if (bio) {
193 bio->bi_status = errno_to_blk_status(ret);
194 bio_endio(bio);
195 epd->bio_ctrl.bio = NULL;
196 }
197}
198
199
200
201
202
203
204
205static int __must_check flush_write_bio(struct extent_page_data *epd)
206{
207 int ret = 0;
208 struct bio *bio = epd->bio_ctrl.bio;
209
210 if (bio) {
211 ret = submit_one_bio(bio, 0, 0);
212
213
214
215
216
217
218
219 epd->bio_ctrl.bio = NULL;
220 }
221 return ret;
222}
223
224int __init extent_state_cache_init(void)
225{
226 extent_state_cache = kmem_cache_create("btrfs_extent_state",
227 sizeof(struct extent_state), 0,
228 SLAB_MEM_SPREAD, NULL);
229 if (!extent_state_cache)
230 return -ENOMEM;
231 return 0;
232}
233
234int __init extent_io_init(void)
235{
236 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
237 sizeof(struct extent_buffer), 0,
238 SLAB_MEM_SPREAD, NULL);
239 if (!extent_buffer_cache)
240 return -ENOMEM;
241
242 if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
243 offsetof(struct btrfs_bio, bio),
244 BIOSET_NEED_BVECS))
245 goto free_buffer_cache;
246
247 if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
248 goto free_bioset;
249
250 return 0;
251
252free_bioset:
253 bioset_exit(&btrfs_bioset);
254
255free_buffer_cache:
256 kmem_cache_destroy(extent_buffer_cache);
257 extent_buffer_cache = NULL;
258 return -ENOMEM;
259}
260
261void __cold extent_state_cache_exit(void)
262{
263 btrfs_extent_state_leak_debug_check();
264 kmem_cache_destroy(extent_state_cache);
265}
266
267void __cold extent_io_exit(void)
268{
269
270
271
272
273 rcu_barrier();
274 kmem_cache_destroy(extent_buffer_cache);
275 bioset_exit(&btrfs_bioset);
276}
277
278
279
280
281
282
283
284
285static struct lock_class_key file_extent_tree_class;
286
287void extent_io_tree_init(struct btrfs_fs_info *fs_info,
288 struct extent_io_tree *tree, unsigned int owner,
289 void *private_data)
290{
291 tree->fs_info = fs_info;
292 tree->state = RB_ROOT;
293 tree->dirty_bytes = 0;
294 spin_lock_init(&tree->lock);
295 tree->private_data = private_data;
296 tree->owner = owner;
297 if (owner == IO_TREE_INODE_FILE_EXTENT)
298 lockdep_set_class(&tree->lock, &file_extent_tree_class);
299}
300
301void extent_io_tree_release(struct extent_io_tree *tree)
302{
303 spin_lock(&tree->lock);
304
305
306
307
308
309 smp_mb();
310 while (!RB_EMPTY_ROOT(&tree->state)) {
311 struct rb_node *node;
312 struct extent_state *state;
313
314 node = rb_first(&tree->state);
315 state = rb_entry(node, struct extent_state, rb_node);
316 rb_erase(&state->rb_node, &tree->state);
317 RB_CLEAR_NODE(&state->rb_node);
318
319
320
321
322 ASSERT(!waitqueue_active(&state->wq));
323 free_extent_state(state);
324
325 cond_resched_lock(&tree->lock);
326 }
327 spin_unlock(&tree->lock);
328}
329
330static struct extent_state *alloc_extent_state(gfp_t mask)
331{
332 struct extent_state *state;
333
334
335
336
337
338 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
339 state = kmem_cache_alloc(extent_state_cache, mask);
340 if (!state)
341 return state;
342 state->state = 0;
343 state->failrec = NULL;
344 RB_CLEAR_NODE(&state->rb_node);
345 btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states);
346 refcount_set(&state->refs, 1);
347 init_waitqueue_head(&state->wq);
348 trace_alloc_extent_state(state, mask, _RET_IP_);
349 return state;
350}
351
352void free_extent_state(struct extent_state *state)
353{
354 if (!state)
355 return;
356 if (refcount_dec_and_test(&state->refs)) {
357 WARN_ON(extent_state_in_tree(state));
358 btrfs_leak_debug_del(&leak_lock, &state->leak_list);
359 trace_free_extent_state(state, _RET_IP_);
360 kmem_cache_free(extent_state_cache, state);
361 }
362}
363
364static struct rb_node *tree_insert(struct rb_root *root,
365 struct rb_node *search_start,
366 u64 offset,
367 struct rb_node *node,
368 struct rb_node ***p_in,
369 struct rb_node **parent_in)
370{
371 struct rb_node **p;
372 struct rb_node *parent = NULL;
373 struct tree_entry *entry;
374
375 if (p_in && parent_in) {
376 p = *p_in;
377 parent = *parent_in;
378 goto do_insert;
379 }
380
381 p = search_start ? &search_start : &root->rb_node;
382 while (*p) {
383 parent = *p;
384 entry = rb_entry(parent, struct tree_entry, rb_node);
385
386 if (offset < entry->start)
387 p = &(*p)->rb_left;
388 else if (offset > entry->end)
389 p = &(*p)->rb_right;
390 else
391 return parent;
392 }
393
394do_insert:
395 rb_link_node(node, parent, p);
396 rb_insert_color(node, root);
397 return NULL;
398}
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
419 struct rb_node **next_ret,
420 struct rb_node **prev_ret,
421 struct rb_node ***p_ret,
422 struct rb_node **parent_ret)
423{
424 struct rb_root *root = &tree->state;
425 struct rb_node **n = &root->rb_node;
426 struct rb_node *prev = NULL;
427 struct rb_node *orig_prev = NULL;
428 struct tree_entry *entry;
429 struct tree_entry *prev_entry = NULL;
430
431 while (*n) {
432 prev = *n;
433 entry = rb_entry(prev, struct tree_entry, rb_node);
434 prev_entry = entry;
435
436 if (offset < entry->start)
437 n = &(*n)->rb_left;
438 else if (offset > entry->end)
439 n = &(*n)->rb_right;
440 else
441 return *n;
442 }
443
444 if (p_ret)
445 *p_ret = n;
446 if (parent_ret)
447 *parent_ret = prev;
448
449 if (next_ret) {
450 orig_prev = prev;
451 while (prev && offset > prev_entry->end) {
452 prev = rb_next(prev);
453 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
454 }
455 *next_ret = prev;
456 prev = orig_prev;
457 }
458
459 if (prev_ret) {
460 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
461 while (prev && offset < prev_entry->start) {
462 prev = rb_prev(prev);
463 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
464 }
465 *prev_ret = prev;
466 }
467 return NULL;
468}
469
470static inline struct rb_node *
471tree_search_for_insert(struct extent_io_tree *tree,
472 u64 offset,
473 struct rb_node ***p_ret,
474 struct rb_node **parent_ret)
475{
476 struct rb_node *next= NULL;
477 struct rb_node *ret;
478
479 ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
480 if (!ret)
481 return next;
482 return ret;
483}
484
485static inline struct rb_node *tree_search(struct extent_io_tree *tree,
486 u64 offset)
487{
488 return tree_search_for_insert(tree, offset, NULL, NULL);
489}
490
491
492
493
494
495
496
497
498
499
500static void merge_state(struct extent_io_tree *tree,
501 struct extent_state *state)
502{
503 struct extent_state *other;
504 struct rb_node *other_node;
505
506 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
507 return;
508
509 other_node = rb_prev(&state->rb_node);
510 if (other_node) {
511 other = rb_entry(other_node, struct extent_state, rb_node);
512 if (other->end == state->start - 1 &&
513 other->state == state->state) {
514 if (tree->private_data &&
515 is_data_inode(tree->private_data))
516 btrfs_merge_delalloc_extent(tree->private_data,
517 state, other);
518 state->start = other->start;
519 rb_erase(&other->rb_node, &tree->state);
520 RB_CLEAR_NODE(&other->rb_node);
521 free_extent_state(other);
522 }
523 }
524 other_node = rb_next(&state->rb_node);
525 if (other_node) {
526 other = rb_entry(other_node, struct extent_state, rb_node);
527 if (other->start == state->end + 1 &&
528 other->state == state->state) {
529 if (tree->private_data &&
530 is_data_inode(tree->private_data))
531 btrfs_merge_delalloc_extent(tree->private_data,
532 state, other);
533 state->end = other->end;
534 rb_erase(&other->rb_node, &tree->state);
535 RB_CLEAR_NODE(&other->rb_node);
536 free_extent_state(other);
537 }
538 }
539}
540
541static void set_state_bits(struct extent_io_tree *tree,
542 struct extent_state *state, u32 *bits,
543 struct extent_changeset *changeset);
544
545
546
547
548
549
550
551
552
553
554
555static int insert_state(struct extent_io_tree *tree,
556 struct extent_state *state, u64 start, u64 end,
557 struct rb_node ***p,
558 struct rb_node **parent,
559 u32 *bits, struct extent_changeset *changeset)
560{
561 struct rb_node *node;
562
563 if (end < start) {
564 btrfs_err(tree->fs_info,
565 "insert state: end < start %llu %llu", end, start);
566 WARN_ON(1);
567 }
568 state->start = start;
569 state->end = end;
570
571 set_state_bits(tree, state, bits, changeset);
572
573 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
574 if (node) {
575 struct extent_state *found;
576 found = rb_entry(node, struct extent_state, rb_node);
577 btrfs_err(tree->fs_info,
578 "found node %llu %llu on insert of %llu %llu",
579 found->start, found->end, start, end);
580 return -EEXIST;
581 }
582 merge_state(tree, state);
583 return 0;
584}
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
601 struct extent_state *prealloc, u64 split)
602{
603 struct rb_node *node;
604
605 if (tree->private_data && is_data_inode(tree->private_data))
606 btrfs_split_delalloc_extent(tree->private_data, orig, split);
607
608 prealloc->start = orig->start;
609 prealloc->end = split - 1;
610 prealloc->state = orig->state;
611 orig->start = split;
612
613 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
614 &prealloc->rb_node, NULL, NULL);
615 if (node) {
616 free_extent_state(prealloc);
617 return -EEXIST;
618 }
619 return 0;
620}
621
622static struct extent_state *next_state(struct extent_state *state)
623{
624 struct rb_node *next = rb_next(&state->rb_node);
625 if (next)
626 return rb_entry(next, struct extent_state, rb_node);
627 else
628 return NULL;
629}
630
631
632
633
634
635
636
637
638static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
639 struct extent_state *state,
640 u32 *bits, int wake,
641 struct extent_changeset *changeset)
642{
643 struct extent_state *next;
644 u32 bits_to_clear = *bits & ~EXTENT_CTLBITS;
645 int ret;
646
647 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
648 u64 range = state->end - state->start + 1;
649 WARN_ON(range > tree->dirty_bytes);
650 tree->dirty_bytes -= range;
651 }
652
653 if (tree->private_data && is_data_inode(tree->private_data))
654 btrfs_clear_delalloc_extent(tree->private_data, state, bits);
655
656 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
657 BUG_ON(ret < 0);
658 state->state &= ~bits_to_clear;
659 if (wake)
660 wake_up(&state->wq);
661 if (state->state == 0) {
662 next = next_state(state);
663 if (extent_state_in_tree(state)) {
664 rb_erase(&state->rb_node, &tree->state);
665 RB_CLEAR_NODE(&state->rb_node);
666 free_extent_state(state);
667 } else {
668 WARN_ON(1);
669 }
670 } else {
671 merge_state(tree, state);
672 next = next_state(state);
673 }
674 return next;
675}
676
677static struct extent_state *
678alloc_extent_state_atomic(struct extent_state *prealloc)
679{
680 if (!prealloc)
681 prealloc = alloc_extent_state(GFP_ATOMIC);
682
683 return prealloc;
684}
685
686static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
687{
688 btrfs_panic(tree->fs_info, err,
689 "locking error: extent tree was modified by another thread while locked");
690}
691
692
693
694
695
696
697
698
699
700
701
702
703
704int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
705 u32 bits, int wake, int delete,
706 struct extent_state **cached_state,
707 gfp_t mask, struct extent_changeset *changeset)
708{
709 struct extent_state *state;
710 struct extent_state *cached;
711 struct extent_state *prealloc = NULL;
712 struct rb_node *node;
713 u64 last_end;
714 int err;
715 int clear = 0;
716
717 btrfs_debug_check_extent_io_range(tree, start, end);
718 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
719
720 if (bits & EXTENT_DELALLOC)
721 bits |= EXTENT_NORESERVE;
722
723 if (delete)
724 bits |= ~EXTENT_CTLBITS;
725
726 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
727 clear = 1;
728again:
729 if (!prealloc && gfpflags_allow_blocking(mask)) {
730
731
732
733
734
735
736
737 prealloc = alloc_extent_state(mask);
738 }
739
740 spin_lock(&tree->lock);
741 if (cached_state) {
742 cached = *cached_state;
743
744 if (clear) {
745 *cached_state = NULL;
746 cached_state = NULL;
747 }
748
749 if (cached && extent_state_in_tree(cached) &&
750 cached->start <= start && cached->end > start) {
751 if (clear)
752 refcount_dec(&cached->refs);
753 state = cached;
754 goto hit_next;
755 }
756 if (clear)
757 free_extent_state(cached);
758 }
759
760
761
762
763 node = tree_search(tree, start);
764 if (!node)
765 goto out;
766 state = rb_entry(node, struct extent_state, rb_node);
767hit_next:
768 if (state->start > end)
769 goto out;
770 WARN_ON(state->end < start);
771 last_end = state->end;
772
773
774 if (!(state->state & bits)) {
775 state = next_state(state);
776 goto next;
777 }
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795 if (state->start < start) {
796 prealloc = alloc_extent_state_atomic(prealloc);
797 BUG_ON(!prealloc);
798 err = split_state(tree, state, prealloc, start);
799 if (err)
800 extent_io_tree_panic(tree, err);
801
802 prealloc = NULL;
803 if (err)
804 goto out;
805 if (state->end <= end) {
806 state = clear_state_bit(tree, state, &bits, wake,
807 changeset);
808 goto next;
809 }
810 goto search_again;
811 }
812
813
814
815
816
817
818 if (state->start <= end && state->end > end) {
819 prealloc = alloc_extent_state_atomic(prealloc);
820 BUG_ON(!prealloc);
821 err = split_state(tree, state, prealloc, end + 1);
822 if (err)
823 extent_io_tree_panic(tree, err);
824
825 if (wake)
826 wake_up(&state->wq);
827
828 clear_state_bit(tree, prealloc, &bits, wake, changeset);
829
830 prealloc = NULL;
831 goto out;
832 }
833
834 state = clear_state_bit(tree, state, &bits, wake, changeset);
835next:
836 if (last_end == (u64)-1)
837 goto out;
838 start = last_end + 1;
839 if (start <= end && state && !need_resched())
840 goto hit_next;
841
842search_again:
843 if (start > end)
844 goto out;
845 spin_unlock(&tree->lock);
846 if (gfpflags_allow_blocking(mask))
847 cond_resched();
848 goto again;
849
850out:
851 spin_unlock(&tree->lock);
852 if (prealloc)
853 free_extent_state(prealloc);
854
855 return 0;
856
857}
858
859static void wait_on_state(struct extent_io_tree *tree,
860 struct extent_state *state)
861 __releases(tree->lock)
862 __acquires(tree->lock)
863{
864 DEFINE_WAIT(wait);
865 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
866 spin_unlock(&tree->lock);
867 schedule();
868 spin_lock(&tree->lock);
869 finish_wait(&state->wq, &wait);
870}
871
872
873
874
875
876
877static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
878 u32 bits)
879{
880 struct extent_state *state;
881 struct rb_node *node;
882
883 btrfs_debug_check_extent_io_range(tree, start, end);
884
885 spin_lock(&tree->lock);
886again:
887 while (1) {
888
889
890
891
892 node = tree_search(tree, start);
893process_node:
894 if (!node)
895 break;
896
897 state = rb_entry(node, struct extent_state, rb_node);
898
899 if (state->start > end)
900 goto out;
901
902 if (state->state & bits) {
903 start = state->start;
904 refcount_inc(&state->refs);
905 wait_on_state(tree, state);
906 free_extent_state(state);
907 goto again;
908 }
909 start = state->end + 1;
910
911 if (start > end)
912 break;
913
914 if (!cond_resched_lock(&tree->lock)) {
915 node = rb_next(node);
916 goto process_node;
917 }
918 }
919out:
920 spin_unlock(&tree->lock);
921}
922
923static void set_state_bits(struct extent_io_tree *tree,
924 struct extent_state *state,
925 u32 *bits, struct extent_changeset *changeset)
926{
927 u32 bits_to_set = *bits & ~EXTENT_CTLBITS;
928 int ret;
929
930 if (tree->private_data && is_data_inode(tree->private_data))
931 btrfs_set_delalloc_extent(tree->private_data, state, bits);
932
933 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
934 u64 range = state->end - state->start + 1;
935 tree->dirty_bytes += range;
936 }
937 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
938 BUG_ON(ret < 0);
939 state->state |= bits_to_set;
940}
941
942static void cache_state_if_flags(struct extent_state *state,
943 struct extent_state **cached_ptr,
944 unsigned flags)
945{
946 if (cached_ptr && !(*cached_ptr)) {
947 if (!flags || (state->state & flags)) {
948 *cached_ptr = state;
949 refcount_inc(&state->refs);
950 }
951 }
952}
953
954static void cache_state(struct extent_state *state,
955 struct extent_state **cached_ptr)
956{
957 return cache_state_if_flags(state, cached_ptr,
958 EXTENT_LOCKED | EXTENT_BOUNDARY);
959}
960
961
962
963
964
965
966
967
968
969
970
971int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
972 u32 exclusive_bits, u64 *failed_start,
973 struct extent_state **cached_state, gfp_t mask,
974 struct extent_changeset *changeset)
975{
976 struct extent_state *state;
977 struct extent_state *prealloc = NULL;
978 struct rb_node *node;
979 struct rb_node **p;
980 struct rb_node *parent;
981 int err = 0;
982 u64 last_start;
983 u64 last_end;
984
985 btrfs_debug_check_extent_io_range(tree, start, end);
986 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
987
988 if (exclusive_bits)
989 ASSERT(failed_start);
990 else
991 ASSERT(failed_start == NULL);
992again:
993 if (!prealloc && gfpflags_allow_blocking(mask)) {
994
995
996
997
998
999
1000
1001 prealloc = alloc_extent_state(mask);
1002 }
1003
1004 spin_lock(&tree->lock);
1005 if (cached_state && *cached_state) {
1006 state = *cached_state;
1007 if (state->start <= start && state->end > start &&
1008 extent_state_in_tree(state)) {
1009 node = &state->rb_node;
1010 goto hit_next;
1011 }
1012 }
1013
1014
1015
1016
1017 node = tree_search_for_insert(tree, start, &p, &parent);
1018 if (!node) {
1019 prealloc = alloc_extent_state_atomic(prealloc);
1020 BUG_ON(!prealloc);
1021 err = insert_state(tree, prealloc, start, end,
1022 &p, &parent, &bits, changeset);
1023 if (err)
1024 extent_io_tree_panic(tree, err);
1025
1026 cache_state(prealloc, cached_state);
1027 prealloc = NULL;
1028 goto out;
1029 }
1030 state = rb_entry(node, struct extent_state, rb_node);
1031hit_next:
1032 last_start = state->start;
1033 last_end = state->end;
1034
1035
1036
1037
1038
1039
1040
1041 if (state->start == start && state->end <= end) {
1042 if (state->state & exclusive_bits) {
1043 *failed_start = state->start;
1044 err = -EEXIST;
1045 goto out;
1046 }
1047
1048 set_state_bits(tree, state, &bits, changeset);
1049 cache_state(state, cached_state);
1050 merge_state(tree, state);
1051 if (last_end == (u64)-1)
1052 goto out;
1053 start = last_end + 1;
1054 state = next_state(state);
1055 if (start < end && state && state->start == start &&
1056 !need_resched())
1057 goto hit_next;
1058 goto search_again;
1059 }
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077 if (state->start < start) {
1078 if (state->state & exclusive_bits) {
1079 *failed_start = start;
1080 err = -EEXIST;
1081 goto out;
1082 }
1083
1084
1085
1086
1087
1088 if ((state->state & bits) == bits) {
1089 start = state->end + 1;
1090 cache_state(state, cached_state);
1091 goto search_again;
1092 }
1093
1094 prealloc = alloc_extent_state_atomic(prealloc);
1095 BUG_ON(!prealloc);
1096 err = split_state(tree, state, prealloc, start);
1097 if (err)
1098 extent_io_tree_panic(tree, err);
1099
1100 prealloc = NULL;
1101 if (err)
1102 goto out;
1103 if (state->end <= end) {
1104 set_state_bits(tree, state, &bits, changeset);
1105 cache_state(state, cached_state);
1106 merge_state(tree, state);
1107 if (last_end == (u64)-1)
1108 goto out;
1109 start = last_end + 1;
1110 state = next_state(state);
1111 if (start < end && state && state->start == start &&
1112 !need_resched())
1113 goto hit_next;
1114 }
1115 goto search_again;
1116 }
1117
1118
1119
1120
1121
1122
1123
1124 if (state->start > start) {
1125 u64 this_end;
1126 if (end < last_start)
1127 this_end = end;
1128 else
1129 this_end = last_start - 1;
1130
1131 prealloc = alloc_extent_state_atomic(prealloc);
1132 BUG_ON(!prealloc);
1133
1134
1135
1136
1137
1138 err = insert_state(tree, prealloc, start, this_end,
1139 NULL, NULL, &bits, changeset);
1140 if (err)
1141 extent_io_tree_panic(tree, err);
1142
1143 cache_state(prealloc, cached_state);
1144 prealloc = NULL;
1145 start = this_end + 1;
1146 goto search_again;
1147 }
1148
1149
1150
1151
1152
1153
1154 if (state->start <= end && state->end > end) {
1155 if (state->state & exclusive_bits) {
1156 *failed_start = start;
1157 err = -EEXIST;
1158 goto out;
1159 }
1160
1161 prealloc = alloc_extent_state_atomic(prealloc);
1162 BUG_ON(!prealloc);
1163 err = split_state(tree, state, prealloc, end + 1);
1164 if (err)
1165 extent_io_tree_panic(tree, err);
1166
1167 set_state_bits(tree, prealloc, &bits, changeset);
1168 cache_state(prealloc, cached_state);
1169 merge_state(tree, prealloc);
1170 prealloc = NULL;
1171 goto out;
1172 }
1173
1174search_again:
1175 if (start > end)
1176 goto out;
1177 spin_unlock(&tree->lock);
1178 if (gfpflags_allow_blocking(mask))
1179 cond_resched();
1180 goto again;
1181
1182out:
1183 spin_unlock(&tree->lock);
1184 if (prealloc)
1185 free_extent_state(prealloc);
1186
1187 return err;
1188
1189}
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1210 u32 bits, u32 clear_bits,
1211 struct extent_state **cached_state)
1212{
1213 struct extent_state *state;
1214 struct extent_state *prealloc = NULL;
1215 struct rb_node *node;
1216 struct rb_node **p;
1217 struct rb_node *parent;
1218 int err = 0;
1219 u64 last_start;
1220 u64 last_end;
1221 bool first_iteration = true;
1222
1223 btrfs_debug_check_extent_io_range(tree, start, end);
1224 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1225 clear_bits);
1226
1227again:
1228 if (!prealloc) {
1229
1230
1231
1232
1233
1234
1235
1236 prealloc = alloc_extent_state(GFP_NOFS);
1237 if (!prealloc && !first_iteration)
1238 return -ENOMEM;
1239 }
1240
1241 spin_lock(&tree->lock);
1242 if (cached_state && *cached_state) {
1243 state = *cached_state;
1244 if (state->start <= start && state->end > start &&
1245 extent_state_in_tree(state)) {
1246 node = &state->rb_node;
1247 goto hit_next;
1248 }
1249 }
1250
1251
1252
1253
1254
1255 node = tree_search_for_insert(tree, start, &p, &parent);
1256 if (!node) {
1257 prealloc = alloc_extent_state_atomic(prealloc);
1258 if (!prealloc) {
1259 err = -ENOMEM;
1260 goto out;
1261 }
1262 err = insert_state(tree, prealloc, start, end,
1263 &p, &parent, &bits, NULL);
1264 if (err)
1265 extent_io_tree_panic(tree, err);
1266 cache_state(prealloc, cached_state);
1267 prealloc = NULL;
1268 goto out;
1269 }
1270 state = rb_entry(node, struct extent_state, rb_node);
1271hit_next:
1272 last_start = state->start;
1273 last_end = state->end;
1274
1275
1276
1277
1278
1279
1280
1281 if (state->start == start && state->end <= end) {
1282 set_state_bits(tree, state, &bits, NULL);
1283 cache_state(state, cached_state);
1284 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1285 if (last_end == (u64)-1)
1286 goto out;
1287 start = last_end + 1;
1288 if (start < end && state && state->start == start &&
1289 !need_resched())
1290 goto hit_next;
1291 goto search_again;
1292 }
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310 if (state->start < start) {
1311 prealloc = alloc_extent_state_atomic(prealloc);
1312 if (!prealloc) {
1313 err = -ENOMEM;
1314 goto out;
1315 }
1316 err = split_state(tree, state, prealloc, start);
1317 if (err)
1318 extent_io_tree_panic(tree, err);
1319 prealloc = NULL;
1320 if (err)
1321 goto out;
1322 if (state->end <= end) {
1323 set_state_bits(tree, state, &bits, NULL);
1324 cache_state(state, cached_state);
1325 state = clear_state_bit(tree, state, &clear_bits, 0,
1326 NULL);
1327 if (last_end == (u64)-1)
1328 goto out;
1329 start = last_end + 1;
1330 if (start < end && state && state->start == start &&
1331 !need_resched())
1332 goto hit_next;
1333 }
1334 goto search_again;
1335 }
1336
1337
1338
1339
1340
1341
1342
1343 if (state->start > start) {
1344 u64 this_end;
1345 if (end < last_start)
1346 this_end = end;
1347 else
1348 this_end = last_start - 1;
1349
1350 prealloc = alloc_extent_state_atomic(prealloc);
1351 if (!prealloc) {
1352 err = -ENOMEM;
1353 goto out;
1354 }
1355
1356
1357
1358
1359
1360 err = insert_state(tree, prealloc, start, this_end,
1361 NULL, NULL, &bits, NULL);
1362 if (err)
1363 extent_io_tree_panic(tree, err);
1364 cache_state(prealloc, cached_state);
1365 prealloc = NULL;
1366 start = this_end + 1;
1367 goto search_again;
1368 }
1369
1370
1371
1372
1373
1374
1375 if (state->start <= end && state->end > end) {
1376 prealloc = alloc_extent_state_atomic(prealloc);
1377 if (!prealloc) {
1378 err = -ENOMEM;
1379 goto out;
1380 }
1381
1382 err = split_state(tree, state, prealloc, end + 1);
1383 if (err)
1384 extent_io_tree_panic(tree, err);
1385
1386 set_state_bits(tree, prealloc, &bits, NULL);
1387 cache_state(prealloc, cached_state);
1388 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1389 prealloc = NULL;
1390 goto out;
1391 }
1392
1393search_again:
1394 if (start > end)
1395 goto out;
1396 spin_unlock(&tree->lock);
1397 cond_resched();
1398 first_iteration = false;
1399 goto again;
1400
1401out:
1402 spin_unlock(&tree->lock);
1403 if (prealloc)
1404 free_extent_state(prealloc);
1405
1406 return err;
1407}
1408
1409
1410int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1411 u32 bits, struct extent_changeset *changeset)
1412{
1413
1414
1415
1416
1417
1418
1419 BUG_ON(bits & EXTENT_LOCKED);
1420
1421 return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1422 changeset);
1423}
1424
1425int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
1426 u32 bits)
1427{
1428 return set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
1429 GFP_NOWAIT, NULL);
1430}
1431
1432int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1433 u32 bits, int wake, int delete,
1434 struct extent_state **cached)
1435{
1436 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1437 cached, GFP_NOFS, NULL);
1438}
1439
1440int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1441 u32 bits, struct extent_changeset *changeset)
1442{
1443
1444
1445
1446
1447 BUG_ON(bits & EXTENT_LOCKED);
1448
1449 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1450 changeset);
1451}
1452
1453
1454
1455
1456
1457int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1458 struct extent_state **cached_state)
1459{
1460 int err;
1461 u64 failed_start;
1462
1463 while (1) {
1464 err = set_extent_bit(tree, start, end, EXTENT_LOCKED,
1465 EXTENT_LOCKED, &failed_start,
1466 cached_state, GFP_NOFS, NULL);
1467 if (err == -EEXIST) {
1468 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1469 start = failed_start;
1470 } else
1471 break;
1472 WARN_ON(start > end);
1473 }
1474 return err;
1475}
1476
1477int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1478{
1479 int err;
1480 u64 failed_start;
1481
1482 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1483 &failed_start, NULL, GFP_NOFS, NULL);
1484 if (err == -EEXIST) {
1485 if (failed_start > start)
1486 clear_extent_bit(tree, start, failed_start - 1,
1487 EXTENT_LOCKED, 1, 0, NULL);
1488 return 0;
1489 }
1490 return 1;
1491}
1492
1493void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1494{
1495 unsigned long index = start >> PAGE_SHIFT;
1496 unsigned long end_index = end >> PAGE_SHIFT;
1497 struct page *page;
1498
1499 while (index <= end_index) {
1500 page = find_get_page(inode->i_mapping, index);
1501 BUG_ON(!page);
1502 clear_page_dirty_for_io(page);
1503 put_page(page);
1504 index++;
1505 }
1506}
1507
1508void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1509{
1510 unsigned long index = start >> PAGE_SHIFT;
1511 unsigned long end_index = end >> PAGE_SHIFT;
1512 struct page *page;
1513
1514 while (index <= end_index) {
1515 page = find_get_page(inode->i_mapping, index);
1516 BUG_ON(!page);
1517 __set_page_dirty_nobuffers(page);
1518 account_page_redirty(page);
1519 put_page(page);
1520 index++;
1521 }
1522}
1523
1524
1525
1526
1527
1528static struct extent_state *
1529find_first_extent_bit_state(struct extent_io_tree *tree, u64 start, u32 bits)
1530{
1531 struct rb_node *node;
1532 struct extent_state *state;
1533
1534
1535
1536
1537
1538 node = tree_search(tree, start);
1539 if (!node)
1540 goto out;
1541
1542 while (1) {
1543 state = rb_entry(node, struct extent_state, rb_node);
1544 if (state->end >= start && (state->state & bits))
1545 return state;
1546
1547 node = rb_next(node);
1548 if (!node)
1549 break;
1550 }
1551out:
1552 return NULL;
1553}
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1564 u64 *start_ret, u64 *end_ret, u32 bits,
1565 struct extent_state **cached_state)
1566{
1567 struct extent_state *state;
1568 int ret = 1;
1569
1570 spin_lock(&tree->lock);
1571 if (cached_state && *cached_state) {
1572 state = *cached_state;
1573 if (state->end == start - 1 && extent_state_in_tree(state)) {
1574 while ((state = next_state(state)) != NULL) {
1575 if (state->state & bits)
1576 goto got_it;
1577 }
1578 free_extent_state(*cached_state);
1579 *cached_state = NULL;
1580 goto out;
1581 }
1582 free_extent_state(*cached_state);
1583 *cached_state = NULL;
1584 }
1585
1586 state = find_first_extent_bit_state(tree, start, bits);
1587got_it:
1588 if (state) {
1589 cache_state_if_flags(state, cached_state, 0);
1590 *start_ret = state->start;
1591 *end_ret = state->end;
1592 ret = 0;
1593 }
1594out:
1595 spin_unlock(&tree->lock);
1596 return ret;
1597}
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
1616 u64 *start_ret, u64 *end_ret, u32 bits)
1617{
1618 struct extent_state *state;
1619 int ret = 1;
1620
1621 spin_lock(&tree->lock);
1622 state = find_first_extent_bit_state(tree, start, bits);
1623 if (state) {
1624 *start_ret = state->start;
1625 *end_ret = state->end;
1626 while ((state = next_state(state)) != NULL) {
1627 if (state->start > (*end_ret + 1))
1628 break;
1629 *end_ret = state->end;
1630 }
1631 ret = 0;
1632 }
1633 spin_unlock(&tree->lock);
1634 return ret;
1635}
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1653 u64 *start_ret, u64 *end_ret, u32 bits)
1654{
1655 struct extent_state *state;
1656 struct rb_node *node, *prev = NULL, *next;
1657
1658 spin_lock(&tree->lock);
1659
1660
1661 while (1) {
1662 node = __etree_search(tree, start, &next, &prev, NULL, NULL);
1663 if (!node && !next && !prev) {
1664
1665
1666
1667
1668 *start_ret = 0;
1669 *end_ret = -1;
1670 goto out;
1671 } else if (!node && !next) {
1672
1673
1674
1675
1676 state = rb_entry(prev, struct extent_state, rb_node);
1677 *start_ret = state->end + 1;
1678 *end_ret = -1;
1679 goto out;
1680 } else if (!node) {
1681 node = next;
1682 }
1683
1684
1685
1686
1687 state = rb_entry(node, struct extent_state, rb_node);
1688
1689 if (in_range(start, state->start, state->end - state->start + 1)) {
1690 if (state->state & bits) {
1691
1692
1693
1694
1695
1696 start = state->end + 1;
1697 } else {
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707 *start_ret = state->start;
1708 break;
1709 }
1710 } else {
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722 if (prev) {
1723 state = rb_entry(prev, struct extent_state,
1724 rb_node);
1725 *start_ret = state->end + 1;
1726 } else {
1727 *start_ret = 0;
1728 }
1729 break;
1730 }
1731 }
1732
1733
1734
1735
1736
1737 while (1) {
1738 state = rb_entry(node, struct extent_state, rb_node);
1739 if (state->end >= start && !(state->state & bits)) {
1740 *end_ret = state->end;
1741 } else {
1742 *end_ret = state->start - 1;
1743 break;
1744 }
1745
1746 node = rb_next(node);
1747 if (!node)
1748 break;
1749 }
1750out:
1751 spin_unlock(&tree->lock);
1752}
1753
1754
1755
1756
1757
1758
1759
1760bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
1761 u64 *end, u64 max_bytes,
1762 struct extent_state **cached_state)
1763{
1764 struct rb_node *node;
1765 struct extent_state *state;
1766 u64 cur_start = *start;
1767 bool found = false;
1768 u64 total_bytes = 0;
1769
1770 spin_lock(&tree->lock);
1771
1772
1773
1774
1775
1776 node = tree_search(tree, cur_start);
1777 if (!node) {
1778 *end = (u64)-1;
1779 goto out;
1780 }
1781
1782 while (1) {
1783 state = rb_entry(node, struct extent_state, rb_node);
1784 if (found && (state->start != cur_start ||
1785 (state->state & EXTENT_BOUNDARY))) {
1786 goto out;
1787 }
1788 if (!(state->state & EXTENT_DELALLOC)) {
1789 if (!found)
1790 *end = state->end;
1791 goto out;
1792 }
1793 if (!found) {
1794 *start = state->start;
1795 *cached_state = state;
1796 refcount_inc(&state->refs);
1797 }
1798 found = true;
1799 *end = state->end;
1800 cur_start = state->end + 1;
1801 node = rb_next(node);
1802 total_bytes += state->end - state->start + 1;
1803 if (total_bytes >= max_bytes)
1804 break;
1805 if (!node)
1806 break;
1807 }
1808out:
1809 spin_unlock(&tree->lock);
1810 return found;
1811}
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821static int process_one_page(struct btrfs_fs_info *fs_info,
1822 struct address_space *mapping,
1823 struct page *page, struct page *locked_page,
1824 unsigned long page_ops, u64 start, u64 end)
1825{
1826 u32 len;
1827
1828 ASSERT(end + 1 - start != 0 && end + 1 - start < U32_MAX);
1829 len = end + 1 - start;
1830
1831 if (page_ops & PAGE_SET_ORDERED)
1832 btrfs_page_clamp_set_ordered(fs_info, page, start, len);
1833 if (page_ops & PAGE_SET_ERROR)
1834 btrfs_page_clamp_set_error(fs_info, page, start, len);
1835 if (page_ops & PAGE_START_WRITEBACK) {
1836 btrfs_page_clamp_clear_dirty(fs_info, page, start, len);
1837 btrfs_page_clamp_set_writeback(fs_info, page, start, len);
1838 }
1839 if (page_ops & PAGE_END_WRITEBACK)
1840 btrfs_page_clamp_clear_writeback(fs_info, page, start, len);
1841
1842 if (page == locked_page)
1843 return 1;
1844
1845 if (page_ops & PAGE_LOCK) {
1846 int ret;
1847
1848 ret = btrfs_page_start_writer_lock(fs_info, page, start, len);
1849 if (ret)
1850 return ret;
1851 if (!PageDirty(page) || page->mapping != mapping) {
1852 btrfs_page_end_writer_lock(fs_info, page, start, len);
1853 return -EAGAIN;
1854 }
1855 }
1856 if (page_ops & PAGE_UNLOCK)
1857 btrfs_page_end_writer_lock(fs_info, page, start, len);
1858 return 0;
1859}
1860
1861static int __process_pages_contig(struct address_space *mapping,
1862 struct page *locked_page,
1863 u64 start, u64 end, unsigned long page_ops,
1864 u64 *processed_end)
1865{
1866 struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
1867 pgoff_t start_index = start >> PAGE_SHIFT;
1868 pgoff_t end_index = end >> PAGE_SHIFT;
1869 pgoff_t index = start_index;
1870 unsigned long nr_pages = end_index - start_index + 1;
1871 unsigned long pages_processed = 0;
1872 struct page *pages[16];
1873 int err = 0;
1874 int i;
1875
1876 if (page_ops & PAGE_LOCK) {
1877 ASSERT(page_ops == PAGE_LOCK);
1878 ASSERT(processed_end && *processed_end == start);
1879 }
1880
1881 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1882 mapping_set_error(mapping, -EIO);
1883
1884 while (nr_pages > 0) {
1885 int found_pages;
1886
1887 found_pages = find_get_pages_contig(mapping, index,
1888 min_t(unsigned long,
1889 nr_pages, ARRAY_SIZE(pages)), pages);
1890 if (found_pages == 0) {
1891
1892
1893
1894
1895 ASSERT(page_ops & PAGE_LOCK);
1896 err = -EAGAIN;
1897 goto out;
1898 }
1899
1900 for (i = 0; i < found_pages; i++) {
1901 int process_ret;
1902
1903 process_ret = process_one_page(fs_info, mapping,
1904 pages[i], locked_page, page_ops,
1905 start, end);
1906 if (process_ret < 0) {
1907 for (; i < found_pages; i++)
1908 put_page(pages[i]);
1909 err = -EAGAIN;
1910 goto out;
1911 }
1912 put_page(pages[i]);
1913 pages_processed++;
1914 }
1915 nr_pages -= found_pages;
1916 index += found_pages;
1917 cond_resched();
1918 }
1919out:
1920 if (err && processed_end) {
1921
1922
1923
1924
1925
1926
1927
1928
1929 if (pages_processed)
1930 *processed_end = min(end,
1931 ((u64)(start_index + pages_processed) << PAGE_SHIFT) - 1);
1932 else
1933 *processed_end = start;
1934 }
1935 return err;
1936}
1937
1938static noinline void __unlock_for_delalloc(struct inode *inode,
1939 struct page *locked_page,
1940 u64 start, u64 end)
1941{
1942 unsigned long index = start >> PAGE_SHIFT;
1943 unsigned long end_index = end >> PAGE_SHIFT;
1944
1945 ASSERT(locked_page);
1946 if (index == locked_page->index && end_index == index)
1947 return;
1948
1949 __process_pages_contig(inode->i_mapping, locked_page, start, end,
1950 PAGE_UNLOCK, NULL);
1951}
1952
1953static noinline int lock_delalloc_pages(struct inode *inode,
1954 struct page *locked_page,
1955 u64 delalloc_start,
1956 u64 delalloc_end)
1957{
1958 unsigned long index = delalloc_start >> PAGE_SHIFT;
1959 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1960 u64 processed_end = delalloc_start;
1961 int ret;
1962
1963 ASSERT(locked_page);
1964 if (index == locked_page->index && index == end_index)
1965 return 0;
1966
1967 ret = __process_pages_contig(inode->i_mapping, locked_page, delalloc_start,
1968 delalloc_end, PAGE_LOCK, &processed_end);
1969 if (ret == -EAGAIN && processed_end > delalloc_start)
1970 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1971 processed_end);
1972 return ret;
1973}
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990EXPORT_FOR_TESTS
1991noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
1992 struct page *locked_page, u64 *start,
1993 u64 *end)
1994{
1995 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1996 const u64 orig_start = *start;
1997 const u64 orig_end = *end;
1998 u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
1999 u64 delalloc_start;
2000 u64 delalloc_end;
2001 bool found;
2002 struct extent_state *cached_state = NULL;
2003 int ret;
2004 int loops = 0;
2005
2006
2007 ASSERT(orig_end > orig_start);
2008
2009
2010 ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE ||
2011 orig_end <= page_offset(locked_page)));
2012again:
2013
2014 delalloc_start = *start;
2015 delalloc_end = 0;
2016 found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
2017 max_bytes, &cached_state);
2018 if (!found || delalloc_end <= *start || delalloc_start > orig_end) {
2019 *start = delalloc_start;
2020
2021
2022 *end = min(delalloc_end, orig_end);
2023 free_extent_state(cached_state);
2024 return false;
2025 }
2026
2027
2028
2029
2030
2031
2032 if (delalloc_start < *start)
2033 delalloc_start = *start;
2034
2035
2036
2037
2038 if (delalloc_end + 1 - delalloc_start > max_bytes)
2039 delalloc_end = delalloc_start + max_bytes - 1;
2040
2041
2042 ret = lock_delalloc_pages(inode, locked_page,
2043 delalloc_start, delalloc_end);
2044 ASSERT(!ret || ret == -EAGAIN);
2045 if (ret == -EAGAIN) {
2046
2047
2048
2049 free_extent_state(cached_state);
2050 cached_state = NULL;
2051 if (!loops) {
2052 max_bytes = PAGE_SIZE;
2053 loops = 1;
2054 goto again;
2055 } else {
2056 found = false;
2057 goto out_failed;
2058 }
2059 }
2060
2061
2062 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
2063
2064
2065 ret = test_range_bit(tree, delalloc_start, delalloc_end,
2066 EXTENT_DELALLOC, 1, cached_state);
2067 if (!ret) {
2068 unlock_extent_cached(tree, delalloc_start, delalloc_end,
2069 &cached_state);
2070 __unlock_for_delalloc(inode, locked_page,
2071 delalloc_start, delalloc_end);
2072 cond_resched();
2073 goto again;
2074 }
2075 free_extent_state(cached_state);
2076 *start = delalloc_start;
2077 *end = delalloc_end;
2078out_failed:
2079 return found;
2080}
2081
2082void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
2083 struct page *locked_page,
2084 u32 clear_bits, unsigned long page_ops)
2085{
2086 clear_extent_bit(&inode->io_tree, start, end, clear_bits, 1, 0, NULL);
2087
2088 __process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
2089 start, end, page_ops, NULL);
2090}
2091
2092
2093
2094
2095
2096
2097u64 count_range_bits(struct extent_io_tree *tree,
2098 u64 *start, u64 search_end, u64 max_bytes,
2099 u32 bits, int contig)
2100{
2101 struct rb_node *node;
2102 struct extent_state *state;
2103 u64 cur_start = *start;
2104 u64 total_bytes = 0;
2105 u64 last = 0;
2106 int found = 0;
2107
2108 if (WARN_ON(search_end <= cur_start))
2109 return 0;
2110
2111 spin_lock(&tree->lock);
2112 if (cur_start == 0 && bits == EXTENT_DIRTY) {
2113 total_bytes = tree->dirty_bytes;
2114 goto out;
2115 }
2116
2117
2118
2119
2120 node = tree_search(tree, cur_start);
2121 if (!node)
2122 goto out;
2123
2124 while (1) {
2125 state = rb_entry(node, struct extent_state, rb_node);
2126 if (state->start > search_end)
2127 break;
2128 if (contig && found && state->start > last + 1)
2129 break;
2130 if (state->end >= cur_start && (state->state & bits) == bits) {
2131 total_bytes += min(search_end, state->end) + 1 -
2132 max(cur_start, state->start);
2133 if (total_bytes >= max_bytes)
2134 break;
2135 if (!found) {
2136 *start = max(cur_start, state->start);
2137 found = 1;
2138 }
2139 last = state->end;
2140 } else if (contig && found) {
2141 break;
2142 }
2143 node = rb_next(node);
2144 if (!node)
2145 break;
2146 }
2147out:
2148 spin_unlock(&tree->lock);
2149 return total_bytes;
2150}
2151
2152
2153
2154
2155
2156int set_state_failrec(struct extent_io_tree *tree, u64 start,
2157 struct io_failure_record *failrec)
2158{
2159 struct rb_node *node;
2160 struct extent_state *state;
2161 int ret = 0;
2162
2163 spin_lock(&tree->lock);
2164
2165
2166
2167
2168 node = tree_search(tree, start);
2169 if (!node) {
2170 ret = -ENOENT;
2171 goto out;
2172 }
2173 state = rb_entry(node, struct extent_state, rb_node);
2174 if (state->start != start) {
2175 ret = -ENOENT;
2176 goto out;
2177 }
2178 state->failrec = failrec;
2179out:
2180 spin_unlock(&tree->lock);
2181 return ret;
2182}
2183
2184struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start)
2185{
2186 struct rb_node *node;
2187 struct extent_state *state;
2188 struct io_failure_record *failrec;
2189
2190 spin_lock(&tree->lock);
2191
2192
2193
2194
2195 node = tree_search(tree, start);
2196 if (!node) {
2197 failrec = ERR_PTR(-ENOENT);
2198 goto out;
2199 }
2200 state = rb_entry(node, struct extent_state, rb_node);
2201 if (state->start != start) {
2202 failrec = ERR_PTR(-ENOENT);
2203 goto out;
2204 }
2205
2206 failrec = state->failrec;
2207out:
2208 spin_unlock(&tree->lock);
2209 return failrec;
2210}
2211
2212
2213
2214
2215
2216
2217
2218int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
2219 u32 bits, int filled, struct extent_state *cached)
2220{
2221 struct extent_state *state = NULL;
2222 struct rb_node *node;
2223 int bitset = 0;
2224
2225 spin_lock(&tree->lock);
2226 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
2227 cached->end > start)
2228 node = &cached->rb_node;
2229 else
2230 node = tree_search(tree, start);
2231 while (node && start <= end) {
2232 state = rb_entry(node, struct extent_state, rb_node);
2233
2234 if (filled && state->start > start) {
2235 bitset = 0;
2236 break;
2237 }
2238
2239 if (state->start > end)
2240 break;
2241
2242 if (state->state & bits) {
2243 bitset = 1;
2244 if (!filled)
2245 break;
2246 } else if (filled) {
2247 bitset = 0;
2248 break;
2249 }
2250
2251 if (state->end == (u64)-1)
2252 break;
2253
2254 start = state->end + 1;
2255 if (start > end)
2256 break;
2257 node = rb_next(node);
2258 if (!node) {
2259 if (filled)
2260 bitset = 0;
2261 break;
2262 }
2263 }
2264 spin_unlock(&tree->lock);
2265 return bitset;
2266}
2267
2268int free_io_failure(struct extent_io_tree *failure_tree,
2269 struct extent_io_tree *io_tree,
2270 struct io_failure_record *rec)
2271{
2272 int ret;
2273 int err = 0;
2274
2275 set_state_failrec(failure_tree, rec->start, NULL);
2276 ret = clear_extent_bits(failure_tree, rec->start,
2277 rec->start + rec->len - 1,
2278 EXTENT_LOCKED | EXTENT_DIRTY);
2279 if (ret)
2280 err = ret;
2281
2282 ret = clear_extent_bits(io_tree, rec->start,
2283 rec->start + rec->len - 1,
2284 EXTENT_DAMAGED);
2285 if (ret && !err)
2286 err = ret;
2287
2288 kfree(rec);
2289 return err;
2290}
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
2303 u64 length, u64 logical, struct page *page,
2304 unsigned int pg_offset, int mirror_num)
2305{
2306 struct bio *bio;
2307 struct btrfs_device *dev;
2308 u64 map_length = 0;
2309 u64 sector;
2310 struct btrfs_io_context *bioc = NULL;
2311 int ret;
2312
2313 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
2314 BUG_ON(!mirror_num);
2315
2316 if (btrfs_repair_one_zone(fs_info, logical))
2317 return 0;
2318
2319 bio = btrfs_bio_alloc(1);
2320 bio->bi_iter.bi_size = 0;
2321 map_length = length;
2322
2323
2324
2325
2326
2327
2328 btrfs_bio_counter_inc_blocked(fs_info);
2329 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
2330
2331
2332
2333
2334
2335
2336 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2337 &map_length, &bioc, 0);
2338 if (ret) {
2339 btrfs_bio_counter_dec(fs_info);
2340 bio_put(bio);
2341 return -EIO;
2342 }
2343 ASSERT(bioc->mirror_num == 1);
2344 } else {
2345 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2346 &map_length, &bioc, mirror_num);
2347 if (ret) {
2348 btrfs_bio_counter_dec(fs_info);
2349 bio_put(bio);
2350 return -EIO;
2351 }
2352 BUG_ON(mirror_num != bioc->mirror_num);
2353 }
2354
2355 sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
2356 bio->bi_iter.bi_sector = sector;
2357 dev = bioc->stripes[bioc->mirror_num - 1].dev;
2358 btrfs_put_bioc(bioc);
2359 if (!dev || !dev->bdev ||
2360 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
2361 btrfs_bio_counter_dec(fs_info);
2362 bio_put(bio);
2363 return -EIO;
2364 }
2365 bio_set_dev(bio, dev->bdev);
2366 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2367 bio_add_page(bio, page, length, pg_offset);
2368
2369 if (btrfsic_submit_bio_wait(bio)) {
2370
2371 btrfs_bio_counter_dec(fs_info);
2372 bio_put(bio);
2373 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2374 return -EIO;
2375 }
2376
2377 btrfs_info_rl_in_rcu(fs_info,
2378 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2379 ino, start,
2380 rcu_str_deref(dev->name), sector);
2381 btrfs_bio_counter_dec(fs_info);
2382 bio_put(bio);
2383 return 0;
2384}
2385
2386int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
2387{
2388 struct btrfs_fs_info *fs_info = eb->fs_info;
2389 u64 start = eb->start;
2390 int i, num_pages = num_extent_pages(eb);
2391 int ret = 0;
2392
2393 if (sb_rdonly(fs_info->sb))
2394 return -EROFS;
2395
2396 for (i = 0; i < num_pages; i++) {
2397 struct page *p = eb->pages[i];
2398
2399 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2400 start - page_offset(p), mirror_num);
2401 if (ret)
2402 break;
2403 start += PAGE_SIZE;
2404 }
2405
2406 return ret;
2407}
2408
2409
2410
2411
2412
2413int clean_io_failure(struct btrfs_fs_info *fs_info,
2414 struct extent_io_tree *failure_tree,
2415 struct extent_io_tree *io_tree, u64 start,
2416 struct page *page, u64 ino, unsigned int pg_offset)
2417{
2418 u64 private;
2419 struct io_failure_record *failrec;
2420 struct extent_state *state;
2421 int num_copies;
2422 int ret;
2423
2424 private = 0;
2425 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2426 EXTENT_DIRTY, 0);
2427 if (!ret)
2428 return 0;
2429
2430 failrec = get_state_failrec(failure_tree, start);
2431 if (IS_ERR(failrec))
2432 return 0;
2433
2434 BUG_ON(!failrec->this_mirror);
2435
2436 if (sb_rdonly(fs_info->sb))
2437 goto out;
2438
2439 spin_lock(&io_tree->lock);
2440 state = find_first_extent_bit_state(io_tree,
2441 failrec->start,
2442 EXTENT_LOCKED);
2443 spin_unlock(&io_tree->lock);
2444
2445 if (state && state->start <= failrec->start &&
2446 state->end >= failrec->start + failrec->len - 1) {
2447 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2448 failrec->len);
2449 if (num_copies > 1) {
2450 repair_io_failure(fs_info, ino, start, failrec->len,
2451 failrec->logical, page, pg_offset,
2452 failrec->failed_mirror);
2453 }
2454 }
2455
2456out:
2457 free_io_failure(failure_tree, io_tree, failrec);
2458
2459 return 0;
2460}
2461
2462
2463
2464
2465
2466
2467
2468void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2469{
2470 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2471 struct io_failure_record *failrec;
2472 struct extent_state *state, *next;
2473
2474 if (RB_EMPTY_ROOT(&failure_tree->state))
2475 return;
2476
2477 spin_lock(&failure_tree->lock);
2478 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2479 while (state) {
2480 if (state->start > end)
2481 break;
2482
2483 ASSERT(state->end <= end);
2484
2485 next = next_state(state);
2486
2487 failrec = state->failrec;
2488 free_extent_state(state);
2489 kfree(failrec);
2490
2491 state = next;
2492 }
2493 spin_unlock(&failure_tree->lock);
2494}
2495
2496static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
2497 u64 start)
2498{
2499 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2500 struct io_failure_record *failrec;
2501 struct extent_map *em;
2502 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2503 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2504 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2505 const u32 sectorsize = fs_info->sectorsize;
2506 int ret;
2507 u64 logical;
2508
2509 failrec = get_state_failrec(failure_tree, start);
2510 if (!IS_ERR(failrec)) {
2511 btrfs_debug(fs_info,
2512 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu",
2513 failrec->logical, failrec->start, failrec->len);
2514
2515
2516
2517
2518
2519
2520 return failrec;
2521 }
2522
2523 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2524 if (!failrec)
2525 return ERR_PTR(-ENOMEM);
2526
2527 failrec->start = start;
2528 failrec->len = sectorsize;
2529 failrec->this_mirror = 0;
2530 failrec->bio_flags = 0;
2531
2532 read_lock(&em_tree->lock);
2533 em = lookup_extent_mapping(em_tree, start, failrec->len);
2534 if (!em) {
2535 read_unlock(&em_tree->lock);
2536 kfree(failrec);
2537 return ERR_PTR(-EIO);
2538 }
2539
2540 if (em->start > start || em->start + em->len <= start) {
2541 free_extent_map(em);
2542 em = NULL;
2543 }
2544 read_unlock(&em_tree->lock);
2545 if (!em) {
2546 kfree(failrec);
2547 return ERR_PTR(-EIO);
2548 }
2549
2550 logical = start - em->start;
2551 logical = em->block_start + logical;
2552 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2553 logical = em->block_start;
2554 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2555 extent_set_compress_type(&failrec->bio_flags, em->compress_type);
2556 }
2557
2558 btrfs_debug(fs_info,
2559 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2560 logical, start, failrec->len);
2561
2562 failrec->logical = logical;
2563 free_extent_map(em);
2564
2565
2566 ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
2567 EXTENT_LOCKED | EXTENT_DIRTY);
2568 if (ret >= 0) {
2569 ret = set_state_failrec(failure_tree, start, failrec);
2570
2571 ret = set_extent_bits(tree, start, start + sectorsize - 1,
2572 EXTENT_DAMAGED);
2573 } else if (ret < 0) {
2574 kfree(failrec);
2575 return ERR_PTR(ret);
2576 }
2577
2578 return failrec;
2579}
2580
2581static bool btrfs_check_repairable(struct inode *inode,
2582 struct io_failure_record *failrec,
2583 int failed_mirror)
2584{
2585 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2586 int num_copies;
2587
2588 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2589 if (num_copies == 1) {
2590
2591
2592
2593
2594
2595 btrfs_debug(fs_info,
2596 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2597 num_copies, failrec->this_mirror, failed_mirror);
2598 return false;
2599 }
2600
2601
2602 ASSERT(failrec->len == fs_info->sectorsize);
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613 failrec->failed_mirror = failed_mirror;
2614 failrec->this_mirror++;
2615 if (failrec->this_mirror == failed_mirror)
2616 failrec->this_mirror++;
2617
2618 if (failrec->this_mirror > num_copies) {
2619 btrfs_debug(fs_info,
2620 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2621 num_copies, failrec->this_mirror, failed_mirror);
2622 return false;
2623 }
2624
2625 return true;
2626}
2627
2628int btrfs_repair_one_sector(struct inode *inode,
2629 struct bio *failed_bio, u32 bio_offset,
2630 struct page *page, unsigned int pgoff,
2631 u64 start, int failed_mirror,
2632 submit_bio_hook_t *submit_bio_hook)
2633{
2634 struct io_failure_record *failrec;
2635 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2636 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2637 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2638 struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);
2639 const int icsum = bio_offset >> fs_info->sectorsize_bits;
2640 struct bio *repair_bio;
2641 struct btrfs_bio *repair_bbio;
2642 blk_status_t status;
2643
2644 btrfs_debug(fs_info,
2645 "repair read error: read error at %llu", start);
2646
2647 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2648
2649 failrec = btrfs_get_io_failure_record(inode, start);
2650 if (IS_ERR(failrec))
2651 return PTR_ERR(failrec);
2652
2653
2654 if (!btrfs_check_repairable(inode, failrec, failed_mirror)) {
2655 free_io_failure(failure_tree, tree, failrec);
2656 return -EIO;
2657 }
2658
2659 repair_bio = btrfs_bio_alloc(1);
2660 repair_bbio = btrfs_bio(repair_bio);
2661 repair_bio->bi_opf = REQ_OP_READ;
2662 repair_bio->bi_end_io = failed_bio->bi_end_io;
2663 repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
2664 repair_bio->bi_private = failed_bio->bi_private;
2665
2666 if (failed_bbio->csum) {
2667 const u32 csum_size = fs_info->csum_size;
2668
2669 repair_bbio->csum = repair_bbio->csum_inline;
2670 memcpy(repair_bbio->csum,
2671 failed_bbio->csum + csum_size * icsum, csum_size);
2672 }
2673
2674 bio_add_page(repair_bio, page, failrec->len, pgoff);
2675 repair_bbio->iter = repair_bio->bi_iter;
2676
2677 btrfs_debug(btrfs_sb(inode->i_sb),
2678 "repair read error: submitting new read to mirror %d",
2679 failrec->this_mirror);
2680
2681 status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
2682 failrec->bio_flags);
2683 if (status) {
2684 free_io_failure(failure_tree, tree, failrec);
2685 bio_put(repair_bio);
2686 }
2687 return blk_status_to_errno(status);
2688}
2689
2690static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
2691{
2692 struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
2693
2694 ASSERT(page_offset(page) <= start &&
2695 start + len <= page_offset(page) + PAGE_SIZE);
2696
2697 if (uptodate) {
2698 if (fsverity_active(page->mapping->host) &&
2699 !PageError(page) &&
2700 !PageUptodate(page) &&
2701 start < i_size_read(page->mapping->host) &&
2702 !fsverity_verify_page(page)) {
2703 btrfs_page_set_error(fs_info, page, start, len);
2704 } else {
2705 btrfs_page_set_uptodate(fs_info, page, start, len);
2706 }
2707 } else {
2708 btrfs_page_clear_uptodate(fs_info, page, start, len);
2709 btrfs_page_set_error(fs_info, page, start, len);
2710 }
2711
2712 if (fs_info->sectorsize == PAGE_SIZE)
2713 unlock_page(page);
2714 else
2715 btrfs_subpage_end_reader(fs_info, page, start, len);
2716}
2717
2718static blk_status_t submit_read_repair(struct inode *inode,
2719 struct bio *failed_bio, u32 bio_offset,
2720 struct page *page, unsigned int pgoff,
2721 u64 start, u64 end, int failed_mirror,
2722 unsigned int error_bitmap,
2723 submit_bio_hook_t *submit_bio_hook)
2724{
2725 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2726 const u32 sectorsize = fs_info->sectorsize;
2727 const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
2728 int error = 0;
2729 int i;
2730
2731 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2732
2733
2734 ASSERT(error_bitmap);
2735
2736
2737
2738
2739
2740 ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED));
2741
2742
2743 for (i = 0; i < nr_bits; i++) {
2744 const unsigned int offset = i * sectorsize;
2745 struct extent_state *cached = NULL;
2746 bool uptodate = false;
2747 int ret;
2748
2749 if (!(error_bitmap & (1U << i))) {
2750
2751
2752
2753
2754 uptodate = true;
2755 goto next;
2756 }
2757
2758 ret = btrfs_repair_one_sector(inode, failed_bio,
2759 bio_offset + offset,
2760 page, pgoff + offset, start + offset,
2761 failed_mirror, submit_bio_hook);
2762 if (!ret) {
2763
2764
2765
2766
2767
2768
2769 continue;
2770 }
2771
2772
2773
2774
2775 if (!error)
2776 error = ret;
2777next:
2778 end_page_read(page, uptodate, start + offset, sectorsize);
2779 if (uptodate)
2780 set_extent_uptodate(&BTRFS_I(inode)->io_tree,
2781 start + offset,
2782 start + offset + sectorsize - 1,
2783 &cached, GFP_ATOMIC);
2784 unlock_extent_cached_atomic(&BTRFS_I(inode)->io_tree,
2785 start + offset,
2786 start + offset + sectorsize - 1,
2787 &cached);
2788 }
2789 return errno_to_blk_status(error);
2790}
2791
2792
2793
2794void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2795{
2796 struct btrfs_inode *inode;
2797 const bool uptodate = (err == 0);
2798 int ret = 0;
2799
2800 ASSERT(page && page->mapping);
2801 inode = BTRFS_I(page->mapping->host);
2802 btrfs_writepage_endio_finish_ordered(inode, page, start, end, uptodate);
2803
2804 if (!uptodate) {
2805 const struct btrfs_fs_info *fs_info = inode->root->fs_info;
2806 u32 len;
2807
2808 ASSERT(end + 1 - start <= U32_MAX);
2809 len = end + 1 - start;
2810
2811 btrfs_page_clear_uptodate(fs_info, page, start, len);
2812 btrfs_page_set_error(fs_info, page, start, len);
2813 ret = err < 0 ? err : -EIO;
2814 mapping_set_error(page->mapping, ret);
2815 }
2816}
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827static void end_bio_extent_writepage(struct bio *bio)
2828{
2829 int error = blk_status_to_errno(bio->bi_status);
2830 struct bio_vec *bvec;
2831 u64 start;
2832 u64 end;
2833 struct bvec_iter_all iter_all;
2834 bool first_bvec = true;
2835
2836 ASSERT(!bio_flagged(bio, BIO_CLONED));
2837 bio_for_each_segment_all(bvec, bio, iter_all) {
2838 struct page *page = bvec->bv_page;
2839 struct inode *inode = page->mapping->host;
2840 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2841 const u32 sectorsize = fs_info->sectorsize;
2842
2843
2844 if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
2845 btrfs_err(fs_info,
2846 "partial page write in btrfs with offset %u and length %u",
2847 bvec->bv_offset, bvec->bv_len);
2848 else if (!IS_ALIGNED(bvec->bv_len, sectorsize))
2849 btrfs_info(fs_info,
2850 "incomplete page write with offset %u and length %u",
2851 bvec->bv_offset, bvec->bv_len);
2852
2853 start = page_offset(page) + bvec->bv_offset;
2854 end = start + bvec->bv_len - 1;
2855
2856 if (first_bvec) {
2857 btrfs_record_physical_zoned(inode, start, bio);
2858 first_bvec = false;
2859 }
2860
2861 end_extent_writepage(page, error, start, end);
2862
2863 btrfs_page_clear_writeback(fs_info, page, start, bvec->bv_len);
2864 }
2865
2866 bio_put(bio);
2867}
2868
2869
2870
2871
2872
2873
2874
2875struct processed_extent {
2876 struct btrfs_inode *inode;
2877
2878 u64 start;
2879
2880 u64 end;
2881 bool uptodate;
2882};
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895static void endio_readpage_release_extent(struct processed_extent *processed,
2896 struct btrfs_inode *inode, u64 start, u64 end,
2897 bool uptodate)
2898{
2899 struct extent_state *cached = NULL;
2900 struct extent_io_tree *tree;
2901
2902
2903 if (!processed->inode)
2904 goto update;
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917 if (processed->inode == inode && processed->uptodate == uptodate &&
2918 processed->end + 1 >= start && end >= processed->end) {
2919 processed->end = end;
2920 return;
2921 }
2922
2923 tree = &processed->inode->io_tree;
2924
2925
2926
2927
2928 if (processed->uptodate && tree->track_uptodate)
2929 set_extent_uptodate(tree, processed->start, processed->end,
2930 &cached, GFP_ATOMIC);
2931 unlock_extent_cached_atomic(tree, processed->start, processed->end,
2932 &cached);
2933
2934update:
2935
2936 processed->inode = inode;
2937 processed->start = start;
2938 processed->end = end;
2939 processed->uptodate = uptodate;
2940}
2941
2942static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
2943{
2944 ASSERT(PageLocked(page));
2945 if (fs_info->sectorsize == PAGE_SIZE)
2946 return;
2947
2948 ASSERT(PagePrivate(page));
2949 btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE);
2950}
2951
2952
2953
2954
2955
2956
2957
2958static struct extent_buffer *find_extent_buffer_readpage(
2959 struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
2960{
2961 struct extent_buffer *eb;
2962
2963
2964
2965
2966
2967 if (fs_info->sectorsize == PAGE_SIZE) {
2968 ASSERT(PagePrivate(page) && page->private);
2969 return (struct extent_buffer *)page->private;
2970 }
2971
2972
2973 rcu_read_lock();
2974 eb = radix_tree_lookup(&fs_info->buffer_radix,
2975 bytenr >> fs_info->sectorsize_bits);
2976 rcu_read_unlock();
2977 ASSERT(eb);
2978 return eb;
2979}
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992static void end_bio_extent_readpage(struct bio *bio)
2993{
2994 struct bio_vec *bvec;
2995 struct btrfs_bio *bbio = btrfs_bio(bio);
2996 struct extent_io_tree *tree, *failure_tree;
2997 struct processed_extent processed = { 0 };
2998
2999
3000
3001
3002 u32 bio_offset = 0;
3003 int mirror;
3004 int ret;
3005 struct bvec_iter_all iter_all;
3006
3007 ASSERT(!bio_flagged(bio, BIO_CLONED));
3008 bio_for_each_segment_all(bvec, bio, iter_all) {
3009 bool uptodate = !bio->bi_status;
3010 struct page *page = bvec->bv_page;
3011 struct inode *inode = page->mapping->host;
3012 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3013 const u32 sectorsize = fs_info->sectorsize;
3014 unsigned int error_bitmap = (unsigned int)-1;
3015 u64 start;
3016 u64 end;
3017 u32 len;
3018
3019 btrfs_debug(fs_info,
3020 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
3021 bio->bi_iter.bi_sector, bio->bi_status,
3022 bbio->mirror_num);
3023 tree = &BTRFS_I(inode)->io_tree;
3024 failure_tree = &BTRFS_I(inode)->io_failure_tree;
3025
3026
3027
3028
3029
3030
3031
3032
3033 if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
3034 btrfs_err(fs_info,
3035 "partial page read in btrfs with offset %u and length %u",
3036 bvec->bv_offset, bvec->bv_len);
3037 else if (!IS_ALIGNED(bvec->bv_offset + bvec->bv_len,
3038 sectorsize))
3039 btrfs_info(fs_info,
3040 "incomplete page read with offset %u and length %u",
3041 bvec->bv_offset, bvec->bv_len);
3042
3043 start = page_offset(page) + bvec->bv_offset;
3044 end = start + bvec->bv_len - 1;
3045 len = bvec->bv_len;
3046
3047 mirror = bbio->mirror_num;
3048 if (likely(uptodate)) {
3049 if (is_data_inode(inode)) {
3050 error_bitmap = btrfs_verify_data_csum(bbio,
3051 bio_offset, page, start, end);
3052 ret = error_bitmap;
3053 } else {
3054 ret = btrfs_validate_metadata_buffer(bbio,
3055 page, start, end, mirror);
3056 }
3057 if (ret)
3058 uptodate = false;
3059 else
3060 clean_io_failure(BTRFS_I(inode)->root->fs_info,
3061 failure_tree, tree, start,
3062 page,
3063 btrfs_ino(BTRFS_I(inode)), 0);
3064 }
3065
3066 if (likely(uptodate))
3067 goto readpage_ok;
3068
3069 if (is_data_inode(inode)) {
3070
3071
3072
3073
3074 submit_read_repair(inode, bio, bio_offset, page,
3075 start - page_offset(page), start,
3076 end, mirror, error_bitmap,
3077 btrfs_submit_data_bio);
3078
3079 ASSERT(bio_offset + len > bio_offset);
3080 bio_offset += len;
3081 continue;
3082 } else {
3083 struct extent_buffer *eb;
3084
3085 eb = find_extent_buffer_readpage(fs_info, page, start);
3086 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
3087 eb->read_mirror = mirror;
3088 atomic_dec(&eb->io_pages);
3089 }
3090readpage_ok:
3091 if (likely(uptodate)) {
3092 loff_t i_size = i_size_read(inode);
3093 pgoff_t end_index = i_size >> PAGE_SHIFT;
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104 if (page->index == end_index && i_size <= end) {
3105 u32 zero_start = max(offset_in_page(i_size),
3106 offset_in_page(start));
3107
3108 zero_user_segment(page, zero_start,
3109 offset_in_page(end) + 1);
3110 }
3111 }
3112 ASSERT(bio_offset + len > bio_offset);
3113 bio_offset += len;
3114
3115
3116 end_page_read(page, uptodate, start, len);
3117 endio_readpage_release_extent(&processed, BTRFS_I(inode),
3118 start, end, PageUptodate(page));
3119 }
3120
3121 endio_readpage_release_extent(&processed, NULL, 0, 0, false);
3122 btrfs_bio_free_csum(bbio);
3123 bio_put(bio);
3124}
3125
3126
3127
3128
3129
3130
3131static inline void btrfs_bio_init(struct btrfs_bio *bbio)
3132{
3133 memset(bbio, 0, offsetof(struct btrfs_bio, bio));
3134}
3135
3136
3137
3138
3139
3140
3141struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
3142{
3143 struct bio *bio;
3144
3145 ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS);
3146 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
3147 btrfs_bio_init(btrfs_bio(bio));
3148 return bio;
3149}
3150
3151struct bio *btrfs_bio_clone(struct bio *bio)
3152{
3153 struct btrfs_bio *bbio;
3154 struct bio *new;
3155
3156
3157 new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
3158 bbio = btrfs_bio(new);
3159 btrfs_bio_init(bbio);
3160 bbio->iter = bio->bi_iter;
3161 return new;
3162}
3163
3164struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
3165{
3166 struct bio *bio;
3167 struct btrfs_bio *bbio;
3168
3169 ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
3170
3171
3172 bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
3173 ASSERT(bio);
3174
3175 bbio = btrfs_bio(bio);
3176 btrfs_bio_init(bbio);
3177
3178 bio_trim(bio, offset >> 9, size >> 9);
3179 bbio->iter = bio->bi_iter;
3180 return bio;
3181}
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
3201 struct page *page,
3202 u64 disk_bytenr, unsigned int size,
3203 unsigned int pg_offset,
3204 unsigned long bio_flags)
3205{
3206 struct bio *bio = bio_ctrl->bio;
3207 u32 bio_size = bio->bi_iter.bi_size;
3208 u32 real_size;
3209 const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
3210 bool contig;
3211 int ret;
3212
3213 ASSERT(bio);
3214
3215 ASSERT(bio_ctrl->len_to_oe_boundary && bio_ctrl->len_to_stripe_boundary);
3216 if (bio_ctrl->bio_flags != bio_flags)
3217 return 0;
3218
3219 if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED)
3220 contig = bio->bi_iter.bi_sector == sector;
3221 else
3222 contig = bio_end_sector(bio) == sector;
3223 if (!contig)
3224 return 0;
3225
3226 real_size = min(bio_ctrl->len_to_oe_boundary,
3227 bio_ctrl->len_to_stripe_boundary) - bio_size;
3228 real_size = min(real_size, size);
3229
3230
3231
3232
3233
3234 if (real_size == 0)
3235 return 0;
3236
3237 if (bio_op(bio) == REQ_OP_ZONE_APPEND)
3238 ret = bio_add_zone_append_page(bio, page, real_size, pg_offset);
3239 else
3240 ret = bio_add_page(bio, page, real_size, pg_offset);
3241
3242 return ret;
3243}
3244
3245static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
3246 struct btrfs_inode *inode, u64 file_offset)
3247{
3248 struct btrfs_fs_info *fs_info = inode->root->fs_info;
3249 struct btrfs_io_geometry geom;
3250 struct btrfs_ordered_extent *ordered;
3251 struct extent_map *em;
3252 u64 logical = (bio_ctrl->bio->bi_iter.bi_sector << SECTOR_SHIFT);
3253 int ret;
3254
3255
3256
3257
3258
3259
3260
3261
3262 if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED) {
3263 bio_ctrl->len_to_oe_boundary = U32_MAX;
3264 bio_ctrl->len_to_stripe_boundary = U32_MAX;
3265 return 0;
3266 }
3267 em = btrfs_get_chunk_map(fs_info, logical, fs_info->sectorsize);
3268 if (IS_ERR(em))
3269 return PTR_ERR(em);
3270 ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio_ctrl->bio),
3271 logical, &geom);
3272 free_extent_map(em);
3273 if (ret < 0) {
3274 return ret;
3275 }
3276 if (geom.len > U32_MAX)
3277 bio_ctrl->len_to_stripe_boundary = U32_MAX;
3278 else
3279 bio_ctrl->len_to_stripe_boundary = (u32)geom.len;
3280
3281 if (bio_op(bio_ctrl->bio) != REQ_OP_ZONE_APPEND) {
3282 bio_ctrl->len_to_oe_boundary = U32_MAX;
3283 return 0;
3284 }
3285
3286
3287 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
3288 if (!ordered) {
3289 bio_ctrl->len_to_oe_boundary = U32_MAX;
3290 return 0;
3291 }
3292
3293 bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
3294 ordered->disk_bytenr + ordered->disk_num_bytes - logical);
3295 btrfs_put_ordered_extent(ordered);
3296 return 0;
3297}
3298
3299static int alloc_new_bio(struct btrfs_inode *inode,
3300 struct btrfs_bio_ctrl *bio_ctrl,
3301 struct writeback_control *wbc,
3302 unsigned int opf,
3303 bio_end_io_t end_io_func,
3304 u64 disk_bytenr, u32 offset, u64 file_offset,
3305 unsigned long bio_flags)
3306{
3307 struct btrfs_fs_info *fs_info = inode->root->fs_info;
3308 struct bio *bio;
3309 int ret;
3310
3311 bio = btrfs_bio_alloc(BIO_MAX_VECS);
3312
3313
3314
3315
3316 if (bio_flags & EXTENT_BIO_COMPRESSED)
3317 bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
3318 else
3319 bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;
3320 bio_ctrl->bio = bio;
3321 bio_ctrl->bio_flags = bio_flags;
3322 bio->bi_end_io = end_io_func;
3323 bio->bi_private = &inode->io_tree;
3324 bio->bi_write_hint = inode->vfs_inode.i_write_hint;
3325 bio->bi_opf = opf;
3326 ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
3327 if (ret < 0)
3328 goto error;
3329 if (wbc) {
3330 struct block_device *bdev;
3331
3332 bdev = fs_info->fs_devices->latest_dev->bdev;
3333 bio_set_dev(bio, bdev);
3334 wbc_init_bio(wbc, bio);
3335 }
3336 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
3337 struct btrfs_device *device;
3338
3339 device = btrfs_zoned_get_device(fs_info, disk_bytenr,
3340 fs_info->sectorsize);
3341 if (IS_ERR(device)) {
3342 ret = PTR_ERR(device);
3343 goto error;
3344 }
3345
3346 btrfs_bio(bio)->device = device;
3347 }
3348 return 0;
3349error:
3350 bio_ctrl->bio = NULL;
3351 bio->bi_status = errno_to_blk_status(ret);
3352 bio_endio(bio);
3353 return ret;
3354}
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370static int submit_extent_page(unsigned int opf,
3371 struct writeback_control *wbc,
3372 struct btrfs_bio_ctrl *bio_ctrl,
3373 struct page *page, u64 disk_bytenr,
3374 size_t size, unsigned long pg_offset,
3375 bio_end_io_t end_io_func,
3376 int mirror_num,
3377 unsigned long bio_flags,
3378 bool force_bio_submit)
3379{
3380 int ret = 0;
3381 struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
3382 unsigned int cur = pg_offset;
3383
3384 ASSERT(bio_ctrl);
3385
3386 ASSERT(pg_offset < PAGE_SIZE && size <= PAGE_SIZE &&
3387 pg_offset + size <= PAGE_SIZE);
3388 if (force_bio_submit && bio_ctrl->bio) {
3389 ret = submit_one_bio(bio_ctrl->bio, mirror_num, bio_ctrl->bio_flags);
3390 bio_ctrl->bio = NULL;
3391 if (ret < 0)
3392 return ret;
3393 }
3394
3395 while (cur < pg_offset + size) {
3396 u32 offset = cur - pg_offset;
3397 int added;
3398
3399
3400 if (!bio_ctrl->bio) {
3401 ret = alloc_new_bio(inode, bio_ctrl, wbc, opf,
3402 end_io_func, disk_bytenr, offset,
3403 page_offset(page) + cur,
3404 bio_flags);
3405 if (ret < 0)
3406 return ret;
3407 }
3408
3409
3410
3411
3412 if (bio_flags & EXTENT_BIO_COMPRESSED)
3413 added = btrfs_bio_add_page(bio_ctrl, page, disk_bytenr,
3414 size - offset, pg_offset + offset,
3415 bio_flags);
3416 else
3417 added = btrfs_bio_add_page(bio_ctrl, page,
3418 disk_bytenr + offset, size - offset,
3419 pg_offset + offset, bio_flags);
3420
3421
3422 if (!is_data_inode(&inode->vfs_inode))
3423 ASSERT(added == 0 || added == size - offset);
3424
3425
3426 if (wbc && added)
3427 wbc_account_cgroup_owner(wbc, page, added);
3428
3429
3430 if (added < size - offset) {
3431
3432 ASSERT(bio_ctrl->bio->bi_iter.bi_size);
3433 ret = submit_one_bio(bio_ctrl->bio, mirror_num,
3434 bio_ctrl->bio_flags);
3435 bio_ctrl->bio = NULL;
3436 if (ret < 0)
3437 return ret;
3438 }
3439 cur += added;
3440 }
3441 return 0;
3442}
3443
3444static int attach_extent_buffer_page(struct extent_buffer *eb,
3445 struct page *page,
3446 struct btrfs_subpage *prealloc)
3447{
3448 struct btrfs_fs_info *fs_info = eb->fs_info;
3449 int ret = 0;
3450
3451
3452
3453
3454
3455
3456
3457 if (page->mapping)
3458 lockdep_assert_held(&page->mapping->private_lock);
3459
3460 if (fs_info->sectorsize == PAGE_SIZE) {
3461 if (!PagePrivate(page))
3462 attach_page_private(page, eb);
3463 else
3464 WARN_ON(page->private != (unsigned long)eb);
3465 return 0;
3466 }
3467
3468
3469 if (PagePrivate(page)) {
3470 btrfs_free_subpage(prealloc);
3471 return 0;
3472 }
3473
3474 if (prealloc)
3475
3476 attach_page_private(page, prealloc);
3477 else
3478
3479 ret = btrfs_attach_subpage(fs_info, page,
3480 BTRFS_SUBPAGE_METADATA);
3481 return ret;
3482}
3483
3484int set_page_extent_mapped(struct page *page)
3485{
3486 struct btrfs_fs_info *fs_info;
3487
3488 ASSERT(page->mapping);
3489
3490 if (PagePrivate(page))
3491 return 0;
3492
3493 fs_info = btrfs_sb(page->mapping->host->i_sb);
3494
3495 if (fs_info->sectorsize < PAGE_SIZE)
3496 return btrfs_attach_subpage(fs_info, page, BTRFS_SUBPAGE_DATA);
3497
3498 attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE);
3499 return 0;
3500}
3501
3502void clear_page_extent_mapped(struct page *page)
3503{
3504 struct btrfs_fs_info *fs_info;
3505
3506 ASSERT(page->mapping);
3507
3508 if (!PagePrivate(page))
3509 return;
3510
3511 fs_info = btrfs_sb(page->mapping->host->i_sb);
3512 if (fs_info->sectorsize < PAGE_SIZE)
3513 return btrfs_detach_subpage(fs_info, page);
3514
3515 detach_page_private(page);
3516}
3517
3518static struct extent_map *
3519__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
3520 u64 start, u64 len, struct extent_map **em_cached)
3521{
3522 struct extent_map *em;
3523
3524 if (em_cached && *em_cached) {
3525 em = *em_cached;
3526 if (extent_map_in_tree(em) && start >= em->start &&
3527 start < extent_map_end(em)) {
3528 refcount_inc(&em->refs);
3529 return em;
3530 }
3531
3532 free_extent_map(em);
3533 *em_cached = NULL;
3534 }
3535
3536 em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
3537 if (em_cached && !IS_ERR_OR_NULL(em)) {
3538 BUG_ON(*em_cached);
3539 refcount_inc(&em->refs);
3540 *em_cached = em;
3541 }
3542 return em;
3543}
3544
3545
3546
3547
3548
3549
3550
3551int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
3552 struct btrfs_bio_ctrl *bio_ctrl,
3553 unsigned int read_flags, u64 *prev_em_start)
3554{
3555 struct inode *inode = page->mapping->host;
3556 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3557 u64 start = page_offset(page);
3558 const u64 end = start + PAGE_SIZE - 1;
3559 u64 cur = start;
3560 u64 extent_offset;
3561 u64 last_byte = i_size_read(inode);
3562 u64 block_start;
3563 u64 cur_end;
3564 struct extent_map *em;
3565 int ret = 0;
3566 int nr = 0;
3567 size_t pg_offset = 0;
3568 size_t iosize;
3569 size_t blocksize = inode->i_sb->s_blocksize;
3570 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
3571
3572 ret = set_page_extent_mapped(page);
3573 if (ret < 0) {
3574 unlock_extent(tree, start, end);
3575 btrfs_page_set_error(fs_info, page, start, PAGE_SIZE);
3576 unlock_page(page);
3577 goto out;
3578 }
3579
3580 if (page->index == last_byte >> PAGE_SHIFT) {
3581 size_t zero_offset = offset_in_page(last_byte);
3582
3583 if (zero_offset) {
3584 iosize = PAGE_SIZE - zero_offset;
3585 memzero_page(page, zero_offset, iosize);
3586 flush_dcache_page(page);
3587 }
3588 }
3589 begin_page_read(fs_info, page);
3590 while (cur <= end) {
3591 unsigned long this_bio_flag = 0;
3592 bool force_bio_submit = false;
3593 u64 disk_bytenr;
3594
3595 ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
3596 if (cur >= last_byte) {
3597 struct extent_state *cached = NULL;
3598
3599 iosize = PAGE_SIZE - pg_offset;
3600 memzero_page(page, pg_offset, iosize);
3601 flush_dcache_page(page);
3602 set_extent_uptodate(tree, cur, cur + iosize - 1,
3603 &cached, GFP_NOFS);
3604 unlock_extent_cached(tree, cur,
3605 cur + iosize - 1, &cached);
3606 end_page_read(page, true, cur, iosize);
3607 break;
3608 }
3609 em = __get_extent_map(inode, page, pg_offset, cur,
3610 end - cur + 1, em_cached);
3611 if (IS_ERR_OR_NULL(em)) {
3612 unlock_extent(tree, cur, end);
3613 end_page_read(page, false, cur, end + 1 - cur);
3614 break;
3615 }
3616 extent_offset = cur - em->start;
3617 BUG_ON(extent_map_end(em) <= cur);
3618 BUG_ON(end < cur);
3619
3620 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
3621 this_bio_flag |= EXTENT_BIO_COMPRESSED;
3622 extent_set_compress_type(&this_bio_flag,
3623 em->compress_type);
3624 }
3625
3626 iosize = min(extent_map_end(em) - cur, end - cur + 1);
3627 cur_end = min(extent_map_end(em) - 1, end);
3628 iosize = ALIGN(iosize, blocksize);
3629 if (this_bio_flag & EXTENT_BIO_COMPRESSED)
3630 disk_bytenr = em->block_start;
3631 else
3632 disk_bytenr = em->block_start + extent_offset;
3633 block_start = em->block_start;
3634 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3635 block_start = EXTENT_MAP_HOLE;
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3672 prev_em_start && *prev_em_start != (u64)-1 &&
3673 *prev_em_start != em->start)
3674 force_bio_submit = true;
3675
3676 if (prev_em_start)
3677 *prev_em_start = em->start;
3678
3679 free_extent_map(em);
3680 em = NULL;
3681
3682
3683 if (block_start == EXTENT_MAP_HOLE) {
3684 struct extent_state *cached = NULL;
3685
3686 memzero_page(page, pg_offset, iosize);
3687 flush_dcache_page(page);
3688
3689 set_extent_uptodate(tree, cur, cur + iosize - 1,
3690 &cached, GFP_NOFS);
3691 unlock_extent_cached(tree, cur,
3692 cur + iosize - 1, &cached);
3693 end_page_read(page, true, cur, iosize);
3694 cur = cur + iosize;
3695 pg_offset += iosize;
3696 continue;
3697 }
3698
3699 if (test_range_bit(tree, cur, cur_end,
3700 EXTENT_UPTODATE, 1, NULL)) {
3701 unlock_extent(tree, cur, cur + iosize - 1);
3702 end_page_read(page, true, cur, iosize);
3703 cur = cur + iosize;
3704 pg_offset += iosize;
3705 continue;
3706 }
3707
3708
3709
3710 if (block_start == EXTENT_MAP_INLINE) {
3711 unlock_extent(tree, cur, cur + iosize - 1);
3712 end_page_read(page, false, cur, iosize);
3713 cur = cur + iosize;
3714 pg_offset += iosize;
3715 continue;
3716 }
3717
3718 ret = submit_extent_page(REQ_OP_READ | read_flags, NULL,
3719 bio_ctrl, page, disk_bytenr, iosize,
3720 pg_offset,
3721 end_bio_extent_readpage, 0,
3722 this_bio_flag,
3723 force_bio_submit);
3724 if (!ret) {
3725 nr++;
3726 } else {
3727 unlock_extent(tree, cur, cur + iosize - 1);
3728 end_page_read(page, false, cur, iosize);
3729 goto out;
3730 }
3731 cur = cur + iosize;
3732 pg_offset += iosize;
3733 }
3734out:
3735 return ret;
3736}
3737
3738static inline void contiguous_readpages(struct page *pages[], int nr_pages,
3739 u64 start, u64 end,
3740 struct extent_map **em_cached,
3741 struct btrfs_bio_ctrl *bio_ctrl,
3742 u64 *prev_em_start)
3743{
3744 struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
3745 int index;
3746
3747 btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
3748
3749 for (index = 0; index < nr_pages; index++) {
3750 btrfs_do_readpage(pages[index], em_cached, bio_ctrl,
3751 REQ_RAHEAD, prev_em_start);
3752 put_page(pages[index]);
3753 }
3754}
3755
3756static void update_nr_written(struct writeback_control *wbc,
3757 unsigned long nr_written)
3758{
3759 wbc->nr_to_write -= nr_written;
3760}
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
3773 struct page *page, struct writeback_control *wbc)
3774{
3775 const u64 page_end = page_offset(page) + PAGE_SIZE - 1;
3776 u64 delalloc_start = page_offset(page);
3777 u64 delalloc_to_write = 0;
3778
3779 unsigned long nr_written = 0;
3780 int ret;
3781 int page_started = 0;
3782
3783 while (delalloc_start < page_end) {
3784 u64 delalloc_end = page_end;
3785 bool found;
3786
3787 found = find_lock_delalloc_range(&inode->vfs_inode, page,
3788 &delalloc_start,
3789 &delalloc_end);
3790 if (!found) {
3791 delalloc_start = delalloc_end + 1;
3792 continue;
3793 }
3794 ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
3795 delalloc_end, &page_started, &nr_written, wbc);
3796 if (ret) {
3797 btrfs_page_set_error(inode->root->fs_info, page,
3798 page_offset(page), PAGE_SIZE);
3799 return ret;
3800 }
3801
3802
3803
3804
3805 delalloc_to_write += (delalloc_end - delalloc_start +
3806 PAGE_SIZE) >> PAGE_SHIFT;
3807 delalloc_start = delalloc_end + 1;
3808 }
3809 if (wbc->nr_to_write < delalloc_to_write) {
3810 int thresh = 8192;
3811
3812 if (delalloc_to_write < thresh * 2)
3813 thresh = delalloc_to_write;
3814 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3815 thresh);
3816 }
3817
3818
3819 if (page_started) {
3820
3821
3822
3823
3824 wbc->nr_to_write -= nr_written;
3825 return 1;
3826 }
3827
3828 return 0;
3829}
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
3847 struct page *page, u64 *start, u64 *end)
3848{
3849 struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
3850 struct btrfs_subpage_info *spi = fs_info->subpage_info;
3851 u64 orig_start = *start;
3852
3853 unsigned long flags;
3854 int range_start_bit;
3855 int range_end_bit;
3856
3857
3858
3859
3860
3861 if (fs_info->sectorsize == PAGE_SIZE) {
3862 *start = page_offset(page);
3863 *end = page_offset(page) + PAGE_SIZE;
3864 return;
3865 }
3866
3867 range_start_bit = spi->dirty_offset +
3868 (offset_in_page(orig_start) >> fs_info->sectorsize_bits);
3869
3870
3871 spin_lock_irqsave(&subpage->lock, flags);
3872 bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit,
3873 spi->dirty_offset + spi->bitmap_nr_bits);
3874 spin_unlock_irqrestore(&subpage->lock, flags);
3875
3876 range_start_bit -= spi->dirty_offset;
3877 range_end_bit -= spi->dirty_offset;
3878
3879 *start = page_offset(page) + range_start_bit * fs_info->sectorsize;
3880 *end = page_offset(page) + range_end_bit * fs_info->sectorsize;
3881}
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
3892 struct page *page,
3893 struct writeback_control *wbc,
3894 struct extent_page_data *epd,
3895 loff_t i_size,
3896 int *nr_ret)
3897{
3898 struct btrfs_fs_info *fs_info = inode->root->fs_info;
3899 u64 cur = page_offset(page);
3900 u64 end = cur + PAGE_SIZE - 1;
3901 u64 extent_offset;
3902 u64 block_start;
3903 struct extent_map *em;
3904 int ret = 0;
3905 int nr = 0;
3906 u32 opf = REQ_OP_WRITE;
3907 const unsigned int write_flags = wbc_to_write_flags(wbc);
3908 bool compressed;
3909
3910 ret = btrfs_writepage_cow_fixup(page);
3911 if (ret) {
3912
3913 redirty_page_for_writepage(wbc, page);
3914 unlock_page(page);
3915 return 1;
3916 }
3917
3918
3919
3920
3921
3922 update_nr_written(wbc, 1);
3923
3924 while (cur <= end) {
3925 u64 disk_bytenr;
3926 u64 em_end;
3927 u64 dirty_range_start = cur;
3928 u64 dirty_range_end;
3929 u32 iosize;
3930
3931 if (cur >= i_size) {
3932 btrfs_writepage_endio_finish_ordered(inode, page, cur,
3933 end, true);
3934
3935
3936
3937
3938
3939
3940
3941
3942 btrfs_page_clear_dirty(fs_info, page, cur, end + 1 - cur);
3943 break;
3944 }
3945
3946 find_next_dirty_byte(fs_info, page, &dirty_range_start,
3947 &dirty_range_end);
3948 if (cur < dirty_range_start) {
3949 cur = dirty_range_start;
3950 continue;
3951 }
3952
3953 em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
3954 if (IS_ERR_OR_NULL(em)) {
3955 btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
3956 ret = PTR_ERR_OR_ZERO(em);
3957 break;
3958 }
3959
3960 extent_offset = cur - em->start;
3961 em_end = extent_map_end(em);
3962 ASSERT(cur <= em_end);
3963 ASSERT(cur < end);
3964 ASSERT(IS_ALIGNED(em->start, fs_info->sectorsize));
3965 ASSERT(IS_ALIGNED(em->len, fs_info->sectorsize));
3966 block_start = em->block_start;
3967 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3968 disk_bytenr = em->block_start + extent_offset;
3969
3970
3971
3972
3973
3974 iosize = min(min(em_end, end + 1), dirty_range_end) - cur;
3975
3976 if (btrfs_use_zone_append(inode, em->block_start))
3977 opf = REQ_OP_ZONE_APPEND;
3978
3979 free_extent_map(em);
3980 em = NULL;
3981
3982
3983
3984
3985
3986 if (compressed || block_start == EXTENT_MAP_HOLE ||
3987 block_start == EXTENT_MAP_INLINE) {
3988 if (compressed)
3989 nr++;
3990 else
3991 btrfs_writepage_endio_finish_ordered(inode,
3992 page, cur, cur + iosize - 1, true);
3993 btrfs_page_clear_dirty(fs_info, page, cur, iosize);
3994 cur += iosize;
3995 continue;
3996 }
3997
3998 btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
3999 if (!PageWriteback(page)) {
4000 btrfs_err(inode->root->fs_info,
4001 "page %lu not writeback, cur %llu end %llu",
4002 page->index, cur, end);
4003 }
4004
4005
4006
4007
4008
4009
4010
4011 btrfs_page_clear_dirty(fs_info, page, cur, iosize);
4012
4013 ret = submit_extent_page(opf | write_flags, wbc,
4014 &epd->bio_ctrl, page,
4015 disk_bytenr, iosize,
4016 cur - page_offset(page),
4017 end_bio_extent_writepage,
4018 0, 0, false);
4019 if (ret) {
4020 btrfs_page_set_error(fs_info, page, cur, iosize);
4021 if (PageWriteback(page))
4022 btrfs_page_clear_writeback(fs_info, page, cur,
4023 iosize);
4024 }
4025
4026 cur += iosize;
4027 nr++;
4028 }
4029
4030
4031
4032
4033 if (!ret)
4034 btrfs_page_assert_not_dirty(fs_info, page);
4035 *nr_ret = nr;
4036 return ret;
4037}
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048static int __extent_writepage(struct page *page, struct writeback_control *wbc,
4049 struct extent_page_data *epd)
4050{
4051 struct inode *inode = page->mapping->host;
4052 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4053 const u64 page_start = page_offset(page);
4054 const u64 page_end = page_start + PAGE_SIZE - 1;
4055 int ret;
4056 int nr = 0;
4057 size_t pg_offset;
4058 loff_t i_size = i_size_read(inode);
4059 unsigned long end_index = i_size >> PAGE_SHIFT;
4060
4061 trace___extent_writepage(page, inode, wbc);
4062
4063 WARN_ON(!PageLocked(page));
4064
4065 btrfs_page_clear_error(btrfs_sb(inode->i_sb), page,
4066 page_offset(page), PAGE_SIZE);
4067
4068 pg_offset = offset_in_page(i_size);
4069 if (page->index > end_index ||
4070 (page->index == end_index && !pg_offset)) {
4071 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
4072 unlock_page(page);
4073 return 0;
4074 }
4075
4076 if (page->index == end_index) {
4077 memzero_page(page, pg_offset, PAGE_SIZE - pg_offset);
4078 flush_dcache_page(page);
4079 }
4080
4081 ret = set_page_extent_mapped(page);
4082 if (ret < 0) {
4083 SetPageError(page);
4084 goto done;
4085 }
4086
4087 if (!epd->extent_locked) {
4088 ret = writepage_delalloc(BTRFS_I(inode), page, wbc);
4089 if (ret == 1)
4090 return 0;
4091 if (ret)
4092 goto done;
4093 }
4094
4095 ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, epd, i_size,
4096 &nr);
4097 if (ret == 1)
4098 return 0;
4099
4100done:
4101 if (nr == 0) {
4102
4103 set_page_writeback(page);
4104 end_page_writeback(page);
4105 }
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137 if (PageError(page))
4138 end_extent_writepage(page, ret, page_start, page_end);
4139 if (epd->extent_locked) {
4140
4141
4142
4143
4144
4145
4146 ASSERT(wbc);
4147 btrfs_page_unlock_writer(fs_info, page, wbc->range_start,
4148 wbc->range_end + 1 - wbc->range_start);
4149 } else {
4150 unlock_page(page);
4151 }
4152 ASSERT(ret <= 0);
4153 return ret;
4154}
4155
4156void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
4157{
4158 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
4159 TASK_UNINTERRUPTIBLE);
4160}
4161
4162static void end_extent_buffer_writeback(struct extent_buffer *eb)
4163{
4164 if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
4165 btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);
4166
4167 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
4168 smp_mb__after_atomic();
4169 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
4170}
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
4183 struct extent_page_data *epd)
4184{
4185 struct btrfs_fs_info *fs_info = eb->fs_info;
4186 int i, num_pages, failed_page_nr;
4187 int flush = 0;
4188 int ret = 0;
4189
4190 if (!btrfs_try_tree_write_lock(eb)) {
4191 ret = flush_write_bio(epd);
4192 if (ret < 0)
4193 return ret;
4194 flush = 1;
4195 btrfs_tree_lock(eb);
4196 }
4197
4198 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
4199 btrfs_tree_unlock(eb);
4200 if (!epd->sync_io)
4201 return 0;
4202 if (!flush) {
4203 ret = flush_write_bio(epd);
4204 if (ret < 0)
4205 return ret;
4206 flush = 1;
4207 }
4208 while (1) {
4209 wait_on_extent_buffer_writeback(eb);
4210 btrfs_tree_lock(eb);
4211 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
4212 break;
4213 btrfs_tree_unlock(eb);
4214 }
4215 }
4216
4217
4218
4219
4220
4221
4222 spin_lock(&eb->refs_lock);
4223 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
4224 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
4225 spin_unlock(&eb->refs_lock);
4226 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
4227 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
4228 -eb->len,
4229 fs_info->dirty_metadata_batch);
4230 ret = 1;
4231 } else {
4232 spin_unlock(&eb->refs_lock);
4233 }
4234
4235 btrfs_tree_unlock(eb);
4236
4237
4238
4239
4240
4241
4242
4243 if (!ret || fs_info->sectorsize < PAGE_SIZE)
4244 return ret;
4245
4246 num_pages = num_extent_pages(eb);
4247 for (i = 0; i < num_pages; i++) {
4248 struct page *p = eb->pages[i];
4249
4250 if (!trylock_page(p)) {
4251 if (!flush) {
4252 int err;
4253
4254 err = flush_write_bio(epd);
4255 if (err < 0) {
4256 ret = err;
4257 failed_page_nr = i;
4258 goto err_unlock;
4259 }
4260 flush = 1;
4261 }
4262 lock_page(p);
4263 }
4264 }
4265
4266 return ret;
4267err_unlock:
4268
4269 for (i = 0; i < failed_page_nr; i++)
4270 unlock_page(eb->pages[i]);
4271
4272
4273
4274
4275
4276 btrfs_tree_lock(eb);
4277 spin_lock(&eb->refs_lock);
4278 set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
4279 end_extent_buffer_writeback(eb);
4280 spin_unlock(&eb->refs_lock);
4281 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len,
4282 fs_info->dirty_metadata_batch);
4283 btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
4284 btrfs_tree_unlock(eb);
4285 return ret;
4286}
4287
4288static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
4289{
4290 struct btrfs_fs_info *fs_info = eb->fs_info;
4291
4292 btrfs_page_set_error(fs_info, page, eb->start, eb->len);
4293 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
4294 return;
4295
4296
4297
4298
4299
4300 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
4301
4302
4303
4304
4305
4306
4307
4308 mapping_set_error(page->mapping, -EIO);
4309
4310
4311
4312
4313
4314 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
4315 eb->len, fs_info->dirty_metadata_batch);
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355 switch (eb->log_index) {
4356 case -1:
4357 set_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags);
4358 break;
4359 case 0:
4360 set_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
4361 break;
4362 case 1:
4363 set_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
4364 break;
4365 default:
4366 BUG();
4367 }
4368}
4369
4370
4371
4372
4373
4374static struct extent_buffer *find_extent_buffer_nolock(
4375 struct btrfs_fs_info *fs_info, u64 start)
4376{
4377 struct extent_buffer *eb;
4378
4379 rcu_read_lock();
4380 eb = radix_tree_lookup(&fs_info->buffer_radix,
4381 start >> fs_info->sectorsize_bits);
4382 if (eb && atomic_inc_not_zero(&eb->refs)) {
4383 rcu_read_unlock();
4384 return eb;
4385 }
4386 rcu_read_unlock();
4387 return NULL;
4388}
4389
4390
4391
4392
4393
4394
4395
4396static void end_bio_subpage_eb_writepage(struct bio *bio)
4397{
4398 struct btrfs_fs_info *fs_info;
4399 struct bio_vec *bvec;
4400 struct bvec_iter_all iter_all;
4401
4402 fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
4403 ASSERT(fs_info->sectorsize < PAGE_SIZE);
4404
4405 ASSERT(!bio_flagged(bio, BIO_CLONED));
4406 bio_for_each_segment_all(bvec, bio, iter_all) {
4407 struct page *page = bvec->bv_page;
4408 u64 bvec_start = page_offset(page) + bvec->bv_offset;
4409 u64 bvec_end = bvec_start + bvec->bv_len - 1;
4410 u64 cur_bytenr = bvec_start;
4411
4412 ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
4413
4414
4415 while (cur_bytenr <= bvec_end) {
4416 struct extent_buffer *eb;
4417 int done;
4418
4419
4420
4421
4422
4423
4424 eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
4425 ASSERT(eb);
4426
4427 cur_bytenr = eb->start + eb->len;
4428
4429 ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
4430 done = atomic_dec_and_test(&eb->io_pages);
4431 ASSERT(done);
4432
4433 if (bio->bi_status ||
4434 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
4435 ClearPageUptodate(page);
4436 set_btree_ioerr(page, eb);
4437 }
4438
4439 btrfs_subpage_clear_writeback(fs_info, page, eb->start,
4440 eb->len);
4441 end_extent_buffer_writeback(eb);
4442
4443
4444
4445
4446
4447 atomic_dec(&eb->refs);
4448 }
4449 }
4450 bio_put(bio);
4451}
4452
4453static void end_bio_extent_buffer_writepage(struct bio *bio)
4454{
4455 struct bio_vec *bvec;
4456 struct extent_buffer *eb;
4457 int done;
4458 struct bvec_iter_all iter_all;
4459
4460 ASSERT(!bio_flagged(bio, BIO_CLONED));
4461 bio_for_each_segment_all(bvec, bio, iter_all) {
4462 struct page *page = bvec->bv_page;
4463
4464 eb = (struct extent_buffer *)page->private;
4465 BUG_ON(!eb);
4466 done = atomic_dec_and_test(&eb->io_pages);
4467
4468 if (bio->bi_status ||
4469 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
4470 ClearPageUptodate(page);
4471 set_btree_ioerr(page, eb);
4472 }
4473
4474 end_page_writeback(page);
4475
4476 if (!done)
4477 continue;
4478
4479 end_extent_buffer_writeback(eb);
4480 }
4481
4482 bio_put(bio);
4483}
4484
4485static void prepare_eb_write(struct extent_buffer *eb)
4486{
4487 u32 nritems;
4488 unsigned long start;
4489 unsigned long end;
4490
4491 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
4492 atomic_set(&eb->io_pages, num_extent_pages(eb));
4493
4494
4495 nritems = btrfs_header_nritems(eb);
4496 if (btrfs_header_level(eb) > 0) {
4497 end = btrfs_node_key_ptr_offset(nritems);
4498 memzero_extent_buffer(eb, end, eb->len - end);
4499 } else {
4500
4501
4502
4503
4504 start = btrfs_item_nr_offset(nritems);
4505 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
4506 memzero_extent_buffer(eb, start, end - start);
4507 }
4508}
4509
4510
4511
4512
4513
4514static int write_one_subpage_eb(struct extent_buffer *eb,
4515 struct writeback_control *wbc,
4516 struct extent_page_data *epd)
4517{
4518 struct btrfs_fs_info *fs_info = eb->fs_info;
4519 struct page *page = eb->pages[0];
4520 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
4521 bool no_dirty_ebs = false;
4522 int ret;
4523
4524 prepare_eb_write(eb);
4525
4526
4527 lock_page(page);
4528 btrfs_subpage_set_writeback(fs_info, page, eb->start, eb->len);
4529
4530
4531 no_dirty_ebs = btrfs_subpage_clear_and_test_dirty(fs_info, page,
4532 eb->start, eb->len);
4533 if (no_dirty_ebs)
4534 clear_page_dirty_for_io(page);
4535
4536 ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
4537 &epd->bio_ctrl, page, eb->start, eb->len,
4538 eb->start - page_offset(page),
4539 end_bio_subpage_eb_writepage, 0, 0, false);
4540 if (ret) {
4541 btrfs_subpage_clear_writeback(fs_info, page, eb->start, eb->len);
4542 set_btree_ioerr(page, eb);
4543 unlock_page(page);
4544
4545 if (atomic_dec_and_test(&eb->io_pages))
4546 end_extent_buffer_writeback(eb);
4547 return -EIO;
4548 }
4549 unlock_page(page);
4550
4551
4552
4553
4554 if (no_dirty_ebs)
4555 update_nr_written(wbc, 1);
4556 return ret;
4557}
4558
4559static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
4560 struct writeback_control *wbc,
4561 struct extent_page_data *epd)
4562{
4563 u64 disk_bytenr = eb->start;
4564 int i, num_pages;
4565 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
4566 int ret = 0;
4567
4568 prepare_eb_write(eb);
4569
4570 num_pages = num_extent_pages(eb);
4571 for (i = 0; i < num_pages; i++) {
4572 struct page *p = eb->pages[i];
4573
4574 clear_page_dirty_for_io(p);
4575 set_page_writeback(p);
4576 ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
4577 &epd->bio_ctrl, p, disk_bytenr,
4578 PAGE_SIZE, 0,
4579 end_bio_extent_buffer_writepage,
4580 0, 0, false);
4581 if (ret) {
4582 set_btree_ioerr(p, eb);
4583 if (PageWriteback(p))
4584 end_page_writeback(p);
4585 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
4586 end_extent_buffer_writeback(eb);
4587 ret = -EIO;
4588 break;
4589 }
4590 disk_bytenr += PAGE_SIZE;
4591 update_nr_written(wbc, 1);
4592 unlock_page(p);
4593 }
4594
4595 if (unlikely(ret)) {
4596 for (; i < num_pages; i++) {
4597 struct page *p = eb->pages[i];
4598 clear_page_dirty_for_io(p);
4599 unlock_page(p);
4600 }
4601 }
4602
4603 return ret;
4604}
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620static int submit_eb_subpage(struct page *page,
4621 struct writeback_control *wbc,
4622 struct extent_page_data *epd)
4623{
4624 struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
4625 int submitted = 0;
4626 u64 page_start = page_offset(page);
4627 int bit_start = 0;
4628 int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
4629 int ret;
4630
4631
4632 while (bit_start < fs_info->subpage_info->bitmap_nr_bits) {
4633 struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
4634 struct extent_buffer *eb;
4635 unsigned long flags;
4636 u64 start;
4637
4638
4639
4640
4641
4642 spin_lock(&page->mapping->private_lock);
4643 if (!PagePrivate(page)) {
4644 spin_unlock(&page->mapping->private_lock);
4645 break;
4646 }
4647 spin_lock_irqsave(&subpage->lock, flags);
4648 if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset,
4649 subpage->bitmaps)) {
4650 spin_unlock_irqrestore(&subpage->lock, flags);
4651 spin_unlock(&page->mapping->private_lock);
4652 bit_start++;
4653 continue;
4654 }
4655
4656 start = page_start + bit_start * fs_info->sectorsize;
4657 bit_start += sectors_per_node;
4658
4659
4660
4661
4662
4663 eb = find_extent_buffer_nolock(fs_info, start);
4664 spin_unlock_irqrestore(&subpage->lock, flags);
4665 spin_unlock(&page->mapping->private_lock);
4666
4667
4668
4669
4670
4671
4672 if (!eb)
4673 continue;
4674
4675 ret = lock_extent_buffer_for_io(eb, epd);
4676 if (ret == 0) {
4677 free_extent_buffer(eb);
4678 continue;
4679 }
4680 if (ret < 0) {
4681 free_extent_buffer(eb);
4682 goto cleanup;
4683 }
4684 ret = write_one_subpage_eb(eb, wbc, epd);
4685 free_extent_buffer(eb);
4686 if (ret < 0)
4687 goto cleanup;
4688 submitted++;
4689 }
4690 return submitted;
4691
4692cleanup:
4693
4694 end_write_bio(epd, ret);
4695 return ret;
4696}
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718static int submit_eb_page(struct page *page, struct writeback_control *wbc,
4719 struct extent_page_data *epd,
4720 struct extent_buffer **eb_context)
4721{
4722 struct address_space *mapping = page->mapping;
4723 struct btrfs_block_group *cache = NULL;
4724 struct extent_buffer *eb;
4725 int ret;
4726
4727 if (!PagePrivate(page))
4728 return 0;
4729
4730 if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
4731 return submit_eb_subpage(page, wbc, epd);
4732
4733 spin_lock(&mapping->private_lock);
4734 if (!PagePrivate(page)) {
4735 spin_unlock(&mapping->private_lock);
4736 return 0;
4737 }
4738
4739 eb = (struct extent_buffer *)page->private;
4740
4741
4742
4743
4744
4745 if (WARN_ON(!eb)) {
4746 spin_unlock(&mapping->private_lock);
4747 return 0;
4748 }
4749
4750 if (eb == *eb_context) {
4751 spin_unlock(&mapping->private_lock);
4752 return 0;
4753 }
4754 ret = atomic_inc_not_zero(&eb->refs);
4755 spin_unlock(&mapping->private_lock);
4756 if (!ret)
4757 return 0;
4758
4759 if (!btrfs_check_meta_write_pointer(eb->fs_info, eb, &cache)) {
4760
4761
4762
4763
4764 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
4765 ret = -EAGAIN;
4766 else
4767 ret = 0;
4768 free_extent_buffer(eb);
4769 return ret;
4770 }
4771
4772 *eb_context = eb;
4773
4774 ret = lock_extent_buffer_for_io(eb, epd);
4775 if (ret <= 0) {
4776 btrfs_revert_meta_write_pointer(cache, eb);
4777 if (cache)
4778 btrfs_put_block_group(cache);
4779 free_extent_buffer(eb);
4780 return ret;
4781 }
4782 if (cache) {
4783
4784 btrfs_put_block_group(cache);
4785
4786 if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
4787 set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
4788 }
4789 ret = write_one_eb(eb, wbc, epd);
4790 free_extent_buffer(eb);
4791 if (ret < 0)
4792 return ret;
4793 return 1;
4794}
4795
4796int btree_write_cache_pages(struct address_space *mapping,
4797 struct writeback_control *wbc)
4798{
4799 struct extent_buffer *eb_context = NULL;
4800 struct extent_page_data epd = {
4801 .bio_ctrl = { 0 },
4802 .extent_locked = 0,
4803 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4804 };
4805 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
4806 int ret = 0;
4807 int done = 0;
4808 int nr_to_write_done = 0;
4809 struct pagevec pvec;
4810 int nr_pages;
4811 pgoff_t index;
4812 pgoff_t end;
4813 int scanned = 0;
4814 xa_mark_t tag;
4815
4816 pagevec_init(&pvec);
4817 if (wbc->range_cyclic) {
4818 index = mapping->writeback_index;
4819 end = -1;
4820
4821
4822
4823
4824 scanned = (index == 0);
4825 } else {
4826 index = wbc->range_start >> PAGE_SHIFT;
4827 end = wbc->range_end >> PAGE_SHIFT;
4828 scanned = 1;
4829 }
4830 if (wbc->sync_mode == WB_SYNC_ALL)
4831 tag = PAGECACHE_TAG_TOWRITE;
4832 else
4833 tag = PAGECACHE_TAG_DIRTY;
4834 btrfs_zoned_meta_io_lock(fs_info);
4835retry:
4836 if (wbc->sync_mode == WB_SYNC_ALL)
4837 tag_pages_for_writeback(mapping, index, end);
4838 while (!done && !nr_to_write_done && (index <= end) &&
4839 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
4840 tag))) {
4841 unsigned i;
4842
4843 for (i = 0; i < nr_pages; i++) {
4844 struct page *page = pvec.pages[i];
4845
4846 ret = submit_eb_page(page, wbc, &epd, &eb_context);
4847 if (ret == 0)
4848 continue;
4849 if (ret < 0) {
4850 done = 1;
4851 break;
4852 }
4853
4854
4855
4856
4857
4858
4859 nr_to_write_done = wbc->nr_to_write <= 0;
4860 }
4861 pagevec_release(&pvec);
4862 cond_resched();
4863 }
4864 if (!scanned && !done) {
4865
4866
4867
4868
4869 scanned = 1;
4870 index = 0;
4871 goto retry;
4872 }
4873 if (ret < 0) {
4874 end_write_bio(&epd, ret);
4875 goto out;
4876 }
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904 if (!BTRFS_FS_ERROR(fs_info)) {
4905 ret = flush_write_bio(&epd);
4906 } else {
4907 ret = -EROFS;
4908 end_write_bio(&epd, ret);
4909 }
4910out:
4911 btrfs_zoned_meta_io_unlock(fs_info);
4912 return ret;
4913}
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930static int extent_write_cache_pages(struct address_space *mapping,
4931 struct writeback_control *wbc,
4932 struct extent_page_data *epd)
4933{
4934 struct inode *inode = mapping->host;
4935 int ret = 0;
4936 int done = 0;
4937 int nr_to_write_done = 0;
4938 struct pagevec pvec;
4939 int nr_pages;
4940 pgoff_t index;
4941 pgoff_t end;
4942 pgoff_t done_index;
4943 int range_whole = 0;
4944 int scanned = 0;
4945 xa_mark_t tag;
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956 if (!igrab(inode))
4957 return 0;
4958
4959 pagevec_init(&pvec);
4960 if (wbc->range_cyclic) {
4961 index = mapping->writeback_index;
4962 end = -1;
4963
4964
4965
4966
4967 scanned = (index == 0);
4968 } else {
4969 index = wbc->range_start >> PAGE_SHIFT;
4970 end = wbc->range_end >> PAGE_SHIFT;
4971 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
4972 range_whole = 1;
4973 scanned = 1;
4974 }
4975
4976
4977
4978
4979
4980
4981
4982
4983 if (range_whole && wbc->nr_to_write == LONG_MAX &&
4984 test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
4985 &BTRFS_I(inode)->runtime_flags))
4986 wbc->tagged_writepages = 1;
4987
4988 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4989 tag = PAGECACHE_TAG_TOWRITE;
4990 else
4991 tag = PAGECACHE_TAG_DIRTY;
4992retry:
4993 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4994 tag_pages_for_writeback(mapping, index, end);
4995 done_index = index;
4996 while (!done && !nr_to_write_done && (index <= end) &&
4997 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
4998 &index, end, tag))) {
4999 unsigned i;
5000
5001 for (i = 0; i < nr_pages; i++) {
5002 struct page *page = pvec.pages[i];
5003
5004 done_index = page->index + 1;
5005
5006
5007
5008
5009
5010
5011
5012 if (!trylock_page(page)) {
5013 ret = flush_write_bio(epd);
5014 BUG_ON(ret < 0);
5015 lock_page(page);
5016 }
5017
5018 if (unlikely(page->mapping != mapping)) {
5019 unlock_page(page);
5020 continue;
5021 }
5022
5023 if (wbc->sync_mode != WB_SYNC_NONE) {
5024 if (PageWriteback(page)) {
5025 ret = flush_write_bio(epd);
5026 BUG_ON(ret < 0);
5027 }
5028 wait_on_page_writeback(page);
5029 }
5030
5031 if (PageWriteback(page) ||
5032 !clear_page_dirty_for_io(page)) {
5033 unlock_page(page);
5034 continue;
5035 }
5036
5037 ret = __extent_writepage(page, wbc, epd);
5038 if (ret < 0) {
5039 done = 1;
5040 break;
5041 }
5042
5043
5044
5045
5046
5047
5048 nr_to_write_done = wbc->nr_to_write <= 0;
5049 }
5050 pagevec_release(&pvec);
5051 cond_resched();
5052 }
5053 if (!scanned && !done) {
5054
5055
5056
5057
5058 scanned = 1;
5059 index = 0;
5060
5061
5062
5063
5064
5065
5066
5067 ret = flush_write_bio(epd);
5068 if (!ret)
5069 goto retry;
5070 }
5071
5072 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
5073 mapping->writeback_index = done_index;
5074
5075 btrfs_add_delayed_iput(inode);
5076 return ret;
5077}
5078
5079int extent_write_full_page(struct page *page, struct writeback_control *wbc)
5080{
5081 int ret;
5082 struct extent_page_data epd = {
5083 .bio_ctrl = { 0 },
5084 .extent_locked = 0,
5085 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
5086 };
5087
5088 ret = __extent_writepage(page, wbc, &epd);
5089 ASSERT(ret <= 0);
5090 if (ret < 0) {
5091 end_write_bio(&epd, ret);
5092 return ret;
5093 }
5094
5095 ret = flush_write_bio(&epd);
5096 ASSERT(ret <= 0);
5097 return ret;
5098}
5099
5100
5101
5102
5103
5104
5105int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
5106{
5107 bool found_error = false;
5108 int first_error = 0;
5109 int ret = 0;
5110 struct address_space *mapping = inode->i_mapping;
5111 struct page *page;
5112 u64 cur = start;
5113 unsigned long nr_pages;
5114 const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize;
5115 struct extent_page_data epd = {
5116 .bio_ctrl = { 0 },
5117 .extent_locked = 1,
5118 .sync_io = 1,
5119 };
5120 struct writeback_control wbc_writepages = {
5121 .sync_mode = WB_SYNC_ALL,
5122 .range_start = start,
5123 .range_end = end + 1,
5124
5125 .punt_to_cgroup = 1,
5126 .no_cgroup_owner = 1,
5127 };
5128
5129 ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize));
5130 nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >>
5131 PAGE_SHIFT;
5132 wbc_writepages.nr_to_write = nr_pages * 2;
5133
5134 wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
5135 while (cur <= end) {
5136 u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
5137
5138 page = find_get_page(mapping, cur >> PAGE_SHIFT);
5139
5140
5141
5142
5143
5144 ASSERT(PageLocked(page));
5145 ASSERT(PageDirty(page));
5146 clear_page_dirty_for_io(page);
5147 ret = __extent_writepage(page, &wbc_writepages, &epd);
5148 ASSERT(ret <= 0);
5149 if (ret < 0) {
5150 found_error = true;
5151 first_error = ret;
5152 }
5153 put_page(page);
5154 cur = cur_end + 1;
5155 }
5156
5157 if (!found_error)
5158 ret = flush_write_bio(&epd);
5159 else
5160 end_write_bio(&epd, ret);
5161
5162 wbc_detach_inode(&wbc_writepages);
5163 if (found_error)
5164 return first_error;
5165 return ret;
5166}
5167
5168int extent_writepages(struct address_space *mapping,
5169 struct writeback_control *wbc)
5170{
5171 struct inode *inode = mapping->host;
5172 int ret = 0;
5173 struct extent_page_data epd = {
5174 .bio_ctrl = { 0 },
5175 .extent_locked = 0,
5176 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
5177 };
5178
5179
5180
5181
5182
5183 btrfs_zoned_data_reloc_lock(BTRFS_I(inode));
5184 ret = extent_write_cache_pages(mapping, wbc, &epd);
5185 btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
5186 ASSERT(ret <= 0);
5187 if (ret < 0) {
5188 end_write_bio(&epd, ret);
5189 return ret;
5190 }
5191 ret = flush_write_bio(&epd);
5192 return ret;
5193}
5194
5195void extent_readahead(struct readahead_control *rac)
5196{
5197 struct btrfs_bio_ctrl bio_ctrl = { 0 };
5198 struct page *pagepool[16];
5199 struct extent_map *em_cached = NULL;
5200 u64 prev_em_start = (u64)-1;
5201 int nr;
5202
5203 while ((nr = readahead_page_batch(rac, pagepool))) {
5204 u64 contig_start = readahead_pos(rac);
5205 u64 contig_end = contig_start + readahead_batch_length(rac) - 1;
5206
5207 contiguous_readpages(pagepool, nr, contig_start, contig_end,
5208 &em_cached, &bio_ctrl, &prev_em_start);
5209 }
5210
5211 if (em_cached)
5212 free_extent_map(em_cached);
5213
5214 if (bio_ctrl.bio) {
5215 if (submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags))
5216 return;
5217 }
5218}
5219
5220
5221
5222
5223
5224
5225int extent_invalidatepage(struct extent_io_tree *tree,
5226 struct page *page, unsigned long offset)
5227{
5228 struct extent_state *cached_state = NULL;
5229 u64 start = page_offset(page);
5230 u64 end = start + PAGE_SIZE - 1;
5231 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
5232
5233
5234 ASSERT(tree->owner == IO_TREE_BTREE_INODE_IO);
5235
5236 start += ALIGN(offset, blocksize);
5237 if (start > end)
5238 return 0;
5239
5240 lock_extent_bits(tree, start, end, &cached_state);
5241 wait_on_page_writeback(page);
5242
5243
5244
5245
5246
5247
5248 unlock_extent_cached(tree, start, end, &cached_state);
5249 return 0;
5250}
5251
5252
5253
5254
5255
5256
5257static int try_release_extent_state(struct extent_io_tree *tree,
5258 struct page *page, gfp_t mask)
5259{
5260 u64 start = page_offset(page);
5261 u64 end = start + PAGE_SIZE - 1;
5262 int ret = 1;
5263
5264 if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
5265 ret = 0;
5266 } else {
5267
5268
5269
5270
5271
5272
5273 ret = __clear_extent_bit(tree, start, end,
5274 ~(EXTENT_LOCKED | EXTENT_NODATASUM | EXTENT_DELALLOC_NEW),
5275 0, 0, NULL, mask, NULL);
5276
5277
5278
5279
5280 if (ret < 0)
5281 ret = 0;
5282 else
5283 ret = 1;
5284 }
5285 return ret;
5286}
5287
5288
5289
5290
5291
5292
5293int try_release_extent_mapping(struct page *page, gfp_t mask)
5294{
5295 struct extent_map *em;
5296 u64 start = page_offset(page);
5297 u64 end = start + PAGE_SIZE - 1;
5298 struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
5299 struct extent_io_tree *tree = &btrfs_inode->io_tree;
5300 struct extent_map_tree *map = &btrfs_inode->extent_tree;
5301
5302 if (gfpflags_allow_blocking(mask) &&
5303 page->mapping->host->i_size > SZ_16M) {
5304 u64 len;
5305 while (start <= end) {
5306 struct btrfs_fs_info *fs_info;
5307 u64 cur_gen;
5308
5309 len = end - start + 1;
5310 write_lock(&map->lock);
5311 em = lookup_extent_mapping(map, start, len);
5312 if (!em) {
5313 write_unlock(&map->lock);
5314 break;
5315 }
5316 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
5317 em->start != start) {
5318 write_unlock(&map->lock);
5319 free_extent_map(em);
5320 break;
5321 }
5322 if (test_range_bit(tree, em->start,
5323 extent_map_end(em) - 1,
5324 EXTENT_LOCKED, 0, NULL))
5325 goto next;
5326
5327
5328
5329
5330
5331
5332 if (list_empty(&em->list) ||
5333 test_bit(EXTENT_FLAG_LOGGING, &em->flags))
5334 goto remove_em;
5335
5336
5337
5338
5339
5340
5341
5342 fs_info = btrfs_inode->root->fs_info;
5343 spin_lock(&fs_info->trans_lock);
5344 cur_gen = fs_info->generation;
5345 spin_unlock(&fs_info->trans_lock);
5346 if (em->generation >= cur_gen)
5347 goto next;
5348remove_em:
5349
5350
5351
5352
5353
5354
5355
5356
5357 remove_extent_mapping(map, em);
5358
5359 free_extent_map(em);
5360next:
5361 start = extent_map_end(em);
5362 write_unlock(&map->lock);
5363
5364
5365 free_extent_map(em);
5366
5367 cond_resched();
5368 }
5369 }
5370 return try_release_extent_state(tree, page, mask);
5371}
5372
5373
5374
5375
5376
5377static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode,
5378 u64 offset, u64 last)
5379{
5380 u64 sectorsize = btrfs_inode_sectorsize(inode);
5381 struct extent_map *em;
5382 u64 len;
5383
5384 if (offset >= last)
5385 return NULL;
5386
5387 while (1) {
5388 len = last - offset;
5389 if (len == 0)
5390 break;
5391 len = ALIGN(len, sectorsize);
5392 em = btrfs_get_extent_fiemap(inode, offset, len);
5393 if (IS_ERR_OR_NULL(em))
5394 return em;
5395
5396
5397 if (em->block_start != EXTENT_MAP_HOLE)
5398 return em;
5399
5400
5401 offset = extent_map_end(em);
5402 free_extent_map(em);
5403 if (offset >= last)
5404 break;
5405 }
5406 return NULL;
5407}
5408
5409
5410
5411
5412
5413
5414struct fiemap_cache {
5415 u64 offset;
5416 u64 phys;
5417 u64 len;
5418 u32 flags;
5419 bool cached;
5420};
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
5433 struct fiemap_cache *cache,
5434 u64 offset, u64 phys, u64 len, u32 flags)
5435{
5436 int ret = 0;
5437
5438 if (!cache->cached)
5439 goto assign;
5440
5441
5442
5443
5444
5445
5446
5447
5448 if (cache->offset + cache->len > offset) {
5449 WARN_ON(1);
5450 return -EINVAL;
5451 }
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464 if (cache->offset + cache->len == offset &&
5465 cache->phys + cache->len == phys &&
5466 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
5467 (flags & ~FIEMAP_EXTENT_LAST)) {
5468 cache->len += len;
5469 cache->flags |= flags;
5470 goto try_submit_last;
5471 }
5472
5473
5474 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
5475 cache->len, cache->flags);
5476 cache->cached = false;
5477 if (ret)
5478 return ret;
5479assign:
5480 cache->cached = true;
5481 cache->offset = offset;
5482 cache->phys = phys;
5483 cache->len = len;
5484 cache->flags = flags;
5485try_submit_last:
5486 if (cache->flags & FIEMAP_EXTENT_LAST) {
5487 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
5488 cache->phys, cache->len, cache->flags);
5489 cache->cached = false;
5490 }
5491 return ret;
5492}
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
5506 struct fiemap_cache *cache)
5507{
5508 int ret;
5509
5510 if (!cache->cached)
5511 return 0;
5512
5513 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
5514 cache->len, cache->flags);
5515 cache->cached = false;
5516 if (ret > 0)
5517 ret = 0;
5518 return ret;
5519}
5520
5521int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
5522 u64 start, u64 len)
5523{
5524 int ret = 0;
5525 u64 off;
5526 u64 max = start + len;
5527 u32 flags = 0;
5528 u32 found_type;
5529 u64 last;
5530 u64 last_for_get_extent = 0;
5531 u64 disko = 0;
5532 u64 isize = i_size_read(&inode->vfs_inode);
5533 struct btrfs_key found_key;
5534 struct extent_map *em = NULL;
5535 struct extent_state *cached_state = NULL;
5536 struct btrfs_path *path;
5537 struct btrfs_root *root = inode->root;
5538 struct fiemap_cache cache = { 0 };
5539 struct ulist *roots;
5540 struct ulist *tmp_ulist;
5541 int end = 0;
5542 u64 em_start = 0;
5543 u64 em_len = 0;
5544 u64 em_end = 0;
5545
5546 if (len == 0)
5547 return -EINVAL;
5548
5549 path = btrfs_alloc_path();
5550 if (!path)
5551 return -ENOMEM;
5552
5553 roots = ulist_alloc(GFP_KERNEL);
5554 tmp_ulist = ulist_alloc(GFP_KERNEL);
5555 if (!roots || !tmp_ulist) {
5556 ret = -ENOMEM;
5557 goto out_free_ulist;
5558 }
5559
5560
5561
5562
5563
5564 off = 0;
5565 start = round_down(start, btrfs_inode_sectorsize(inode));
5566 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
5567
5568
5569
5570
5571
5572 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
5573 0);
5574 if (ret < 0) {
5575 goto out_free_ulist;
5576 } else {
5577 WARN_ON(!ret);
5578 if (ret == 1)
5579 ret = 0;
5580 }
5581
5582 path->slots[0]--;
5583 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
5584 found_type = found_key.type;
5585
5586
5587 if (found_key.objectid != btrfs_ino(inode) ||
5588 found_type != BTRFS_EXTENT_DATA_KEY) {
5589
5590 last = (u64)-1;
5591 last_for_get_extent = isize;
5592 } else {
5593
5594
5595
5596
5597
5598 last = found_key.offset;
5599 last_for_get_extent = last + 1;
5600 }
5601 btrfs_release_path(path);
5602
5603
5604
5605
5606
5607
5608 if (last < isize) {
5609 last = (u64)-1;
5610 last_for_get_extent = isize;
5611 }
5612
5613 lock_extent_bits(&inode->io_tree, start, start + len - 1,
5614 &cached_state);
5615
5616 em = get_extent_skip_holes(inode, start, last_for_get_extent);
5617 if (!em)
5618 goto out;
5619 if (IS_ERR(em)) {
5620 ret = PTR_ERR(em);
5621 goto out;
5622 }
5623
5624 while (!end) {
5625 u64 offset_in_extent = 0;
5626
5627
5628 if (em->start >= max || extent_map_end(em) < off)
5629 break;
5630
5631
5632
5633
5634
5635
5636
5637 em_start = max(em->start, off);
5638
5639
5640
5641
5642
5643
5644
5645 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
5646 offset_in_extent = em_start - em->start;
5647 em_end = extent_map_end(em);
5648 em_len = em_end - em_start;
5649 flags = 0;
5650 if (em->block_start < EXTENT_MAP_LAST_BYTE)
5651 disko = em->block_start + offset_in_extent;
5652 else
5653 disko = 0;
5654
5655
5656
5657
5658 off = extent_map_end(em);
5659 if (off >= max)
5660 end = 1;
5661
5662 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
5663 end = 1;
5664 flags |= FIEMAP_EXTENT_LAST;
5665 } else if (em->block_start == EXTENT_MAP_INLINE) {
5666 flags |= (FIEMAP_EXTENT_DATA_INLINE |
5667 FIEMAP_EXTENT_NOT_ALIGNED);
5668 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
5669 flags |= (FIEMAP_EXTENT_DELALLOC |
5670 FIEMAP_EXTENT_UNKNOWN);
5671 } else if (fieinfo->fi_extents_max) {
5672 u64 bytenr = em->block_start -
5673 (em->start - em->orig_start);
5674
5675
5676
5677
5678
5679
5680
5681
5682 ret = btrfs_check_shared(root, btrfs_ino(inode),
5683 bytenr, roots, tmp_ulist);
5684 if (ret < 0)
5685 goto out_free;
5686 if (ret)
5687 flags |= FIEMAP_EXTENT_SHARED;
5688 ret = 0;
5689 }
5690 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
5691 flags |= FIEMAP_EXTENT_ENCODED;
5692 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5693 flags |= FIEMAP_EXTENT_UNWRITTEN;
5694
5695 free_extent_map(em);
5696 em = NULL;
5697 if ((em_start >= last) || em_len == (u64)-1 ||
5698 (last == (u64)-1 && isize <= em_end)) {
5699 flags |= FIEMAP_EXTENT_LAST;
5700 end = 1;
5701 }
5702
5703
5704 em = get_extent_skip_holes(inode, off, last_for_get_extent);
5705 if (IS_ERR(em)) {
5706 ret = PTR_ERR(em);
5707 goto out;
5708 }
5709 if (!em) {
5710 flags |= FIEMAP_EXTENT_LAST;
5711 end = 1;
5712 }
5713 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
5714 em_len, flags);
5715 if (ret) {
5716 if (ret == 1)
5717 ret = 0;
5718 goto out_free;
5719 }
5720 }
5721out_free:
5722 if (!ret)
5723 ret = emit_last_fiemap_cache(fieinfo, &cache);
5724 free_extent_map(em);
5725out:
5726 unlock_extent_cached(&inode->io_tree, start, start + len - 1,
5727 &cached_state);
5728
5729out_free_ulist:
5730 btrfs_free_path(path);
5731 ulist_free(roots);
5732 ulist_free(tmp_ulist);
5733 return ret;
5734}
5735
5736static void __free_extent_buffer(struct extent_buffer *eb)
5737{
5738 kmem_cache_free(extent_buffer_cache, eb);
5739}
5740
5741int extent_buffer_under_io(const struct extent_buffer *eb)
5742{
5743 return (atomic_read(&eb->io_pages) ||
5744 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
5745 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
5746}
5747
5748static bool page_range_has_eb(struct btrfs_fs_info *fs_info, struct page *page)
5749{
5750 struct btrfs_subpage *subpage;
5751
5752 lockdep_assert_held(&page->mapping->private_lock);
5753
5754 if (PagePrivate(page)) {
5755 subpage = (struct btrfs_subpage *)page->private;
5756 if (atomic_read(&subpage->eb_refs))
5757 return true;
5758
5759
5760
5761
5762 if (atomic_read(&subpage->readers))
5763 return true;
5764 }
5765 return false;
5766}
5767
5768static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
5769{
5770 struct btrfs_fs_info *fs_info = eb->fs_info;
5771 const bool mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
5772
5773
5774
5775
5776
5777 if (mapped)
5778 spin_lock(&page->mapping->private_lock);
5779
5780 if (!PagePrivate(page)) {
5781 if (mapped)
5782 spin_unlock(&page->mapping->private_lock);
5783 return;
5784 }
5785
5786 if (fs_info->sectorsize == PAGE_SIZE) {
5787
5788
5789
5790
5791
5792
5793
5794 if (PagePrivate(page) &&
5795 page->private == (unsigned long)eb) {
5796 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
5797 BUG_ON(PageDirty(page));
5798 BUG_ON(PageWriteback(page));
5799
5800
5801
5802
5803 detach_page_private(page);
5804 }
5805 if (mapped)
5806 spin_unlock(&page->mapping->private_lock);
5807 return;
5808 }
5809
5810
5811
5812
5813
5814
5815 if (!mapped) {
5816 btrfs_detach_subpage(fs_info, page);
5817 return;
5818 }
5819
5820 btrfs_page_dec_eb_refs(fs_info, page);
5821
5822
5823
5824
5825
5826 if (!page_range_has_eb(fs_info, page))
5827 btrfs_detach_subpage(fs_info, page);
5828
5829 spin_unlock(&page->mapping->private_lock);
5830}
5831
5832
5833static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
5834{
5835 int i;
5836 int num_pages;
5837
5838 ASSERT(!extent_buffer_under_io(eb));
5839
5840 num_pages = num_extent_pages(eb);
5841 for (i = 0; i < num_pages; i++) {
5842 struct page *page = eb->pages[i];
5843
5844 if (!page)
5845 continue;
5846
5847 detach_extent_buffer_page(eb, page);
5848
5849
5850 put_page(page);
5851 }
5852}
5853
5854
5855
5856
5857static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
5858{
5859 btrfs_release_extent_buffer_pages(eb);
5860 btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
5861 __free_extent_buffer(eb);
5862}
5863
5864static struct extent_buffer *
5865__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
5866 unsigned long len)
5867{
5868 struct extent_buffer *eb = NULL;
5869
5870 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
5871 eb->start = start;
5872 eb->len = len;
5873 eb->fs_info = fs_info;
5874 eb->bflags = 0;
5875 init_rwsem(&eb->lock);
5876
5877 btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list,
5878 &fs_info->allocated_ebs);
5879 INIT_LIST_HEAD(&eb->release_list);
5880
5881 spin_lock_init(&eb->refs_lock);
5882 atomic_set(&eb->refs, 1);
5883 atomic_set(&eb->io_pages, 0);
5884
5885 ASSERT(len <= BTRFS_MAX_METADATA_BLOCKSIZE);
5886
5887 return eb;
5888}
5889
5890struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
5891{
5892 int i;
5893 struct page *p;
5894 struct extent_buffer *new;
5895 int num_pages = num_extent_pages(src);
5896
5897 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
5898 if (new == NULL)
5899 return NULL;
5900
5901
5902
5903
5904
5905
5906 set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
5907
5908 for (i = 0; i < num_pages; i++) {
5909 int ret;
5910
5911 p = alloc_page(GFP_NOFS);
5912 if (!p) {
5913 btrfs_release_extent_buffer(new);
5914 return NULL;
5915 }
5916 ret = attach_extent_buffer_page(new, p, NULL);
5917 if (ret < 0) {
5918 put_page(p);
5919 btrfs_release_extent_buffer(new);
5920 return NULL;
5921 }
5922 WARN_ON(PageDirty(p));
5923 new->pages[i] = p;
5924 copy_page(page_address(p), page_address(src->pages[i]));
5925 }
5926 set_extent_buffer_uptodate(new);
5927
5928 return new;
5929}
5930
5931struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
5932 u64 start, unsigned long len)
5933{
5934 struct extent_buffer *eb;
5935 int num_pages;
5936 int i;
5937
5938 eb = __alloc_extent_buffer(fs_info, start, len);
5939 if (!eb)
5940 return NULL;
5941
5942 num_pages = num_extent_pages(eb);
5943 for (i = 0; i < num_pages; i++) {
5944 int ret;
5945
5946 eb->pages[i] = alloc_page(GFP_NOFS);
5947 if (!eb->pages[i])
5948 goto err;
5949 ret = attach_extent_buffer_page(eb, eb->pages[i], NULL);
5950 if (ret < 0)
5951 goto err;
5952 }
5953 set_extent_buffer_uptodate(eb);
5954 btrfs_set_header_nritems(eb, 0);
5955 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
5956
5957 return eb;
5958err:
5959 for (; i > 0; i--) {
5960 detach_extent_buffer_page(eb, eb->pages[i - 1]);
5961 __free_page(eb->pages[i - 1]);
5962 }
5963 __free_extent_buffer(eb);
5964 return NULL;
5965}
5966
5967struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
5968 u64 start)
5969{
5970 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
5971}
5972
5973static void check_buffer_tree_ref(struct extent_buffer *eb)
5974{
5975 int refs;
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999 refs = atomic_read(&eb->refs);
6000 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
6001 return;
6002
6003 spin_lock(&eb->refs_lock);
6004 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
6005 atomic_inc(&eb->refs);
6006 spin_unlock(&eb->refs_lock);
6007}
6008
6009static void mark_extent_buffer_accessed(struct extent_buffer *eb,
6010 struct page *accessed)
6011{
6012 int num_pages, i;
6013
6014 check_buffer_tree_ref(eb);
6015
6016 num_pages = num_extent_pages(eb);
6017 for (i = 0; i < num_pages; i++) {
6018 struct page *p = eb->pages[i];
6019
6020 if (p != accessed)
6021 mark_page_accessed(p);
6022 }
6023}
6024
6025struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
6026 u64 start)
6027{
6028 struct extent_buffer *eb;
6029
6030 eb = find_extent_buffer_nolock(fs_info, start);
6031 if (!eb)
6032 return NULL;
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
6047 spin_lock(&eb->refs_lock);
6048 spin_unlock(&eb->refs_lock);
6049 }
6050 mark_extent_buffer_accessed(eb, NULL);
6051 return eb;
6052}
6053
6054#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
6055struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
6056 u64 start)
6057{
6058 struct extent_buffer *eb, *exists = NULL;
6059 int ret;
6060
6061 eb = find_extent_buffer(fs_info, start);
6062 if (eb)
6063 return eb;
6064 eb = alloc_dummy_extent_buffer(fs_info, start);
6065 if (!eb)
6066 return ERR_PTR(-ENOMEM);
6067 eb->fs_info = fs_info;
6068again:
6069 ret = radix_tree_preload(GFP_NOFS);
6070 if (ret) {
6071 exists = ERR_PTR(ret);
6072 goto free_eb;
6073 }
6074 spin_lock(&fs_info->buffer_lock);
6075 ret = radix_tree_insert(&fs_info->buffer_radix,
6076 start >> fs_info->sectorsize_bits, eb);
6077 spin_unlock(&fs_info->buffer_lock);
6078 radix_tree_preload_end();
6079 if (ret == -EEXIST) {
6080 exists = find_extent_buffer(fs_info, start);
6081 if (exists)
6082 goto free_eb;
6083 else
6084 goto again;
6085 }
6086 check_buffer_tree_ref(eb);
6087 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
6088
6089 return eb;
6090free_eb:
6091 btrfs_release_extent_buffer(eb);
6092 return exists;
6093}
6094#endif
6095
6096static struct extent_buffer *grab_extent_buffer(
6097 struct btrfs_fs_info *fs_info, struct page *page)
6098{
6099 struct extent_buffer *exists;
6100
6101
6102
6103
6104
6105
6106 if (fs_info->sectorsize < PAGE_SIZE)
6107 return NULL;
6108
6109
6110 if (!PagePrivate(page))
6111 return NULL;
6112
6113
6114
6115
6116
6117
6118
6119 exists = (struct extent_buffer *)page->private;
6120 if (atomic_inc_not_zero(&exists->refs))
6121 return exists;
6122
6123 WARN_ON(PageDirty(page));
6124 detach_page_private(page);
6125 return NULL;
6126}
6127
6128struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
6129 u64 start, u64 owner_root, int level)
6130{
6131 unsigned long len = fs_info->nodesize;
6132 int num_pages;
6133 int i;
6134 unsigned long index = start >> PAGE_SHIFT;
6135 struct extent_buffer *eb;
6136 struct extent_buffer *exists = NULL;
6137 struct page *p;
6138 struct address_space *mapping = fs_info->btree_inode->i_mapping;
6139 int uptodate = 1;
6140 int ret;
6141
6142 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
6143 btrfs_err(fs_info, "bad tree block start %llu", start);
6144 return ERR_PTR(-EINVAL);
6145 }
6146
6147#if BITS_PER_LONG == 32
6148 if (start >= MAX_LFS_FILESIZE) {
6149 btrfs_err_rl(fs_info,
6150 "extent buffer %llu is beyond 32bit page cache limit", start);
6151 btrfs_err_32bit_limit(fs_info);
6152 return ERR_PTR(-EOVERFLOW);
6153 }
6154 if (start >= BTRFS_32BIT_EARLY_WARN_THRESHOLD)
6155 btrfs_warn_32bit_limit(fs_info);
6156#endif
6157
6158 if (fs_info->sectorsize < PAGE_SIZE &&
6159 offset_in_page(start) + len > PAGE_SIZE) {
6160 btrfs_err(fs_info,
6161 "tree block crosses page boundary, start %llu nodesize %lu",
6162 start, len);
6163 return ERR_PTR(-EINVAL);
6164 }
6165
6166 eb = find_extent_buffer(fs_info, start);
6167 if (eb)
6168 return eb;
6169
6170 eb = __alloc_extent_buffer(fs_info, start, len);
6171 if (!eb)
6172 return ERR_PTR(-ENOMEM);
6173 btrfs_set_buffer_lockdep_class(owner_root, eb, level);
6174
6175 num_pages = num_extent_pages(eb);
6176 for (i = 0; i < num_pages; i++, index++) {
6177 struct btrfs_subpage *prealloc = NULL;
6178
6179 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
6180 if (!p) {
6181 exists = ERR_PTR(-ENOMEM);
6182 goto free_eb;
6183 }
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195 if (fs_info->sectorsize < PAGE_SIZE) {
6196 prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
6197 if (IS_ERR(prealloc)) {
6198 ret = PTR_ERR(prealloc);
6199 unlock_page(p);
6200 put_page(p);
6201 exists = ERR_PTR(ret);
6202 goto free_eb;
6203 }
6204 }
6205
6206 spin_lock(&mapping->private_lock);
6207 exists = grab_extent_buffer(fs_info, p);
6208 if (exists) {
6209 spin_unlock(&mapping->private_lock);
6210 unlock_page(p);
6211 put_page(p);
6212 mark_extent_buffer_accessed(exists, p);
6213 btrfs_free_subpage(prealloc);
6214 goto free_eb;
6215 }
6216
6217 ret = attach_extent_buffer_page(eb, p, prealloc);
6218 ASSERT(!ret);
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228 btrfs_page_inc_eb_refs(fs_info, p);
6229 spin_unlock(&mapping->private_lock);
6230
6231 WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
6232 eb->pages[i] = p;
6233 if (!PageUptodate(p))
6234 uptodate = 0;
6235
6236
6237
6238
6239
6240
6241
6242
6243 }
6244 if (uptodate)
6245 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6246again:
6247 ret = radix_tree_preload(GFP_NOFS);
6248 if (ret) {
6249 exists = ERR_PTR(ret);
6250 goto free_eb;
6251 }
6252
6253 spin_lock(&fs_info->buffer_lock);
6254 ret = radix_tree_insert(&fs_info->buffer_radix,
6255 start >> fs_info->sectorsize_bits, eb);
6256 spin_unlock(&fs_info->buffer_lock);
6257 radix_tree_preload_end();
6258 if (ret == -EEXIST) {
6259 exists = find_extent_buffer(fs_info, start);
6260 if (exists)
6261 goto free_eb;
6262 else
6263 goto again;
6264 }
6265
6266 check_buffer_tree_ref(eb);
6267 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
6268
6269
6270
6271
6272
6273
6274 for (i = 0; i < num_pages; i++)
6275 unlock_page(eb->pages[i]);
6276 return eb;
6277
6278free_eb:
6279 WARN_ON(!atomic_dec_and_test(&eb->refs));
6280 for (i = 0; i < num_pages; i++) {
6281 if (eb->pages[i])
6282 unlock_page(eb->pages[i]);
6283 }
6284
6285 btrfs_release_extent_buffer(eb);
6286 return exists;
6287}
6288
6289static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
6290{
6291 struct extent_buffer *eb =
6292 container_of(head, struct extent_buffer, rcu_head);
6293
6294 __free_extent_buffer(eb);
6295}
6296
6297static int release_extent_buffer(struct extent_buffer *eb)
6298 __releases(&eb->refs_lock)
6299{
6300 lockdep_assert_held(&eb->refs_lock);
6301
6302 WARN_ON(atomic_read(&eb->refs) == 0);
6303 if (atomic_dec_and_test(&eb->refs)) {
6304 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
6305 struct btrfs_fs_info *fs_info = eb->fs_info;
6306
6307 spin_unlock(&eb->refs_lock);
6308
6309 spin_lock(&fs_info->buffer_lock);
6310 radix_tree_delete(&fs_info->buffer_radix,
6311 eb->start >> fs_info->sectorsize_bits);
6312 spin_unlock(&fs_info->buffer_lock);
6313 } else {
6314 spin_unlock(&eb->refs_lock);
6315 }
6316
6317 btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
6318
6319 btrfs_release_extent_buffer_pages(eb);
6320#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
6321 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
6322 __free_extent_buffer(eb);
6323 return 1;
6324 }
6325#endif
6326 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
6327 return 1;
6328 }
6329 spin_unlock(&eb->refs_lock);
6330
6331 return 0;
6332}
6333
6334void free_extent_buffer(struct extent_buffer *eb)
6335{
6336 int refs;
6337 int old;
6338 if (!eb)
6339 return;
6340
6341 while (1) {
6342 refs = atomic_read(&eb->refs);
6343 if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
6344 || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
6345 refs == 1))
6346 break;
6347 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
6348 if (old == refs)
6349 return;
6350 }
6351
6352 spin_lock(&eb->refs_lock);
6353 if (atomic_read(&eb->refs) == 2 &&
6354 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
6355 !extent_buffer_under_io(eb) &&
6356 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
6357 atomic_dec(&eb->refs);
6358
6359
6360
6361
6362
6363 release_extent_buffer(eb);
6364}
6365
6366void free_extent_buffer_stale(struct extent_buffer *eb)
6367{
6368 if (!eb)
6369 return;
6370
6371 spin_lock(&eb->refs_lock);
6372 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
6373
6374 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
6375 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
6376 atomic_dec(&eb->refs);
6377 release_extent_buffer(eb);
6378}
6379
6380static void btree_clear_page_dirty(struct page *page)
6381{
6382 ASSERT(PageDirty(page));
6383 ASSERT(PageLocked(page));
6384 clear_page_dirty_for_io(page);
6385 xa_lock_irq(&page->mapping->i_pages);
6386 if (!PageDirty(page))
6387 __xa_clear_mark(&page->mapping->i_pages,
6388 page_index(page), PAGECACHE_TAG_DIRTY);
6389 xa_unlock_irq(&page->mapping->i_pages);
6390}
6391
6392static void clear_subpage_extent_buffer_dirty(const struct extent_buffer *eb)
6393{
6394 struct btrfs_fs_info *fs_info = eb->fs_info;
6395 struct page *page = eb->pages[0];
6396 bool last;
6397
6398
6399 lock_page(page);
6400 last = btrfs_subpage_clear_and_test_dirty(fs_info, page, eb->start,
6401 eb->len);
6402 if (last)
6403 btree_clear_page_dirty(page);
6404 unlock_page(page);
6405 WARN_ON(atomic_read(&eb->refs) == 0);
6406}
6407
6408void clear_extent_buffer_dirty(const struct extent_buffer *eb)
6409{
6410 int i;
6411 int num_pages;
6412 struct page *page;
6413
6414 if (eb->fs_info->sectorsize < PAGE_SIZE)
6415 return clear_subpage_extent_buffer_dirty(eb);
6416
6417 num_pages = num_extent_pages(eb);
6418
6419 for (i = 0; i < num_pages; i++) {
6420 page = eb->pages[i];
6421 if (!PageDirty(page))
6422 continue;
6423 lock_page(page);
6424 btree_clear_page_dirty(page);
6425 ClearPageError(page);
6426 unlock_page(page);
6427 }
6428 WARN_ON(atomic_read(&eb->refs) == 0);
6429}
6430
6431bool set_extent_buffer_dirty(struct extent_buffer *eb)
6432{
6433 int i;
6434 int num_pages;
6435 bool was_dirty;
6436
6437 check_buffer_tree_ref(eb);
6438
6439 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
6440
6441 num_pages = num_extent_pages(eb);
6442 WARN_ON(atomic_read(&eb->refs) == 0);
6443 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
6444
6445 if (!was_dirty) {
6446 bool subpage = eb->fs_info->sectorsize < PAGE_SIZE;
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459 if (subpage)
6460 lock_page(eb->pages[0]);
6461 for (i = 0; i < num_pages; i++)
6462 btrfs_page_set_dirty(eb->fs_info, eb->pages[i],
6463 eb->start, eb->len);
6464 if (subpage)
6465 unlock_page(eb->pages[0]);
6466 }
6467#ifdef CONFIG_BTRFS_DEBUG
6468 for (i = 0; i < num_pages; i++)
6469 ASSERT(PageDirty(eb->pages[i]));
6470#endif
6471
6472 return was_dirty;
6473}
6474
6475void clear_extent_buffer_uptodate(struct extent_buffer *eb)
6476{
6477 struct btrfs_fs_info *fs_info = eb->fs_info;
6478 struct page *page;
6479 int num_pages;
6480 int i;
6481
6482 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6483 num_pages = num_extent_pages(eb);
6484 for (i = 0; i < num_pages; i++) {
6485 page = eb->pages[i];
6486 if (page)
6487 btrfs_page_clear_uptodate(fs_info, page,
6488 eb->start, eb->len);
6489 }
6490}
6491
6492void set_extent_buffer_uptodate(struct extent_buffer *eb)
6493{
6494 struct btrfs_fs_info *fs_info = eb->fs_info;
6495 struct page *page;
6496 int num_pages;
6497 int i;
6498
6499 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6500 num_pages = num_extent_pages(eb);
6501 for (i = 0; i < num_pages; i++) {
6502 page = eb->pages[i];
6503 btrfs_page_set_uptodate(fs_info, page, eb->start, eb->len);
6504 }
6505}
6506
6507static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
6508 int mirror_num)
6509{
6510 struct btrfs_fs_info *fs_info = eb->fs_info;
6511 struct extent_io_tree *io_tree;
6512 struct page *page = eb->pages[0];
6513 struct btrfs_bio_ctrl bio_ctrl = { 0 };
6514 int ret = 0;
6515
6516 ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
6517 ASSERT(PagePrivate(page));
6518 io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
6519
6520 if (wait == WAIT_NONE) {
6521 if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1))
6522 return -EAGAIN;
6523 } else {
6524 ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1);
6525 if (ret < 0)
6526 return ret;
6527 }
6528
6529 ret = 0;
6530 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags) ||
6531 PageUptodate(page) ||
6532 btrfs_subpage_test_uptodate(fs_info, page, eb->start, eb->len)) {
6533 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6534 unlock_extent(io_tree, eb->start, eb->start + eb->len - 1);
6535 return ret;
6536 }
6537
6538 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
6539 eb->read_mirror = 0;
6540 atomic_set(&eb->io_pages, 1);
6541 check_buffer_tree_ref(eb);
6542 btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
6543
6544 btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
6545 ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, &bio_ctrl,
6546 page, eb->start, eb->len,
6547 eb->start - page_offset(page),
6548 end_bio_extent_readpage, mirror_num, 0,
6549 true);
6550 if (ret) {
6551
6552
6553
6554
6555
6556 atomic_dec(&eb->io_pages);
6557 }
6558 if (bio_ctrl.bio) {
6559 int tmp;
6560
6561 tmp = submit_one_bio(bio_ctrl.bio, mirror_num, 0);
6562 bio_ctrl.bio = NULL;
6563 if (tmp < 0)
6564 return tmp;
6565 }
6566 if (ret || wait != WAIT_COMPLETE)
6567 return ret;
6568
6569 wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1, EXTENT_LOCKED);
6570 if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
6571 ret = -EIO;
6572 return ret;
6573}
6574
6575int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
6576{
6577 int i;
6578 struct page *page;
6579 int err;
6580 int ret = 0;
6581 int locked_pages = 0;
6582 int all_uptodate = 1;
6583 int num_pages;
6584 unsigned long num_reads = 0;
6585 struct btrfs_bio_ctrl bio_ctrl = { 0 };
6586
6587 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
6588 return 0;
6589
6590
6591
6592
6593
6594
6595 if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
6596 return -EIO;
6597
6598 if (eb->fs_info->sectorsize < PAGE_SIZE)
6599 return read_extent_buffer_subpage(eb, wait, mirror_num);
6600
6601 num_pages = num_extent_pages(eb);
6602 for (i = 0; i < num_pages; i++) {
6603 page = eb->pages[i];
6604 if (wait == WAIT_NONE) {
6605
6606
6607
6608
6609
6610
6611
6612 if (!trylock_page(page))
6613 goto unlock_exit;
6614 } else {
6615 lock_page(page);
6616 }
6617 locked_pages++;
6618 }
6619
6620
6621
6622
6623
6624 for (i = 0; i < num_pages; i++) {
6625 page = eb->pages[i];
6626 if (!PageUptodate(page)) {
6627 num_reads++;
6628 all_uptodate = 0;
6629 }
6630 }
6631
6632 if (all_uptodate) {
6633 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6634 goto unlock_exit;
6635 }
6636
6637 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
6638 eb->read_mirror = 0;
6639 atomic_set(&eb->io_pages, num_reads);
6640
6641
6642
6643
6644 check_buffer_tree_ref(eb);
6645 for (i = 0; i < num_pages; i++) {
6646 page = eb->pages[i];
6647
6648 if (!PageUptodate(page)) {
6649 if (ret) {
6650 atomic_dec(&eb->io_pages);
6651 unlock_page(page);
6652 continue;
6653 }
6654
6655 ClearPageError(page);
6656 err = submit_extent_page(REQ_OP_READ | REQ_META, NULL,
6657 &bio_ctrl, page, page_offset(page),
6658 PAGE_SIZE, 0, end_bio_extent_readpage,
6659 mirror_num, 0, false);
6660 if (err) {
6661
6662
6663
6664
6665
6666 ret = err;
6667 SetPageError(page);
6668 unlock_page(page);
6669 atomic_dec(&eb->io_pages);
6670 }
6671 } else {
6672 unlock_page(page);
6673 }
6674 }
6675
6676 if (bio_ctrl.bio) {
6677 err = submit_one_bio(bio_ctrl.bio, mirror_num, bio_ctrl.bio_flags);
6678 bio_ctrl.bio = NULL;
6679 if (err)
6680 return err;
6681 }
6682
6683 if (ret || wait != WAIT_COMPLETE)
6684 return ret;
6685
6686 for (i = 0; i < num_pages; i++) {
6687 page = eb->pages[i];
6688 wait_on_page_locked(page);
6689 if (!PageUptodate(page))
6690 ret = -EIO;
6691 }
6692
6693 return ret;
6694
6695unlock_exit:
6696 while (locked_pages > 0) {
6697 locked_pages--;
6698 page = eb->pages[locked_pages];
6699 unlock_page(page);
6700 }
6701 return ret;
6702}
6703
6704static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
6705 unsigned long len)
6706{
6707 btrfs_warn(eb->fs_info,
6708 "access to eb bytenr %llu len %lu out of range start %lu len %lu",
6709 eb->start, eb->len, start, len);
6710 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
6711
6712 return true;
6713}
6714
6715
6716
6717
6718
6719
6720
6721
6722static inline int check_eb_range(const struct extent_buffer *eb,
6723 unsigned long start, unsigned long len)
6724{
6725 unsigned long offset;
6726
6727
6728 if (unlikely(check_add_overflow(start, len, &offset) || offset > eb->len))
6729 return report_eb_range(eb, start, len);
6730
6731 return false;
6732}
6733
6734void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
6735 unsigned long start, unsigned long len)
6736{
6737 size_t cur;
6738 size_t offset;
6739 struct page *page;
6740 char *kaddr;
6741 char *dst = (char *)dstv;
6742 unsigned long i = get_eb_page_index(start);
6743
6744 if (check_eb_range(eb, start, len))
6745 return;
6746
6747 offset = get_eb_offset_in_page(eb, start);
6748
6749 while (len > 0) {
6750 page = eb->pages[i];
6751
6752 cur = min(len, (PAGE_SIZE - offset));
6753 kaddr = page_address(page);
6754 memcpy(dst, kaddr + offset, cur);
6755
6756 dst += cur;
6757 len -= cur;
6758 offset = 0;
6759 i++;
6760 }
6761}
6762
6763int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
6764 void __user *dstv,
6765 unsigned long start, unsigned long len)
6766{
6767 size_t cur;
6768 size_t offset;
6769 struct page *page;
6770 char *kaddr;
6771 char __user *dst = (char __user *)dstv;
6772 unsigned long i = get_eb_page_index(start);
6773 int ret = 0;
6774
6775 WARN_ON(start > eb->len);
6776 WARN_ON(start + len > eb->start + eb->len);
6777
6778 offset = get_eb_offset_in_page(eb, start);
6779
6780 while (len > 0) {
6781 page = eb->pages[i];
6782
6783 cur = min(len, (PAGE_SIZE - offset));
6784 kaddr = page_address(page);
6785 if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
6786 ret = -EFAULT;
6787 break;
6788 }
6789
6790 dst += cur;
6791 len -= cur;
6792 offset = 0;
6793 i++;
6794 }
6795
6796 return ret;
6797}
6798
6799int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
6800 unsigned long start, unsigned long len)
6801{
6802 size_t cur;
6803 size_t offset;
6804 struct page *page;
6805 char *kaddr;
6806 char *ptr = (char *)ptrv;
6807 unsigned long i = get_eb_page_index(start);
6808 int ret = 0;
6809
6810 if (check_eb_range(eb, start, len))
6811 return -EINVAL;
6812
6813 offset = get_eb_offset_in_page(eb, start);
6814
6815 while (len > 0) {
6816 page = eb->pages[i];
6817
6818 cur = min(len, (PAGE_SIZE - offset));
6819
6820 kaddr = page_address(page);
6821 ret = memcmp(ptr, kaddr + offset, cur);
6822 if (ret)
6823 break;
6824
6825 ptr += cur;
6826 len -= cur;
6827 offset = 0;
6828 i++;
6829 }
6830 return ret;
6831}
6832
6833
6834
6835
6836
6837
6838
6839static void assert_eb_page_uptodate(const struct extent_buffer *eb,
6840 struct page *page)
6841{
6842 struct btrfs_fs_info *fs_info = eb->fs_info;
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853 if (fs_info->sectorsize < PAGE_SIZE) {
6854 bool uptodate, error;
6855
6856 uptodate = btrfs_subpage_test_uptodate(fs_info, page,
6857 eb->start, eb->len);
6858 error = btrfs_subpage_test_error(fs_info, page, eb->start, eb->len);
6859 WARN_ON(!uptodate && !error);
6860 } else {
6861 WARN_ON(!PageUptodate(page) && !PageError(page));
6862 }
6863}
6864
6865void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
6866 const void *srcv)
6867{
6868 char *kaddr;
6869
6870 assert_eb_page_uptodate(eb, eb->pages[0]);
6871 kaddr = page_address(eb->pages[0]) +
6872 get_eb_offset_in_page(eb, offsetof(struct btrfs_header,
6873 chunk_tree_uuid));
6874 memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
6875}
6876
6877void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
6878{
6879 char *kaddr;
6880
6881 assert_eb_page_uptodate(eb, eb->pages[0]);
6882 kaddr = page_address(eb->pages[0]) +
6883 get_eb_offset_in_page(eb, offsetof(struct btrfs_header, fsid));
6884 memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
6885}
6886
6887void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
6888 unsigned long start, unsigned long len)
6889{
6890 size_t cur;
6891 size_t offset;
6892 struct page *page;
6893 char *kaddr;
6894 char *src = (char *)srcv;
6895 unsigned long i = get_eb_page_index(start);
6896
6897 WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
6898
6899 if (check_eb_range(eb, start, len))
6900 return;
6901
6902 offset = get_eb_offset_in_page(eb, start);
6903
6904 while (len > 0) {
6905 page = eb->pages[i];
6906 assert_eb_page_uptodate(eb, page);
6907
6908 cur = min(len, PAGE_SIZE - offset);
6909 kaddr = page_address(page);
6910 memcpy(kaddr + offset, src, cur);
6911
6912 src += cur;
6913 len -= cur;
6914 offset = 0;
6915 i++;
6916 }
6917}
6918
6919void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
6920 unsigned long len)
6921{
6922 size_t cur;
6923 size_t offset;
6924 struct page *page;
6925 char *kaddr;
6926 unsigned long i = get_eb_page_index(start);
6927
6928 if (check_eb_range(eb, start, len))
6929 return;
6930
6931 offset = get_eb_offset_in_page(eb, start);
6932
6933 while (len > 0) {
6934 page = eb->pages[i];
6935 assert_eb_page_uptodate(eb, page);
6936
6937 cur = min(len, PAGE_SIZE - offset);
6938 kaddr = page_address(page);
6939 memset(kaddr + offset, 0, cur);
6940
6941 len -= cur;
6942 offset = 0;
6943 i++;
6944 }
6945}
6946
6947void copy_extent_buffer_full(const struct extent_buffer *dst,
6948 const struct extent_buffer *src)
6949{
6950 int i;
6951 int num_pages;
6952
6953 ASSERT(dst->len == src->len);
6954
6955 if (dst->fs_info->sectorsize == PAGE_SIZE) {
6956 num_pages = num_extent_pages(dst);
6957 for (i = 0; i < num_pages; i++)
6958 copy_page(page_address(dst->pages[i]),
6959 page_address(src->pages[i]));
6960 } else {
6961 size_t src_offset = get_eb_offset_in_page(src, 0);
6962 size_t dst_offset = get_eb_offset_in_page(dst, 0);
6963
6964 ASSERT(src->fs_info->sectorsize < PAGE_SIZE);
6965 memcpy(page_address(dst->pages[0]) + dst_offset,
6966 page_address(src->pages[0]) + src_offset,
6967 src->len);
6968 }
6969}
6970
6971void copy_extent_buffer(const struct extent_buffer *dst,
6972 const struct extent_buffer *src,
6973 unsigned long dst_offset, unsigned long src_offset,
6974 unsigned long len)
6975{
6976 u64 dst_len = dst->len;
6977 size_t cur;
6978 size_t offset;
6979 struct page *page;
6980 char *kaddr;
6981 unsigned long i = get_eb_page_index(dst_offset);
6982
6983 if (check_eb_range(dst, dst_offset, len) ||
6984 check_eb_range(src, src_offset, len))
6985 return;
6986
6987 WARN_ON(src->len != dst_len);
6988
6989 offset = get_eb_offset_in_page(dst, dst_offset);
6990
6991 while (len > 0) {
6992 page = dst->pages[i];
6993 assert_eb_page_uptodate(dst, page);
6994
6995 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
6996
6997 kaddr = page_address(page);
6998 read_extent_buffer(src, kaddr + offset, src_offset, cur);
6999
7000 src_offset += cur;
7001 len -= cur;
7002 offset = 0;
7003 i++;
7004 }
7005}
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020static inline void eb_bitmap_offset(const struct extent_buffer *eb,
7021 unsigned long start, unsigned long nr,
7022 unsigned long *page_index,
7023 size_t *page_offset)
7024{
7025 size_t byte_offset = BIT_BYTE(nr);
7026 size_t offset;
7027
7028
7029
7030
7031
7032
7033 offset = start + offset_in_page(eb->start) + byte_offset;
7034
7035 *page_index = offset >> PAGE_SHIFT;
7036 *page_offset = offset_in_page(offset);
7037}
7038
7039
7040
7041
7042
7043
7044
7045int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
7046 unsigned long nr)
7047{
7048 u8 *kaddr;
7049 struct page *page;
7050 unsigned long i;
7051 size_t offset;
7052
7053 eb_bitmap_offset(eb, start, nr, &i, &offset);
7054 page = eb->pages[i];
7055 assert_eb_page_uptodate(eb, page);
7056 kaddr = page_address(page);
7057 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
7058}
7059
7060
7061
7062
7063
7064
7065
7066
7067void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
7068 unsigned long pos, unsigned long len)
7069{
7070 u8 *kaddr;
7071 struct page *page;
7072 unsigned long i;
7073 size_t offset;
7074 const unsigned int size = pos + len;
7075 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
7076 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
7077
7078 eb_bitmap_offset(eb, start, pos, &i, &offset);
7079 page = eb->pages[i];
7080 assert_eb_page_uptodate(eb, page);
7081 kaddr = page_address(page);
7082
7083 while (len >= bits_to_set) {
7084 kaddr[offset] |= mask_to_set;
7085 len -= bits_to_set;
7086 bits_to_set = BITS_PER_BYTE;
7087 mask_to_set = ~0;
7088 if (++offset >= PAGE_SIZE && len > 0) {
7089 offset = 0;
7090 page = eb->pages[++i];
7091 assert_eb_page_uptodate(eb, page);
7092 kaddr = page_address(page);
7093 }
7094 }
7095 if (len) {
7096 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
7097 kaddr[offset] |= mask_to_set;
7098 }
7099}
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
7110 unsigned long start, unsigned long pos,
7111 unsigned long len)
7112{
7113 u8 *kaddr;
7114 struct page *page;
7115 unsigned long i;
7116 size_t offset;
7117 const unsigned int size = pos + len;
7118 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
7119 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
7120
7121 eb_bitmap_offset(eb, start, pos, &i, &offset);
7122 page = eb->pages[i];
7123 assert_eb_page_uptodate(eb, page);
7124 kaddr = page_address(page);
7125
7126 while (len >= bits_to_clear) {
7127 kaddr[offset] &= ~mask_to_clear;
7128 len -= bits_to_clear;
7129 bits_to_clear = BITS_PER_BYTE;
7130 mask_to_clear = ~0;
7131 if (++offset >= PAGE_SIZE && len > 0) {
7132 offset = 0;
7133 page = eb->pages[++i];
7134 assert_eb_page_uptodate(eb, page);
7135 kaddr = page_address(page);
7136 }
7137 }
7138 if (len) {
7139 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
7140 kaddr[offset] &= ~mask_to_clear;
7141 }
7142}
7143
7144static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
7145{
7146 unsigned long distance = (src > dst) ? src - dst : dst - src;
7147 return distance < len;
7148}
7149
7150static void copy_pages(struct page *dst_page, struct page *src_page,
7151 unsigned long dst_off, unsigned long src_off,
7152 unsigned long len)
7153{
7154 char *dst_kaddr = page_address(dst_page);
7155 char *src_kaddr;
7156 int must_memmove = 0;
7157
7158 if (dst_page != src_page) {
7159 src_kaddr = page_address(src_page);
7160 } else {
7161 src_kaddr = dst_kaddr;
7162 if (areas_overlap(src_off, dst_off, len))
7163 must_memmove = 1;
7164 }
7165
7166 if (must_memmove)
7167 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
7168 else
7169 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
7170}
7171
7172void memcpy_extent_buffer(const struct extent_buffer *dst,
7173 unsigned long dst_offset, unsigned long src_offset,
7174 unsigned long len)
7175{
7176 size_t cur;
7177 size_t dst_off_in_page;
7178 size_t src_off_in_page;
7179 unsigned long dst_i;
7180 unsigned long src_i;
7181
7182 if (check_eb_range(dst, dst_offset, len) ||
7183 check_eb_range(dst, src_offset, len))
7184 return;
7185
7186 while (len > 0) {
7187 dst_off_in_page = get_eb_offset_in_page(dst, dst_offset);
7188 src_off_in_page = get_eb_offset_in_page(dst, src_offset);
7189
7190 dst_i = get_eb_page_index(dst_offset);
7191 src_i = get_eb_page_index(src_offset);
7192
7193 cur = min(len, (unsigned long)(PAGE_SIZE -
7194 src_off_in_page));
7195 cur = min_t(unsigned long, cur,
7196 (unsigned long)(PAGE_SIZE - dst_off_in_page));
7197
7198 copy_pages(dst->pages[dst_i], dst->pages[src_i],
7199 dst_off_in_page, src_off_in_page, cur);
7200
7201 src_offset += cur;
7202 dst_offset += cur;
7203 len -= cur;
7204 }
7205}
7206
7207void memmove_extent_buffer(const struct extent_buffer *dst,
7208 unsigned long dst_offset, unsigned long src_offset,
7209 unsigned long len)
7210{
7211 size_t cur;
7212 size_t dst_off_in_page;
7213 size_t src_off_in_page;
7214 unsigned long dst_end = dst_offset + len - 1;
7215 unsigned long src_end = src_offset + len - 1;
7216 unsigned long dst_i;
7217 unsigned long src_i;
7218
7219 if (check_eb_range(dst, dst_offset, len) ||
7220 check_eb_range(dst, src_offset, len))
7221 return;
7222 if (dst_offset < src_offset) {
7223 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
7224 return;
7225 }
7226 while (len > 0) {
7227 dst_i = get_eb_page_index(dst_end);
7228 src_i = get_eb_page_index(src_end);
7229
7230 dst_off_in_page = get_eb_offset_in_page(dst, dst_end);
7231 src_off_in_page = get_eb_offset_in_page(dst, src_end);
7232
7233 cur = min_t(unsigned long, len, src_off_in_page + 1);
7234 cur = min(cur, dst_off_in_page + 1);
7235 copy_pages(dst->pages[dst_i], dst->pages[src_i],
7236 dst_off_in_page - cur + 1,
7237 src_off_in_page - cur + 1, cur);
7238
7239 dst_end -= cur;
7240 src_end -= cur;
7241 len -= cur;
7242 }
7243}
7244
7245#define GANG_LOOKUP_SIZE 16
7246static struct extent_buffer *get_next_extent_buffer(
7247 struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
7248{
7249 struct extent_buffer *gang[GANG_LOOKUP_SIZE];
7250 struct extent_buffer *found = NULL;
7251 u64 page_start = page_offset(page);
7252 u64 cur = page_start;
7253
7254 ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
7255 lockdep_assert_held(&fs_info->buffer_lock);
7256
7257 while (cur < page_start + PAGE_SIZE) {
7258 int ret;
7259 int i;
7260
7261 ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
7262 (void **)gang, cur >> fs_info->sectorsize_bits,
7263 min_t(unsigned int, GANG_LOOKUP_SIZE,
7264 PAGE_SIZE / fs_info->nodesize));
7265 if (ret == 0)
7266 goto out;
7267 for (i = 0; i < ret; i++) {
7268
7269 if (gang[i]->start >= page_start + PAGE_SIZE)
7270 goto out;
7271
7272 if (gang[i]->start >= bytenr) {
7273 found = gang[i];
7274 goto out;
7275 }
7276 }
7277 cur = gang[ret - 1]->start + gang[ret - 1]->len;
7278 }
7279out:
7280 return found;
7281}
7282
7283static int try_release_subpage_extent_buffer(struct page *page)
7284{
7285 struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
7286 u64 cur = page_offset(page);
7287 const u64 end = page_offset(page) + PAGE_SIZE;
7288 int ret;
7289
7290 while (cur < end) {
7291 struct extent_buffer *eb = NULL;
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301 spin_lock(&fs_info->buffer_lock);
7302 eb = get_next_extent_buffer(fs_info, page, cur);
7303 if (!eb) {
7304
7305 spin_unlock(&fs_info->buffer_lock);
7306 break;
7307 }
7308 cur = eb->start + eb->len;
7309
7310
7311
7312
7313
7314 spin_lock(&eb->refs_lock);
7315 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
7316 spin_unlock(&eb->refs_lock);
7317 spin_unlock(&fs_info->buffer_lock);
7318 break;
7319 }
7320 spin_unlock(&fs_info->buffer_lock);
7321
7322
7323
7324
7325
7326
7327 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
7328 spin_unlock(&eb->refs_lock);
7329 break;
7330 }
7331
7332
7333
7334
7335
7336
7337 release_extent_buffer(eb);
7338 }
7339
7340
7341
7342
7343 spin_lock(&page->mapping->private_lock);
7344 if (!PagePrivate(page))
7345 ret = 1;
7346 else
7347 ret = 0;
7348 spin_unlock(&page->mapping->private_lock);
7349 return ret;
7350
7351}
7352
7353int try_release_extent_buffer(struct page *page)
7354{
7355 struct extent_buffer *eb;
7356
7357 if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
7358 return try_release_subpage_extent_buffer(page);
7359
7360
7361
7362
7363
7364 spin_lock(&page->mapping->private_lock);
7365 if (!PagePrivate(page)) {
7366 spin_unlock(&page->mapping->private_lock);
7367 return 1;
7368 }
7369
7370 eb = (struct extent_buffer *)page->private;
7371 BUG_ON(!eb);
7372
7373
7374
7375
7376
7377
7378 spin_lock(&eb->refs_lock);
7379 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
7380 spin_unlock(&eb->refs_lock);
7381 spin_unlock(&page->mapping->private_lock);
7382 return 0;
7383 }
7384 spin_unlock(&page->mapping->private_lock);
7385
7386
7387
7388
7389
7390 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
7391 spin_unlock(&eb->refs_lock);
7392 return 0;
7393 }
7394
7395 return release_extent_buffer(eb);
7396}
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
7411 u64 bytenr, u64 owner_root, u64 gen, int level)
7412{
7413 struct extent_buffer *eb;
7414 int ret;
7415
7416 eb = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
7417 if (IS_ERR(eb))
7418 return;
7419
7420 if (btrfs_buffer_uptodate(eb, gen, 1)) {
7421 free_extent_buffer(eb);
7422 return;
7423 }
7424
7425 ret = read_extent_buffer_pages(eb, WAIT_NONE, 0);
7426 if (ret < 0)
7427 free_extent_buffer_stale(eb);
7428 else
7429 free_extent_buffer(eb);
7430}
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440void btrfs_readahead_node_child(struct extent_buffer *node, int slot)
7441{
7442 btrfs_readahead_tree_block(node->fs_info,
7443 btrfs_node_blockptr(node, slot),
7444 btrfs_header_owner(node),
7445 btrfs_node_ptr_generation(node, slot),
7446 btrfs_header_level(node) - 1);
7447}
7448