1
2
3#include <linux/bitops.h>
4#include <linux/slab.h>
5#include <linux/bio.h>
6#include <linux/mm.h>
7#include <linux/pagemap.h>
8#include <linux/page-flags.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/writeback.h>
13#include <linux/pagevec.h>
14#include <linux/prefetch.h>
15#include <linux/cleancache.h>
16#include <linux/fsverity.h>
17#include "misc.h"
18#include "extent_io.h"
19#include "extent-io-tree.h"
20#include "extent_map.h"
21#include "ctree.h"
22#include "btrfs_inode.h"
23#include "volumes.h"
24#include "check-integrity.h"
25#include "locking.h"
26#include "rcu-string.h"
27#include "backref.h"
28#include "disk-io.h"
29#include "subpage.h"
30#include "zoned.h"
31#include "block-group.h"
32
33static struct kmem_cache *extent_state_cache;
34static struct kmem_cache *extent_buffer_cache;
35static struct bio_set btrfs_bioset;
36
37static inline bool extent_state_in_tree(const struct extent_state *state)
38{
39 return !RB_EMPTY_NODE(&state->rb_node);
40}
41
42#ifdef CONFIG_BTRFS_DEBUG
43static LIST_HEAD(states);
44static DEFINE_SPINLOCK(leak_lock);
45
46static inline void btrfs_leak_debug_add(spinlock_t *lock,
47 struct list_head *new,
48 struct list_head *head)
49{
50 unsigned long flags;
51
52 spin_lock_irqsave(lock, flags);
53 list_add(new, head);
54 spin_unlock_irqrestore(lock, flags);
55}
56
57static inline void btrfs_leak_debug_del(spinlock_t *lock,
58 struct list_head *entry)
59{
60 unsigned long flags;
61
62 spin_lock_irqsave(lock, flags);
63 list_del(entry);
64 spin_unlock_irqrestore(lock, flags);
65}
66
67void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
68{
69 struct extent_buffer *eb;
70 unsigned long flags;
71
72
73
74
75
76 if (!fs_info->allocated_ebs.next)
77 return;
78
79 spin_lock_irqsave(&fs_info->eb_leak_lock, flags);
80 while (!list_empty(&fs_info->allocated_ebs)) {
81 eb = list_first_entry(&fs_info->allocated_ebs,
82 struct extent_buffer, leak_list);
83 pr_err(
84 "BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n",
85 eb->start, eb->len, atomic_read(&eb->refs), eb->bflags,
86 btrfs_header_owner(eb));
87 list_del(&eb->leak_list);
88 kmem_cache_free(extent_buffer_cache, eb);
89 }
90 spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags);
91}
92
93static inline void btrfs_extent_state_leak_debug_check(void)
94{
95 struct extent_state *state;
96
97 while (!list_empty(&states)) {
98 state = list_entry(states.next, struct extent_state, leak_list);
99 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
100 state->start, state->end, state->state,
101 extent_state_in_tree(state),
102 refcount_read(&state->refs));
103 list_del(&state->leak_list);
104 kmem_cache_free(extent_state_cache, state);
105 }
106}
107
108#define btrfs_debug_check_extent_io_range(tree, start, end) \
109 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
110static inline void __btrfs_debug_check_extent_io_range(const char *caller,
111 struct extent_io_tree *tree, u64 start, u64 end)
112{
113 struct inode *inode = tree->private_data;
114 u64 isize;
115
116 if (!inode || !is_data_inode(inode))
117 return;
118
119 isize = i_size_read(inode);
120 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
121 btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
122 "%s: ino %llu isize %llu odd range [%llu,%llu]",
123 caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
124 }
125}
126#else
127#define btrfs_leak_debug_add(lock, new, head) do {} while (0)
128#define btrfs_leak_debug_del(lock, entry) do {} while (0)
129#define btrfs_extent_state_leak_debug_check() do {} while (0)
130#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
131#endif
132
133struct tree_entry {
134 u64 start;
135 u64 end;
136 struct rb_node rb_node;
137};
138
139struct extent_page_data {
140 struct btrfs_bio_ctrl bio_ctrl;
141
142
143
144 unsigned int extent_locked:1;
145
146
147 unsigned int sync_io:1;
148};
149
150static int add_extent_changeset(struct extent_state *state, u32 bits,
151 struct extent_changeset *changeset,
152 int set)
153{
154 int ret;
155
156 if (!changeset)
157 return 0;
158 if (set && (state->state & bits) == bits)
159 return 0;
160 if (!set && (state->state & bits) == 0)
161 return 0;
162 changeset->bytes_changed += state->end - state->start + 1;
163 ret = ulist_add(&changeset->range_changed, state->start, state->end,
164 GFP_ATOMIC);
165 return ret;
166}
167
168int __must_check submit_one_bio(struct bio *bio, int mirror_num,
169 unsigned long bio_flags)
170{
171 blk_status_t ret = 0;
172 struct extent_io_tree *tree = bio->bi_private;
173
174 bio->bi_private = NULL;
175
176
177 ASSERT(bio->bi_iter.bi_size);
178 if (is_data_inode(tree->private_data))
179 ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
180 bio_flags);
181 else
182 ret = btrfs_submit_metadata_bio(tree->private_data, bio,
183 mirror_num, bio_flags);
184
185 return blk_status_to_errno(ret);
186}
187
188
189static void end_write_bio(struct extent_page_data *epd, int ret)
190{
191 struct bio *bio = epd->bio_ctrl.bio;
192
193 if (bio) {
194 bio->bi_status = errno_to_blk_status(ret);
195 bio_endio(bio);
196 epd->bio_ctrl.bio = NULL;
197 }
198}
199
200
201
202
203
204
205
206static int __must_check flush_write_bio(struct extent_page_data *epd)
207{
208 int ret = 0;
209 struct bio *bio = epd->bio_ctrl.bio;
210
211 if (bio) {
212 ret = submit_one_bio(bio, 0, 0);
213
214
215
216
217
218
219
220 epd->bio_ctrl.bio = NULL;
221 }
222 return ret;
223}
224
225int __init extent_state_cache_init(void)
226{
227 extent_state_cache = kmem_cache_create("btrfs_extent_state",
228 sizeof(struct extent_state), 0,
229 SLAB_MEM_SPREAD, NULL);
230 if (!extent_state_cache)
231 return -ENOMEM;
232 return 0;
233}
234
235int __init extent_io_init(void)
236{
237 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
238 sizeof(struct extent_buffer), 0,
239 SLAB_MEM_SPREAD, NULL);
240 if (!extent_buffer_cache)
241 return -ENOMEM;
242
243 if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
244 offsetof(struct btrfs_io_bio, bio),
245 BIOSET_NEED_BVECS))
246 goto free_buffer_cache;
247
248 if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
249 goto free_bioset;
250
251 return 0;
252
253free_bioset:
254 bioset_exit(&btrfs_bioset);
255
256free_buffer_cache:
257 kmem_cache_destroy(extent_buffer_cache);
258 extent_buffer_cache = NULL;
259 return -ENOMEM;
260}
261
262void __cold extent_state_cache_exit(void)
263{
264 btrfs_extent_state_leak_debug_check();
265 kmem_cache_destroy(extent_state_cache);
266}
267
268void __cold extent_io_exit(void)
269{
270
271
272
273
274 rcu_barrier();
275 kmem_cache_destroy(extent_buffer_cache);
276 bioset_exit(&btrfs_bioset);
277}
278
279
280
281
282
283
284
285
286static struct lock_class_key file_extent_tree_class;
287
288void extent_io_tree_init(struct btrfs_fs_info *fs_info,
289 struct extent_io_tree *tree, unsigned int owner,
290 void *private_data)
291{
292 tree->fs_info = fs_info;
293 tree->state = RB_ROOT;
294 tree->dirty_bytes = 0;
295 spin_lock_init(&tree->lock);
296 tree->private_data = private_data;
297 tree->owner = owner;
298 if (owner == IO_TREE_INODE_FILE_EXTENT)
299 lockdep_set_class(&tree->lock, &file_extent_tree_class);
300}
301
302void extent_io_tree_release(struct extent_io_tree *tree)
303{
304 spin_lock(&tree->lock);
305
306
307
308
309
310 smp_mb();
311 while (!RB_EMPTY_ROOT(&tree->state)) {
312 struct rb_node *node;
313 struct extent_state *state;
314
315 node = rb_first(&tree->state);
316 state = rb_entry(node, struct extent_state, rb_node);
317 rb_erase(&state->rb_node, &tree->state);
318 RB_CLEAR_NODE(&state->rb_node);
319
320
321
322
323 ASSERT(!waitqueue_active(&state->wq));
324 free_extent_state(state);
325
326 cond_resched_lock(&tree->lock);
327 }
328 spin_unlock(&tree->lock);
329}
330
331static struct extent_state *alloc_extent_state(gfp_t mask)
332{
333 struct extent_state *state;
334
335
336
337
338
339 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
340 state = kmem_cache_alloc(extent_state_cache, mask);
341 if (!state)
342 return state;
343 state->state = 0;
344 state->failrec = NULL;
345 RB_CLEAR_NODE(&state->rb_node);
346 btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states);
347 refcount_set(&state->refs, 1);
348 init_waitqueue_head(&state->wq);
349 trace_alloc_extent_state(state, mask, _RET_IP_);
350 return state;
351}
352
353void free_extent_state(struct extent_state *state)
354{
355 if (!state)
356 return;
357 if (refcount_dec_and_test(&state->refs)) {
358 WARN_ON(extent_state_in_tree(state));
359 btrfs_leak_debug_del(&leak_lock, &state->leak_list);
360 trace_free_extent_state(state, _RET_IP_);
361 kmem_cache_free(extent_state_cache, state);
362 }
363}
364
365static struct rb_node *tree_insert(struct rb_root *root,
366 struct rb_node *search_start,
367 u64 offset,
368 struct rb_node *node,
369 struct rb_node ***p_in,
370 struct rb_node **parent_in)
371{
372 struct rb_node **p;
373 struct rb_node *parent = NULL;
374 struct tree_entry *entry;
375
376 if (p_in && parent_in) {
377 p = *p_in;
378 parent = *parent_in;
379 goto do_insert;
380 }
381
382 p = search_start ? &search_start : &root->rb_node;
383 while (*p) {
384 parent = *p;
385 entry = rb_entry(parent, struct tree_entry, rb_node);
386
387 if (offset < entry->start)
388 p = &(*p)->rb_left;
389 else if (offset > entry->end)
390 p = &(*p)->rb_right;
391 else
392 return parent;
393 }
394
395do_insert:
396 rb_link_node(node, parent, p);
397 rb_insert_color(node, root);
398 return NULL;
399}
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
420 struct rb_node **next_ret,
421 struct rb_node **prev_ret,
422 struct rb_node ***p_ret,
423 struct rb_node **parent_ret)
424{
425 struct rb_root *root = &tree->state;
426 struct rb_node **n = &root->rb_node;
427 struct rb_node *prev = NULL;
428 struct rb_node *orig_prev = NULL;
429 struct tree_entry *entry;
430 struct tree_entry *prev_entry = NULL;
431
432 while (*n) {
433 prev = *n;
434 entry = rb_entry(prev, struct tree_entry, rb_node);
435 prev_entry = entry;
436
437 if (offset < entry->start)
438 n = &(*n)->rb_left;
439 else if (offset > entry->end)
440 n = &(*n)->rb_right;
441 else
442 return *n;
443 }
444
445 if (p_ret)
446 *p_ret = n;
447 if (parent_ret)
448 *parent_ret = prev;
449
450 if (next_ret) {
451 orig_prev = prev;
452 while (prev && offset > prev_entry->end) {
453 prev = rb_next(prev);
454 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
455 }
456 *next_ret = prev;
457 prev = orig_prev;
458 }
459
460 if (prev_ret) {
461 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
462 while (prev && offset < prev_entry->start) {
463 prev = rb_prev(prev);
464 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
465 }
466 *prev_ret = prev;
467 }
468 return NULL;
469}
470
471static inline struct rb_node *
472tree_search_for_insert(struct extent_io_tree *tree,
473 u64 offset,
474 struct rb_node ***p_ret,
475 struct rb_node **parent_ret)
476{
477 struct rb_node *next= NULL;
478 struct rb_node *ret;
479
480 ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
481 if (!ret)
482 return next;
483 return ret;
484}
485
486static inline struct rb_node *tree_search(struct extent_io_tree *tree,
487 u64 offset)
488{
489 return tree_search_for_insert(tree, offset, NULL, NULL);
490}
491
492
493
494
495
496
497
498
499
500
501static void merge_state(struct extent_io_tree *tree,
502 struct extent_state *state)
503{
504 struct extent_state *other;
505 struct rb_node *other_node;
506
507 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
508 return;
509
510 other_node = rb_prev(&state->rb_node);
511 if (other_node) {
512 other = rb_entry(other_node, struct extent_state, rb_node);
513 if (other->end == state->start - 1 &&
514 other->state == state->state) {
515 if (tree->private_data &&
516 is_data_inode(tree->private_data))
517 btrfs_merge_delalloc_extent(tree->private_data,
518 state, other);
519 state->start = other->start;
520 rb_erase(&other->rb_node, &tree->state);
521 RB_CLEAR_NODE(&other->rb_node);
522 free_extent_state(other);
523 }
524 }
525 other_node = rb_next(&state->rb_node);
526 if (other_node) {
527 other = rb_entry(other_node, struct extent_state, rb_node);
528 if (other->start == state->end + 1 &&
529 other->state == state->state) {
530 if (tree->private_data &&
531 is_data_inode(tree->private_data))
532 btrfs_merge_delalloc_extent(tree->private_data,
533 state, other);
534 state->end = other->end;
535 rb_erase(&other->rb_node, &tree->state);
536 RB_CLEAR_NODE(&other->rb_node);
537 free_extent_state(other);
538 }
539 }
540}
541
542static void set_state_bits(struct extent_io_tree *tree,
543 struct extent_state *state, u32 *bits,
544 struct extent_changeset *changeset);
545
546
547
548
549
550
551
552
553
554
555
556static int insert_state(struct extent_io_tree *tree,
557 struct extent_state *state, u64 start, u64 end,
558 struct rb_node ***p,
559 struct rb_node **parent,
560 u32 *bits, struct extent_changeset *changeset)
561{
562 struct rb_node *node;
563
564 if (end < start) {
565 btrfs_err(tree->fs_info,
566 "insert state: end < start %llu %llu", end, start);
567 WARN_ON(1);
568 }
569 state->start = start;
570 state->end = end;
571
572 set_state_bits(tree, state, bits, changeset);
573
574 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
575 if (node) {
576 struct extent_state *found;
577 found = rb_entry(node, struct extent_state, rb_node);
578 btrfs_err(tree->fs_info,
579 "found node %llu %llu on insert of %llu %llu",
580 found->start, found->end, start, end);
581 return -EEXIST;
582 }
583 merge_state(tree, state);
584 return 0;
585}
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
602 struct extent_state *prealloc, u64 split)
603{
604 struct rb_node *node;
605
606 if (tree->private_data && is_data_inode(tree->private_data))
607 btrfs_split_delalloc_extent(tree->private_data, orig, split);
608
609 prealloc->start = orig->start;
610 prealloc->end = split - 1;
611 prealloc->state = orig->state;
612 orig->start = split;
613
614 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
615 &prealloc->rb_node, NULL, NULL);
616 if (node) {
617 free_extent_state(prealloc);
618 return -EEXIST;
619 }
620 return 0;
621}
622
623static struct extent_state *next_state(struct extent_state *state)
624{
625 struct rb_node *next = rb_next(&state->rb_node);
626 if (next)
627 return rb_entry(next, struct extent_state, rb_node);
628 else
629 return NULL;
630}
631
632
633
634
635
636
637
638
639static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
640 struct extent_state *state,
641 u32 *bits, int wake,
642 struct extent_changeset *changeset)
643{
644 struct extent_state *next;
645 u32 bits_to_clear = *bits & ~EXTENT_CTLBITS;
646 int ret;
647
648 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
649 u64 range = state->end - state->start + 1;
650 WARN_ON(range > tree->dirty_bytes);
651 tree->dirty_bytes -= range;
652 }
653
654 if (tree->private_data && is_data_inode(tree->private_data))
655 btrfs_clear_delalloc_extent(tree->private_data, state, bits);
656
657 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
658 BUG_ON(ret < 0);
659 state->state &= ~bits_to_clear;
660 if (wake)
661 wake_up(&state->wq);
662 if (state->state == 0) {
663 next = next_state(state);
664 if (extent_state_in_tree(state)) {
665 rb_erase(&state->rb_node, &tree->state);
666 RB_CLEAR_NODE(&state->rb_node);
667 free_extent_state(state);
668 } else {
669 WARN_ON(1);
670 }
671 } else {
672 merge_state(tree, state);
673 next = next_state(state);
674 }
675 return next;
676}
677
678static struct extent_state *
679alloc_extent_state_atomic(struct extent_state *prealloc)
680{
681 if (!prealloc)
682 prealloc = alloc_extent_state(GFP_ATOMIC);
683
684 return prealloc;
685}
686
687static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
688{
689 btrfs_panic(tree->fs_info, err,
690 "locking error: extent tree was modified by another thread while locked");
691}
692
693
694
695
696
697
698
699
700
701
702
703
704
705int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
706 u32 bits, int wake, int delete,
707 struct extent_state **cached_state,
708 gfp_t mask, struct extent_changeset *changeset)
709{
710 struct extent_state *state;
711 struct extent_state *cached;
712 struct extent_state *prealloc = NULL;
713 struct rb_node *node;
714 u64 last_end;
715 int err;
716 int clear = 0;
717
718 btrfs_debug_check_extent_io_range(tree, start, end);
719 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
720
721 if (bits & EXTENT_DELALLOC)
722 bits |= EXTENT_NORESERVE;
723
724 if (delete)
725 bits |= ~EXTENT_CTLBITS;
726
727 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
728 clear = 1;
729again:
730 if (!prealloc && gfpflags_allow_blocking(mask)) {
731
732
733
734
735
736
737
738 prealloc = alloc_extent_state(mask);
739 }
740
741 spin_lock(&tree->lock);
742 if (cached_state) {
743 cached = *cached_state;
744
745 if (clear) {
746 *cached_state = NULL;
747 cached_state = NULL;
748 }
749
750 if (cached && extent_state_in_tree(cached) &&
751 cached->start <= start && cached->end > start) {
752 if (clear)
753 refcount_dec(&cached->refs);
754 state = cached;
755 goto hit_next;
756 }
757 if (clear)
758 free_extent_state(cached);
759 }
760
761
762
763
764 node = tree_search(tree, start);
765 if (!node)
766 goto out;
767 state = rb_entry(node, struct extent_state, rb_node);
768hit_next:
769 if (state->start > end)
770 goto out;
771 WARN_ON(state->end < start);
772 last_end = state->end;
773
774
775 if (!(state->state & bits)) {
776 state = next_state(state);
777 goto next;
778 }
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796 if (state->start < start) {
797 prealloc = alloc_extent_state_atomic(prealloc);
798 BUG_ON(!prealloc);
799 err = split_state(tree, state, prealloc, start);
800 if (err)
801 extent_io_tree_panic(tree, err);
802
803 prealloc = NULL;
804 if (err)
805 goto out;
806 if (state->end <= end) {
807 state = clear_state_bit(tree, state, &bits, wake,
808 changeset);
809 goto next;
810 }
811 goto search_again;
812 }
813
814
815
816
817
818
819 if (state->start <= end && state->end > end) {
820 prealloc = alloc_extent_state_atomic(prealloc);
821 BUG_ON(!prealloc);
822 err = split_state(tree, state, prealloc, end + 1);
823 if (err)
824 extent_io_tree_panic(tree, err);
825
826 if (wake)
827 wake_up(&state->wq);
828
829 clear_state_bit(tree, prealloc, &bits, wake, changeset);
830
831 prealloc = NULL;
832 goto out;
833 }
834
835 state = clear_state_bit(tree, state, &bits, wake, changeset);
836next:
837 if (last_end == (u64)-1)
838 goto out;
839 start = last_end + 1;
840 if (start <= end && state && !need_resched())
841 goto hit_next;
842
843search_again:
844 if (start > end)
845 goto out;
846 spin_unlock(&tree->lock);
847 if (gfpflags_allow_blocking(mask))
848 cond_resched();
849 goto again;
850
851out:
852 spin_unlock(&tree->lock);
853 if (prealloc)
854 free_extent_state(prealloc);
855
856 return 0;
857
858}
859
860static void wait_on_state(struct extent_io_tree *tree,
861 struct extent_state *state)
862 __releases(tree->lock)
863 __acquires(tree->lock)
864{
865 DEFINE_WAIT(wait);
866 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
867 spin_unlock(&tree->lock);
868 schedule();
869 spin_lock(&tree->lock);
870 finish_wait(&state->wq, &wait);
871}
872
873
874
875
876
877
878static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
879 u32 bits)
880{
881 struct extent_state *state;
882 struct rb_node *node;
883
884 btrfs_debug_check_extent_io_range(tree, start, end);
885
886 spin_lock(&tree->lock);
887again:
888 while (1) {
889
890
891
892
893 node = tree_search(tree, start);
894process_node:
895 if (!node)
896 break;
897
898 state = rb_entry(node, struct extent_state, rb_node);
899
900 if (state->start > end)
901 goto out;
902
903 if (state->state & bits) {
904 start = state->start;
905 refcount_inc(&state->refs);
906 wait_on_state(tree, state);
907 free_extent_state(state);
908 goto again;
909 }
910 start = state->end + 1;
911
912 if (start > end)
913 break;
914
915 if (!cond_resched_lock(&tree->lock)) {
916 node = rb_next(node);
917 goto process_node;
918 }
919 }
920out:
921 spin_unlock(&tree->lock);
922}
923
924static void set_state_bits(struct extent_io_tree *tree,
925 struct extent_state *state,
926 u32 *bits, struct extent_changeset *changeset)
927{
928 u32 bits_to_set = *bits & ~EXTENT_CTLBITS;
929 int ret;
930
931 if (tree->private_data && is_data_inode(tree->private_data))
932 btrfs_set_delalloc_extent(tree->private_data, state, bits);
933
934 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
935 u64 range = state->end - state->start + 1;
936 tree->dirty_bytes += range;
937 }
938 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
939 BUG_ON(ret < 0);
940 state->state |= bits_to_set;
941}
942
943static void cache_state_if_flags(struct extent_state *state,
944 struct extent_state **cached_ptr,
945 unsigned flags)
946{
947 if (cached_ptr && !(*cached_ptr)) {
948 if (!flags || (state->state & flags)) {
949 *cached_ptr = state;
950 refcount_inc(&state->refs);
951 }
952 }
953}
954
955static void cache_state(struct extent_state *state,
956 struct extent_state **cached_ptr)
957{
958 return cache_state_if_flags(state, cached_ptr,
959 EXTENT_LOCKED | EXTENT_BOUNDARY);
960}
961
962
963
964
965
966
967
968
969
970
971
972int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
973 u32 exclusive_bits, u64 *failed_start,
974 struct extent_state **cached_state, gfp_t mask,
975 struct extent_changeset *changeset)
976{
977 struct extent_state *state;
978 struct extent_state *prealloc = NULL;
979 struct rb_node *node;
980 struct rb_node **p;
981 struct rb_node *parent;
982 int err = 0;
983 u64 last_start;
984 u64 last_end;
985
986 btrfs_debug_check_extent_io_range(tree, start, end);
987 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
988
989 if (exclusive_bits)
990 ASSERT(failed_start);
991 else
992 ASSERT(failed_start == NULL);
993again:
994 if (!prealloc && gfpflags_allow_blocking(mask)) {
995
996
997
998
999
1000
1001
1002 prealloc = alloc_extent_state(mask);
1003 }
1004
1005 spin_lock(&tree->lock);
1006 if (cached_state && *cached_state) {
1007 state = *cached_state;
1008 if (state->start <= start && state->end > start &&
1009 extent_state_in_tree(state)) {
1010 node = &state->rb_node;
1011 goto hit_next;
1012 }
1013 }
1014
1015
1016
1017
1018 node = tree_search_for_insert(tree, start, &p, &parent);
1019 if (!node) {
1020 prealloc = alloc_extent_state_atomic(prealloc);
1021 BUG_ON(!prealloc);
1022 err = insert_state(tree, prealloc, start, end,
1023 &p, &parent, &bits, changeset);
1024 if (err)
1025 extent_io_tree_panic(tree, err);
1026
1027 cache_state(prealloc, cached_state);
1028 prealloc = NULL;
1029 goto out;
1030 }
1031 state = rb_entry(node, struct extent_state, rb_node);
1032hit_next:
1033 last_start = state->start;
1034 last_end = state->end;
1035
1036
1037
1038
1039
1040
1041
1042 if (state->start == start && state->end <= end) {
1043 if (state->state & exclusive_bits) {
1044 *failed_start = state->start;
1045 err = -EEXIST;
1046 goto out;
1047 }
1048
1049 set_state_bits(tree, state, &bits, changeset);
1050 cache_state(state, cached_state);
1051 merge_state(tree, state);
1052 if (last_end == (u64)-1)
1053 goto out;
1054 start = last_end + 1;
1055 state = next_state(state);
1056 if (start < end && state && state->start == start &&
1057 !need_resched())
1058 goto hit_next;
1059 goto search_again;
1060 }
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078 if (state->start < start) {
1079 if (state->state & exclusive_bits) {
1080 *failed_start = start;
1081 err = -EEXIST;
1082 goto out;
1083 }
1084
1085
1086
1087
1088
1089 if ((state->state & bits) == bits) {
1090 start = state->end + 1;
1091 cache_state(state, cached_state);
1092 goto search_again;
1093 }
1094
1095 prealloc = alloc_extent_state_atomic(prealloc);
1096 BUG_ON(!prealloc);
1097 err = split_state(tree, state, prealloc, start);
1098 if (err)
1099 extent_io_tree_panic(tree, err);
1100
1101 prealloc = NULL;
1102 if (err)
1103 goto out;
1104 if (state->end <= end) {
1105 set_state_bits(tree, state, &bits, changeset);
1106 cache_state(state, cached_state);
1107 merge_state(tree, state);
1108 if (last_end == (u64)-1)
1109 goto out;
1110 start = last_end + 1;
1111 state = next_state(state);
1112 if (start < end && state && state->start == start &&
1113 !need_resched())
1114 goto hit_next;
1115 }
1116 goto search_again;
1117 }
1118
1119
1120
1121
1122
1123
1124
1125 if (state->start > start) {
1126 u64 this_end;
1127 if (end < last_start)
1128 this_end = end;
1129 else
1130 this_end = last_start - 1;
1131
1132 prealloc = alloc_extent_state_atomic(prealloc);
1133 BUG_ON(!prealloc);
1134
1135
1136
1137
1138
1139 err = insert_state(tree, prealloc, start, this_end,
1140 NULL, NULL, &bits, changeset);
1141 if (err)
1142 extent_io_tree_panic(tree, err);
1143
1144 cache_state(prealloc, cached_state);
1145 prealloc = NULL;
1146 start = this_end + 1;
1147 goto search_again;
1148 }
1149
1150
1151
1152
1153
1154
1155 if (state->start <= end && state->end > end) {
1156 if (state->state & exclusive_bits) {
1157 *failed_start = start;
1158 err = -EEXIST;
1159 goto out;
1160 }
1161
1162 prealloc = alloc_extent_state_atomic(prealloc);
1163 BUG_ON(!prealloc);
1164 err = split_state(tree, state, prealloc, end + 1);
1165 if (err)
1166 extent_io_tree_panic(tree, err);
1167
1168 set_state_bits(tree, prealloc, &bits, changeset);
1169 cache_state(prealloc, cached_state);
1170 merge_state(tree, prealloc);
1171 prealloc = NULL;
1172 goto out;
1173 }
1174
1175search_again:
1176 if (start > end)
1177 goto out;
1178 spin_unlock(&tree->lock);
1179 if (gfpflags_allow_blocking(mask))
1180 cond_resched();
1181 goto again;
1182
1183out:
1184 spin_unlock(&tree->lock);
1185 if (prealloc)
1186 free_extent_state(prealloc);
1187
1188 return err;
1189
1190}
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1211 u32 bits, u32 clear_bits,
1212 struct extent_state **cached_state)
1213{
1214 struct extent_state *state;
1215 struct extent_state *prealloc = NULL;
1216 struct rb_node *node;
1217 struct rb_node **p;
1218 struct rb_node *parent;
1219 int err = 0;
1220 u64 last_start;
1221 u64 last_end;
1222 bool first_iteration = true;
1223
1224 btrfs_debug_check_extent_io_range(tree, start, end);
1225 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1226 clear_bits);
1227
1228again:
1229 if (!prealloc) {
1230
1231
1232
1233
1234
1235
1236
1237 prealloc = alloc_extent_state(GFP_NOFS);
1238 if (!prealloc && !first_iteration)
1239 return -ENOMEM;
1240 }
1241
1242 spin_lock(&tree->lock);
1243 if (cached_state && *cached_state) {
1244 state = *cached_state;
1245 if (state->start <= start && state->end > start &&
1246 extent_state_in_tree(state)) {
1247 node = &state->rb_node;
1248 goto hit_next;
1249 }
1250 }
1251
1252
1253
1254
1255
1256 node = tree_search_for_insert(tree, start, &p, &parent);
1257 if (!node) {
1258 prealloc = alloc_extent_state_atomic(prealloc);
1259 if (!prealloc) {
1260 err = -ENOMEM;
1261 goto out;
1262 }
1263 err = insert_state(tree, prealloc, start, end,
1264 &p, &parent, &bits, NULL);
1265 if (err)
1266 extent_io_tree_panic(tree, err);
1267 cache_state(prealloc, cached_state);
1268 prealloc = NULL;
1269 goto out;
1270 }
1271 state = rb_entry(node, struct extent_state, rb_node);
1272hit_next:
1273 last_start = state->start;
1274 last_end = state->end;
1275
1276
1277
1278
1279
1280
1281
1282 if (state->start == start && state->end <= end) {
1283 set_state_bits(tree, state, &bits, NULL);
1284 cache_state(state, cached_state);
1285 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1286 if (last_end == (u64)-1)
1287 goto out;
1288 start = last_end + 1;
1289 if (start < end && state && state->start == start &&
1290 !need_resched())
1291 goto hit_next;
1292 goto search_again;
1293 }
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311 if (state->start < start) {
1312 prealloc = alloc_extent_state_atomic(prealloc);
1313 if (!prealloc) {
1314 err = -ENOMEM;
1315 goto out;
1316 }
1317 err = split_state(tree, state, prealloc, start);
1318 if (err)
1319 extent_io_tree_panic(tree, err);
1320 prealloc = NULL;
1321 if (err)
1322 goto out;
1323 if (state->end <= end) {
1324 set_state_bits(tree, state, &bits, NULL);
1325 cache_state(state, cached_state);
1326 state = clear_state_bit(tree, state, &clear_bits, 0,
1327 NULL);
1328 if (last_end == (u64)-1)
1329 goto out;
1330 start = last_end + 1;
1331 if (start < end && state && state->start == start &&
1332 !need_resched())
1333 goto hit_next;
1334 }
1335 goto search_again;
1336 }
1337
1338
1339
1340
1341
1342
1343
1344 if (state->start > start) {
1345 u64 this_end;
1346 if (end < last_start)
1347 this_end = end;
1348 else
1349 this_end = last_start - 1;
1350
1351 prealloc = alloc_extent_state_atomic(prealloc);
1352 if (!prealloc) {
1353 err = -ENOMEM;
1354 goto out;
1355 }
1356
1357
1358
1359
1360
1361 err = insert_state(tree, prealloc, start, this_end,
1362 NULL, NULL, &bits, NULL);
1363 if (err)
1364 extent_io_tree_panic(tree, err);
1365 cache_state(prealloc, cached_state);
1366 prealloc = NULL;
1367 start = this_end + 1;
1368 goto search_again;
1369 }
1370
1371
1372
1373
1374
1375
1376 if (state->start <= end && state->end > end) {
1377 prealloc = alloc_extent_state_atomic(prealloc);
1378 if (!prealloc) {
1379 err = -ENOMEM;
1380 goto out;
1381 }
1382
1383 err = split_state(tree, state, prealloc, end + 1);
1384 if (err)
1385 extent_io_tree_panic(tree, err);
1386
1387 set_state_bits(tree, prealloc, &bits, NULL);
1388 cache_state(prealloc, cached_state);
1389 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1390 prealloc = NULL;
1391 goto out;
1392 }
1393
1394search_again:
1395 if (start > end)
1396 goto out;
1397 spin_unlock(&tree->lock);
1398 cond_resched();
1399 first_iteration = false;
1400 goto again;
1401
1402out:
1403 spin_unlock(&tree->lock);
1404 if (prealloc)
1405 free_extent_state(prealloc);
1406
1407 return err;
1408}
1409
1410
1411int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1412 u32 bits, struct extent_changeset *changeset)
1413{
1414
1415
1416
1417
1418
1419
1420 BUG_ON(bits & EXTENT_LOCKED);
1421
1422 return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1423 changeset);
1424}
1425
1426int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
1427 u32 bits)
1428{
1429 return set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
1430 GFP_NOWAIT, NULL);
1431}
1432
1433int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1434 u32 bits, int wake, int delete,
1435 struct extent_state **cached)
1436{
1437 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1438 cached, GFP_NOFS, NULL);
1439}
1440
1441int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1442 u32 bits, struct extent_changeset *changeset)
1443{
1444
1445
1446
1447
1448 BUG_ON(bits & EXTENT_LOCKED);
1449
1450 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1451 changeset);
1452}
1453
1454
1455
1456
1457
1458int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1459 struct extent_state **cached_state)
1460{
1461 int err;
1462 u64 failed_start;
1463
1464 while (1) {
1465 err = set_extent_bit(tree, start, end, EXTENT_LOCKED,
1466 EXTENT_LOCKED, &failed_start,
1467 cached_state, GFP_NOFS, NULL);
1468 if (err == -EEXIST) {
1469 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1470 start = failed_start;
1471 } else
1472 break;
1473 WARN_ON(start > end);
1474 }
1475 return err;
1476}
1477
1478int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1479{
1480 int err;
1481 u64 failed_start;
1482
1483 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1484 &failed_start, NULL, GFP_NOFS, NULL);
1485 if (err == -EEXIST) {
1486 if (failed_start > start)
1487 clear_extent_bit(tree, start, failed_start - 1,
1488 EXTENT_LOCKED, 1, 0, NULL);
1489 return 0;
1490 }
1491 return 1;
1492}
1493
1494void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1495{
1496 unsigned long index = start >> PAGE_SHIFT;
1497 unsigned long end_index = end >> PAGE_SHIFT;
1498 struct page *page;
1499
1500 while (index <= end_index) {
1501 page = find_get_page(inode->i_mapping, index);
1502 BUG_ON(!page);
1503 clear_page_dirty_for_io(page);
1504 put_page(page);
1505 index++;
1506 }
1507}
1508
1509void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1510{
1511 unsigned long index = start >> PAGE_SHIFT;
1512 unsigned long end_index = end >> PAGE_SHIFT;
1513 struct page *page;
1514
1515 while (index <= end_index) {
1516 page = find_get_page(inode->i_mapping, index);
1517 BUG_ON(!page);
1518 __set_page_dirty_nobuffers(page);
1519 account_page_redirty(page);
1520 put_page(page);
1521 index++;
1522 }
1523}
1524
1525
1526
1527
1528
1529static struct extent_state *
1530find_first_extent_bit_state(struct extent_io_tree *tree, u64 start, u32 bits)
1531{
1532 struct rb_node *node;
1533 struct extent_state *state;
1534
1535
1536
1537
1538
1539 node = tree_search(tree, start);
1540 if (!node)
1541 goto out;
1542
1543 while (1) {
1544 state = rb_entry(node, struct extent_state, rb_node);
1545 if (state->end >= start && (state->state & bits))
1546 return state;
1547
1548 node = rb_next(node);
1549 if (!node)
1550 break;
1551 }
1552out:
1553 return NULL;
1554}
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1565 u64 *start_ret, u64 *end_ret, u32 bits,
1566 struct extent_state **cached_state)
1567{
1568 struct extent_state *state;
1569 int ret = 1;
1570
1571 spin_lock(&tree->lock);
1572 if (cached_state && *cached_state) {
1573 state = *cached_state;
1574 if (state->end == start - 1 && extent_state_in_tree(state)) {
1575 while ((state = next_state(state)) != NULL) {
1576 if (state->state & bits)
1577 goto got_it;
1578 }
1579 free_extent_state(*cached_state);
1580 *cached_state = NULL;
1581 goto out;
1582 }
1583 free_extent_state(*cached_state);
1584 *cached_state = NULL;
1585 }
1586
1587 state = find_first_extent_bit_state(tree, start, bits);
1588got_it:
1589 if (state) {
1590 cache_state_if_flags(state, cached_state, 0);
1591 *start_ret = state->start;
1592 *end_ret = state->end;
1593 ret = 0;
1594 }
1595out:
1596 spin_unlock(&tree->lock);
1597 return ret;
1598}
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
1617 u64 *start_ret, u64 *end_ret, u32 bits)
1618{
1619 struct extent_state *state;
1620 int ret = 1;
1621
1622 spin_lock(&tree->lock);
1623 state = find_first_extent_bit_state(tree, start, bits);
1624 if (state) {
1625 *start_ret = state->start;
1626 *end_ret = state->end;
1627 while ((state = next_state(state)) != NULL) {
1628 if (state->start > (*end_ret + 1))
1629 break;
1630 *end_ret = state->end;
1631 }
1632 ret = 0;
1633 }
1634 spin_unlock(&tree->lock);
1635 return ret;
1636}
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1654 u64 *start_ret, u64 *end_ret, u32 bits)
1655{
1656 struct extent_state *state;
1657 struct rb_node *node, *prev = NULL, *next;
1658
1659 spin_lock(&tree->lock);
1660
1661
1662 while (1) {
1663 node = __etree_search(tree, start, &next, &prev, NULL, NULL);
1664 if (!node && !next && !prev) {
1665
1666
1667
1668
1669 *start_ret = 0;
1670 *end_ret = -1;
1671 goto out;
1672 } else if (!node && !next) {
1673
1674
1675
1676
1677 state = rb_entry(prev, struct extent_state, rb_node);
1678 *start_ret = state->end + 1;
1679 *end_ret = -1;
1680 goto out;
1681 } else if (!node) {
1682 node = next;
1683 }
1684
1685
1686
1687
1688 state = rb_entry(node, struct extent_state, rb_node);
1689
1690 if (in_range(start, state->start, state->end - state->start + 1)) {
1691 if (state->state & bits) {
1692
1693
1694
1695
1696
1697 start = state->end + 1;
1698 } else {
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708 *start_ret = state->start;
1709 break;
1710 }
1711 } else {
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723 if (prev) {
1724 state = rb_entry(prev, struct extent_state,
1725 rb_node);
1726 *start_ret = state->end + 1;
1727 } else {
1728 *start_ret = 0;
1729 }
1730 break;
1731 }
1732 }
1733
1734
1735
1736
1737
1738 while (1) {
1739 state = rb_entry(node, struct extent_state, rb_node);
1740 if (state->end >= start && !(state->state & bits)) {
1741 *end_ret = state->end;
1742 } else {
1743 *end_ret = state->start - 1;
1744 break;
1745 }
1746
1747 node = rb_next(node);
1748 if (!node)
1749 break;
1750 }
1751out:
1752 spin_unlock(&tree->lock);
1753}
1754
1755
1756
1757
1758
1759
1760
1761bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
1762 u64 *end, u64 max_bytes,
1763 struct extent_state **cached_state)
1764{
1765 struct rb_node *node;
1766 struct extent_state *state;
1767 u64 cur_start = *start;
1768 bool found = false;
1769 u64 total_bytes = 0;
1770
1771 spin_lock(&tree->lock);
1772
1773
1774
1775
1776
1777 node = tree_search(tree, cur_start);
1778 if (!node) {
1779 *end = (u64)-1;
1780 goto out;
1781 }
1782
1783 while (1) {
1784 state = rb_entry(node, struct extent_state, rb_node);
1785 if (found && (state->start != cur_start ||
1786 (state->state & EXTENT_BOUNDARY))) {
1787 goto out;
1788 }
1789 if (!(state->state & EXTENT_DELALLOC)) {
1790 if (!found)
1791 *end = state->end;
1792 goto out;
1793 }
1794 if (!found) {
1795 *start = state->start;
1796 *cached_state = state;
1797 refcount_inc(&state->refs);
1798 }
1799 found = true;
1800 *end = state->end;
1801 cur_start = state->end + 1;
1802 node = rb_next(node);
1803 total_bytes += state->end - state->start + 1;
1804 if (total_bytes >= max_bytes)
1805 break;
1806 if (!node)
1807 break;
1808 }
1809out:
1810 spin_unlock(&tree->lock);
1811 return found;
1812}
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822static int process_one_page(struct btrfs_fs_info *fs_info,
1823 struct address_space *mapping,
1824 struct page *page, struct page *locked_page,
1825 unsigned long page_ops, u64 start, u64 end)
1826{
1827 u32 len;
1828
1829 ASSERT(end + 1 - start != 0 && end + 1 - start < U32_MAX);
1830 len = end + 1 - start;
1831
1832 if (page_ops & PAGE_SET_ORDERED)
1833 btrfs_page_clamp_set_ordered(fs_info, page, start, len);
1834 if (page_ops & PAGE_SET_ERROR)
1835 btrfs_page_clamp_set_error(fs_info, page, start, len);
1836 if (page_ops & PAGE_START_WRITEBACK) {
1837 btrfs_page_clamp_clear_dirty(fs_info, page, start, len);
1838 btrfs_page_clamp_set_writeback(fs_info, page, start, len);
1839 }
1840 if (page_ops & PAGE_END_WRITEBACK)
1841 btrfs_page_clamp_clear_writeback(fs_info, page, start, len);
1842
1843 if (page == locked_page)
1844 return 1;
1845
1846 if (page_ops & PAGE_LOCK) {
1847 int ret;
1848
1849 ret = btrfs_page_start_writer_lock(fs_info, page, start, len);
1850 if (ret)
1851 return ret;
1852 if (!PageDirty(page) || page->mapping != mapping) {
1853 btrfs_page_end_writer_lock(fs_info, page, start, len);
1854 return -EAGAIN;
1855 }
1856 }
1857 if (page_ops & PAGE_UNLOCK)
1858 btrfs_page_end_writer_lock(fs_info, page, start, len);
1859 return 0;
1860}
1861
1862static int __process_pages_contig(struct address_space *mapping,
1863 struct page *locked_page,
1864 u64 start, u64 end, unsigned long page_ops,
1865 u64 *processed_end)
1866{
1867 struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
1868 pgoff_t start_index = start >> PAGE_SHIFT;
1869 pgoff_t end_index = end >> PAGE_SHIFT;
1870 pgoff_t index = start_index;
1871 unsigned long nr_pages = end_index - start_index + 1;
1872 unsigned long pages_processed = 0;
1873 struct page *pages[16];
1874 int err = 0;
1875 int i;
1876
1877 if (page_ops & PAGE_LOCK) {
1878 ASSERT(page_ops == PAGE_LOCK);
1879 ASSERT(processed_end && *processed_end == start);
1880 }
1881
1882 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1883 mapping_set_error(mapping, -EIO);
1884
1885 while (nr_pages > 0) {
1886 int found_pages;
1887
1888 found_pages = find_get_pages_contig(mapping, index,
1889 min_t(unsigned long,
1890 nr_pages, ARRAY_SIZE(pages)), pages);
1891 if (found_pages == 0) {
1892
1893
1894
1895
1896 ASSERT(page_ops & PAGE_LOCK);
1897 err = -EAGAIN;
1898 goto out;
1899 }
1900
1901 for (i = 0; i < found_pages; i++) {
1902 int process_ret;
1903
1904 process_ret = process_one_page(fs_info, mapping,
1905 pages[i], locked_page, page_ops,
1906 start, end);
1907 if (process_ret < 0) {
1908 for (; i < found_pages; i++)
1909 put_page(pages[i]);
1910 err = -EAGAIN;
1911 goto out;
1912 }
1913 put_page(pages[i]);
1914 pages_processed++;
1915 }
1916 nr_pages -= found_pages;
1917 index += found_pages;
1918 cond_resched();
1919 }
1920out:
1921 if (err && processed_end) {
1922
1923
1924
1925
1926
1927
1928
1929
1930 if (pages_processed)
1931 *processed_end = min(end,
1932 ((u64)(start_index + pages_processed) << PAGE_SHIFT) - 1);
1933 else
1934 *processed_end = start;
1935 }
1936 return err;
1937}
1938
1939static noinline void __unlock_for_delalloc(struct inode *inode,
1940 struct page *locked_page,
1941 u64 start, u64 end)
1942{
1943 unsigned long index = start >> PAGE_SHIFT;
1944 unsigned long end_index = end >> PAGE_SHIFT;
1945
1946 ASSERT(locked_page);
1947 if (index == locked_page->index && end_index == index)
1948 return;
1949
1950 __process_pages_contig(inode->i_mapping, locked_page, start, end,
1951 PAGE_UNLOCK, NULL);
1952}
1953
1954static noinline int lock_delalloc_pages(struct inode *inode,
1955 struct page *locked_page,
1956 u64 delalloc_start,
1957 u64 delalloc_end)
1958{
1959 unsigned long index = delalloc_start >> PAGE_SHIFT;
1960 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1961 u64 processed_end = delalloc_start;
1962 int ret;
1963
1964 ASSERT(locked_page);
1965 if (index == locked_page->index && index == end_index)
1966 return 0;
1967
1968 ret = __process_pages_contig(inode->i_mapping, locked_page, delalloc_start,
1969 delalloc_end, PAGE_LOCK, &processed_end);
1970 if (ret == -EAGAIN && processed_end > delalloc_start)
1971 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1972 processed_end);
1973 return ret;
1974}
1975
1976
1977
1978
1979
1980
1981
1982
1983EXPORT_FOR_TESTS
1984noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
1985 struct page *locked_page, u64 *start,
1986 u64 *end)
1987{
1988 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1989 u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
1990 u64 delalloc_start;
1991 u64 delalloc_end;
1992 bool found;
1993 struct extent_state *cached_state = NULL;
1994 int ret;
1995 int loops = 0;
1996
1997again:
1998
1999 delalloc_start = *start;
2000 delalloc_end = 0;
2001 found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
2002 max_bytes, &cached_state);
2003 if (!found || delalloc_end <= *start) {
2004 *start = delalloc_start;
2005 *end = delalloc_end;
2006 free_extent_state(cached_state);
2007 return false;
2008 }
2009
2010
2011
2012
2013
2014
2015 if (delalloc_start < *start)
2016 delalloc_start = *start;
2017
2018
2019
2020
2021 if (delalloc_end + 1 - delalloc_start > max_bytes)
2022 delalloc_end = delalloc_start + max_bytes - 1;
2023
2024
2025 ret = lock_delalloc_pages(inode, locked_page,
2026 delalloc_start, delalloc_end);
2027 ASSERT(!ret || ret == -EAGAIN);
2028 if (ret == -EAGAIN) {
2029
2030
2031
2032 free_extent_state(cached_state);
2033 cached_state = NULL;
2034 if (!loops) {
2035 max_bytes = PAGE_SIZE;
2036 loops = 1;
2037 goto again;
2038 } else {
2039 found = false;
2040 goto out_failed;
2041 }
2042 }
2043
2044
2045 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
2046
2047
2048 ret = test_range_bit(tree, delalloc_start, delalloc_end,
2049 EXTENT_DELALLOC, 1, cached_state);
2050 if (!ret) {
2051 unlock_extent_cached(tree, delalloc_start, delalloc_end,
2052 &cached_state);
2053 __unlock_for_delalloc(inode, locked_page,
2054 delalloc_start, delalloc_end);
2055 cond_resched();
2056 goto again;
2057 }
2058 free_extent_state(cached_state);
2059 *start = delalloc_start;
2060 *end = delalloc_end;
2061out_failed:
2062 return found;
2063}
2064
2065void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
2066 struct page *locked_page,
2067 u32 clear_bits, unsigned long page_ops)
2068{
2069 clear_extent_bit(&inode->io_tree, start, end, clear_bits, 1, 0, NULL);
2070
2071 __process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
2072 start, end, page_ops, NULL);
2073}
2074
2075
2076
2077
2078
2079
2080u64 count_range_bits(struct extent_io_tree *tree,
2081 u64 *start, u64 search_end, u64 max_bytes,
2082 u32 bits, int contig)
2083{
2084 struct rb_node *node;
2085 struct extent_state *state;
2086 u64 cur_start = *start;
2087 u64 total_bytes = 0;
2088 u64 last = 0;
2089 int found = 0;
2090
2091 if (WARN_ON(search_end <= cur_start))
2092 return 0;
2093
2094 spin_lock(&tree->lock);
2095 if (cur_start == 0 && bits == EXTENT_DIRTY) {
2096 total_bytes = tree->dirty_bytes;
2097 goto out;
2098 }
2099
2100
2101
2102
2103 node = tree_search(tree, cur_start);
2104 if (!node)
2105 goto out;
2106
2107 while (1) {
2108 state = rb_entry(node, struct extent_state, rb_node);
2109 if (state->start > search_end)
2110 break;
2111 if (contig && found && state->start > last + 1)
2112 break;
2113 if (state->end >= cur_start && (state->state & bits) == bits) {
2114 total_bytes += min(search_end, state->end) + 1 -
2115 max(cur_start, state->start);
2116 if (total_bytes >= max_bytes)
2117 break;
2118 if (!found) {
2119 *start = max(cur_start, state->start);
2120 found = 1;
2121 }
2122 last = state->end;
2123 } else if (contig && found) {
2124 break;
2125 }
2126 node = rb_next(node);
2127 if (!node)
2128 break;
2129 }
2130out:
2131 spin_unlock(&tree->lock);
2132 return total_bytes;
2133}
2134
2135
2136
2137
2138
2139int set_state_failrec(struct extent_io_tree *tree, u64 start,
2140 struct io_failure_record *failrec)
2141{
2142 struct rb_node *node;
2143 struct extent_state *state;
2144 int ret = 0;
2145
2146 spin_lock(&tree->lock);
2147
2148
2149
2150
2151 node = tree_search(tree, start);
2152 if (!node) {
2153 ret = -ENOENT;
2154 goto out;
2155 }
2156 state = rb_entry(node, struct extent_state, rb_node);
2157 if (state->start != start) {
2158 ret = -ENOENT;
2159 goto out;
2160 }
2161 state->failrec = failrec;
2162out:
2163 spin_unlock(&tree->lock);
2164 return ret;
2165}
2166
2167struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start)
2168{
2169 struct rb_node *node;
2170 struct extent_state *state;
2171 struct io_failure_record *failrec;
2172
2173 spin_lock(&tree->lock);
2174
2175
2176
2177
2178 node = tree_search(tree, start);
2179 if (!node) {
2180 failrec = ERR_PTR(-ENOENT);
2181 goto out;
2182 }
2183 state = rb_entry(node, struct extent_state, rb_node);
2184 if (state->start != start) {
2185 failrec = ERR_PTR(-ENOENT);
2186 goto out;
2187 }
2188
2189 failrec = state->failrec;
2190out:
2191 spin_unlock(&tree->lock);
2192 return failrec;
2193}
2194
2195
2196
2197
2198
2199
2200
2201int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
2202 u32 bits, int filled, struct extent_state *cached)
2203{
2204 struct extent_state *state = NULL;
2205 struct rb_node *node;
2206 int bitset = 0;
2207
2208 spin_lock(&tree->lock);
2209 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
2210 cached->end > start)
2211 node = &cached->rb_node;
2212 else
2213 node = tree_search(tree, start);
2214 while (node && start <= end) {
2215 state = rb_entry(node, struct extent_state, rb_node);
2216
2217 if (filled && state->start > start) {
2218 bitset = 0;
2219 break;
2220 }
2221
2222 if (state->start > end)
2223 break;
2224
2225 if (state->state & bits) {
2226 bitset = 1;
2227 if (!filled)
2228 break;
2229 } else if (filled) {
2230 bitset = 0;
2231 break;
2232 }
2233
2234 if (state->end == (u64)-1)
2235 break;
2236
2237 start = state->end + 1;
2238 if (start > end)
2239 break;
2240 node = rb_next(node);
2241 if (!node) {
2242 if (filled)
2243 bitset = 0;
2244 break;
2245 }
2246 }
2247 spin_unlock(&tree->lock);
2248 return bitset;
2249}
2250
2251int free_io_failure(struct extent_io_tree *failure_tree,
2252 struct extent_io_tree *io_tree,
2253 struct io_failure_record *rec)
2254{
2255 int ret;
2256 int err = 0;
2257
2258 set_state_failrec(failure_tree, rec->start, NULL);
2259 ret = clear_extent_bits(failure_tree, rec->start,
2260 rec->start + rec->len - 1,
2261 EXTENT_LOCKED | EXTENT_DIRTY);
2262 if (ret)
2263 err = ret;
2264
2265 ret = clear_extent_bits(io_tree, rec->start,
2266 rec->start + rec->len - 1,
2267 EXTENT_DAMAGED);
2268 if (ret && !err)
2269 err = ret;
2270
2271 kfree(rec);
2272 return err;
2273}
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
2286 u64 length, u64 logical, struct page *page,
2287 unsigned int pg_offset, int mirror_num)
2288{
2289 struct bio *bio;
2290 struct btrfs_device *dev;
2291 u64 map_length = 0;
2292 u64 sector;
2293 struct btrfs_bio *bbio = NULL;
2294 int ret;
2295
2296 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
2297 BUG_ON(!mirror_num);
2298
2299 if (btrfs_is_zoned(fs_info))
2300 return btrfs_repair_one_zone(fs_info, logical);
2301
2302 bio = btrfs_io_bio_alloc(1);
2303 bio->bi_iter.bi_size = 0;
2304 map_length = length;
2305
2306
2307
2308
2309
2310
2311 btrfs_bio_counter_inc_blocked(fs_info);
2312 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
2313
2314
2315
2316
2317
2318
2319 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2320 &map_length, &bbio, 0);
2321 if (ret) {
2322 btrfs_bio_counter_dec(fs_info);
2323 bio_put(bio);
2324 return -EIO;
2325 }
2326 ASSERT(bbio->mirror_num == 1);
2327 } else {
2328 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2329 &map_length, &bbio, mirror_num);
2330 if (ret) {
2331 btrfs_bio_counter_dec(fs_info);
2332 bio_put(bio);
2333 return -EIO;
2334 }
2335 BUG_ON(mirror_num != bbio->mirror_num);
2336 }
2337
2338 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
2339 bio->bi_iter.bi_sector = sector;
2340 dev = bbio->stripes[bbio->mirror_num - 1].dev;
2341 btrfs_put_bbio(bbio);
2342 if (!dev || !dev->bdev ||
2343 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
2344 btrfs_bio_counter_dec(fs_info);
2345 bio_put(bio);
2346 return -EIO;
2347 }
2348 bio_set_dev(bio, dev->bdev);
2349 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2350 bio_add_page(bio, page, length, pg_offset);
2351
2352 if (btrfsic_submit_bio_wait(bio)) {
2353
2354 btrfs_bio_counter_dec(fs_info);
2355 bio_put(bio);
2356 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2357 return -EIO;
2358 }
2359
2360 btrfs_info_rl_in_rcu(fs_info,
2361 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2362 ino, start,
2363 rcu_str_deref(dev->name), sector);
2364 btrfs_bio_counter_dec(fs_info);
2365 bio_put(bio);
2366 return 0;
2367}
2368
2369int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
2370{
2371 struct btrfs_fs_info *fs_info = eb->fs_info;
2372 u64 start = eb->start;
2373 int i, num_pages = num_extent_pages(eb);
2374 int ret = 0;
2375
2376 if (sb_rdonly(fs_info->sb))
2377 return -EROFS;
2378
2379 for (i = 0; i < num_pages; i++) {
2380 struct page *p = eb->pages[i];
2381
2382 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2383 start - page_offset(p), mirror_num);
2384 if (ret)
2385 break;
2386 start += PAGE_SIZE;
2387 }
2388
2389 return ret;
2390}
2391
2392
2393
2394
2395
2396int clean_io_failure(struct btrfs_fs_info *fs_info,
2397 struct extent_io_tree *failure_tree,
2398 struct extent_io_tree *io_tree, u64 start,
2399 struct page *page, u64 ino, unsigned int pg_offset)
2400{
2401 u64 private;
2402 struct io_failure_record *failrec;
2403 struct extent_state *state;
2404 int num_copies;
2405 int ret;
2406
2407 private = 0;
2408 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2409 EXTENT_DIRTY, 0);
2410 if (!ret)
2411 return 0;
2412
2413 failrec = get_state_failrec(failure_tree, start);
2414 if (IS_ERR(failrec))
2415 return 0;
2416
2417 BUG_ON(!failrec->this_mirror);
2418
2419 if (sb_rdonly(fs_info->sb))
2420 goto out;
2421
2422 spin_lock(&io_tree->lock);
2423 state = find_first_extent_bit_state(io_tree,
2424 failrec->start,
2425 EXTENT_LOCKED);
2426 spin_unlock(&io_tree->lock);
2427
2428 if (state && state->start <= failrec->start &&
2429 state->end >= failrec->start + failrec->len - 1) {
2430 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2431 failrec->len);
2432 if (num_copies > 1) {
2433 repair_io_failure(fs_info, ino, start, failrec->len,
2434 failrec->logical, page, pg_offset,
2435 failrec->failed_mirror);
2436 }
2437 }
2438
2439out:
2440 free_io_failure(failure_tree, io_tree, failrec);
2441
2442 return 0;
2443}
2444
2445
2446
2447
2448
2449
2450
2451void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2452{
2453 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2454 struct io_failure_record *failrec;
2455 struct extent_state *state, *next;
2456
2457 if (RB_EMPTY_ROOT(&failure_tree->state))
2458 return;
2459
2460 spin_lock(&failure_tree->lock);
2461 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2462 while (state) {
2463 if (state->start > end)
2464 break;
2465
2466 ASSERT(state->end <= end);
2467
2468 next = next_state(state);
2469
2470 failrec = state->failrec;
2471 free_extent_state(state);
2472 kfree(failrec);
2473
2474 state = next;
2475 }
2476 spin_unlock(&failure_tree->lock);
2477}
2478
2479static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
2480 u64 start)
2481{
2482 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2483 struct io_failure_record *failrec;
2484 struct extent_map *em;
2485 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2486 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2487 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2488 const u32 sectorsize = fs_info->sectorsize;
2489 int ret;
2490 u64 logical;
2491
2492 failrec = get_state_failrec(failure_tree, start);
2493 if (!IS_ERR(failrec)) {
2494 btrfs_debug(fs_info,
2495 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu",
2496 failrec->logical, failrec->start, failrec->len);
2497
2498
2499
2500
2501
2502
2503 return failrec;
2504 }
2505
2506 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2507 if (!failrec)
2508 return ERR_PTR(-ENOMEM);
2509
2510 failrec->start = start;
2511 failrec->len = sectorsize;
2512 failrec->this_mirror = 0;
2513 failrec->bio_flags = 0;
2514
2515 read_lock(&em_tree->lock);
2516 em = lookup_extent_mapping(em_tree, start, failrec->len);
2517 if (!em) {
2518 read_unlock(&em_tree->lock);
2519 kfree(failrec);
2520 return ERR_PTR(-EIO);
2521 }
2522
2523 if (em->start > start || em->start + em->len <= start) {
2524 free_extent_map(em);
2525 em = NULL;
2526 }
2527 read_unlock(&em_tree->lock);
2528 if (!em) {
2529 kfree(failrec);
2530 return ERR_PTR(-EIO);
2531 }
2532
2533 logical = start - em->start;
2534 logical = em->block_start + logical;
2535 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2536 logical = em->block_start;
2537 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2538 extent_set_compress_type(&failrec->bio_flags, em->compress_type);
2539 }
2540
2541 btrfs_debug(fs_info,
2542 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2543 logical, start, failrec->len);
2544
2545 failrec->logical = logical;
2546 free_extent_map(em);
2547
2548
2549 ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
2550 EXTENT_LOCKED | EXTENT_DIRTY);
2551 if (ret >= 0) {
2552 ret = set_state_failrec(failure_tree, start, failrec);
2553
2554 ret = set_extent_bits(tree, start, start + sectorsize - 1,
2555 EXTENT_DAMAGED);
2556 } else if (ret < 0) {
2557 kfree(failrec);
2558 return ERR_PTR(ret);
2559 }
2560
2561 return failrec;
2562}
2563
2564static bool btrfs_check_repairable(struct inode *inode,
2565 struct io_failure_record *failrec,
2566 int failed_mirror)
2567{
2568 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2569 int num_copies;
2570
2571 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2572 if (num_copies == 1) {
2573
2574
2575
2576
2577
2578 btrfs_debug(fs_info,
2579 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2580 num_copies, failrec->this_mirror, failed_mirror);
2581 return false;
2582 }
2583
2584
2585 ASSERT(failrec->len == fs_info->sectorsize);
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596 failrec->failed_mirror = failed_mirror;
2597 failrec->this_mirror++;
2598 if (failrec->this_mirror == failed_mirror)
2599 failrec->this_mirror++;
2600
2601 if (failrec->this_mirror > num_copies) {
2602 btrfs_debug(fs_info,
2603 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2604 num_copies, failrec->this_mirror, failed_mirror);
2605 return false;
2606 }
2607
2608 return true;
2609}
2610
2611int btrfs_repair_one_sector(struct inode *inode,
2612 struct bio *failed_bio, u32 bio_offset,
2613 struct page *page, unsigned int pgoff,
2614 u64 start, int failed_mirror,
2615 submit_bio_hook_t *submit_bio_hook)
2616{
2617 struct io_failure_record *failrec;
2618 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2619 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2620 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2621 struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
2622 const int icsum = bio_offset >> fs_info->sectorsize_bits;
2623 struct bio *repair_bio;
2624 struct btrfs_io_bio *repair_io_bio;
2625 blk_status_t status;
2626
2627 btrfs_debug(fs_info,
2628 "repair read error: read error at %llu", start);
2629
2630 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2631
2632 failrec = btrfs_get_io_failure_record(inode, start);
2633 if (IS_ERR(failrec))
2634 return PTR_ERR(failrec);
2635
2636
2637 if (!btrfs_check_repairable(inode, failrec, failed_mirror)) {
2638 free_io_failure(failure_tree, tree, failrec);
2639 return -EIO;
2640 }
2641
2642 repair_bio = btrfs_io_bio_alloc(1);
2643 repair_io_bio = btrfs_io_bio(repair_bio);
2644 repair_bio->bi_opf = REQ_OP_READ;
2645 repair_bio->bi_end_io = failed_bio->bi_end_io;
2646 repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
2647 repair_bio->bi_private = failed_bio->bi_private;
2648
2649 if (failed_io_bio->csum) {
2650 const u32 csum_size = fs_info->csum_size;
2651
2652 repair_io_bio->csum = repair_io_bio->csum_inline;
2653 memcpy(repair_io_bio->csum,
2654 failed_io_bio->csum + csum_size * icsum, csum_size);
2655 }
2656
2657 bio_add_page(repair_bio, page, failrec->len, pgoff);
2658 repair_io_bio->logical = failrec->start;
2659 repair_io_bio->iter = repair_bio->bi_iter;
2660
2661 btrfs_debug(btrfs_sb(inode->i_sb),
2662 "repair read error: submitting new read to mirror %d",
2663 failrec->this_mirror);
2664
2665 status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
2666 failrec->bio_flags);
2667 if (status) {
2668 free_io_failure(failure_tree, tree, failrec);
2669 bio_put(repair_bio);
2670 }
2671 return blk_status_to_errno(status);
2672}
2673
2674static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
2675{
2676 struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
2677
2678 ASSERT(page_offset(page) <= start &&
2679 start + len <= page_offset(page) + PAGE_SIZE);
2680
2681 if (uptodate) {
2682 if (fsverity_active(page->mapping->host) &&
2683 !PageError(page) &&
2684 !PageUptodate(page) &&
2685 start < i_size_read(page->mapping->host) &&
2686 !fsverity_verify_page(page)) {
2687 btrfs_page_set_error(fs_info, page, start, len);
2688 } else {
2689 btrfs_page_set_uptodate(fs_info, page, start, len);
2690 }
2691 } else {
2692 btrfs_page_clear_uptodate(fs_info, page, start, len);
2693 btrfs_page_set_error(fs_info, page, start, len);
2694 }
2695
2696 if (fs_info->sectorsize == PAGE_SIZE)
2697 unlock_page(page);
2698 else
2699 btrfs_subpage_end_reader(fs_info, page, start, len);
2700}
2701
2702static blk_status_t submit_read_repair(struct inode *inode,
2703 struct bio *failed_bio, u32 bio_offset,
2704 struct page *page, unsigned int pgoff,
2705 u64 start, u64 end, int failed_mirror,
2706 unsigned int error_bitmap,
2707 submit_bio_hook_t *submit_bio_hook)
2708{
2709 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2710 const u32 sectorsize = fs_info->sectorsize;
2711 const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
2712 int error = 0;
2713 int i;
2714
2715 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2716
2717
2718 ASSERT(error_bitmap);
2719
2720
2721
2722
2723
2724 ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED));
2725
2726
2727 for (i = 0; i < nr_bits; i++) {
2728 const unsigned int offset = i * sectorsize;
2729 struct extent_state *cached = NULL;
2730 bool uptodate = false;
2731 int ret;
2732
2733 if (!(error_bitmap & (1U << i))) {
2734
2735
2736
2737
2738 uptodate = true;
2739 goto next;
2740 }
2741
2742 ret = btrfs_repair_one_sector(inode, failed_bio,
2743 bio_offset + offset,
2744 page, pgoff + offset, start + offset,
2745 failed_mirror, submit_bio_hook);
2746 if (!ret) {
2747
2748
2749
2750
2751
2752
2753 continue;
2754 }
2755
2756
2757
2758
2759 if (!error)
2760 error = ret;
2761next:
2762 end_page_read(page, uptodate, start + offset, sectorsize);
2763 if (uptodate)
2764 set_extent_uptodate(&BTRFS_I(inode)->io_tree,
2765 start + offset,
2766 start + offset + sectorsize - 1,
2767 &cached, GFP_ATOMIC);
2768 unlock_extent_cached_atomic(&BTRFS_I(inode)->io_tree,
2769 start + offset,
2770 start + offset + sectorsize - 1,
2771 &cached);
2772 }
2773 return errno_to_blk_status(error);
2774}
2775
2776
2777
2778void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2779{
2780 struct btrfs_inode *inode;
2781 const bool uptodate = (err == 0);
2782 int ret = 0;
2783
2784 ASSERT(page && page->mapping);
2785 inode = BTRFS_I(page->mapping->host);
2786 btrfs_writepage_endio_finish_ordered(inode, page, start, end, uptodate);
2787
2788 if (!uptodate) {
2789 const struct btrfs_fs_info *fs_info = inode->root->fs_info;
2790 u32 len;
2791
2792 ASSERT(end + 1 - start <= U32_MAX);
2793 len = end + 1 - start;
2794
2795 btrfs_page_clear_uptodate(fs_info, page, start, len);
2796 btrfs_page_set_error(fs_info, page, start, len);
2797 ret = err < 0 ? err : -EIO;
2798 mapping_set_error(page->mapping, ret);
2799 }
2800}
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811static void end_bio_extent_writepage(struct bio *bio)
2812{
2813 int error = blk_status_to_errno(bio->bi_status);
2814 struct bio_vec *bvec;
2815 u64 start;
2816 u64 end;
2817 struct bvec_iter_all iter_all;
2818 bool first_bvec = true;
2819
2820 ASSERT(!bio_flagged(bio, BIO_CLONED));
2821 bio_for_each_segment_all(bvec, bio, iter_all) {
2822 struct page *page = bvec->bv_page;
2823 struct inode *inode = page->mapping->host;
2824 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2825 const u32 sectorsize = fs_info->sectorsize;
2826
2827
2828 if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
2829 btrfs_err(fs_info,
2830 "partial page write in btrfs with offset %u and length %u",
2831 bvec->bv_offset, bvec->bv_len);
2832 else if (!IS_ALIGNED(bvec->bv_len, sectorsize))
2833 btrfs_info(fs_info,
2834 "incomplete page write with offset %u and length %u",
2835 bvec->bv_offset, bvec->bv_len);
2836
2837 start = page_offset(page) + bvec->bv_offset;
2838 end = start + bvec->bv_len - 1;
2839
2840 if (first_bvec) {
2841 btrfs_record_physical_zoned(inode, start, bio);
2842 first_bvec = false;
2843 }
2844
2845 end_extent_writepage(page, error, start, end);
2846
2847 btrfs_page_clear_writeback(fs_info, page, start, bvec->bv_len);
2848 }
2849
2850 bio_put(bio);
2851}
2852
2853
2854
2855
2856
2857
2858
2859struct processed_extent {
2860 struct btrfs_inode *inode;
2861
2862 u64 start;
2863
2864 u64 end;
2865 bool uptodate;
2866};
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879static void endio_readpage_release_extent(struct processed_extent *processed,
2880 struct btrfs_inode *inode, u64 start, u64 end,
2881 bool uptodate)
2882{
2883 struct extent_state *cached = NULL;
2884 struct extent_io_tree *tree;
2885
2886
2887 if (!processed->inode)
2888 goto update;
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901 if (processed->inode == inode && processed->uptodate == uptodate &&
2902 processed->end + 1 >= start && end >= processed->end) {
2903 processed->end = end;
2904 return;
2905 }
2906
2907 tree = &processed->inode->io_tree;
2908
2909
2910
2911
2912 if (processed->uptodate && tree->track_uptodate)
2913 set_extent_uptodate(tree, processed->start, processed->end,
2914 &cached, GFP_ATOMIC);
2915 unlock_extent_cached_atomic(tree, processed->start, processed->end,
2916 &cached);
2917
2918update:
2919
2920 processed->inode = inode;
2921 processed->start = start;
2922 processed->end = end;
2923 processed->uptodate = uptodate;
2924}
2925
2926static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
2927{
2928 ASSERT(PageLocked(page));
2929 if (fs_info->sectorsize == PAGE_SIZE)
2930 return;
2931
2932 ASSERT(PagePrivate(page));
2933 btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE);
2934}
2935
2936
2937
2938
2939
2940
2941
2942static struct extent_buffer *find_extent_buffer_readpage(
2943 struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
2944{
2945 struct extent_buffer *eb;
2946
2947
2948
2949
2950
2951 if (fs_info->sectorsize == PAGE_SIZE) {
2952 ASSERT(PagePrivate(page) && page->private);
2953 return (struct extent_buffer *)page->private;
2954 }
2955
2956
2957 rcu_read_lock();
2958 eb = radix_tree_lookup(&fs_info->buffer_radix,
2959 bytenr >> fs_info->sectorsize_bits);
2960 rcu_read_unlock();
2961 ASSERT(eb);
2962 return eb;
2963}
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976static void end_bio_extent_readpage(struct bio *bio)
2977{
2978 struct bio_vec *bvec;
2979 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2980 struct extent_io_tree *tree, *failure_tree;
2981 struct processed_extent processed = { 0 };
2982
2983
2984
2985
2986 u32 bio_offset = 0;
2987 int mirror;
2988 int ret;
2989 struct bvec_iter_all iter_all;
2990
2991 ASSERT(!bio_flagged(bio, BIO_CLONED));
2992 bio_for_each_segment_all(bvec, bio, iter_all) {
2993 bool uptodate = !bio->bi_status;
2994 struct page *page = bvec->bv_page;
2995 struct inode *inode = page->mapping->host;
2996 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2997 const u32 sectorsize = fs_info->sectorsize;
2998 unsigned int error_bitmap = (unsigned int)-1;
2999 u64 start;
3000 u64 end;
3001 u32 len;
3002
3003 btrfs_debug(fs_info,
3004 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
3005 bio->bi_iter.bi_sector, bio->bi_status,
3006 io_bio->mirror_num);
3007 tree = &BTRFS_I(inode)->io_tree;
3008 failure_tree = &BTRFS_I(inode)->io_failure_tree;
3009
3010
3011
3012
3013
3014
3015
3016
3017 if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
3018 btrfs_err(fs_info,
3019 "partial page read in btrfs with offset %u and length %u",
3020 bvec->bv_offset, bvec->bv_len);
3021 else if (!IS_ALIGNED(bvec->bv_offset + bvec->bv_len,
3022 sectorsize))
3023 btrfs_info(fs_info,
3024 "incomplete page read with offset %u and length %u",
3025 bvec->bv_offset, bvec->bv_len);
3026
3027 start = page_offset(page) + bvec->bv_offset;
3028 end = start + bvec->bv_len - 1;
3029 len = bvec->bv_len;
3030
3031 mirror = io_bio->mirror_num;
3032 if (likely(uptodate)) {
3033 if (is_data_inode(inode)) {
3034 error_bitmap = btrfs_verify_data_csum(io_bio,
3035 bio_offset, page, start, end);
3036 ret = error_bitmap;
3037 } else {
3038 ret = btrfs_validate_metadata_buffer(io_bio,
3039 page, start, end, mirror);
3040 }
3041 if (ret)
3042 uptodate = false;
3043 else
3044 clean_io_failure(BTRFS_I(inode)->root->fs_info,
3045 failure_tree, tree, start,
3046 page,
3047 btrfs_ino(BTRFS_I(inode)), 0);
3048 }
3049
3050 if (likely(uptodate))
3051 goto readpage_ok;
3052
3053 if (is_data_inode(inode)) {
3054
3055
3056
3057
3058 submit_read_repair(inode, bio, bio_offset, page,
3059 start - page_offset(page), start,
3060 end, mirror, error_bitmap,
3061 btrfs_submit_data_bio);
3062
3063 ASSERT(bio_offset + len > bio_offset);
3064 bio_offset += len;
3065 continue;
3066 } else {
3067 struct extent_buffer *eb;
3068
3069 eb = find_extent_buffer_readpage(fs_info, page, start);
3070 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
3071 eb->read_mirror = mirror;
3072 atomic_dec(&eb->io_pages);
3073 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
3074 &eb->bflags))
3075 btree_readahead_hook(eb, -EIO);
3076 }
3077readpage_ok:
3078 if (likely(uptodate)) {
3079 loff_t i_size = i_size_read(inode);
3080 pgoff_t end_index = i_size >> PAGE_SHIFT;
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091 if (page->index == end_index && i_size <= end) {
3092 u32 zero_start = max(offset_in_page(i_size),
3093 offset_in_page(start));
3094
3095 zero_user_segment(page, zero_start,
3096 offset_in_page(end) + 1);
3097 }
3098 }
3099 ASSERT(bio_offset + len > bio_offset);
3100 bio_offset += len;
3101
3102
3103 end_page_read(page, uptodate, start, len);
3104 endio_readpage_release_extent(&processed, BTRFS_I(inode),
3105 start, end, PageUptodate(page));
3106 }
3107
3108 endio_readpage_release_extent(&processed, NULL, 0, 0, false);
3109 btrfs_io_bio_free_csum(io_bio);
3110 bio_put(bio);
3111}
3112
3113
3114
3115
3116
3117
3118static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
3119{
3120 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
3121}
3122
3123
3124
3125
3126
3127
3128struct bio *btrfs_bio_alloc(u64 first_byte)
3129{
3130 struct bio *bio;
3131
3132 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset);
3133 bio->bi_iter.bi_sector = first_byte >> 9;
3134 btrfs_io_bio_init(btrfs_io_bio(bio));
3135 return bio;
3136}
3137
3138struct bio *btrfs_bio_clone(struct bio *bio)
3139{
3140 struct btrfs_io_bio *btrfs_bio;
3141 struct bio *new;
3142
3143
3144 new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
3145 btrfs_bio = btrfs_io_bio(new);
3146 btrfs_io_bio_init(btrfs_bio);
3147 btrfs_bio->iter = bio->bi_iter;
3148 return new;
3149}
3150
3151struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
3152{
3153 struct bio *bio;
3154
3155
3156 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
3157 btrfs_io_bio_init(btrfs_io_bio(bio));
3158 return bio;
3159}
3160
3161struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
3162{
3163 struct bio *bio;
3164 struct btrfs_io_bio *btrfs_bio;
3165
3166 ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
3167
3168
3169 bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
3170 ASSERT(bio);
3171
3172 btrfs_bio = btrfs_io_bio(bio);
3173 btrfs_io_bio_init(btrfs_bio);
3174
3175 bio_trim(bio, offset >> 9, size >> 9);
3176 btrfs_bio->iter = bio->bi_iter;
3177 return bio;
3178}
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
3199 struct page *page,
3200 u64 disk_bytenr, unsigned int size,
3201 unsigned int pg_offset,
3202 unsigned long bio_flags)
3203{
3204 struct bio *bio = bio_ctrl->bio;
3205 u32 bio_size = bio->bi_iter.bi_size;
3206 u32 real_size;
3207 const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
3208 bool contig;
3209 int ret;
3210
3211 ASSERT(bio);
3212
3213 ASSERT(bio_ctrl->len_to_oe_boundary && bio_ctrl->len_to_stripe_boundary);
3214 if (bio_ctrl->bio_flags != bio_flags)
3215 return 0;
3216
3217 if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED)
3218 contig = bio->bi_iter.bi_sector == sector;
3219 else
3220 contig = bio_end_sector(bio) == sector;
3221 if (!contig)
3222 return 0;
3223
3224 real_size = min(bio_ctrl->len_to_oe_boundary,
3225 bio_ctrl->len_to_stripe_boundary) - bio_size;
3226 real_size = min(real_size, size);
3227
3228
3229
3230
3231
3232 if (real_size == 0)
3233 return 0;
3234
3235 if (bio_op(bio) == REQ_OP_ZONE_APPEND)
3236 ret = bio_add_zone_append_page(bio, page, real_size, pg_offset);
3237 else
3238 ret = bio_add_page(bio, page, real_size, pg_offset);
3239
3240 return ret;
3241}
3242
3243static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
3244 struct btrfs_inode *inode, u64 file_offset)
3245{
3246 struct btrfs_fs_info *fs_info = inode->root->fs_info;
3247 struct btrfs_io_geometry geom;
3248 struct btrfs_ordered_extent *ordered;
3249 struct extent_map *em;
3250 u64 logical = (bio_ctrl->bio->bi_iter.bi_sector << SECTOR_SHIFT);
3251 int ret;
3252
3253
3254
3255
3256
3257
3258
3259
3260 if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED) {
3261 bio_ctrl->len_to_oe_boundary = U32_MAX;
3262 bio_ctrl->len_to_stripe_boundary = U32_MAX;
3263 return 0;
3264 }
3265 em = btrfs_get_chunk_map(fs_info, logical, fs_info->sectorsize);
3266 if (IS_ERR(em))
3267 return PTR_ERR(em);
3268 ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio_ctrl->bio),
3269 logical, &geom);
3270 free_extent_map(em);
3271 if (ret < 0) {
3272 return ret;
3273 }
3274 if (geom.len > U32_MAX)
3275 bio_ctrl->len_to_stripe_boundary = U32_MAX;
3276 else
3277 bio_ctrl->len_to_stripe_boundary = (u32)geom.len;
3278
3279 if (!btrfs_is_zoned(fs_info) ||
3280 bio_op(bio_ctrl->bio) != REQ_OP_ZONE_APPEND) {
3281 bio_ctrl->len_to_oe_boundary = U32_MAX;
3282 return 0;
3283 }
3284
3285
3286 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
3287 if (!ordered) {
3288 bio_ctrl->len_to_oe_boundary = U32_MAX;
3289 return 0;
3290 }
3291
3292 bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
3293 ordered->disk_bytenr + ordered->disk_num_bytes - logical);
3294 btrfs_put_ordered_extent(ordered);
3295 return 0;
3296}
3297
3298static int alloc_new_bio(struct btrfs_inode *inode,
3299 struct btrfs_bio_ctrl *bio_ctrl,
3300 struct writeback_control *wbc,
3301 unsigned int opf,
3302 bio_end_io_t end_io_func,
3303 u64 disk_bytenr, u32 offset, u64 file_offset,
3304 unsigned long bio_flags)
3305{
3306 struct btrfs_fs_info *fs_info = inode->root->fs_info;
3307 struct bio *bio;
3308 int ret;
3309
3310
3311
3312
3313
3314 if (bio_flags & EXTENT_BIO_COMPRESSED)
3315 bio = btrfs_bio_alloc(disk_bytenr);
3316 else
3317 bio = btrfs_bio_alloc(disk_bytenr + offset);
3318 bio_ctrl->bio = bio;
3319 bio_ctrl->bio_flags = bio_flags;
3320 bio->bi_end_io = end_io_func;
3321 bio->bi_private = &inode->io_tree;
3322 bio->bi_write_hint = inode->vfs_inode.i_write_hint;
3323 bio->bi_opf = opf;
3324 ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
3325 if (ret < 0)
3326 goto error;
3327 if (wbc) {
3328 struct block_device *bdev;
3329
3330 bdev = fs_info->fs_devices->latest_bdev;
3331 bio_set_dev(bio, bdev);
3332 wbc_init_bio(wbc, bio);
3333 }
3334 if (btrfs_is_zoned(fs_info) && bio_op(bio) == REQ_OP_ZONE_APPEND) {
3335 struct btrfs_device *device;
3336
3337 device = btrfs_zoned_get_device(fs_info, disk_bytenr,
3338 fs_info->sectorsize);
3339 if (IS_ERR(device)) {
3340 ret = PTR_ERR(device);
3341 goto error;
3342 }
3343
3344 btrfs_io_bio(bio)->device = device;
3345 }
3346 return 0;
3347error:
3348 bio_ctrl->bio = NULL;
3349 bio->bi_status = errno_to_blk_status(ret);
3350 bio_endio(bio);
3351 return ret;
3352}
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368static int submit_extent_page(unsigned int opf,
3369 struct writeback_control *wbc,
3370 struct btrfs_bio_ctrl *bio_ctrl,
3371 struct page *page, u64 disk_bytenr,
3372 size_t size, unsigned long pg_offset,
3373 bio_end_io_t end_io_func,
3374 int mirror_num,
3375 unsigned long bio_flags,
3376 bool force_bio_submit)
3377{
3378 int ret = 0;
3379 struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
3380 unsigned int cur = pg_offset;
3381
3382 ASSERT(bio_ctrl);
3383
3384 ASSERT(pg_offset < PAGE_SIZE && size <= PAGE_SIZE &&
3385 pg_offset + size <= PAGE_SIZE);
3386 if (force_bio_submit && bio_ctrl->bio) {
3387 ret = submit_one_bio(bio_ctrl->bio, mirror_num, bio_ctrl->bio_flags);
3388 bio_ctrl->bio = NULL;
3389 if (ret < 0)
3390 return ret;
3391 }
3392
3393 while (cur < pg_offset + size) {
3394 u32 offset = cur - pg_offset;
3395 int added;
3396
3397
3398 if (!bio_ctrl->bio) {
3399 ret = alloc_new_bio(inode, bio_ctrl, wbc, opf,
3400 end_io_func, disk_bytenr, offset,
3401 page_offset(page) + cur,
3402 bio_flags);
3403 if (ret < 0)
3404 return ret;
3405 }
3406
3407
3408
3409
3410 if (bio_flags & EXTENT_BIO_COMPRESSED)
3411 added = btrfs_bio_add_page(bio_ctrl, page, disk_bytenr,
3412 size - offset, pg_offset + offset,
3413 bio_flags);
3414 else
3415 added = btrfs_bio_add_page(bio_ctrl, page,
3416 disk_bytenr + offset, size - offset,
3417 pg_offset + offset, bio_flags);
3418
3419
3420 if (!is_data_inode(&inode->vfs_inode))
3421 ASSERT(added == 0 || added == size - offset);
3422
3423
3424 if (wbc && added)
3425 wbc_account_cgroup_owner(wbc, page, added);
3426
3427
3428 if (added < size - offset) {
3429
3430 ASSERT(bio_ctrl->bio->bi_iter.bi_size);
3431 ret = submit_one_bio(bio_ctrl->bio, mirror_num,
3432 bio_ctrl->bio_flags);
3433 bio_ctrl->bio = NULL;
3434 if (ret < 0)
3435 return ret;
3436 }
3437 cur += added;
3438 }
3439 return 0;
3440}
3441
3442static int attach_extent_buffer_page(struct extent_buffer *eb,
3443 struct page *page,
3444 struct btrfs_subpage *prealloc)
3445{
3446 struct btrfs_fs_info *fs_info = eb->fs_info;
3447 int ret = 0;
3448
3449
3450
3451
3452
3453
3454
3455 if (page->mapping)
3456 lockdep_assert_held(&page->mapping->private_lock);
3457
3458 if (fs_info->sectorsize == PAGE_SIZE) {
3459 if (!PagePrivate(page))
3460 attach_page_private(page, eb);
3461 else
3462 WARN_ON(page->private != (unsigned long)eb);
3463 return 0;
3464 }
3465
3466
3467 if (PagePrivate(page)) {
3468 btrfs_free_subpage(prealloc);
3469 return 0;
3470 }
3471
3472 if (prealloc)
3473
3474 attach_page_private(page, prealloc);
3475 else
3476
3477 ret = btrfs_attach_subpage(fs_info, page,
3478 BTRFS_SUBPAGE_METADATA);
3479 return ret;
3480}
3481
3482int set_page_extent_mapped(struct page *page)
3483{
3484 struct btrfs_fs_info *fs_info;
3485
3486 ASSERT(page->mapping);
3487
3488 if (PagePrivate(page))
3489 return 0;
3490
3491 fs_info = btrfs_sb(page->mapping->host->i_sb);
3492
3493 if (fs_info->sectorsize < PAGE_SIZE)
3494 return btrfs_attach_subpage(fs_info, page, BTRFS_SUBPAGE_DATA);
3495
3496 attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE);
3497 return 0;
3498}
3499
3500void clear_page_extent_mapped(struct page *page)
3501{
3502 struct btrfs_fs_info *fs_info;
3503
3504 ASSERT(page->mapping);
3505
3506 if (!PagePrivate(page))
3507 return;
3508
3509 fs_info = btrfs_sb(page->mapping->host->i_sb);
3510 if (fs_info->sectorsize < PAGE_SIZE)
3511 return btrfs_detach_subpage(fs_info, page);
3512
3513 detach_page_private(page);
3514}
3515
3516static struct extent_map *
3517__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
3518 u64 start, u64 len, struct extent_map **em_cached)
3519{
3520 struct extent_map *em;
3521
3522 if (em_cached && *em_cached) {
3523 em = *em_cached;
3524 if (extent_map_in_tree(em) && start >= em->start &&
3525 start < extent_map_end(em)) {
3526 refcount_inc(&em->refs);
3527 return em;
3528 }
3529
3530 free_extent_map(em);
3531 *em_cached = NULL;
3532 }
3533
3534 em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
3535 if (em_cached && !IS_ERR_OR_NULL(em)) {
3536 BUG_ON(*em_cached);
3537 refcount_inc(&em->refs);
3538 *em_cached = em;
3539 }
3540 return em;
3541}
3542
3543
3544
3545
3546
3547
3548
3549int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
3550 struct btrfs_bio_ctrl *bio_ctrl,
3551 unsigned int read_flags, u64 *prev_em_start)
3552{
3553 struct inode *inode = page->mapping->host;
3554 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3555 u64 start = page_offset(page);
3556 const u64 end = start + PAGE_SIZE - 1;
3557 u64 cur = start;
3558 u64 extent_offset;
3559 u64 last_byte = i_size_read(inode);
3560 u64 block_start;
3561 u64 cur_end;
3562 struct extent_map *em;
3563 int ret = 0;
3564 int nr = 0;
3565 size_t pg_offset = 0;
3566 size_t iosize;
3567 size_t blocksize = inode->i_sb->s_blocksize;
3568 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
3569
3570 ret = set_page_extent_mapped(page);
3571 if (ret < 0) {
3572 unlock_extent(tree, start, end);
3573 btrfs_page_set_error(fs_info, page, start, PAGE_SIZE);
3574 unlock_page(page);
3575 goto out;
3576 }
3577
3578 if (!PageUptodate(page)) {
3579 if (cleancache_get_page(page) == 0) {
3580 BUG_ON(blocksize != PAGE_SIZE);
3581 unlock_extent(tree, start, end);
3582 unlock_page(page);
3583 goto out;
3584 }
3585 }
3586
3587 if (page->index == last_byte >> PAGE_SHIFT) {
3588 size_t zero_offset = offset_in_page(last_byte);
3589
3590 if (zero_offset) {
3591 iosize = PAGE_SIZE - zero_offset;
3592 memzero_page(page, zero_offset, iosize);
3593 flush_dcache_page(page);
3594 }
3595 }
3596 begin_page_read(fs_info, page);
3597 while (cur <= end) {
3598 unsigned long this_bio_flag = 0;
3599 bool force_bio_submit = false;
3600 u64 disk_bytenr;
3601
3602 if (cur >= last_byte) {
3603 struct extent_state *cached = NULL;
3604
3605 iosize = PAGE_SIZE - pg_offset;
3606 memzero_page(page, pg_offset, iosize);
3607 flush_dcache_page(page);
3608 set_extent_uptodate(tree, cur, cur + iosize - 1,
3609 &cached, GFP_NOFS);
3610 unlock_extent_cached(tree, cur,
3611 cur + iosize - 1, &cached);
3612 end_page_read(page, true, cur, iosize);
3613 break;
3614 }
3615 em = __get_extent_map(inode, page, pg_offset, cur,
3616 end - cur + 1, em_cached);
3617 if (IS_ERR_OR_NULL(em)) {
3618 unlock_extent(tree, cur, end);
3619 end_page_read(page, false, cur, end + 1 - cur);
3620 break;
3621 }
3622 extent_offset = cur - em->start;
3623 BUG_ON(extent_map_end(em) <= cur);
3624 BUG_ON(end < cur);
3625
3626 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
3627 this_bio_flag |= EXTENT_BIO_COMPRESSED;
3628 extent_set_compress_type(&this_bio_flag,
3629 em->compress_type);
3630 }
3631
3632 iosize = min(extent_map_end(em) - cur, end - cur + 1);
3633 cur_end = min(extent_map_end(em) - 1, end);
3634 iosize = ALIGN(iosize, blocksize);
3635 if (this_bio_flag & EXTENT_BIO_COMPRESSED)
3636 disk_bytenr = em->block_start;
3637 else
3638 disk_bytenr = em->block_start + extent_offset;
3639 block_start = em->block_start;
3640 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3641 block_start = EXTENT_MAP_HOLE;
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3678 prev_em_start && *prev_em_start != (u64)-1 &&
3679 *prev_em_start != em->start)
3680 force_bio_submit = true;
3681
3682 if (prev_em_start)
3683 *prev_em_start = em->start;
3684
3685 free_extent_map(em);
3686 em = NULL;
3687
3688
3689 if (block_start == EXTENT_MAP_HOLE) {
3690 struct extent_state *cached = NULL;
3691
3692 memzero_page(page, pg_offset, iosize);
3693 flush_dcache_page(page);
3694
3695 set_extent_uptodate(tree, cur, cur + iosize - 1,
3696 &cached, GFP_NOFS);
3697 unlock_extent_cached(tree, cur,
3698 cur + iosize - 1, &cached);
3699 end_page_read(page, true, cur, iosize);
3700 cur = cur + iosize;
3701 pg_offset += iosize;
3702 continue;
3703 }
3704
3705 if (test_range_bit(tree, cur, cur_end,
3706 EXTENT_UPTODATE, 1, NULL)) {
3707 unlock_extent(tree, cur, cur + iosize - 1);
3708 end_page_read(page, true, cur, iosize);
3709 cur = cur + iosize;
3710 pg_offset += iosize;
3711 continue;
3712 }
3713
3714
3715
3716 if (block_start == EXTENT_MAP_INLINE) {
3717 unlock_extent(tree, cur, cur + iosize - 1);
3718 end_page_read(page, false, cur, iosize);
3719 cur = cur + iosize;
3720 pg_offset += iosize;
3721 continue;
3722 }
3723
3724 ret = submit_extent_page(REQ_OP_READ | read_flags, NULL,
3725 bio_ctrl, page, disk_bytenr, iosize,
3726 pg_offset,
3727 end_bio_extent_readpage, 0,
3728 this_bio_flag,
3729 force_bio_submit);
3730 if (!ret) {
3731 nr++;
3732 } else {
3733 unlock_extent(tree, cur, cur + iosize - 1);
3734 end_page_read(page, false, cur, iosize);
3735 goto out;
3736 }
3737 cur = cur + iosize;
3738 pg_offset += iosize;
3739 }
3740out:
3741 return ret;
3742}
3743
3744static inline void contiguous_readpages(struct page *pages[], int nr_pages,
3745 u64 start, u64 end,
3746 struct extent_map **em_cached,
3747 struct btrfs_bio_ctrl *bio_ctrl,
3748 u64 *prev_em_start)
3749{
3750 struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
3751 int index;
3752
3753 btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
3754
3755 for (index = 0; index < nr_pages; index++) {
3756 btrfs_do_readpage(pages[index], em_cached, bio_ctrl,
3757 REQ_RAHEAD, prev_em_start);
3758 put_page(pages[index]);
3759 }
3760}
3761
3762static void update_nr_written(struct writeback_control *wbc,
3763 unsigned long nr_written)
3764{
3765 wbc->nr_to_write -= nr_written;
3766}
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
3779 struct page *page, struct writeback_control *wbc,
3780 u64 delalloc_start, unsigned long *nr_written)
3781{
3782 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3783 bool found;
3784 u64 delalloc_to_write = 0;
3785 u64 delalloc_end = 0;
3786 int ret;
3787 int page_started = 0;
3788
3789
3790 while (delalloc_end < page_end) {
3791 found = find_lock_delalloc_range(&inode->vfs_inode, page,
3792 &delalloc_start,
3793 &delalloc_end);
3794 if (!found) {
3795 delalloc_start = delalloc_end + 1;
3796 continue;
3797 }
3798 ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
3799 delalloc_end, &page_started, nr_written, wbc);
3800 if (ret) {
3801 btrfs_page_set_error(inode->root->fs_info, page,
3802 page_offset(page), PAGE_SIZE);
3803 return ret;
3804 }
3805
3806
3807
3808
3809 delalloc_to_write += (delalloc_end - delalloc_start +
3810 PAGE_SIZE) >> PAGE_SHIFT;
3811 delalloc_start = delalloc_end + 1;
3812 }
3813 if (wbc->nr_to_write < delalloc_to_write) {
3814 int thresh = 8192;
3815
3816 if (delalloc_to_write < thresh * 2)
3817 thresh = delalloc_to_write;
3818 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3819 thresh);
3820 }
3821
3822
3823
3824
3825 if (page_started) {
3826
3827
3828
3829
3830
3831 wbc->nr_to_write -= *nr_written;
3832 return 1;
3833 }
3834
3835 return 0;
3836}
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
3854 struct page *page, u64 *start, u64 *end)
3855{
3856 struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
3857 u64 orig_start = *start;
3858
3859 unsigned long dirty_bitmap;
3860 unsigned long flags;
3861 int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits;
3862 int range_start_bit = nbits;
3863 int range_end_bit;
3864
3865
3866
3867
3868
3869 if (fs_info->sectorsize == PAGE_SIZE) {
3870 *start = page_offset(page);
3871 *end = page_offset(page) + PAGE_SIZE;
3872 return;
3873 }
3874
3875
3876 spin_lock_irqsave(&subpage->lock, flags);
3877 dirty_bitmap = subpage->dirty_bitmap;
3878 spin_unlock_irqrestore(&subpage->lock, flags);
3879
3880 bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit,
3881 BTRFS_SUBPAGE_BITMAP_SIZE);
3882 *start = page_offset(page) + range_start_bit * fs_info->sectorsize;
3883 *end = page_offset(page) + range_end_bit * fs_info->sectorsize;
3884}
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
3895 struct page *page,
3896 struct writeback_control *wbc,
3897 struct extent_page_data *epd,
3898 loff_t i_size,
3899 unsigned long nr_written,
3900 int *nr_ret)
3901{
3902 struct btrfs_fs_info *fs_info = inode->root->fs_info;
3903 u64 cur = page_offset(page);
3904 u64 end = cur + PAGE_SIZE - 1;
3905 u64 extent_offset;
3906 u64 block_start;
3907 struct extent_map *em;
3908 int ret = 0;
3909 int nr = 0;
3910 u32 opf = REQ_OP_WRITE;
3911 const unsigned int write_flags = wbc_to_write_flags(wbc);
3912 bool compressed;
3913
3914 ret = btrfs_writepage_cow_fixup(page);
3915 if (ret) {
3916
3917 redirty_page_for_writepage(wbc, page);
3918 update_nr_written(wbc, nr_written);
3919 unlock_page(page);
3920 return 1;
3921 }
3922
3923
3924
3925
3926
3927 update_nr_written(wbc, nr_written + 1);
3928
3929 while (cur <= end) {
3930 u64 disk_bytenr;
3931 u64 em_end;
3932 u64 dirty_range_start = cur;
3933 u64 dirty_range_end;
3934 u32 iosize;
3935
3936 if (cur >= i_size) {
3937 btrfs_writepage_endio_finish_ordered(inode, page, cur,
3938 end, true);
3939
3940
3941
3942
3943
3944
3945
3946
3947 btrfs_page_clear_dirty(fs_info, page, cur, end + 1 - cur);
3948 break;
3949 }
3950
3951 find_next_dirty_byte(fs_info, page, &dirty_range_start,
3952 &dirty_range_end);
3953 if (cur < dirty_range_start) {
3954 cur = dirty_range_start;
3955 continue;
3956 }
3957
3958 em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
3959 if (IS_ERR_OR_NULL(em)) {
3960 btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
3961 ret = PTR_ERR_OR_ZERO(em);
3962 break;
3963 }
3964
3965 extent_offset = cur - em->start;
3966 em_end = extent_map_end(em);
3967 ASSERT(cur <= em_end);
3968 ASSERT(cur < end);
3969 ASSERT(IS_ALIGNED(em->start, fs_info->sectorsize));
3970 ASSERT(IS_ALIGNED(em->len, fs_info->sectorsize));
3971 block_start = em->block_start;
3972 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3973 disk_bytenr = em->block_start + extent_offset;
3974
3975
3976
3977
3978
3979 iosize = min(min(em_end, end + 1), dirty_range_end) - cur;
3980
3981 if (btrfs_use_zone_append(inode, em->block_start))
3982 opf = REQ_OP_ZONE_APPEND;
3983
3984 free_extent_map(em);
3985 em = NULL;
3986
3987
3988
3989
3990
3991 if (compressed || block_start == EXTENT_MAP_HOLE ||
3992 block_start == EXTENT_MAP_INLINE) {
3993 if (compressed)
3994 nr++;
3995 else
3996 btrfs_writepage_endio_finish_ordered(inode,
3997 page, cur, cur + iosize - 1, true);
3998 btrfs_page_clear_dirty(fs_info, page, cur, iosize);
3999 cur += iosize;
4000 continue;
4001 }
4002
4003 btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
4004 if (!PageWriteback(page)) {
4005 btrfs_err(inode->root->fs_info,
4006 "page %lu not writeback, cur %llu end %llu",
4007 page->index, cur, end);
4008 }
4009
4010
4011
4012
4013
4014
4015
4016 btrfs_page_clear_dirty(fs_info, page, cur, iosize);
4017
4018 ret = submit_extent_page(opf | write_flags, wbc,
4019 &epd->bio_ctrl, page,
4020 disk_bytenr, iosize,
4021 cur - page_offset(page),
4022 end_bio_extent_writepage,
4023 0, 0, false);
4024 if (ret) {
4025 btrfs_page_set_error(fs_info, page, cur, iosize);
4026 if (PageWriteback(page))
4027 btrfs_page_clear_writeback(fs_info, page, cur,
4028 iosize);
4029 }
4030
4031 cur += iosize;
4032 nr++;
4033 }
4034
4035
4036
4037
4038 if (!ret)
4039 btrfs_page_assert_not_dirty(fs_info, page);
4040 *nr_ret = nr;
4041 return ret;
4042}
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053static int __extent_writepage(struct page *page, struct writeback_control *wbc,
4054 struct extent_page_data *epd)
4055{
4056 struct inode *inode = page->mapping->host;
4057 u64 start = page_offset(page);
4058 u64 page_end = start + PAGE_SIZE - 1;
4059 int ret;
4060 int nr = 0;
4061 size_t pg_offset;
4062 loff_t i_size = i_size_read(inode);
4063 unsigned long end_index = i_size >> PAGE_SHIFT;
4064 unsigned long nr_written = 0;
4065
4066 trace___extent_writepage(page, inode, wbc);
4067
4068 WARN_ON(!PageLocked(page));
4069
4070 btrfs_page_clear_error(btrfs_sb(inode->i_sb), page,
4071 page_offset(page), PAGE_SIZE);
4072
4073 pg_offset = offset_in_page(i_size);
4074 if (page->index > end_index ||
4075 (page->index == end_index && !pg_offset)) {
4076 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
4077 unlock_page(page);
4078 return 0;
4079 }
4080
4081 if (page->index == end_index) {
4082 memzero_page(page, pg_offset, PAGE_SIZE - pg_offset);
4083 flush_dcache_page(page);
4084 }
4085
4086 ret = set_page_extent_mapped(page);
4087 if (ret < 0) {
4088 SetPageError(page);
4089 goto done;
4090 }
4091
4092 if (!epd->extent_locked) {
4093 ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
4094 &nr_written);
4095 if (ret == 1)
4096 return 0;
4097 if (ret)
4098 goto done;
4099 }
4100
4101 ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, epd, i_size,
4102 nr_written, &nr);
4103 if (ret == 1)
4104 return 0;
4105
4106done:
4107 if (nr == 0) {
4108
4109 set_page_writeback(page);
4110 end_page_writeback(page);
4111 }
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143 if (PageError(page))
4144 end_extent_writepage(page, ret, start, page_end);
4145 unlock_page(page);
4146 ASSERT(ret <= 0);
4147 return ret;
4148}
4149
4150void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
4151{
4152 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
4153 TASK_UNINTERRUPTIBLE);
4154}
4155
4156static void end_extent_buffer_writeback(struct extent_buffer *eb)
4157{
4158 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
4159 smp_mb__after_atomic();
4160 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
4161}
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
4174 struct extent_page_data *epd)
4175{
4176 struct btrfs_fs_info *fs_info = eb->fs_info;
4177 int i, num_pages, failed_page_nr;
4178 int flush = 0;
4179 int ret = 0;
4180
4181 if (!btrfs_try_tree_write_lock(eb)) {
4182 ret = flush_write_bio(epd);
4183 if (ret < 0)
4184 return ret;
4185 flush = 1;
4186 btrfs_tree_lock(eb);
4187 }
4188
4189 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
4190 btrfs_tree_unlock(eb);
4191 if (!epd->sync_io)
4192 return 0;
4193 if (!flush) {
4194 ret = flush_write_bio(epd);
4195 if (ret < 0)
4196 return ret;
4197 flush = 1;
4198 }
4199 while (1) {
4200 wait_on_extent_buffer_writeback(eb);
4201 btrfs_tree_lock(eb);
4202 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
4203 break;
4204 btrfs_tree_unlock(eb);
4205 }
4206 }
4207
4208
4209
4210
4211
4212
4213 spin_lock(&eb->refs_lock);
4214 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
4215 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
4216 spin_unlock(&eb->refs_lock);
4217 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
4218 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
4219 -eb->len,
4220 fs_info->dirty_metadata_batch);
4221 ret = 1;
4222 } else {
4223 spin_unlock(&eb->refs_lock);
4224 }
4225
4226 btrfs_tree_unlock(eb);
4227
4228
4229
4230
4231
4232
4233
4234 if (!ret || fs_info->sectorsize < PAGE_SIZE)
4235 return ret;
4236
4237 num_pages = num_extent_pages(eb);
4238 for (i = 0; i < num_pages; i++) {
4239 struct page *p = eb->pages[i];
4240
4241 if (!trylock_page(p)) {
4242 if (!flush) {
4243 int err;
4244
4245 err = flush_write_bio(epd);
4246 if (err < 0) {
4247 ret = err;
4248 failed_page_nr = i;
4249 goto err_unlock;
4250 }
4251 flush = 1;
4252 }
4253 lock_page(p);
4254 }
4255 }
4256
4257 return ret;
4258err_unlock:
4259
4260 for (i = 0; i < failed_page_nr; i++)
4261 unlock_page(eb->pages[i]);
4262
4263
4264
4265
4266
4267 btrfs_tree_lock(eb);
4268 spin_lock(&eb->refs_lock);
4269 set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
4270 end_extent_buffer_writeback(eb);
4271 spin_unlock(&eb->refs_lock);
4272 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len,
4273 fs_info->dirty_metadata_batch);
4274 btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
4275 btrfs_tree_unlock(eb);
4276 return ret;
4277}
4278
4279static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
4280{
4281 struct btrfs_fs_info *fs_info = eb->fs_info;
4282
4283 btrfs_page_set_error(fs_info, page, eb->start, eb->len);
4284 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
4285 return;
4286
4287
4288
4289
4290
4291 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
4292 eb->len, fs_info->dirty_metadata_batch);
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332 switch (eb->log_index) {
4333 case -1:
4334 set_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags);
4335 break;
4336 case 0:
4337 set_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
4338 break;
4339 case 1:
4340 set_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
4341 break;
4342 default:
4343 BUG();
4344 }
4345}
4346
4347
4348
4349
4350
4351static struct extent_buffer *find_extent_buffer_nolock(
4352 struct btrfs_fs_info *fs_info, u64 start)
4353{
4354 struct extent_buffer *eb;
4355
4356 rcu_read_lock();
4357 eb = radix_tree_lookup(&fs_info->buffer_radix,
4358 start >> fs_info->sectorsize_bits);
4359 if (eb && atomic_inc_not_zero(&eb->refs)) {
4360 rcu_read_unlock();
4361 return eb;
4362 }
4363 rcu_read_unlock();
4364 return NULL;
4365}
4366
4367
4368
4369
4370
4371
4372
4373static void end_bio_subpage_eb_writepage(struct bio *bio)
4374{
4375 struct btrfs_fs_info *fs_info;
4376 struct bio_vec *bvec;
4377 struct bvec_iter_all iter_all;
4378
4379 fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
4380 ASSERT(fs_info->sectorsize < PAGE_SIZE);
4381
4382 ASSERT(!bio_flagged(bio, BIO_CLONED));
4383 bio_for_each_segment_all(bvec, bio, iter_all) {
4384 struct page *page = bvec->bv_page;
4385 u64 bvec_start = page_offset(page) + bvec->bv_offset;
4386 u64 bvec_end = bvec_start + bvec->bv_len - 1;
4387 u64 cur_bytenr = bvec_start;
4388
4389 ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
4390
4391
4392 while (cur_bytenr <= bvec_end) {
4393 struct extent_buffer *eb;
4394 int done;
4395
4396
4397
4398
4399
4400
4401 eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
4402 ASSERT(eb);
4403
4404 cur_bytenr = eb->start + eb->len;
4405
4406 ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
4407 done = atomic_dec_and_test(&eb->io_pages);
4408 ASSERT(done);
4409
4410 if (bio->bi_status ||
4411 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
4412 ClearPageUptodate(page);
4413 set_btree_ioerr(page, eb);
4414 }
4415
4416 btrfs_subpage_clear_writeback(fs_info, page, eb->start,
4417 eb->len);
4418 end_extent_buffer_writeback(eb);
4419
4420
4421
4422
4423
4424 atomic_dec(&eb->refs);
4425 }
4426 }
4427 bio_put(bio);
4428}
4429
4430static void end_bio_extent_buffer_writepage(struct bio *bio)
4431{
4432 struct bio_vec *bvec;
4433 struct extent_buffer *eb;
4434 int done;
4435 struct bvec_iter_all iter_all;
4436
4437 ASSERT(!bio_flagged(bio, BIO_CLONED));
4438 bio_for_each_segment_all(bvec, bio, iter_all) {
4439 struct page *page = bvec->bv_page;
4440
4441 eb = (struct extent_buffer *)page->private;
4442 BUG_ON(!eb);
4443 done = atomic_dec_and_test(&eb->io_pages);
4444
4445 if (bio->bi_status ||
4446 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
4447 ClearPageUptodate(page);
4448 set_btree_ioerr(page, eb);
4449 }
4450
4451 end_page_writeback(page);
4452
4453 if (!done)
4454 continue;
4455
4456 end_extent_buffer_writeback(eb);
4457 }
4458
4459 bio_put(bio);
4460}
4461
4462static void prepare_eb_write(struct extent_buffer *eb)
4463{
4464 u32 nritems;
4465 unsigned long start;
4466 unsigned long end;
4467
4468 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
4469 atomic_set(&eb->io_pages, num_extent_pages(eb));
4470
4471
4472 nritems = btrfs_header_nritems(eb);
4473 if (btrfs_header_level(eb) > 0) {
4474 end = btrfs_node_key_ptr_offset(nritems);
4475 memzero_extent_buffer(eb, end, eb->len - end);
4476 } else {
4477
4478
4479
4480
4481 start = btrfs_item_nr_offset(nritems);
4482 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
4483 memzero_extent_buffer(eb, start, end - start);
4484 }
4485}
4486
4487
4488
4489
4490
4491static int write_one_subpage_eb(struct extent_buffer *eb,
4492 struct writeback_control *wbc,
4493 struct extent_page_data *epd)
4494{
4495 struct btrfs_fs_info *fs_info = eb->fs_info;
4496 struct page *page = eb->pages[0];
4497 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
4498 bool no_dirty_ebs = false;
4499 int ret;
4500
4501 prepare_eb_write(eb);
4502
4503
4504 lock_page(page);
4505 btrfs_subpage_set_writeback(fs_info, page, eb->start, eb->len);
4506
4507
4508 no_dirty_ebs = btrfs_subpage_clear_and_test_dirty(fs_info, page,
4509 eb->start, eb->len);
4510 if (no_dirty_ebs)
4511 clear_page_dirty_for_io(page);
4512
4513 ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
4514 &epd->bio_ctrl, page, eb->start, eb->len,
4515 eb->start - page_offset(page),
4516 end_bio_subpage_eb_writepage, 0, 0, false);
4517 if (ret) {
4518 btrfs_subpage_clear_writeback(fs_info, page, eb->start, eb->len);
4519 set_btree_ioerr(page, eb);
4520 unlock_page(page);
4521
4522 if (atomic_dec_and_test(&eb->io_pages))
4523 end_extent_buffer_writeback(eb);
4524 return -EIO;
4525 }
4526 unlock_page(page);
4527
4528
4529
4530
4531 if (no_dirty_ebs)
4532 update_nr_written(wbc, 1);
4533 return ret;
4534}
4535
4536static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
4537 struct writeback_control *wbc,
4538 struct extent_page_data *epd)
4539{
4540 u64 disk_bytenr = eb->start;
4541 int i, num_pages;
4542 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
4543 int ret = 0;
4544
4545 prepare_eb_write(eb);
4546
4547 num_pages = num_extent_pages(eb);
4548 for (i = 0; i < num_pages; i++) {
4549 struct page *p = eb->pages[i];
4550
4551 clear_page_dirty_for_io(p);
4552 set_page_writeback(p);
4553 ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
4554 &epd->bio_ctrl, p, disk_bytenr,
4555 PAGE_SIZE, 0,
4556 end_bio_extent_buffer_writepage,
4557 0, 0, false);
4558 if (ret) {
4559 set_btree_ioerr(p, eb);
4560 if (PageWriteback(p))
4561 end_page_writeback(p);
4562 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
4563 end_extent_buffer_writeback(eb);
4564 ret = -EIO;
4565 break;
4566 }
4567 disk_bytenr += PAGE_SIZE;
4568 update_nr_written(wbc, 1);
4569 unlock_page(p);
4570 }
4571
4572 if (unlikely(ret)) {
4573 for (; i < num_pages; i++) {
4574 struct page *p = eb->pages[i];
4575 clear_page_dirty_for_io(p);
4576 unlock_page(p);
4577 }
4578 }
4579
4580 return ret;
4581}
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597static int submit_eb_subpage(struct page *page,
4598 struct writeback_control *wbc,
4599 struct extent_page_data *epd)
4600{
4601 struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
4602 int submitted = 0;
4603 u64 page_start = page_offset(page);
4604 int bit_start = 0;
4605 const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE;
4606 int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
4607 int ret;
4608
4609
4610 while (bit_start < nbits) {
4611 struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
4612 struct extent_buffer *eb;
4613 unsigned long flags;
4614 u64 start;
4615
4616
4617
4618
4619
4620 spin_lock(&page->mapping->private_lock);
4621 if (!PagePrivate(page)) {
4622 spin_unlock(&page->mapping->private_lock);
4623 break;
4624 }
4625 spin_lock_irqsave(&subpage->lock, flags);
4626 if (!((1 << bit_start) & subpage->dirty_bitmap)) {
4627 spin_unlock_irqrestore(&subpage->lock, flags);
4628 spin_unlock(&page->mapping->private_lock);
4629 bit_start++;
4630 continue;
4631 }
4632
4633 start = page_start + bit_start * fs_info->sectorsize;
4634 bit_start += sectors_per_node;
4635
4636
4637
4638
4639
4640 eb = find_extent_buffer_nolock(fs_info, start);
4641 spin_unlock_irqrestore(&subpage->lock, flags);
4642 spin_unlock(&page->mapping->private_lock);
4643
4644
4645
4646
4647
4648
4649 if (!eb)
4650 continue;
4651
4652 ret = lock_extent_buffer_for_io(eb, epd);
4653 if (ret == 0) {
4654 free_extent_buffer(eb);
4655 continue;
4656 }
4657 if (ret < 0) {
4658 free_extent_buffer(eb);
4659 goto cleanup;
4660 }
4661 ret = write_one_subpage_eb(eb, wbc, epd);
4662 free_extent_buffer(eb);
4663 if (ret < 0)
4664 goto cleanup;
4665 submitted++;
4666 }
4667 return submitted;
4668
4669cleanup:
4670
4671 end_write_bio(epd, ret);
4672 return ret;
4673}
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695static int submit_eb_page(struct page *page, struct writeback_control *wbc,
4696 struct extent_page_data *epd,
4697 struct extent_buffer **eb_context)
4698{
4699 struct address_space *mapping = page->mapping;
4700 struct btrfs_block_group *cache = NULL;
4701 struct extent_buffer *eb;
4702 int ret;
4703
4704 if (!PagePrivate(page))
4705 return 0;
4706
4707 if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
4708 return submit_eb_subpage(page, wbc, epd);
4709
4710 spin_lock(&mapping->private_lock);
4711 if (!PagePrivate(page)) {
4712 spin_unlock(&mapping->private_lock);
4713 return 0;
4714 }
4715
4716 eb = (struct extent_buffer *)page->private;
4717
4718
4719
4720
4721
4722 if (WARN_ON(!eb)) {
4723 spin_unlock(&mapping->private_lock);
4724 return 0;
4725 }
4726
4727 if (eb == *eb_context) {
4728 spin_unlock(&mapping->private_lock);
4729 return 0;
4730 }
4731 ret = atomic_inc_not_zero(&eb->refs);
4732 spin_unlock(&mapping->private_lock);
4733 if (!ret)
4734 return 0;
4735
4736 if (!btrfs_check_meta_write_pointer(eb->fs_info, eb, &cache)) {
4737
4738
4739
4740
4741 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
4742 ret = -EAGAIN;
4743 else
4744 ret = 0;
4745 free_extent_buffer(eb);
4746 return ret;
4747 }
4748
4749 *eb_context = eb;
4750
4751 ret = lock_extent_buffer_for_io(eb, epd);
4752 if (ret <= 0) {
4753 btrfs_revert_meta_write_pointer(cache, eb);
4754 if (cache)
4755 btrfs_put_block_group(cache);
4756 free_extent_buffer(eb);
4757 return ret;
4758 }
4759 if (cache)
4760 btrfs_put_block_group(cache);
4761 ret = write_one_eb(eb, wbc, epd);
4762 free_extent_buffer(eb);
4763 if (ret < 0)
4764 return ret;
4765 return 1;
4766}
4767
4768int btree_write_cache_pages(struct address_space *mapping,
4769 struct writeback_control *wbc)
4770{
4771 struct extent_buffer *eb_context = NULL;
4772 struct extent_page_data epd = {
4773 .bio_ctrl = { 0 },
4774 .extent_locked = 0,
4775 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4776 };
4777 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
4778 int ret = 0;
4779 int done = 0;
4780 int nr_to_write_done = 0;
4781 struct pagevec pvec;
4782 int nr_pages;
4783 pgoff_t index;
4784 pgoff_t end;
4785 int scanned = 0;
4786 xa_mark_t tag;
4787
4788 pagevec_init(&pvec);
4789 if (wbc->range_cyclic) {
4790 index = mapping->writeback_index;
4791 end = -1;
4792
4793
4794
4795
4796 scanned = (index == 0);
4797 } else {
4798 index = wbc->range_start >> PAGE_SHIFT;
4799 end = wbc->range_end >> PAGE_SHIFT;
4800 scanned = 1;
4801 }
4802 if (wbc->sync_mode == WB_SYNC_ALL)
4803 tag = PAGECACHE_TAG_TOWRITE;
4804 else
4805 tag = PAGECACHE_TAG_DIRTY;
4806 btrfs_zoned_meta_io_lock(fs_info);
4807retry:
4808 if (wbc->sync_mode == WB_SYNC_ALL)
4809 tag_pages_for_writeback(mapping, index, end);
4810 while (!done && !nr_to_write_done && (index <= end) &&
4811 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
4812 tag))) {
4813 unsigned i;
4814
4815 for (i = 0; i < nr_pages; i++) {
4816 struct page *page = pvec.pages[i];
4817
4818 ret = submit_eb_page(page, wbc, &epd, &eb_context);
4819 if (ret == 0)
4820 continue;
4821 if (ret < 0) {
4822 done = 1;
4823 break;
4824 }
4825
4826
4827
4828
4829
4830
4831 nr_to_write_done = wbc->nr_to_write <= 0;
4832 }
4833 pagevec_release(&pvec);
4834 cond_resched();
4835 }
4836 if (!scanned && !done) {
4837
4838
4839
4840
4841 scanned = 1;
4842 index = 0;
4843 goto retry;
4844 }
4845 if (ret < 0) {
4846 end_write_bio(&epd, ret);
4847 goto out;
4848 }
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876 if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
4877 ret = flush_write_bio(&epd);
4878 } else {
4879 ret = -EROFS;
4880 end_write_bio(&epd, ret);
4881 }
4882out:
4883 btrfs_zoned_meta_io_unlock(fs_info);
4884 return ret;
4885}
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902static int extent_write_cache_pages(struct address_space *mapping,
4903 struct writeback_control *wbc,
4904 struct extent_page_data *epd)
4905{
4906 struct inode *inode = mapping->host;
4907 int ret = 0;
4908 int done = 0;
4909 int nr_to_write_done = 0;
4910 struct pagevec pvec;
4911 int nr_pages;
4912 pgoff_t index;
4913 pgoff_t end;
4914 pgoff_t done_index;
4915 int range_whole = 0;
4916 int scanned = 0;
4917 xa_mark_t tag;
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928 if (!igrab(inode))
4929 return 0;
4930
4931 pagevec_init(&pvec);
4932 if (wbc->range_cyclic) {
4933 index = mapping->writeback_index;
4934 end = -1;
4935
4936
4937
4938
4939 scanned = (index == 0);
4940 } else {
4941 index = wbc->range_start >> PAGE_SHIFT;
4942 end = wbc->range_end >> PAGE_SHIFT;
4943 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
4944 range_whole = 1;
4945 scanned = 1;
4946 }
4947
4948
4949
4950
4951
4952
4953
4954
4955 if (range_whole && wbc->nr_to_write == LONG_MAX &&
4956 test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
4957 &BTRFS_I(inode)->runtime_flags))
4958 wbc->tagged_writepages = 1;
4959
4960 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4961 tag = PAGECACHE_TAG_TOWRITE;
4962 else
4963 tag = PAGECACHE_TAG_DIRTY;
4964retry:
4965 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4966 tag_pages_for_writeback(mapping, index, end);
4967 done_index = index;
4968 while (!done && !nr_to_write_done && (index <= end) &&
4969 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
4970 &index, end, tag))) {
4971 unsigned i;
4972
4973 for (i = 0; i < nr_pages; i++) {
4974 struct page *page = pvec.pages[i];
4975
4976 done_index = page->index + 1;
4977
4978
4979
4980
4981
4982
4983
4984 if (!trylock_page(page)) {
4985 ret = flush_write_bio(epd);
4986 BUG_ON(ret < 0);
4987 lock_page(page);
4988 }
4989
4990 if (unlikely(page->mapping != mapping)) {
4991 unlock_page(page);
4992 continue;
4993 }
4994
4995 if (wbc->sync_mode != WB_SYNC_NONE) {
4996 if (PageWriteback(page)) {
4997 ret = flush_write_bio(epd);
4998 BUG_ON(ret < 0);
4999 }
5000 wait_on_page_writeback(page);
5001 }
5002
5003 if (PageWriteback(page) ||
5004 !clear_page_dirty_for_io(page)) {
5005 unlock_page(page);
5006 continue;
5007 }
5008
5009 ret = __extent_writepage(page, wbc, epd);
5010 if (ret < 0) {
5011 done = 1;
5012 break;
5013 }
5014
5015
5016
5017
5018
5019
5020 nr_to_write_done = wbc->nr_to_write <= 0;
5021 }
5022 pagevec_release(&pvec);
5023 cond_resched();
5024 }
5025 if (!scanned && !done) {
5026
5027
5028
5029
5030 scanned = 1;
5031 index = 0;
5032
5033
5034
5035
5036
5037
5038
5039 ret = flush_write_bio(epd);
5040 if (!ret)
5041 goto retry;
5042 }
5043
5044 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
5045 mapping->writeback_index = done_index;
5046
5047 btrfs_add_delayed_iput(inode);
5048 return ret;
5049}
5050
5051int extent_write_full_page(struct page *page, struct writeback_control *wbc)
5052{
5053 int ret;
5054 struct extent_page_data epd = {
5055 .bio_ctrl = { 0 },
5056 .extent_locked = 0,
5057 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
5058 };
5059
5060 ret = __extent_writepage(page, wbc, &epd);
5061 ASSERT(ret <= 0);
5062 if (ret < 0) {
5063 end_write_bio(&epd, ret);
5064 return ret;
5065 }
5066
5067 ret = flush_write_bio(&epd);
5068 ASSERT(ret <= 0);
5069 return ret;
5070}
5071
5072int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
5073 int mode)
5074{
5075 int ret = 0;
5076 struct address_space *mapping = inode->i_mapping;
5077 struct page *page;
5078 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
5079 PAGE_SHIFT;
5080
5081 struct extent_page_data epd = {
5082 .bio_ctrl = { 0 },
5083 .extent_locked = 1,
5084 .sync_io = mode == WB_SYNC_ALL,
5085 };
5086 struct writeback_control wbc_writepages = {
5087 .sync_mode = mode,
5088 .nr_to_write = nr_pages * 2,
5089 .range_start = start,
5090 .range_end = end + 1,
5091
5092 .punt_to_cgroup = 1,
5093 .no_cgroup_owner = 1,
5094 };
5095
5096 wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
5097 while (start <= end) {
5098 page = find_get_page(mapping, start >> PAGE_SHIFT);
5099 if (clear_page_dirty_for_io(page))
5100 ret = __extent_writepage(page, &wbc_writepages, &epd);
5101 else {
5102 btrfs_writepage_endio_finish_ordered(BTRFS_I(inode),
5103 page, start, start + PAGE_SIZE - 1, true);
5104 unlock_page(page);
5105 }
5106 put_page(page);
5107 start += PAGE_SIZE;
5108 }
5109
5110 ASSERT(ret <= 0);
5111 if (ret == 0)
5112 ret = flush_write_bio(&epd);
5113 else
5114 end_write_bio(&epd, ret);
5115
5116 wbc_detach_inode(&wbc_writepages);
5117 return ret;
5118}
5119
5120int extent_writepages(struct address_space *mapping,
5121 struct writeback_control *wbc)
5122{
5123 int ret = 0;
5124 struct extent_page_data epd = {
5125 .bio_ctrl = { 0 },
5126 .extent_locked = 0,
5127 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
5128 };
5129
5130 ret = extent_write_cache_pages(mapping, wbc, &epd);
5131 ASSERT(ret <= 0);
5132 if (ret < 0) {
5133 end_write_bio(&epd, ret);
5134 return ret;
5135 }
5136 ret = flush_write_bio(&epd);
5137 return ret;
5138}
5139
5140void extent_readahead(struct readahead_control *rac)
5141{
5142 struct btrfs_bio_ctrl bio_ctrl = { 0 };
5143 struct page *pagepool[16];
5144 struct extent_map *em_cached = NULL;
5145 u64 prev_em_start = (u64)-1;
5146 int nr;
5147
5148 while ((nr = readahead_page_batch(rac, pagepool))) {
5149 u64 contig_start = readahead_pos(rac);
5150 u64 contig_end = contig_start + readahead_batch_length(rac) - 1;
5151
5152 contiguous_readpages(pagepool, nr, contig_start, contig_end,
5153 &em_cached, &bio_ctrl, &prev_em_start);
5154 }
5155
5156 if (em_cached)
5157 free_extent_map(em_cached);
5158
5159 if (bio_ctrl.bio) {
5160 if (submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags))
5161 return;
5162 }
5163}
5164
5165
5166
5167
5168
5169
5170int extent_invalidatepage(struct extent_io_tree *tree,
5171 struct page *page, unsigned long offset)
5172{
5173 struct extent_state *cached_state = NULL;
5174 u64 start = page_offset(page);
5175 u64 end = start + PAGE_SIZE - 1;
5176 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
5177
5178
5179 ASSERT(tree->owner == IO_TREE_BTREE_INODE_IO);
5180
5181 start += ALIGN(offset, blocksize);
5182 if (start > end)
5183 return 0;
5184
5185 lock_extent_bits(tree, start, end, &cached_state);
5186 wait_on_page_writeback(page);
5187
5188
5189
5190
5191
5192
5193 unlock_extent_cached(tree, start, end, &cached_state);
5194 return 0;
5195}
5196
5197
5198
5199
5200
5201
5202static int try_release_extent_state(struct extent_io_tree *tree,
5203 struct page *page, gfp_t mask)
5204{
5205 u64 start = page_offset(page);
5206 u64 end = start + PAGE_SIZE - 1;
5207 int ret = 1;
5208
5209 if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
5210 ret = 0;
5211 } else {
5212
5213
5214
5215
5216
5217
5218 ret = __clear_extent_bit(tree, start, end,
5219 ~(EXTENT_LOCKED | EXTENT_NODATASUM | EXTENT_DELALLOC_NEW),
5220 0, 0, NULL, mask, NULL);
5221
5222
5223
5224
5225 if (ret < 0)
5226 ret = 0;
5227 else
5228 ret = 1;
5229 }
5230 return ret;
5231}
5232
5233
5234
5235
5236
5237
5238int try_release_extent_mapping(struct page *page, gfp_t mask)
5239{
5240 struct extent_map *em;
5241 u64 start = page_offset(page);
5242 u64 end = start + PAGE_SIZE - 1;
5243 struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
5244 struct extent_io_tree *tree = &btrfs_inode->io_tree;
5245 struct extent_map_tree *map = &btrfs_inode->extent_tree;
5246
5247 if (gfpflags_allow_blocking(mask) &&
5248 page->mapping->host->i_size > SZ_16M) {
5249 u64 len;
5250 while (start <= end) {
5251 struct btrfs_fs_info *fs_info;
5252 u64 cur_gen;
5253
5254 len = end - start + 1;
5255 write_lock(&map->lock);
5256 em = lookup_extent_mapping(map, start, len);
5257 if (!em) {
5258 write_unlock(&map->lock);
5259 break;
5260 }
5261 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
5262 em->start != start) {
5263 write_unlock(&map->lock);
5264 free_extent_map(em);
5265 break;
5266 }
5267 if (test_range_bit(tree, em->start,
5268 extent_map_end(em) - 1,
5269 EXTENT_LOCKED, 0, NULL))
5270 goto next;
5271
5272
5273
5274
5275
5276
5277 if (list_empty(&em->list) ||
5278 test_bit(EXTENT_FLAG_LOGGING, &em->flags))
5279 goto remove_em;
5280
5281
5282
5283
5284
5285
5286
5287 fs_info = btrfs_inode->root->fs_info;
5288 spin_lock(&fs_info->trans_lock);
5289 cur_gen = fs_info->generation;
5290 spin_unlock(&fs_info->trans_lock);
5291 if (em->generation >= cur_gen)
5292 goto next;
5293remove_em:
5294
5295
5296
5297
5298
5299
5300
5301
5302 remove_extent_mapping(map, em);
5303
5304 free_extent_map(em);
5305next:
5306 start = extent_map_end(em);
5307 write_unlock(&map->lock);
5308
5309
5310 free_extent_map(em);
5311
5312 cond_resched();
5313 }
5314 }
5315 return try_release_extent_state(tree, page, mask);
5316}
5317
5318
5319
5320
5321
5322static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode,
5323 u64 offset, u64 last)
5324{
5325 u64 sectorsize = btrfs_inode_sectorsize(inode);
5326 struct extent_map *em;
5327 u64 len;
5328
5329 if (offset >= last)
5330 return NULL;
5331
5332 while (1) {
5333 len = last - offset;
5334 if (len == 0)
5335 break;
5336 len = ALIGN(len, sectorsize);
5337 em = btrfs_get_extent_fiemap(inode, offset, len);
5338 if (IS_ERR_OR_NULL(em))
5339 return em;
5340
5341
5342 if (em->block_start != EXTENT_MAP_HOLE)
5343 return em;
5344
5345
5346 offset = extent_map_end(em);
5347 free_extent_map(em);
5348 if (offset >= last)
5349 break;
5350 }
5351 return NULL;
5352}
5353
5354
5355
5356
5357
5358
5359struct fiemap_cache {
5360 u64 offset;
5361 u64 phys;
5362 u64 len;
5363 u32 flags;
5364 bool cached;
5365};
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
5378 struct fiemap_cache *cache,
5379 u64 offset, u64 phys, u64 len, u32 flags)
5380{
5381 int ret = 0;
5382
5383 if (!cache->cached)
5384 goto assign;
5385
5386
5387
5388
5389
5390
5391
5392
5393 if (cache->offset + cache->len > offset) {
5394 WARN_ON(1);
5395 return -EINVAL;
5396 }
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409 if (cache->offset + cache->len == offset &&
5410 cache->phys + cache->len == phys &&
5411 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
5412 (flags & ~FIEMAP_EXTENT_LAST)) {
5413 cache->len += len;
5414 cache->flags |= flags;
5415 goto try_submit_last;
5416 }
5417
5418
5419 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
5420 cache->len, cache->flags);
5421 cache->cached = false;
5422 if (ret)
5423 return ret;
5424assign:
5425 cache->cached = true;
5426 cache->offset = offset;
5427 cache->phys = phys;
5428 cache->len = len;
5429 cache->flags = flags;
5430try_submit_last:
5431 if (cache->flags & FIEMAP_EXTENT_LAST) {
5432 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
5433 cache->phys, cache->len, cache->flags);
5434 cache->cached = false;
5435 }
5436 return ret;
5437}
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
5451 struct fiemap_cache *cache)
5452{
5453 int ret;
5454
5455 if (!cache->cached)
5456 return 0;
5457
5458 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
5459 cache->len, cache->flags);
5460 cache->cached = false;
5461 if (ret > 0)
5462 ret = 0;
5463 return ret;
5464}
5465
5466int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
5467 u64 start, u64 len)
5468{
5469 int ret = 0;
5470 u64 off;
5471 u64 max = start + len;
5472 u32 flags = 0;
5473 u32 found_type;
5474 u64 last;
5475 u64 last_for_get_extent = 0;
5476 u64 disko = 0;
5477 u64 isize = i_size_read(&inode->vfs_inode);
5478 struct btrfs_key found_key;
5479 struct extent_map *em = NULL;
5480 struct extent_state *cached_state = NULL;
5481 struct btrfs_path *path;
5482 struct btrfs_root *root = inode->root;
5483 struct fiemap_cache cache = { 0 };
5484 struct ulist *roots;
5485 struct ulist *tmp_ulist;
5486 int end = 0;
5487 u64 em_start = 0;
5488 u64 em_len = 0;
5489 u64 em_end = 0;
5490
5491 if (len == 0)
5492 return -EINVAL;
5493
5494 path = btrfs_alloc_path();
5495 if (!path)
5496 return -ENOMEM;
5497
5498 roots = ulist_alloc(GFP_KERNEL);
5499 tmp_ulist = ulist_alloc(GFP_KERNEL);
5500 if (!roots || !tmp_ulist) {
5501 ret = -ENOMEM;
5502 goto out_free_ulist;
5503 }
5504
5505
5506
5507
5508
5509 off = 0;
5510 start = round_down(start, btrfs_inode_sectorsize(inode));
5511 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
5512
5513
5514
5515
5516
5517 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
5518 0);
5519 if (ret < 0) {
5520 goto out_free_ulist;
5521 } else {
5522 WARN_ON(!ret);
5523 if (ret == 1)
5524 ret = 0;
5525 }
5526
5527 path->slots[0]--;
5528 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
5529 found_type = found_key.type;
5530
5531
5532 if (found_key.objectid != btrfs_ino(inode) ||
5533 found_type != BTRFS_EXTENT_DATA_KEY) {
5534
5535 last = (u64)-1;
5536 last_for_get_extent = isize;
5537 } else {
5538
5539
5540
5541
5542
5543 last = found_key.offset;
5544 last_for_get_extent = last + 1;
5545 }
5546 btrfs_release_path(path);
5547
5548
5549
5550
5551
5552
5553 if (last < isize) {
5554 last = (u64)-1;
5555 last_for_get_extent = isize;
5556 }
5557
5558 lock_extent_bits(&inode->io_tree, start, start + len - 1,
5559 &cached_state);
5560
5561 em = get_extent_skip_holes(inode, start, last_for_get_extent);
5562 if (!em)
5563 goto out;
5564 if (IS_ERR(em)) {
5565 ret = PTR_ERR(em);
5566 goto out;
5567 }
5568
5569 while (!end) {
5570 u64 offset_in_extent = 0;
5571
5572
5573 if (em->start >= max || extent_map_end(em) < off)
5574 break;
5575
5576
5577
5578
5579
5580
5581
5582 em_start = max(em->start, off);
5583
5584
5585
5586
5587
5588
5589
5590 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
5591 offset_in_extent = em_start - em->start;
5592 em_end = extent_map_end(em);
5593 em_len = em_end - em_start;
5594 flags = 0;
5595 if (em->block_start < EXTENT_MAP_LAST_BYTE)
5596 disko = em->block_start + offset_in_extent;
5597 else
5598 disko = 0;
5599
5600
5601
5602
5603 off = extent_map_end(em);
5604 if (off >= max)
5605 end = 1;
5606
5607 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
5608 end = 1;
5609 flags |= FIEMAP_EXTENT_LAST;
5610 } else if (em->block_start == EXTENT_MAP_INLINE) {
5611 flags |= (FIEMAP_EXTENT_DATA_INLINE |
5612 FIEMAP_EXTENT_NOT_ALIGNED);
5613 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
5614 flags |= (FIEMAP_EXTENT_DELALLOC |
5615 FIEMAP_EXTENT_UNKNOWN);
5616 } else if (fieinfo->fi_extents_max) {
5617 u64 bytenr = em->block_start -
5618 (em->start - em->orig_start);
5619
5620
5621
5622
5623
5624
5625
5626
5627 ret = btrfs_check_shared(root, btrfs_ino(inode),
5628 bytenr, roots, tmp_ulist);
5629 if (ret < 0)
5630 goto out_free;
5631 if (ret)
5632 flags |= FIEMAP_EXTENT_SHARED;
5633 ret = 0;
5634 }
5635 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
5636 flags |= FIEMAP_EXTENT_ENCODED;
5637 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5638 flags |= FIEMAP_EXTENT_UNWRITTEN;
5639
5640 free_extent_map(em);
5641 em = NULL;
5642 if ((em_start >= last) || em_len == (u64)-1 ||
5643 (last == (u64)-1 && isize <= em_end)) {
5644 flags |= FIEMAP_EXTENT_LAST;
5645 end = 1;
5646 }
5647
5648
5649 em = get_extent_skip_holes(inode, off, last_for_get_extent);
5650 if (IS_ERR(em)) {
5651 ret = PTR_ERR(em);
5652 goto out;
5653 }
5654 if (!em) {
5655 flags |= FIEMAP_EXTENT_LAST;
5656 end = 1;
5657 }
5658 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
5659 em_len, flags);
5660 if (ret) {
5661 if (ret == 1)
5662 ret = 0;
5663 goto out_free;
5664 }
5665 }
5666out_free:
5667 if (!ret)
5668 ret = emit_last_fiemap_cache(fieinfo, &cache);
5669 free_extent_map(em);
5670out:
5671 unlock_extent_cached(&inode->io_tree, start, start + len - 1,
5672 &cached_state);
5673
5674out_free_ulist:
5675 btrfs_free_path(path);
5676 ulist_free(roots);
5677 ulist_free(tmp_ulist);
5678 return ret;
5679}
5680
5681static void __free_extent_buffer(struct extent_buffer *eb)
5682{
5683 kmem_cache_free(extent_buffer_cache, eb);
5684}
5685
5686int extent_buffer_under_io(const struct extent_buffer *eb)
5687{
5688 return (atomic_read(&eb->io_pages) ||
5689 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
5690 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
5691}
5692
5693static bool page_range_has_eb(struct btrfs_fs_info *fs_info, struct page *page)
5694{
5695 struct btrfs_subpage *subpage;
5696
5697 lockdep_assert_held(&page->mapping->private_lock);
5698
5699 if (PagePrivate(page)) {
5700 subpage = (struct btrfs_subpage *)page->private;
5701 if (atomic_read(&subpage->eb_refs))
5702 return true;
5703
5704
5705
5706
5707 if (atomic_read(&subpage->readers))
5708 return true;
5709 }
5710 return false;
5711}
5712
5713static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
5714{
5715 struct btrfs_fs_info *fs_info = eb->fs_info;
5716 const bool mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
5717
5718
5719
5720
5721
5722 if (mapped)
5723 spin_lock(&page->mapping->private_lock);
5724
5725 if (!PagePrivate(page)) {
5726 if (mapped)
5727 spin_unlock(&page->mapping->private_lock);
5728 return;
5729 }
5730
5731 if (fs_info->sectorsize == PAGE_SIZE) {
5732
5733
5734
5735
5736
5737
5738
5739 if (PagePrivate(page) &&
5740 page->private == (unsigned long)eb) {
5741 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
5742 BUG_ON(PageDirty(page));
5743 BUG_ON(PageWriteback(page));
5744
5745
5746
5747
5748 detach_page_private(page);
5749 }
5750 if (mapped)
5751 spin_unlock(&page->mapping->private_lock);
5752 return;
5753 }
5754
5755
5756
5757
5758
5759
5760 if (!mapped) {
5761 btrfs_detach_subpage(fs_info, page);
5762 return;
5763 }
5764
5765 btrfs_page_dec_eb_refs(fs_info, page);
5766
5767
5768
5769
5770
5771 if (!page_range_has_eb(fs_info, page))
5772 btrfs_detach_subpage(fs_info, page);
5773
5774 spin_unlock(&page->mapping->private_lock);
5775}
5776
5777
5778static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
5779{
5780 int i;
5781 int num_pages;
5782
5783 ASSERT(!extent_buffer_under_io(eb));
5784
5785 num_pages = num_extent_pages(eb);
5786 for (i = 0; i < num_pages; i++) {
5787 struct page *page = eb->pages[i];
5788
5789 if (!page)
5790 continue;
5791
5792 detach_extent_buffer_page(eb, page);
5793
5794
5795 put_page(page);
5796 }
5797}
5798
5799
5800
5801
5802static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
5803{
5804 btrfs_release_extent_buffer_pages(eb);
5805 btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
5806 __free_extent_buffer(eb);
5807}
5808
5809static struct extent_buffer *
5810__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
5811 unsigned long len)
5812{
5813 struct extent_buffer *eb = NULL;
5814
5815 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
5816 eb->start = start;
5817 eb->len = len;
5818 eb->fs_info = fs_info;
5819 eb->bflags = 0;
5820 init_rwsem(&eb->lock);
5821
5822 btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list,
5823 &fs_info->allocated_ebs);
5824 INIT_LIST_HEAD(&eb->release_list);
5825
5826 spin_lock_init(&eb->refs_lock);
5827 atomic_set(&eb->refs, 1);
5828 atomic_set(&eb->io_pages, 0);
5829
5830 ASSERT(len <= BTRFS_MAX_METADATA_BLOCKSIZE);
5831
5832 return eb;
5833}
5834
5835struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
5836{
5837 int i;
5838 struct page *p;
5839 struct extent_buffer *new;
5840 int num_pages = num_extent_pages(src);
5841
5842 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
5843 if (new == NULL)
5844 return NULL;
5845
5846
5847
5848
5849
5850
5851 set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
5852
5853 for (i = 0; i < num_pages; i++) {
5854 int ret;
5855
5856 p = alloc_page(GFP_NOFS);
5857 if (!p) {
5858 btrfs_release_extent_buffer(new);
5859 return NULL;
5860 }
5861 ret = attach_extent_buffer_page(new, p, NULL);
5862 if (ret < 0) {
5863 put_page(p);
5864 btrfs_release_extent_buffer(new);
5865 return NULL;
5866 }
5867 WARN_ON(PageDirty(p));
5868 new->pages[i] = p;
5869 copy_page(page_address(p), page_address(src->pages[i]));
5870 }
5871 set_extent_buffer_uptodate(new);
5872
5873 return new;
5874}
5875
5876struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
5877 u64 start, unsigned long len)
5878{
5879 struct extent_buffer *eb;
5880 int num_pages;
5881 int i;
5882
5883 eb = __alloc_extent_buffer(fs_info, start, len);
5884 if (!eb)
5885 return NULL;
5886
5887 num_pages = num_extent_pages(eb);
5888 for (i = 0; i < num_pages; i++) {
5889 int ret;
5890
5891 eb->pages[i] = alloc_page(GFP_NOFS);
5892 if (!eb->pages[i])
5893 goto err;
5894 ret = attach_extent_buffer_page(eb, eb->pages[i], NULL);
5895 if (ret < 0)
5896 goto err;
5897 }
5898 set_extent_buffer_uptodate(eb);
5899 btrfs_set_header_nritems(eb, 0);
5900 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
5901
5902 return eb;
5903err:
5904 for (; i > 0; i--) {
5905 detach_extent_buffer_page(eb, eb->pages[i - 1]);
5906 __free_page(eb->pages[i - 1]);
5907 }
5908 __free_extent_buffer(eb);
5909 return NULL;
5910}
5911
5912struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
5913 u64 start)
5914{
5915 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
5916}
5917
5918static void check_buffer_tree_ref(struct extent_buffer *eb)
5919{
5920 int refs;
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944 refs = atomic_read(&eb->refs);
5945 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5946 return;
5947
5948 spin_lock(&eb->refs_lock);
5949 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5950 atomic_inc(&eb->refs);
5951 spin_unlock(&eb->refs_lock);
5952}
5953
5954static void mark_extent_buffer_accessed(struct extent_buffer *eb,
5955 struct page *accessed)
5956{
5957 int num_pages, i;
5958
5959 check_buffer_tree_ref(eb);
5960
5961 num_pages = num_extent_pages(eb);
5962 for (i = 0; i < num_pages; i++) {
5963 struct page *p = eb->pages[i];
5964
5965 if (p != accessed)
5966 mark_page_accessed(p);
5967 }
5968}
5969
5970struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
5971 u64 start)
5972{
5973 struct extent_buffer *eb;
5974
5975 eb = find_extent_buffer_nolock(fs_info, start);
5976 if (!eb)
5977 return NULL;
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
5992 spin_lock(&eb->refs_lock);
5993 spin_unlock(&eb->refs_lock);
5994 }
5995 mark_extent_buffer_accessed(eb, NULL);
5996 return eb;
5997}
5998
5999#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
6000struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
6001 u64 start)
6002{
6003 struct extent_buffer *eb, *exists = NULL;
6004 int ret;
6005
6006 eb = find_extent_buffer(fs_info, start);
6007 if (eb)
6008 return eb;
6009 eb = alloc_dummy_extent_buffer(fs_info, start);
6010 if (!eb)
6011 return ERR_PTR(-ENOMEM);
6012 eb->fs_info = fs_info;
6013again:
6014 ret = radix_tree_preload(GFP_NOFS);
6015 if (ret) {
6016 exists = ERR_PTR(ret);
6017 goto free_eb;
6018 }
6019 spin_lock(&fs_info->buffer_lock);
6020 ret = radix_tree_insert(&fs_info->buffer_radix,
6021 start >> fs_info->sectorsize_bits, eb);
6022 spin_unlock(&fs_info->buffer_lock);
6023 radix_tree_preload_end();
6024 if (ret == -EEXIST) {
6025 exists = find_extent_buffer(fs_info, start);
6026 if (exists)
6027 goto free_eb;
6028 else
6029 goto again;
6030 }
6031 check_buffer_tree_ref(eb);
6032 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
6033
6034 return eb;
6035free_eb:
6036 btrfs_release_extent_buffer(eb);
6037 return exists;
6038}
6039#endif
6040
6041static struct extent_buffer *grab_extent_buffer(
6042 struct btrfs_fs_info *fs_info, struct page *page)
6043{
6044 struct extent_buffer *exists;
6045
6046
6047
6048
6049
6050
6051 if (fs_info->sectorsize < PAGE_SIZE)
6052 return NULL;
6053
6054
6055 if (!PagePrivate(page))
6056 return NULL;
6057
6058
6059
6060
6061
6062
6063
6064 exists = (struct extent_buffer *)page->private;
6065 if (atomic_inc_not_zero(&exists->refs))
6066 return exists;
6067
6068 WARN_ON(PageDirty(page));
6069 detach_page_private(page);
6070 return NULL;
6071}
6072
6073struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
6074 u64 start, u64 owner_root, int level)
6075{
6076 unsigned long len = fs_info->nodesize;
6077 int num_pages;
6078 int i;
6079 unsigned long index = start >> PAGE_SHIFT;
6080 struct extent_buffer *eb;
6081 struct extent_buffer *exists = NULL;
6082 struct page *p;
6083 struct address_space *mapping = fs_info->btree_inode->i_mapping;
6084 int uptodate = 1;
6085 int ret;
6086
6087 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
6088 btrfs_err(fs_info, "bad tree block start %llu", start);
6089 return ERR_PTR(-EINVAL);
6090 }
6091
6092#if BITS_PER_LONG == 32
6093 if (start >= MAX_LFS_FILESIZE) {
6094 btrfs_err_rl(fs_info,
6095 "extent buffer %llu is beyond 32bit page cache limit", start);
6096 btrfs_err_32bit_limit(fs_info);
6097 return ERR_PTR(-EOVERFLOW);
6098 }
6099 if (start >= BTRFS_32BIT_EARLY_WARN_THRESHOLD)
6100 btrfs_warn_32bit_limit(fs_info);
6101#endif
6102
6103 if (fs_info->sectorsize < PAGE_SIZE &&
6104 offset_in_page(start) + len > PAGE_SIZE) {
6105 btrfs_err(fs_info,
6106 "tree block crosses page boundary, start %llu nodesize %lu",
6107 start, len);
6108 return ERR_PTR(-EINVAL);
6109 }
6110
6111 eb = find_extent_buffer(fs_info, start);
6112 if (eb)
6113 return eb;
6114
6115 eb = __alloc_extent_buffer(fs_info, start, len);
6116 if (!eb)
6117 return ERR_PTR(-ENOMEM);
6118 btrfs_set_buffer_lockdep_class(owner_root, eb, level);
6119
6120 num_pages = num_extent_pages(eb);
6121 for (i = 0; i < num_pages; i++, index++) {
6122 struct btrfs_subpage *prealloc = NULL;
6123
6124 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
6125 if (!p) {
6126 exists = ERR_PTR(-ENOMEM);
6127 goto free_eb;
6128 }
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140 ret = btrfs_alloc_subpage(fs_info, &prealloc,
6141 BTRFS_SUBPAGE_METADATA);
6142 if (ret < 0) {
6143 unlock_page(p);
6144 put_page(p);
6145 exists = ERR_PTR(ret);
6146 goto free_eb;
6147 }
6148
6149 spin_lock(&mapping->private_lock);
6150 exists = grab_extent_buffer(fs_info, p);
6151 if (exists) {
6152 spin_unlock(&mapping->private_lock);
6153 unlock_page(p);
6154 put_page(p);
6155 mark_extent_buffer_accessed(exists, p);
6156 btrfs_free_subpage(prealloc);
6157 goto free_eb;
6158 }
6159
6160 ret = attach_extent_buffer_page(eb, p, prealloc);
6161 ASSERT(!ret);
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171 btrfs_page_inc_eb_refs(fs_info, p);
6172 spin_unlock(&mapping->private_lock);
6173
6174 WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
6175 eb->pages[i] = p;
6176 if (!PageUptodate(p))
6177 uptodate = 0;
6178
6179
6180
6181
6182
6183
6184
6185
6186 }
6187 if (uptodate)
6188 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6189again:
6190 ret = radix_tree_preload(GFP_NOFS);
6191 if (ret) {
6192 exists = ERR_PTR(ret);
6193 goto free_eb;
6194 }
6195
6196 spin_lock(&fs_info->buffer_lock);
6197 ret = radix_tree_insert(&fs_info->buffer_radix,
6198 start >> fs_info->sectorsize_bits, eb);
6199 spin_unlock(&fs_info->buffer_lock);
6200 radix_tree_preload_end();
6201 if (ret == -EEXIST) {
6202 exists = find_extent_buffer(fs_info, start);
6203 if (exists)
6204 goto free_eb;
6205 else
6206 goto again;
6207 }
6208
6209 check_buffer_tree_ref(eb);
6210 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
6211
6212
6213
6214
6215
6216
6217 for (i = 0; i < num_pages; i++)
6218 unlock_page(eb->pages[i]);
6219 return eb;
6220
6221free_eb:
6222 WARN_ON(!atomic_dec_and_test(&eb->refs));
6223 for (i = 0; i < num_pages; i++) {
6224 if (eb->pages[i])
6225 unlock_page(eb->pages[i]);
6226 }
6227
6228 btrfs_release_extent_buffer(eb);
6229 return exists;
6230}
6231
6232static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
6233{
6234 struct extent_buffer *eb =
6235 container_of(head, struct extent_buffer, rcu_head);
6236
6237 __free_extent_buffer(eb);
6238}
6239
6240static int release_extent_buffer(struct extent_buffer *eb)
6241 __releases(&eb->refs_lock)
6242{
6243 lockdep_assert_held(&eb->refs_lock);
6244
6245 WARN_ON(atomic_read(&eb->refs) == 0);
6246 if (atomic_dec_and_test(&eb->refs)) {
6247 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
6248 struct btrfs_fs_info *fs_info = eb->fs_info;
6249
6250 spin_unlock(&eb->refs_lock);
6251
6252 spin_lock(&fs_info->buffer_lock);
6253 radix_tree_delete(&fs_info->buffer_radix,
6254 eb->start >> fs_info->sectorsize_bits);
6255 spin_unlock(&fs_info->buffer_lock);
6256 } else {
6257 spin_unlock(&eb->refs_lock);
6258 }
6259
6260 btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
6261
6262 btrfs_release_extent_buffer_pages(eb);
6263#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
6264 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
6265 __free_extent_buffer(eb);
6266 return 1;
6267 }
6268#endif
6269 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
6270 return 1;
6271 }
6272 spin_unlock(&eb->refs_lock);
6273
6274 return 0;
6275}
6276
6277void free_extent_buffer(struct extent_buffer *eb)
6278{
6279 int refs;
6280 int old;
6281 if (!eb)
6282 return;
6283
6284 while (1) {
6285 refs = atomic_read(&eb->refs);
6286 if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
6287 || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
6288 refs == 1))
6289 break;
6290 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
6291 if (old == refs)
6292 return;
6293 }
6294
6295 spin_lock(&eb->refs_lock);
6296 if (atomic_read(&eb->refs) == 2 &&
6297 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
6298 !extent_buffer_under_io(eb) &&
6299 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
6300 atomic_dec(&eb->refs);
6301
6302
6303
6304
6305
6306 release_extent_buffer(eb);
6307}
6308
6309void free_extent_buffer_stale(struct extent_buffer *eb)
6310{
6311 if (!eb)
6312 return;
6313
6314 spin_lock(&eb->refs_lock);
6315 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
6316
6317 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
6318 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
6319 atomic_dec(&eb->refs);
6320 release_extent_buffer(eb);
6321}
6322
6323static void btree_clear_page_dirty(struct page *page)
6324{
6325 ASSERT(PageDirty(page));
6326 ASSERT(PageLocked(page));
6327 clear_page_dirty_for_io(page);
6328 xa_lock_irq(&page->mapping->i_pages);
6329 if (!PageDirty(page))
6330 __xa_clear_mark(&page->mapping->i_pages,
6331 page_index(page), PAGECACHE_TAG_DIRTY);
6332 xa_unlock_irq(&page->mapping->i_pages);
6333}
6334
6335static void clear_subpage_extent_buffer_dirty(const struct extent_buffer *eb)
6336{
6337 struct btrfs_fs_info *fs_info = eb->fs_info;
6338 struct page *page = eb->pages[0];
6339 bool last;
6340
6341
6342 lock_page(page);
6343 last = btrfs_subpage_clear_and_test_dirty(fs_info, page, eb->start,
6344 eb->len);
6345 if (last)
6346 btree_clear_page_dirty(page);
6347 unlock_page(page);
6348 WARN_ON(atomic_read(&eb->refs) == 0);
6349}
6350
6351void clear_extent_buffer_dirty(const struct extent_buffer *eb)
6352{
6353 int i;
6354 int num_pages;
6355 struct page *page;
6356
6357 if (eb->fs_info->sectorsize < PAGE_SIZE)
6358 return clear_subpage_extent_buffer_dirty(eb);
6359
6360 num_pages = num_extent_pages(eb);
6361
6362 for (i = 0; i < num_pages; i++) {
6363 page = eb->pages[i];
6364 if (!PageDirty(page))
6365 continue;
6366 lock_page(page);
6367 btree_clear_page_dirty(page);
6368 ClearPageError(page);
6369 unlock_page(page);
6370 }
6371 WARN_ON(atomic_read(&eb->refs) == 0);
6372}
6373
6374bool set_extent_buffer_dirty(struct extent_buffer *eb)
6375{
6376 int i;
6377 int num_pages;
6378 bool was_dirty;
6379
6380 check_buffer_tree_ref(eb);
6381
6382 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
6383
6384 num_pages = num_extent_pages(eb);
6385 WARN_ON(atomic_read(&eb->refs) == 0);
6386 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
6387
6388 if (!was_dirty) {
6389 bool subpage = eb->fs_info->sectorsize < PAGE_SIZE;
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402 if (subpage)
6403 lock_page(eb->pages[0]);
6404 for (i = 0; i < num_pages; i++)
6405 btrfs_page_set_dirty(eb->fs_info, eb->pages[i],
6406 eb->start, eb->len);
6407 if (subpage)
6408 unlock_page(eb->pages[0]);
6409 }
6410#ifdef CONFIG_BTRFS_DEBUG
6411 for (i = 0; i < num_pages; i++)
6412 ASSERT(PageDirty(eb->pages[i]));
6413#endif
6414
6415 return was_dirty;
6416}
6417
6418void clear_extent_buffer_uptodate(struct extent_buffer *eb)
6419{
6420 struct btrfs_fs_info *fs_info = eb->fs_info;
6421 struct page *page;
6422 int num_pages;
6423 int i;
6424
6425 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6426 num_pages = num_extent_pages(eb);
6427 for (i = 0; i < num_pages; i++) {
6428 page = eb->pages[i];
6429 if (page)
6430 btrfs_page_clear_uptodate(fs_info, page,
6431 eb->start, eb->len);
6432 }
6433}
6434
6435void set_extent_buffer_uptodate(struct extent_buffer *eb)
6436{
6437 struct btrfs_fs_info *fs_info = eb->fs_info;
6438 struct page *page;
6439 int num_pages;
6440 int i;
6441
6442 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6443 num_pages = num_extent_pages(eb);
6444 for (i = 0; i < num_pages; i++) {
6445 page = eb->pages[i];
6446 btrfs_page_set_uptodate(fs_info, page, eb->start, eb->len);
6447 }
6448}
6449
6450static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
6451 int mirror_num)
6452{
6453 struct btrfs_fs_info *fs_info = eb->fs_info;
6454 struct extent_io_tree *io_tree;
6455 struct page *page = eb->pages[0];
6456 struct btrfs_bio_ctrl bio_ctrl = { 0 };
6457 int ret = 0;
6458
6459 ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
6460 ASSERT(PagePrivate(page));
6461 io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
6462
6463 if (wait == WAIT_NONE) {
6464 if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1))
6465 return -EAGAIN;
6466 } else {
6467 ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1);
6468 if (ret < 0)
6469 return ret;
6470 }
6471
6472 ret = 0;
6473 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags) ||
6474 PageUptodate(page) ||
6475 btrfs_subpage_test_uptodate(fs_info, page, eb->start, eb->len)) {
6476 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6477 unlock_extent(io_tree, eb->start, eb->start + eb->len - 1);
6478 return ret;
6479 }
6480
6481 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
6482 eb->read_mirror = 0;
6483 atomic_set(&eb->io_pages, 1);
6484 check_buffer_tree_ref(eb);
6485 btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
6486
6487 btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
6488 ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, &bio_ctrl,
6489 page, eb->start, eb->len,
6490 eb->start - page_offset(page),
6491 end_bio_extent_readpage, mirror_num, 0,
6492 true);
6493 if (ret) {
6494
6495
6496
6497
6498
6499 atomic_dec(&eb->io_pages);
6500 }
6501 if (bio_ctrl.bio) {
6502 int tmp;
6503
6504 tmp = submit_one_bio(bio_ctrl.bio, mirror_num, 0);
6505 bio_ctrl.bio = NULL;
6506 if (tmp < 0)
6507 return tmp;
6508 }
6509 if (ret || wait != WAIT_COMPLETE)
6510 return ret;
6511
6512 wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1, EXTENT_LOCKED);
6513 if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
6514 ret = -EIO;
6515 return ret;
6516}
6517
6518int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
6519{
6520 int i;
6521 struct page *page;
6522 int err;
6523 int ret = 0;
6524 int locked_pages = 0;
6525 int all_uptodate = 1;
6526 int num_pages;
6527 unsigned long num_reads = 0;
6528 struct btrfs_bio_ctrl bio_ctrl = { 0 };
6529
6530 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
6531 return 0;
6532
6533 if (eb->fs_info->sectorsize < PAGE_SIZE)
6534 return read_extent_buffer_subpage(eb, wait, mirror_num);
6535
6536 num_pages = num_extent_pages(eb);
6537 for (i = 0; i < num_pages; i++) {
6538 page = eb->pages[i];
6539 if (wait == WAIT_NONE) {
6540
6541
6542
6543
6544
6545
6546
6547 if (!trylock_page(page))
6548 goto unlock_exit;
6549 } else {
6550 lock_page(page);
6551 }
6552 locked_pages++;
6553 }
6554
6555
6556
6557
6558
6559 for (i = 0; i < num_pages; i++) {
6560 page = eb->pages[i];
6561 if (!PageUptodate(page)) {
6562 num_reads++;
6563 all_uptodate = 0;
6564 }
6565 }
6566
6567 if (all_uptodate) {
6568 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
6569 goto unlock_exit;
6570 }
6571
6572 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
6573 eb->read_mirror = 0;
6574 atomic_set(&eb->io_pages, num_reads);
6575
6576
6577
6578
6579 check_buffer_tree_ref(eb);
6580 for (i = 0; i < num_pages; i++) {
6581 page = eb->pages[i];
6582
6583 if (!PageUptodate(page)) {
6584 if (ret) {
6585 atomic_dec(&eb->io_pages);
6586 unlock_page(page);
6587 continue;
6588 }
6589
6590 ClearPageError(page);
6591 err = submit_extent_page(REQ_OP_READ | REQ_META, NULL,
6592 &bio_ctrl, page, page_offset(page),
6593 PAGE_SIZE, 0, end_bio_extent_readpage,
6594 mirror_num, 0, false);
6595 if (err) {
6596
6597
6598
6599
6600
6601 ret = err;
6602 SetPageError(page);
6603 unlock_page(page);
6604 atomic_dec(&eb->io_pages);
6605 }
6606 } else {
6607 unlock_page(page);
6608 }
6609 }
6610
6611 if (bio_ctrl.bio) {
6612 err = submit_one_bio(bio_ctrl.bio, mirror_num, bio_ctrl.bio_flags);
6613 bio_ctrl.bio = NULL;
6614 if (err)
6615 return err;
6616 }
6617
6618 if (ret || wait != WAIT_COMPLETE)
6619 return ret;
6620
6621 for (i = 0; i < num_pages; i++) {
6622 page = eb->pages[i];
6623 wait_on_page_locked(page);
6624 if (!PageUptodate(page))
6625 ret = -EIO;
6626 }
6627
6628 return ret;
6629
6630unlock_exit:
6631 while (locked_pages > 0) {
6632 locked_pages--;
6633 page = eb->pages[locked_pages];
6634 unlock_page(page);
6635 }
6636 return ret;
6637}
6638
6639static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
6640 unsigned long len)
6641{
6642 btrfs_warn(eb->fs_info,
6643 "access to eb bytenr %llu len %lu out of range start %lu len %lu",
6644 eb->start, eb->len, start, len);
6645 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
6646
6647 return true;
6648}
6649
6650
6651
6652
6653
6654
6655
6656
6657static inline int check_eb_range(const struct extent_buffer *eb,
6658 unsigned long start, unsigned long len)
6659{
6660 unsigned long offset;
6661
6662
6663 if (unlikely(check_add_overflow(start, len, &offset) || offset > eb->len))
6664 return report_eb_range(eb, start, len);
6665
6666 return false;
6667}
6668
6669void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
6670 unsigned long start, unsigned long len)
6671{
6672 size_t cur;
6673 size_t offset;
6674 struct page *page;
6675 char *kaddr;
6676 char *dst = (char *)dstv;
6677 unsigned long i = get_eb_page_index(start);
6678
6679 if (check_eb_range(eb, start, len))
6680 return;
6681
6682 offset = get_eb_offset_in_page(eb, start);
6683
6684 while (len > 0) {
6685 page = eb->pages[i];
6686
6687 cur = min(len, (PAGE_SIZE - offset));
6688 kaddr = page_address(page);
6689 memcpy(dst, kaddr + offset, cur);
6690
6691 dst += cur;
6692 len -= cur;
6693 offset = 0;
6694 i++;
6695 }
6696}
6697
6698int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
6699 void __user *dstv,
6700 unsigned long start, unsigned long len)
6701{
6702 size_t cur;
6703 size_t offset;
6704 struct page *page;
6705 char *kaddr;
6706 char __user *dst = (char __user *)dstv;
6707 unsigned long i = get_eb_page_index(start);
6708 int ret = 0;
6709
6710 WARN_ON(start > eb->len);
6711 WARN_ON(start + len > eb->start + eb->len);
6712
6713 offset = get_eb_offset_in_page(eb, start);
6714
6715 while (len > 0) {
6716 page = eb->pages[i];
6717
6718 cur = min(len, (PAGE_SIZE - offset));
6719 kaddr = page_address(page);
6720 if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
6721 ret = -EFAULT;
6722 break;
6723 }
6724
6725 dst += cur;
6726 len -= cur;
6727 offset = 0;
6728 i++;
6729 }
6730
6731 return ret;
6732}
6733
6734int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
6735 unsigned long start, unsigned long len)
6736{
6737 size_t cur;
6738 size_t offset;
6739 struct page *page;
6740 char *kaddr;
6741 char *ptr = (char *)ptrv;
6742 unsigned long i = get_eb_page_index(start);
6743 int ret = 0;
6744
6745 if (check_eb_range(eb, start, len))
6746 return -EINVAL;
6747
6748 offset = get_eb_offset_in_page(eb, start);
6749
6750 while (len > 0) {
6751 page = eb->pages[i];
6752
6753 cur = min(len, (PAGE_SIZE - offset));
6754
6755 kaddr = page_address(page);
6756 ret = memcmp(ptr, kaddr + offset, cur);
6757 if (ret)
6758 break;
6759
6760 ptr += cur;
6761 len -= cur;
6762 offset = 0;
6763 i++;
6764 }
6765 return ret;
6766}
6767
6768
6769
6770
6771
6772
6773
6774static void assert_eb_page_uptodate(const struct extent_buffer *eb,
6775 struct page *page)
6776{
6777 struct btrfs_fs_info *fs_info = eb->fs_info;
6778
6779 if (fs_info->sectorsize < PAGE_SIZE) {
6780 bool uptodate;
6781
6782 uptodate = btrfs_subpage_test_uptodate(fs_info, page,
6783 eb->start, eb->len);
6784 WARN_ON(!uptodate);
6785 } else {
6786 WARN_ON(!PageUptodate(page));
6787 }
6788}
6789
6790void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
6791 const void *srcv)
6792{
6793 char *kaddr;
6794
6795 assert_eb_page_uptodate(eb, eb->pages[0]);
6796 kaddr = page_address(eb->pages[0]) +
6797 get_eb_offset_in_page(eb, offsetof(struct btrfs_header,
6798 chunk_tree_uuid));
6799 memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
6800}
6801
6802void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
6803{
6804 char *kaddr;
6805
6806 assert_eb_page_uptodate(eb, eb->pages[0]);
6807 kaddr = page_address(eb->pages[0]) +
6808 get_eb_offset_in_page(eb, offsetof(struct btrfs_header, fsid));
6809 memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
6810}
6811
6812void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
6813 unsigned long start, unsigned long len)
6814{
6815 size_t cur;
6816 size_t offset;
6817 struct page *page;
6818 char *kaddr;
6819 char *src = (char *)srcv;
6820 unsigned long i = get_eb_page_index(start);
6821
6822 WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
6823
6824 if (check_eb_range(eb, start, len))
6825 return;
6826
6827 offset = get_eb_offset_in_page(eb, start);
6828
6829 while (len > 0) {
6830 page = eb->pages[i];
6831 assert_eb_page_uptodate(eb, page);
6832
6833 cur = min(len, PAGE_SIZE - offset);
6834 kaddr = page_address(page);
6835 memcpy(kaddr + offset, src, cur);
6836
6837 src += cur;
6838 len -= cur;
6839 offset = 0;
6840 i++;
6841 }
6842}
6843
6844void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
6845 unsigned long len)
6846{
6847 size_t cur;
6848 size_t offset;
6849 struct page *page;
6850 char *kaddr;
6851 unsigned long i = get_eb_page_index(start);
6852
6853 if (check_eb_range(eb, start, len))
6854 return;
6855
6856 offset = get_eb_offset_in_page(eb, start);
6857
6858 while (len > 0) {
6859 page = eb->pages[i];
6860 assert_eb_page_uptodate(eb, page);
6861
6862 cur = min(len, PAGE_SIZE - offset);
6863 kaddr = page_address(page);
6864 memset(kaddr + offset, 0, cur);
6865
6866 len -= cur;
6867 offset = 0;
6868 i++;
6869 }
6870}
6871
6872void copy_extent_buffer_full(const struct extent_buffer *dst,
6873 const struct extent_buffer *src)
6874{
6875 int i;
6876 int num_pages;
6877
6878 ASSERT(dst->len == src->len);
6879
6880 if (dst->fs_info->sectorsize == PAGE_SIZE) {
6881 num_pages = num_extent_pages(dst);
6882 for (i = 0; i < num_pages; i++)
6883 copy_page(page_address(dst->pages[i]),
6884 page_address(src->pages[i]));
6885 } else {
6886 size_t src_offset = get_eb_offset_in_page(src, 0);
6887 size_t dst_offset = get_eb_offset_in_page(dst, 0);
6888
6889 ASSERT(src->fs_info->sectorsize < PAGE_SIZE);
6890 memcpy(page_address(dst->pages[0]) + dst_offset,
6891 page_address(src->pages[0]) + src_offset,
6892 src->len);
6893 }
6894}
6895
6896void copy_extent_buffer(const struct extent_buffer *dst,
6897 const struct extent_buffer *src,
6898 unsigned long dst_offset, unsigned long src_offset,
6899 unsigned long len)
6900{
6901 u64 dst_len = dst->len;
6902 size_t cur;
6903 size_t offset;
6904 struct page *page;
6905 char *kaddr;
6906 unsigned long i = get_eb_page_index(dst_offset);
6907
6908 if (check_eb_range(dst, dst_offset, len) ||
6909 check_eb_range(src, src_offset, len))
6910 return;
6911
6912 WARN_ON(src->len != dst_len);
6913
6914 offset = get_eb_offset_in_page(dst, dst_offset);
6915
6916 while (len > 0) {
6917 page = dst->pages[i];
6918 assert_eb_page_uptodate(dst, page);
6919
6920 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
6921
6922 kaddr = page_address(page);
6923 read_extent_buffer(src, kaddr + offset, src_offset, cur);
6924
6925 src_offset += cur;
6926 len -= cur;
6927 offset = 0;
6928 i++;
6929 }
6930}
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945static inline void eb_bitmap_offset(const struct extent_buffer *eb,
6946 unsigned long start, unsigned long nr,
6947 unsigned long *page_index,
6948 size_t *page_offset)
6949{
6950 size_t byte_offset = BIT_BYTE(nr);
6951 size_t offset;
6952
6953
6954
6955
6956
6957
6958 offset = start + offset_in_page(eb->start) + byte_offset;
6959
6960 *page_index = offset >> PAGE_SHIFT;
6961 *page_offset = offset_in_page(offset);
6962}
6963
6964
6965
6966
6967
6968
6969
6970int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
6971 unsigned long nr)
6972{
6973 u8 *kaddr;
6974 struct page *page;
6975 unsigned long i;
6976 size_t offset;
6977
6978 eb_bitmap_offset(eb, start, nr, &i, &offset);
6979 page = eb->pages[i];
6980 assert_eb_page_uptodate(eb, page);
6981 kaddr = page_address(page);
6982 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
6983}
6984
6985
6986
6987
6988
6989
6990
6991
6992void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
6993 unsigned long pos, unsigned long len)
6994{
6995 u8 *kaddr;
6996 struct page *page;
6997 unsigned long i;
6998 size_t offset;
6999 const unsigned int size = pos + len;
7000 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
7001 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
7002
7003 eb_bitmap_offset(eb, start, pos, &i, &offset);
7004 page = eb->pages[i];
7005 assert_eb_page_uptodate(eb, page);
7006 kaddr = page_address(page);
7007
7008 while (len >= bits_to_set) {
7009 kaddr[offset] |= mask_to_set;
7010 len -= bits_to_set;
7011 bits_to_set = BITS_PER_BYTE;
7012 mask_to_set = ~0;
7013 if (++offset >= PAGE_SIZE && len > 0) {
7014 offset = 0;
7015 page = eb->pages[++i];
7016 assert_eb_page_uptodate(eb, page);
7017 kaddr = page_address(page);
7018 }
7019 }
7020 if (len) {
7021 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
7022 kaddr[offset] |= mask_to_set;
7023 }
7024}
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
7035 unsigned long start, unsigned long pos,
7036 unsigned long len)
7037{
7038 u8 *kaddr;
7039 struct page *page;
7040 unsigned long i;
7041 size_t offset;
7042 const unsigned int size = pos + len;
7043 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
7044 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
7045
7046 eb_bitmap_offset(eb, start, pos, &i, &offset);
7047 page = eb->pages[i];
7048 assert_eb_page_uptodate(eb, page);
7049 kaddr = page_address(page);
7050
7051 while (len >= bits_to_clear) {
7052 kaddr[offset] &= ~mask_to_clear;
7053 len -= bits_to_clear;
7054 bits_to_clear = BITS_PER_BYTE;
7055 mask_to_clear = ~0;
7056 if (++offset >= PAGE_SIZE && len > 0) {
7057 offset = 0;
7058 page = eb->pages[++i];
7059 assert_eb_page_uptodate(eb, page);
7060 kaddr = page_address(page);
7061 }
7062 }
7063 if (len) {
7064 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
7065 kaddr[offset] &= ~mask_to_clear;
7066 }
7067}
7068
7069static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
7070{
7071 unsigned long distance = (src > dst) ? src - dst : dst - src;
7072 return distance < len;
7073}
7074
7075static void copy_pages(struct page *dst_page, struct page *src_page,
7076 unsigned long dst_off, unsigned long src_off,
7077 unsigned long len)
7078{
7079 char *dst_kaddr = page_address(dst_page);
7080 char *src_kaddr;
7081 int must_memmove = 0;
7082
7083 if (dst_page != src_page) {
7084 src_kaddr = page_address(src_page);
7085 } else {
7086 src_kaddr = dst_kaddr;
7087 if (areas_overlap(src_off, dst_off, len))
7088 must_memmove = 1;
7089 }
7090
7091 if (must_memmove)
7092 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
7093 else
7094 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
7095}
7096
7097void memcpy_extent_buffer(const struct extent_buffer *dst,
7098 unsigned long dst_offset, unsigned long src_offset,
7099 unsigned long len)
7100{
7101 size_t cur;
7102 size_t dst_off_in_page;
7103 size_t src_off_in_page;
7104 unsigned long dst_i;
7105 unsigned long src_i;
7106
7107 if (check_eb_range(dst, dst_offset, len) ||
7108 check_eb_range(dst, src_offset, len))
7109 return;
7110
7111 while (len > 0) {
7112 dst_off_in_page = get_eb_offset_in_page(dst, dst_offset);
7113 src_off_in_page = get_eb_offset_in_page(dst, src_offset);
7114
7115 dst_i = get_eb_page_index(dst_offset);
7116 src_i = get_eb_page_index(src_offset);
7117
7118 cur = min(len, (unsigned long)(PAGE_SIZE -
7119 src_off_in_page));
7120 cur = min_t(unsigned long, cur,
7121 (unsigned long)(PAGE_SIZE - dst_off_in_page));
7122
7123 copy_pages(dst->pages[dst_i], dst->pages[src_i],
7124 dst_off_in_page, src_off_in_page, cur);
7125
7126 src_offset += cur;
7127 dst_offset += cur;
7128 len -= cur;
7129 }
7130}
7131
7132void memmove_extent_buffer(const struct extent_buffer *dst,
7133 unsigned long dst_offset, unsigned long src_offset,
7134 unsigned long len)
7135{
7136 size_t cur;
7137 size_t dst_off_in_page;
7138 size_t src_off_in_page;
7139 unsigned long dst_end = dst_offset + len - 1;
7140 unsigned long src_end = src_offset + len - 1;
7141 unsigned long dst_i;
7142 unsigned long src_i;
7143
7144 if (check_eb_range(dst, dst_offset, len) ||
7145 check_eb_range(dst, src_offset, len))
7146 return;
7147 if (dst_offset < src_offset) {
7148 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
7149 return;
7150 }
7151 while (len > 0) {
7152 dst_i = get_eb_page_index(dst_end);
7153 src_i = get_eb_page_index(src_end);
7154
7155 dst_off_in_page = get_eb_offset_in_page(dst, dst_end);
7156 src_off_in_page = get_eb_offset_in_page(dst, src_end);
7157
7158 cur = min_t(unsigned long, len, src_off_in_page + 1);
7159 cur = min(cur, dst_off_in_page + 1);
7160 copy_pages(dst->pages[dst_i], dst->pages[src_i],
7161 dst_off_in_page - cur + 1,
7162 src_off_in_page - cur + 1, cur);
7163
7164 dst_end -= cur;
7165 src_end -= cur;
7166 len -= cur;
7167 }
7168}
7169
7170static struct extent_buffer *get_next_extent_buffer(
7171 struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
7172{
7173 struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE];
7174 struct extent_buffer *found = NULL;
7175 u64 page_start = page_offset(page);
7176 int ret;
7177 int i;
7178
7179 ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
7180 ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE);
7181 lockdep_assert_held(&fs_info->buffer_lock);
7182
7183 ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang,
7184 bytenr >> fs_info->sectorsize_bits,
7185 PAGE_SIZE / fs_info->nodesize);
7186 for (i = 0; i < ret; i++) {
7187
7188 if (gang[i]->start >= page_start + PAGE_SIZE)
7189 break;
7190
7191 if (gang[i]->start >= bytenr) {
7192 found = gang[i];
7193 break;
7194 }
7195 }
7196 return found;
7197}
7198
7199static int try_release_subpage_extent_buffer(struct page *page)
7200{
7201 struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
7202 u64 cur = page_offset(page);
7203 const u64 end = page_offset(page) + PAGE_SIZE;
7204 int ret;
7205
7206 while (cur < end) {
7207 struct extent_buffer *eb = NULL;
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217 spin_lock(&fs_info->buffer_lock);
7218 eb = get_next_extent_buffer(fs_info, page, cur);
7219 if (!eb) {
7220
7221 spin_unlock(&fs_info->buffer_lock);
7222 break;
7223 }
7224 cur = eb->start + eb->len;
7225
7226
7227
7228
7229
7230 spin_lock(&eb->refs_lock);
7231 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
7232 spin_unlock(&eb->refs_lock);
7233 spin_unlock(&fs_info->buffer_lock);
7234 break;
7235 }
7236 spin_unlock(&fs_info->buffer_lock);
7237
7238
7239
7240
7241
7242
7243 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
7244 spin_unlock(&eb->refs_lock);
7245 break;
7246 }
7247
7248
7249
7250
7251
7252
7253 release_extent_buffer(eb);
7254 }
7255
7256
7257
7258
7259 spin_lock(&page->mapping->private_lock);
7260 if (!PagePrivate(page))
7261 ret = 1;
7262 else
7263 ret = 0;
7264 spin_unlock(&page->mapping->private_lock);
7265 return ret;
7266
7267}
7268
7269int try_release_extent_buffer(struct page *page)
7270{
7271 struct extent_buffer *eb;
7272
7273 if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
7274 return try_release_subpage_extent_buffer(page);
7275
7276
7277
7278
7279
7280 spin_lock(&page->mapping->private_lock);
7281 if (!PagePrivate(page)) {
7282 spin_unlock(&page->mapping->private_lock);
7283 return 1;
7284 }
7285
7286 eb = (struct extent_buffer *)page->private;
7287 BUG_ON(!eb);
7288
7289
7290
7291
7292
7293
7294 spin_lock(&eb->refs_lock);
7295 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
7296 spin_unlock(&eb->refs_lock);
7297 spin_unlock(&page->mapping->private_lock);
7298 return 0;
7299 }
7300 spin_unlock(&page->mapping->private_lock);
7301
7302
7303
7304
7305
7306 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
7307 spin_unlock(&eb->refs_lock);
7308 return 0;
7309 }
7310
7311 return release_extent_buffer(eb);
7312}
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
7327 u64 bytenr, u64 owner_root, u64 gen, int level)
7328{
7329 struct extent_buffer *eb;
7330 int ret;
7331
7332 eb = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
7333 if (IS_ERR(eb))
7334 return;
7335
7336 if (btrfs_buffer_uptodate(eb, gen, 1)) {
7337 free_extent_buffer(eb);
7338 return;
7339 }
7340
7341 ret = read_extent_buffer_pages(eb, WAIT_NONE, 0);
7342 if (ret < 0)
7343 free_extent_buffer_stale(eb);
7344 else
7345 free_extent_buffer(eb);
7346}
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356void btrfs_readahead_node_child(struct extent_buffer *node, int slot)
7357{
7358 btrfs_readahead_tree_block(node->fs_info,
7359 btrfs_node_blockptr(node, slot),
7360 btrfs_header_owner(node),
7361 btrfs_node_ptr_generation(node, slot),
7362 btrfs_header_level(node) - 1);
7363}
7364