1
2
3#include <linux/bitops.h>
4#include <linux/slab.h>
5#include <linux/bio.h>
6#include <linux/mm.h>
7#include <linux/pagemap.h>
8#include <linux/page-flags.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/writeback.h>
13#include <linux/pagevec.h>
14#include <linux/prefetch.h>
15#include <linux/cleancache.h>
16#include "extent_io.h"
17#include "extent_map.h"
18#include "ctree.h"
19#include "btrfs_inode.h"
20#include "volumes.h"
21#include "check-integrity.h"
22#include "locking.h"
23#include "rcu-string.h"
24#include "backref.h"
25#include "disk-io.h"
26
27static struct kmem_cache *extent_state_cache;
28static struct kmem_cache *extent_buffer_cache;
29static struct bio_set btrfs_bioset;
30
31static inline bool extent_state_in_tree(const struct extent_state *state)
32{
33 return !RB_EMPTY_NODE(&state->rb_node);
34}
35
36#ifdef CONFIG_BTRFS_DEBUG
37static LIST_HEAD(buffers);
38static LIST_HEAD(states);
39
40static DEFINE_SPINLOCK(leak_lock);
41
42static inline
43void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
44{
45 unsigned long flags;
46
47 spin_lock_irqsave(&leak_lock, flags);
48 list_add(new, head);
49 spin_unlock_irqrestore(&leak_lock, flags);
50}
51
52static inline
53void btrfs_leak_debug_del(struct list_head *entry)
54{
55 unsigned long flags;
56
57 spin_lock_irqsave(&leak_lock, flags);
58 list_del(entry);
59 spin_unlock_irqrestore(&leak_lock, flags);
60}
61
62static inline
63void btrfs_leak_debug_check(void)
64{
65 struct extent_state *state;
66 struct extent_buffer *eb;
67
68 while (!list_empty(&states)) {
69 state = list_entry(states.next, struct extent_state, leak_list);
70 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
71 state->start, state->end, state->state,
72 extent_state_in_tree(state),
73 refcount_read(&state->refs));
74 list_del(&state->leak_list);
75 kmem_cache_free(extent_state_cache, state);
76 }
77
78 while (!list_empty(&buffers)) {
79 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
80 pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
81 eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
82 list_del(&eb->leak_list);
83 kmem_cache_free(extent_buffer_cache, eb);
84 }
85}
86
87#define btrfs_debug_check_extent_io_range(tree, start, end) \
88 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
89static inline void __btrfs_debug_check_extent_io_range(const char *caller,
90 struct extent_io_tree *tree, u64 start, u64 end)
91{
92 struct inode *inode = tree->private_data;
93 u64 isize;
94
95 if (!inode || !is_data_inode(inode))
96 return;
97
98 isize = i_size_read(inode);
99 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
100 btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
101 "%s: ino %llu isize %llu odd range [%llu,%llu]",
102 caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
103 }
104}
105#else
106#define btrfs_leak_debug_add(new, head) do {} while (0)
107#define btrfs_leak_debug_del(entry) do {} while (0)
108#define btrfs_leak_debug_check() do {} while (0)
109#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
110#endif
111
112struct tree_entry {
113 u64 start;
114 u64 end;
115 struct rb_node rb_node;
116};
117
118struct extent_page_data {
119 struct bio *bio;
120 struct extent_io_tree *tree;
121
122
123
124 unsigned int extent_locked:1;
125
126
127 unsigned int sync_io:1;
128};
129
130static int add_extent_changeset(struct extent_state *state, unsigned bits,
131 struct extent_changeset *changeset,
132 int set)
133{
134 int ret;
135
136 if (!changeset)
137 return 0;
138 if (set && (state->state & bits) == bits)
139 return 0;
140 if (!set && (state->state & bits) == 0)
141 return 0;
142 changeset->bytes_changed += state->end - state->start + 1;
143 ret = ulist_add(&changeset->range_changed, state->start, state->end,
144 GFP_ATOMIC);
145 return ret;
146}
147
148static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
149 unsigned long bio_flags)
150{
151 blk_status_t ret = 0;
152 struct extent_io_tree *tree = bio->bi_private;
153
154 bio->bi_private = NULL;
155
156 if (tree->ops)
157 ret = tree->ops->submit_bio_hook(tree->private_data, bio,
158 mirror_num, bio_flags);
159 else
160 btrfsic_submit_bio(bio);
161
162 return blk_status_to_errno(ret);
163}
164
165
166static void end_write_bio(struct extent_page_data *epd, int ret)
167{
168 if (epd->bio) {
169 epd->bio->bi_status = errno_to_blk_status(ret);
170 bio_endio(epd->bio);
171 epd->bio = NULL;
172 }
173}
174
175
176
177
178
179
180
181static int __must_check flush_write_bio(struct extent_page_data *epd)
182{
183 int ret = 0;
184
185 if (epd->bio) {
186 ret = submit_one_bio(epd->bio, 0, 0);
187
188
189
190
191
192
193
194 epd->bio = NULL;
195 }
196 return ret;
197}
198
199int __init extent_io_init(void)
200{
201 extent_state_cache = kmem_cache_create("btrfs_extent_state",
202 sizeof(struct extent_state), 0,
203 SLAB_MEM_SPREAD, NULL);
204 if (!extent_state_cache)
205 return -ENOMEM;
206
207 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
208 sizeof(struct extent_buffer), 0,
209 SLAB_MEM_SPREAD, NULL);
210 if (!extent_buffer_cache)
211 goto free_state_cache;
212
213 if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
214 offsetof(struct btrfs_io_bio, bio),
215 BIOSET_NEED_BVECS))
216 goto free_buffer_cache;
217
218 if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
219 goto free_bioset;
220
221 return 0;
222
223free_bioset:
224 bioset_exit(&btrfs_bioset);
225
226free_buffer_cache:
227 kmem_cache_destroy(extent_buffer_cache);
228 extent_buffer_cache = NULL;
229
230free_state_cache:
231 kmem_cache_destroy(extent_state_cache);
232 extent_state_cache = NULL;
233 return -ENOMEM;
234}
235
236void __cold extent_io_exit(void)
237{
238 btrfs_leak_debug_check();
239
240
241
242
243
244 rcu_barrier();
245 kmem_cache_destroy(extent_state_cache);
246 kmem_cache_destroy(extent_buffer_cache);
247 bioset_exit(&btrfs_bioset);
248}
249
250void extent_io_tree_init(struct btrfs_fs_info *fs_info,
251 struct extent_io_tree *tree, unsigned int owner,
252 void *private_data)
253{
254 tree->fs_info = fs_info;
255 tree->state = RB_ROOT;
256 tree->ops = NULL;
257 tree->dirty_bytes = 0;
258 spin_lock_init(&tree->lock);
259 tree->private_data = private_data;
260 tree->owner = owner;
261}
262
263void extent_io_tree_release(struct extent_io_tree *tree)
264{
265 spin_lock(&tree->lock);
266
267
268
269
270
271 smp_mb();
272 while (!RB_EMPTY_ROOT(&tree->state)) {
273 struct rb_node *node;
274 struct extent_state *state;
275
276 node = rb_first(&tree->state);
277 state = rb_entry(node, struct extent_state, rb_node);
278 rb_erase(&state->rb_node, &tree->state);
279 RB_CLEAR_NODE(&state->rb_node);
280
281
282
283
284 ASSERT(!waitqueue_active(&state->wq));
285 free_extent_state(state);
286
287 cond_resched_lock(&tree->lock);
288 }
289 spin_unlock(&tree->lock);
290}
291
292static struct extent_state *alloc_extent_state(gfp_t mask)
293{
294 struct extent_state *state;
295
296
297
298
299
300 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
301 state = kmem_cache_alloc(extent_state_cache, mask);
302 if (!state)
303 return state;
304 state->state = 0;
305 state->failrec = NULL;
306 RB_CLEAR_NODE(&state->rb_node);
307 btrfs_leak_debug_add(&state->leak_list, &states);
308 refcount_set(&state->refs, 1);
309 init_waitqueue_head(&state->wq);
310 trace_alloc_extent_state(state, mask, _RET_IP_);
311 return state;
312}
313
314void free_extent_state(struct extent_state *state)
315{
316 if (!state)
317 return;
318 if (refcount_dec_and_test(&state->refs)) {
319 WARN_ON(extent_state_in_tree(state));
320 btrfs_leak_debug_del(&state->leak_list);
321 trace_free_extent_state(state, _RET_IP_);
322 kmem_cache_free(extent_state_cache, state);
323 }
324}
325
326static struct rb_node *tree_insert(struct rb_root *root,
327 struct rb_node *search_start,
328 u64 offset,
329 struct rb_node *node,
330 struct rb_node ***p_in,
331 struct rb_node **parent_in)
332{
333 struct rb_node **p;
334 struct rb_node *parent = NULL;
335 struct tree_entry *entry;
336
337 if (p_in && parent_in) {
338 p = *p_in;
339 parent = *parent_in;
340 goto do_insert;
341 }
342
343 p = search_start ? &search_start : &root->rb_node;
344 while (*p) {
345 parent = *p;
346 entry = rb_entry(parent, struct tree_entry, rb_node);
347
348 if (offset < entry->start)
349 p = &(*p)->rb_left;
350 else if (offset > entry->end)
351 p = &(*p)->rb_right;
352 else
353 return parent;
354 }
355
356do_insert:
357 rb_link_node(node, parent, p);
358 rb_insert_color(node, root);
359 return NULL;
360}
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
381 struct rb_node **next_ret,
382 struct rb_node **prev_ret,
383 struct rb_node ***p_ret,
384 struct rb_node **parent_ret)
385{
386 struct rb_root *root = &tree->state;
387 struct rb_node **n = &root->rb_node;
388 struct rb_node *prev = NULL;
389 struct rb_node *orig_prev = NULL;
390 struct tree_entry *entry;
391 struct tree_entry *prev_entry = NULL;
392
393 while (*n) {
394 prev = *n;
395 entry = rb_entry(prev, struct tree_entry, rb_node);
396 prev_entry = entry;
397
398 if (offset < entry->start)
399 n = &(*n)->rb_left;
400 else if (offset > entry->end)
401 n = &(*n)->rb_right;
402 else
403 return *n;
404 }
405
406 if (p_ret)
407 *p_ret = n;
408 if (parent_ret)
409 *parent_ret = prev;
410
411 if (next_ret) {
412 orig_prev = prev;
413 while (prev && offset > prev_entry->end) {
414 prev = rb_next(prev);
415 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
416 }
417 *next_ret = prev;
418 prev = orig_prev;
419 }
420
421 if (prev_ret) {
422 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
423 while (prev && offset < prev_entry->start) {
424 prev = rb_prev(prev);
425 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
426 }
427 *prev_ret = prev;
428 }
429 return NULL;
430}
431
432static inline struct rb_node *
433tree_search_for_insert(struct extent_io_tree *tree,
434 u64 offset,
435 struct rb_node ***p_ret,
436 struct rb_node **parent_ret)
437{
438 struct rb_node *next= NULL;
439 struct rb_node *ret;
440
441 ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
442 if (!ret)
443 return next;
444 return ret;
445}
446
447static inline struct rb_node *tree_search(struct extent_io_tree *tree,
448 u64 offset)
449{
450 return tree_search_for_insert(tree, offset, NULL, NULL);
451}
452
453
454
455
456
457
458
459
460
461
462static void merge_state(struct extent_io_tree *tree,
463 struct extent_state *state)
464{
465 struct extent_state *other;
466 struct rb_node *other_node;
467
468 if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY))
469 return;
470
471 other_node = rb_prev(&state->rb_node);
472 if (other_node) {
473 other = rb_entry(other_node, struct extent_state, rb_node);
474 if (other->end == state->start - 1 &&
475 other->state == state->state) {
476 if (tree->private_data &&
477 is_data_inode(tree->private_data))
478 btrfs_merge_delalloc_extent(tree->private_data,
479 state, other);
480 state->start = other->start;
481 rb_erase(&other->rb_node, &tree->state);
482 RB_CLEAR_NODE(&other->rb_node);
483 free_extent_state(other);
484 }
485 }
486 other_node = rb_next(&state->rb_node);
487 if (other_node) {
488 other = rb_entry(other_node, struct extent_state, rb_node);
489 if (other->start == state->end + 1 &&
490 other->state == state->state) {
491 if (tree->private_data &&
492 is_data_inode(tree->private_data))
493 btrfs_merge_delalloc_extent(tree->private_data,
494 state, other);
495 state->end = other->end;
496 rb_erase(&other->rb_node, &tree->state);
497 RB_CLEAR_NODE(&other->rb_node);
498 free_extent_state(other);
499 }
500 }
501}
502
503static void set_state_bits(struct extent_io_tree *tree,
504 struct extent_state *state, unsigned *bits,
505 struct extent_changeset *changeset);
506
507
508
509
510
511
512
513
514
515
516
517static int insert_state(struct extent_io_tree *tree,
518 struct extent_state *state, u64 start, u64 end,
519 struct rb_node ***p,
520 struct rb_node **parent,
521 unsigned *bits, struct extent_changeset *changeset)
522{
523 struct rb_node *node;
524
525 if (end < start) {
526 btrfs_err(tree->fs_info,
527 "insert state: end < start %llu %llu", end, start);
528 WARN_ON(1);
529 }
530 state->start = start;
531 state->end = end;
532
533 set_state_bits(tree, state, bits, changeset);
534
535 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
536 if (node) {
537 struct extent_state *found;
538 found = rb_entry(node, struct extent_state, rb_node);
539 btrfs_err(tree->fs_info,
540 "found node %llu %llu on insert of %llu %llu",
541 found->start, found->end, start, end);
542 return -EEXIST;
543 }
544 merge_state(tree, state);
545 return 0;
546}
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
563 struct extent_state *prealloc, u64 split)
564{
565 struct rb_node *node;
566
567 if (tree->private_data && is_data_inode(tree->private_data))
568 btrfs_split_delalloc_extent(tree->private_data, orig, split);
569
570 prealloc->start = orig->start;
571 prealloc->end = split - 1;
572 prealloc->state = orig->state;
573 orig->start = split;
574
575 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
576 &prealloc->rb_node, NULL, NULL);
577 if (node) {
578 free_extent_state(prealloc);
579 return -EEXIST;
580 }
581 return 0;
582}
583
584static struct extent_state *next_state(struct extent_state *state)
585{
586 struct rb_node *next = rb_next(&state->rb_node);
587 if (next)
588 return rb_entry(next, struct extent_state, rb_node);
589 else
590 return NULL;
591}
592
593
594
595
596
597
598
599
600static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
601 struct extent_state *state,
602 unsigned *bits, int wake,
603 struct extent_changeset *changeset)
604{
605 struct extent_state *next;
606 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
607 int ret;
608
609 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
610 u64 range = state->end - state->start + 1;
611 WARN_ON(range > tree->dirty_bytes);
612 tree->dirty_bytes -= range;
613 }
614
615 if (tree->private_data && is_data_inode(tree->private_data))
616 btrfs_clear_delalloc_extent(tree->private_data, state, bits);
617
618 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
619 BUG_ON(ret < 0);
620 state->state &= ~bits_to_clear;
621 if (wake)
622 wake_up(&state->wq);
623 if (state->state == 0) {
624 next = next_state(state);
625 if (extent_state_in_tree(state)) {
626 rb_erase(&state->rb_node, &tree->state);
627 RB_CLEAR_NODE(&state->rb_node);
628 free_extent_state(state);
629 } else {
630 WARN_ON(1);
631 }
632 } else {
633 merge_state(tree, state);
634 next = next_state(state);
635 }
636 return next;
637}
638
639static struct extent_state *
640alloc_extent_state_atomic(struct extent_state *prealloc)
641{
642 if (!prealloc)
643 prealloc = alloc_extent_state(GFP_ATOMIC);
644
645 return prealloc;
646}
647
648static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
649{
650 struct inode *inode = tree->private_data;
651
652 btrfs_panic(btrfs_sb(inode->i_sb), err,
653 "locking error: extent tree was modified by another thread while locked");
654}
655
656
657
658
659
660
661
662
663
664
665
666
667
668int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
669 unsigned bits, int wake, int delete,
670 struct extent_state **cached_state,
671 gfp_t mask, struct extent_changeset *changeset)
672{
673 struct extent_state *state;
674 struct extent_state *cached;
675 struct extent_state *prealloc = NULL;
676 struct rb_node *node;
677 u64 last_end;
678 int err;
679 int clear = 0;
680
681 btrfs_debug_check_extent_io_range(tree, start, end);
682 trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
683
684 if (bits & EXTENT_DELALLOC)
685 bits |= EXTENT_NORESERVE;
686
687 if (delete)
688 bits |= ~EXTENT_CTLBITS;
689
690 if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY))
691 clear = 1;
692again:
693 if (!prealloc && gfpflags_allow_blocking(mask)) {
694
695
696
697
698
699
700
701 prealloc = alloc_extent_state(mask);
702 }
703
704 spin_lock(&tree->lock);
705 if (cached_state) {
706 cached = *cached_state;
707
708 if (clear) {
709 *cached_state = NULL;
710 cached_state = NULL;
711 }
712
713 if (cached && extent_state_in_tree(cached) &&
714 cached->start <= start && cached->end > start) {
715 if (clear)
716 refcount_dec(&cached->refs);
717 state = cached;
718 goto hit_next;
719 }
720 if (clear)
721 free_extent_state(cached);
722 }
723
724
725
726
727 node = tree_search(tree, start);
728 if (!node)
729 goto out;
730 state = rb_entry(node, struct extent_state, rb_node);
731hit_next:
732 if (state->start > end)
733 goto out;
734 WARN_ON(state->end < start);
735 last_end = state->end;
736
737
738 if (!(state->state & bits)) {
739 state = next_state(state);
740 goto next;
741 }
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759 if (state->start < start) {
760 prealloc = alloc_extent_state_atomic(prealloc);
761 BUG_ON(!prealloc);
762 err = split_state(tree, state, prealloc, start);
763 if (err)
764 extent_io_tree_panic(tree, err);
765
766 prealloc = NULL;
767 if (err)
768 goto out;
769 if (state->end <= end) {
770 state = clear_state_bit(tree, state, &bits, wake,
771 changeset);
772 goto next;
773 }
774 goto search_again;
775 }
776
777
778
779
780
781
782 if (state->start <= end && state->end > end) {
783 prealloc = alloc_extent_state_atomic(prealloc);
784 BUG_ON(!prealloc);
785 err = split_state(tree, state, prealloc, end + 1);
786 if (err)
787 extent_io_tree_panic(tree, err);
788
789 if (wake)
790 wake_up(&state->wq);
791
792 clear_state_bit(tree, prealloc, &bits, wake, changeset);
793
794 prealloc = NULL;
795 goto out;
796 }
797
798 state = clear_state_bit(tree, state, &bits, wake, changeset);
799next:
800 if (last_end == (u64)-1)
801 goto out;
802 start = last_end + 1;
803 if (start <= end && state && !need_resched())
804 goto hit_next;
805
806search_again:
807 if (start > end)
808 goto out;
809 spin_unlock(&tree->lock);
810 if (gfpflags_allow_blocking(mask))
811 cond_resched();
812 goto again;
813
814out:
815 spin_unlock(&tree->lock);
816 if (prealloc)
817 free_extent_state(prealloc);
818
819 return 0;
820
821}
822
823static void wait_on_state(struct extent_io_tree *tree,
824 struct extent_state *state)
825 __releases(tree->lock)
826 __acquires(tree->lock)
827{
828 DEFINE_WAIT(wait);
829 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
830 spin_unlock(&tree->lock);
831 schedule();
832 spin_lock(&tree->lock);
833 finish_wait(&state->wq, &wait);
834}
835
836
837
838
839
840
841static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
842 unsigned long bits)
843{
844 struct extent_state *state;
845 struct rb_node *node;
846
847 btrfs_debug_check_extent_io_range(tree, start, end);
848
849 spin_lock(&tree->lock);
850again:
851 while (1) {
852
853
854
855
856 node = tree_search(tree, start);
857process_node:
858 if (!node)
859 break;
860
861 state = rb_entry(node, struct extent_state, rb_node);
862
863 if (state->start > end)
864 goto out;
865
866 if (state->state & bits) {
867 start = state->start;
868 refcount_inc(&state->refs);
869 wait_on_state(tree, state);
870 free_extent_state(state);
871 goto again;
872 }
873 start = state->end + 1;
874
875 if (start > end)
876 break;
877
878 if (!cond_resched_lock(&tree->lock)) {
879 node = rb_next(node);
880 goto process_node;
881 }
882 }
883out:
884 spin_unlock(&tree->lock);
885}
886
887static void set_state_bits(struct extent_io_tree *tree,
888 struct extent_state *state,
889 unsigned *bits, struct extent_changeset *changeset)
890{
891 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
892 int ret;
893
894 if (tree->private_data && is_data_inode(tree->private_data))
895 btrfs_set_delalloc_extent(tree->private_data, state, bits);
896
897 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
898 u64 range = state->end - state->start + 1;
899 tree->dirty_bytes += range;
900 }
901 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
902 BUG_ON(ret < 0);
903 state->state |= bits_to_set;
904}
905
906static void cache_state_if_flags(struct extent_state *state,
907 struct extent_state **cached_ptr,
908 unsigned flags)
909{
910 if (cached_ptr && !(*cached_ptr)) {
911 if (!flags || (state->state & flags)) {
912 *cached_ptr = state;
913 refcount_inc(&state->refs);
914 }
915 }
916}
917
918static void cache_state(struct extent_state *state,
919 struct extent_state **cached_ptr)
920{
921 return cache_state_if_flags(state, cached_ptr,
922 EXTENT_LOCKED | EXTENT_BOUNDARY);
923}
924
925
926
927
928
929
930
931
932
933
934
935
936static int __must_check
937__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
938 unsigned bits, unsigned exclusive_bits,
939 u64 *failed_start, struct extent_state **cached_state,
940 gfp_t mask, struct extent_changeset *changeset)
941{
942 struct extent_state *state;
943 struct extent_state *prealloc = NULL;
944 struct rb_node *node;
945 struct rb_node **p;
946 struct rb_node *parent;
947 int err = 0;
948 u64 last_start;
949 u64 last_end;
950
951 btrfs_debug_check_extent_io_range(tree, start, end);
952 trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
953
954again:
955 if (!prealloc && gfpflags_allow_blocking(mask)) {
956
957
958
959
960
961
962
963 prealloc = alloc_extent_state(mask);
964 }
965
966 spin_lock(&tree->lock);
967 if (cached_state && *cached_state) {
968 state = *cached_state;
969 if (state->start <= start && state->end > start &&
970 extent_state_in_tree(state)) {
971 node = &state->rb_node;
972 goto hit_next;
973 }
974 }
975
976
977
978
979 node = tree_search_for_insert(tree, start, &p, &parent);
980 if (!node) {
981 prealloc = alloc_extent_state_atomic(prealloc);
982 BUG_ON(!prealloc);
983 err = insert_state(tree, prealloc, start, end,
984 &p, &parent, &bits, changeset);
985 if (err)
986 extent_io_tree_panic(tree, err);
987
988 cache_state(prealloc, cached_state);
989 prealloc = NULL;
990 goto out;
991 }
992 state = rb_entry(node, struct extent_state, rb_node);
993hit_next:
994 last_start = state->start;
995 last_end = state->end;
996
997
998
999
1000
1001
1002
1003 if (state->start == start && state->end <= end) {
1004 if (state->state & exclusive_bits) {
1005 *failed_start = state->start;
1006 err = -EEXIST;
1007 goto out;
1008 }
1009
1010 set_state_bits(tree, state, &bits, changeset);
1011 cache_state(state, cached_state);
1012 merge_state(tree, state);
1013 if (last_end == (u64)-1)
1014 goto out;
1015 start = last_end + 1;
1016 state = next_state(state);
1017 if (start < end && state && state->start == start &&
1018 !need_resched())
1019 goto hit_next;
1020 goto search_again;
1021 }
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039 if (state->start < start) {
1040 if (state->state & exclusive_bits) {
1041 *failed_start = start;
1042 err = -EEXIST;
1043 goto out;
1044 }
1045
1046 prealloc = alloc_extent_state_atomic(prealloc);
1047 BUG_ON(!prealloc);
1048 err = split_state(tree, state, prealloc, start);
1049 if (err)
1050 extent_io_tree_panic(tree, err);
1051
1052 prealloc = NULL;
1053 if (err)
1054 goto out;
1055 if (state->end <= end) {
1056 set_state_bits(tree, state, &bits, changeset);
1057 cache_state(state, cached_state);
1058 merge_state(tree, state);
1059 if (last_end == (u64)-1)
1060 goto out;
1061 start = last_end + 1;
1062 state = next_state(state);
1063 if (start < end && state && state->start == start &&
1064 !need_resched())
1065 goto hit_next;
1066 }
1067 goto search_again;
1068 }
1069
1070
1071
1072
1073
1074
1075
1076 if (state->start > start) {
1077 u64 this_end;
1078 if (end < last_start)
1079 this_end = end;
1080 else
1081 this_end = last_start - 1;
1082
1083 prealloc = alloc_extent_state_atomic(prealloc);
1084 BUG_ON(!prealloc);
1085
1086
1087
1088
1089
1090 err = insert_state(tree, prealloc, start, this_end,
1091 NULL, NULL, &bits, changeset);
1092 if (err)
1093 extent_io_tree_panic(tree, err);
1094
1095 cache_state(prealloc, cached_state);
1096 prealloc = NULL;
1097 start = this_end + 1;
1098 goto search_again;
1099 }
1100
1101
1102
1103
1104
1105
1106 if (state->start <= end && state->end > end) {
1107 if (state->state & exclusive_bits) {
1108 *failed_start = start;
1109 err = -EEXIST;
1110 goto out;
1111 }
1112
1113 prealloc = alloc_extent_state_atomic(prealloc);
1114 BUG_ON(!prealloc);
1115 err = split_state(tree, state, prealloc, end + 1);
1116 if (err)
1117 extent_io_tree_panic(tree, err);
1118
1119 set_state_bits(tree, prealloc, &bits, changeset);
1120 cache_state(prealloc, cached_state);
1121 merge_state(tree, prealloc);
1122 prealloc = NULL;
1123 goto out;
1124 }
1125
1126search_again:
1127 if (start > end)
1128 goto out;
1129 spin_unlock(&tree->lock);
1130 if (gfpflags_allow_blocking(mask))
1131 cond_resched();
1132 goto again;
1133
1134out:
1135 spin_unlock(&tree->lock);
1136 if (prealloc)
1137 free_extent_state(prealloc);
1138
1139 return err;
1140
1141}
1142
1143int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1144 unsigned bits, u64 * failed_start,
1145 struct extent_state **cached_state, gfp_t mask)
1146{
1147 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1148 cached_state, mask, NULL);
1149}
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1171 unsigned bits, unsigned clear_bits,
1172 struct extent_state **cached_state)
1173{
1174 struct extent_state *state;
1175 struct extent_state *prealloc = NULL;
1176 struct rb_node *node;
1177 struct rb_node **p;
1178 struct rb_node *parent;
1179 int err = 0;
1180 u64 last_start;
1181 u64 last_end;
1182 bool first_iteration = true;
1183
1184 btrfs_debug_check_extent_io_range(tree, start, end);
1185 trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
1186 clear_bits);
1187
1188again:
1189 if (!prealloc) {
1190
1191
1192
1193
1194
1195
1196
1197 prealloc = alloc_extent_state(GFP_NOFS);
1198 if (!prealloc && !first_iteration)
1199 return -ENOMEM;
1200 }
1201
1202 spin_lock(&tree->lock);
1203 if (cached_state && *cached_state) {
1204 state = *cached_state;
1205 if (state->start <= start && state->end > start &&
1206 extent_state_in_tree(state)) {
1207 node = &state->rb_node;
1208 goto hit_next;
1209 }
1210 }
1211
1212
1213
1214
1215
1216 node = tree_search_for_insert(tree, start, &p, &parent);
1217 if (!node) {
1218 prealloc = alloc_extent_state_atomic(prealloc);
1219 if (!prealloc) {
1220 err = -ENOMEM;
1221 goto out;
1222 }
1223 err = insert_state(tree, prealloc, start, end,
1224 &p, &parent, &bits, NULL);
1225 if (err)
1226 extent_io_tree_panic(tree, err);
1227 cache_state(prealloc, cached_state);
1228 prealloc = NULL;
1229 goto out;
1230 }
1231 state = rb_entry(node, struct extent_state, rb_node);
1232hit_next:
1233 last_start = state->start;
1234 last_end = state->end;
1235
1236
1237
1238
1239
1240
1241
1242 if (state->start == start && state->end <= end) {
1243 set_state_bits(tree, state, &bits, NULL);
1244 cache_state(state, cached_state);
1245 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1246 if (last_end == (u64)-1)
1247 goto out;
1248 start = last_end + 1;
1249 if (start < end && state && state->start == start &&
1250 !need_resched())
1251 goto hit_next;
1252 goto search_again;
1253 }
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271 if (state->start < start) {
1272 prealloc = alloc_extent_state_atomic(prealloc);
1273 if (!prealloc) {
1274 err = -ENOMEM;
1275 goto out;
1276 }
1277 err = split_state(tree, state, prealloc, start);
1278 if (err)
1279 extent_io_tree_panic(tree, err);
1280 prealloc = NULL;
1281 if (err)
1282 goto out;
1283 if (state->end <= end) {
1284 set_state_bits(tree, state, &bits, NULL);
1285 cache_state(state, cached_state);
1286 state = clear_state_bit(tree, state, &clear_bits, 0,
1287 NULL);
1288 if (last_end == (u64)-1)
1289 goto out;
1290 start = last_end + 1;
1291 if (start < end && state && state->start == start &&
1292 !need_resched())
1293 goto hit_next;
1294 }
1295 goto search_again;
1296 }
1297
1298
1299
1300
1301
1302
1303
1304 if (state->start > start) {
1305 u64 this_end;
1306 if (end < last_start)
1307 this_end = end;
1308 else
1309 this_end = last_start - 1;
1310
1311 prealloc = alloc_extent_state_atomic(prealloc);
1312 if (!prealloc) {
1313 err = -ENOMEM;
1314 goto out;
1315 }
1316
1317
1318
1319
1320
1321 err = insert_state(tree, prealloc, start, this_end,
1322 NULL, NULL, &bits, NULL);
1323 if (err)
1324 extent_io_tree_panic(tree, err);
1325 cache_state(prealloc, cached_state);
1326 prealloc = NULL;
1327 start = this_end + 1;
1328 goto search_again;
1329 }
1330
1331
1332
1333
1334
1335
1336 if (state->start <= end && state->end > end) {
1337 prealloc = alloc_extent_state_atomic(prealloc);
1338 if (!prealloc) {
1339 err = -ENOMEM;
1340 goto out;
1341 }
1342
1343 err = split_state(tree, state, prealloc, end + 1);
1344 if (err)
1345 extent_io_tree_panic(tree, err);
1346
1347 set_state_bits(tree, prealloc, &bits, NULL);
1348 cache_state(prealloc, cached_state);
1349 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1350 prealloc = NULL;
1351 goto out;
1352 }
1353
1354search_again:
1355 if (start > end)
1356 goto out;
1357 spin_unlock(&tree->lock);
1358 cond_resched();
1359 first_iteration = false;
1360 goto again;
1361
1362out:
1363 spin_unlock(&tree->lock);
1364 if (prealloc)
1365 free_extent_state(prealloc);
1366
1367 return err;
1368}
1369
1370
1371int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1372 unsigned bits, struct extent_changeset *changeset)
1373{
1374
1375
1376
1377
1378
1379
1380 BUG_ON(bits & EXTENT_LOCKED);
1381
1382 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1383 changeset);
1384}
1385
1386int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
1387 unsigned bits)
1388{
1389 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
1390 GFP_NOWAIT, NULL);
1391}
1392
1393int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1394 unsigned bits, int wake, int delete,
1395 struct extent_state **cached)
1396{
1397 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1398 cached, GFP_NOFS, NULL);
1399}
1400
1401int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1402 unsigned bits, struct extent_changeset *changeset)
1403{
1404
1405
1406
1407
1408 BUG_ON(bits & EXTENT_LOCKED);
1409
1410 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1411 changeset);
1412}
1413
1414
1415
1416
1417
1418int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1419 struct extent_state **cached_state)
1420{
1421 int err;
1422 u64 failed_start;
1423
1424 while (1) {
1425 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1426 EXTENT_LOCKED, &failed_start,
1427 cached_state, GFP_NOFS, NULL);
1428 if (err == -EEXIST) {
1429 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1430 start = failed_start;
1431 } else
1432 break;
1433 WARN_ON(start > end);
1434 }
1435 return err;
1436}
1437
1438int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1439{
1440 int err;
1441 u64 failed_start;
1442
1443 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1444 &failed_start, NULL, GFP_NOFS, NULL);
1445 if (err == -EEXIST) {
1446 if (failed_start > start)
1447 clear_extent_bit(tree, start, failed_start - 1,
1448 EXTENT_LOCKED, 1, 0, NULL);
1449 return 0;
1450 }
1451 return 1;
1452}
1453
1454void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1455{
1456 unsigned long index = start >> PAGE_SHIFT;
1457 unsigned long end_index = end >> PAGE_SHIFT;
1458 struct page *page;
1459
1460 while (index <= end_index) {
1461 page = find_get_page(inode->i_mapping, index);
1462 BUG_ON(!page);
1463 clear_page_dirty_for_io(page);
1464 put_page(page);
1465 index++;
1466 }
1467}
1468
1469void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1470{
1471 unsigned long index = start >> PAGE_SHIFT;
1472 unsigned long end_index = end >> PAGE_SHIFT;
1473 struct page *page;
1474
1475 while (index <= end_index) {
1476 page = find_get_page(inode->i_mapping, index);
1477 BUG_ON(!page);
1478 __set_page_dirty_nobuffers(page);
1479 account_page_redirty(page);
1480 put_page(page);
1481 index++;
1482 }
1483}
1484
1485
1486
1487
1488
1489static struct extent_state *
1490find_first_extent_bit_state(struct extent_io_tree *tree,
1491 u64 start, unsigned bits)
1492{
1493 struct rb_node *node;
1494 struct extent_state *state;
1495
1496
1497
1498
1499
1500 node = tree_search(tree, start);
1501 if (!node)
1502 goto out;
1503
1504 while (1) {
1505 state = rb_entry(node, struct extent_state, rb_node);
1506 if (state->end >= start && (state->state & bits))
1507 return state;
1508
1509 node = rb_next(node);
1510 if (!node)
1511 break;
1512 }
1513out:
1514 return NULL;
1515}
1516
1517
1518
1519
1520
1521
1522
1523
1524int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1525 u64 *start_ret, u64 *end_ret, unsigned bits,
1526 struct extent_state **cached_state)
1527{
1528 struct extent_state *state;
1529 int ret = 1;
1530
1531 spin_lock(&tree->lock);
1532 if (cached_state && *cached_state) {
1533 state = *cached_state;
1534 if (state->end == start - 1 && extent_state_in_tree(state)) {
1535 while ((state = next_state(state)) != NULL) {
1536 if (state->state & bits)
1537 goto got_it;
1538 }
1539 free_extent_state(*cached_state);
1540 *cached_state = NULL;
1541 goto out;
1542 }
1543 free_extent_state(*cached_state);
1544 *cached_state = NULL;
1545 }
1546
1547 state = find_first_extent_bit_state(tree, start, bits);
1548got_it:
1549 if (state) {
1550 cache_state_if_flags(state, cached_state, 0);
1551 *start_ret = state->start;
1552 *end_ret = state->end;
1553 ret = 0;
1554 }
1555out:
1556 spin_unlock(&tree->lock);
1557 return ret;
1558}
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
1576 u64 *start_ret, u64 *end_ret, unsigned bits)
1577{
1578 struct extent_state *state;
1579 struct rb_node *node, *prev = NULL, *next;
1580
1581 spin_lock(&tree->lock);
1582
1583
1584 while (1) {
1585 node = __etree_search(tree, start, &next, &prev, NULL, NULL);
1586 if (!node) {
1587 node = next;
1588 if (!node) {
1589
1590
1591
1592
1593
1594
1595 ASSERT(prev);
1596 state = rb_entry(prev, struct extent_state, rb_node);
1597 *start_ret = state->end + 1;
1598 *end_ret = -1;
1599 goto out;
1600 }
1601 }
1602
1603
1604
1605
1606 state = rb_entry(node, struct extent_state, rb_node);
1607
1608 if (in_range(start, state->start, state->end - state->start + 1)) {
1609 if (state->state & bits) {
1610
1611
1612
1613
1614
1615 start = state->end + 1;
1616 } else {
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626 *start_ret = state->start;
1627 break;
1628 }
1629 } else {
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641 if (prev) {
1642 state = rb_entry(prev, struct extent_state,
1643 rb_node);
1644 *start_ret = state->end + 1;
1645 } else {
1646 *start_ret = 0;
1647 }
1648 break;
1649 }
1650 }
1651
1652
1653
1654
1655
1656 while (1) {
1657 state = rb_entry(node, struct extent_state, rb_node);
1658 if (state->end >= start && !(state->state & bits)) {
1659 *end_ret = state->end;
1660 } else {
1661 *end_ret = state->start - 1;
1662 break;
1663 }
1664
1665 node = rb_next(node);
1666 if (!node)
1667 break;
1668 }
1669out:
1670 spin_unlock(&tree->lock);
1671}
1672
1673
1674
1675
1676
1677
1678
1679static noinline bool find_delalloc_range(struct extent_io_tree *tree,
1680 u64 *start, u64 *end, u64 max_bytes,
1681 struct extent_state **cached_state)
1682{
1683 struct rb_node *node;
1684 struct extent_state *state;
1685 u64 cur_start = *start;
1686 bool found = false;
1687 u64 total_bytes = 0;
1688
1689 spin_lock(&tree->lock);
1690
1691
1692
1693
1694
1695 node = tree_search(tree, cur_start);
1696 if (!node) {
1697 *end = (u64)-1;
1698 goto out;
1699 }
1700
1701 while (1) {
1702 state = rb_entry(node, struct extent_state, rb_node);
1703 if (found && (state->start != cur_start ||
1704 (state->state & EXTENT_BOUNDARY))) {
1705 goto out;
1706 }
1707 if (!(state->state & EXTENT_DELALLOC)) {
1708 if (!found)
1709 *end = state->end;
1710 goto out;
1711 }
1712 if (!found) {
1713 *start = state->start;
1714 *cached_state = state;
1715 refcount_inc(&state->refs);
1716 }
1717 found = true;
1718 *end = state->end;
1719 cur_start = state->end + 1;
1720 node = rb_next(node);
1721 total_bytes += state->end - state->start + 1;
1722 if (total_bytes >= max_bytes)
1723 break;
1724 if (!node)
1725 break;
1726 }
1727out:
1728 spin_unlock(&tree->lock);
1729 return found;
1730}
1731
1732static int __process_pages_contig(struct address_space *mapping,
1733 struct page *locked_page,
1734 pgoff_t start_index, pgoff_t end_index,
1735 unsigned long page_ops, pgoff_t *index_ret);
1736
1737static noinline void __unlock_for_delalloc(struct inode *inode,
1738 struct page *locked_page,
1739 u64 start, u64 end)
1740{
1741 unsigned long index = start >> PAGE_SHIFT;
1742 unsigned long end_index = end >> PAGE_SHIFT;
1743
1744 ASSERT(locked_page);
1745 if (index == locked_page->index && end_index == index)
1746 return;
1747
1748 __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
1749 PAGE_UNLOCK, NULL);
1750}
1751
1752static noinline int lock_delalloc_pages(struct inode *inode,
1753 struct page *locked_page,
1754 u64 delalloc_start,
1755 u64 delalloc_end)
1756{
1757 unsigned long index = delalloc_start >> PAGE_SHIFT;
1758 unsigned long index_ret = index;
1759 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1760 int ret;
1761
1762 ASSERT(locked_page);
1763 if (index == locked_page->index && index == end_index)
1764 return 0;
1765
1766 ret = __process_pages_contig(inode->i_mapping, locked_page, index,
1767 end_index, PAGE_LOCK, &index_ret);
1768 if (ret == -EAGAIN)
1769 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1770 (u64)index_ret << PAGE_SHIFT);
1771 return ret;
1772}
1773
1774
1775
1776
1777
1778
1779
1780
1781EXPORT_FOR_TESTS
1782noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
1783 struct page *locked_page, u64 *start,
1784 u64 *end)
1785{
1786 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1787 u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
1788 u64 delalloc_start;
1789 u64 delalloc_end;
1790 bool found;
1791 struct extent_state *cached_state = NULL;
1792 int ret;
1793 int loops = 0;
1794
1795again:
1796
1797 delalloc_start = *start;
1798 delalloc_end = 0;
1799 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1800 max_bytes, &cached_state);
1801 if (!found || delalloc_end <= *start) {
1802 *start = delalloc_start;
1803 *end = delalloc_end;
1804 free_extent_state(cached_state);
1805 return false;
1806 }
1807
1808
1809
1810
1811
1812
1813 if (delalloc_start < *start)
1814 delalloc_start = *start;
1815
1816
1817
1818
1819 if (delalloc_end + 1 - delalloc_start > max_bytes)
1820 delalloc_end = delalloc_start + max_bytes - 1;
1821
1822
1823 ret = lock_delalloc_pages(inode, locked_page,
1824 delalloc_start, delalloc_end);
1825 ASSERT(!ret || ret == -EAGAIN);
1826 if (ret == -EAGAIN) {
1827
1828
1829
1830 free_extent_state(cached_state);
1831 cached_state = NULL;
1832 if (!loops) {
1833 max_bytes = PAGE_SIZE;
1834 loops = 1;
1835 goto again;
1836 } else {
1837 found = false;
1838 goto out_failed;
1839 }
1840 }
1841
1842
1843 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1844
1845
1846 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1847 EXTENT_DELALLOC, 1, cached_state);
1848 if (!ret) {
1849 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1850 &cached_state);
1851 __unlock_for_delalloc(inode, locked_page,
1852 delalloc_start, delalloc_end);
1853 cond_resched();
1854 goto again;
1855 }
1856 free_extent_state(cached_state);
1857 *start = delalloc_start;
1858 *end = delalloc_end;
1859out_failed:
1860 return found;
1861}
1862
1863static int __process_pages_contig(struct address_space *mapping,
1864 struct page *locked_page,
1865 pgoff_t start_index, pgoff_t end_index,
1866 unsigned long page_ops, pgoff_t *index_ret)
1867{
1868 unsigned long nr_pages = end_index - start_index + 1;
1869 unsigned long pages_locked = 0;
1870 pgoff_t index = start_index;
1871 struct page *pages[16];
1872 unsigned ret;
1873 int err = 0;
1874 int i;
1875
1876 if (page_ops & PAGE_LOCK) {
1877 ASSERT(page_ops == PAGE_LOCK);
1878 ASSERT(index_ret && *index_ret == start_index);
1879 }
1880
1881 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1882 mapping_set_error(mapping, -EIO);
1883
1884 while (nr_pages > 0) {
1885 ret = find_get_pages_contig(mapping, index,
1886 min_t(unsigned long,
1887 nr_pages, ARRAY_SIZE(pages)), pages);
1888 if (ret == 0) {
1889
1890
1891
1892
1893 ASSERT(page_ops & PAGE_LOCK);
1894 err = -EAGAIN;
1895 goto out;
1896 }
1897
1898 for (i = 0; i < ret; i++) {
1899 if (page_ops & PAGE_SET_PRIVATE2)
1900 SetPagePrivate2(pages[i]);
1901
1902 if (pages[i] == locked_page) {
1903 put_page(pages[i]);
1904 pages_locked++;
1905 continue;
1906 }
1907 if (page_ops & PAGE_CLEAR_DIRTY)
1908 clear_page_dirty_for_io(pages[i]);
1909 if (page_ops & PAGE_SET_WRITEBACK)
1910 set_page_writeback(pages[i]);
1911 if (page_ops & PAGE_SET_ERROR)
1912 SetPageError(pages[i]);
1913 if (page_ops & PAGE_END_WRITEBACK)
1914 end_page_writeback(pages[i]);
1915 if (page_ops & PAGE_UNLOCK)
1916 unlock_page(pages[i]);
1917 if (page_ops & PAGE_LOCK) {
1918 lock_page(pages[i]);
1919 if (!PageDirty(pages[i]) ||
1920 pages[i]->mapping != mapping) {
1921 unlock_page(pages[i]);
1922 put_page(pages[i]);
1923 err = -EAGAIN;
1924 goto out;
1925 }
1926 }
1927 put_page(pages[i]);
1928 pages_locked++;
1929 }
1930 nr_pages -= ret;
1931 index += ret;
1932 cond_resched();
1933 }
1934out:
1935 if (err && index_ret)
1936 *index_ret = start_index + pages_locked - 1;
1937 return err;
1938}
1939
1940void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1941 u64 delalloc_end, struct page *locked_page,
1942 unsigned clear_bits,
1943 unsigned long page_ops)
1944{
1945 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
1946 NULL);
1947
1948 __process_pages_contig(inode->i_mapping, locked_page,
1949 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
1950 page_ops, NULL);
1951}
1952
1953
1954
1955
1956
1957
1958u64 count_range_bits(struct extent_io_tree *tree,
1959 u64 *start, u64 search_end, u64 max_bytes,
1960 unsigned bits, int contig)
1961{
1962 struct rb_node *node;
1963 struct extent_state *state;
1964 u64 cur_start = *start;
1965 u64 total_bytes = 0;
1966 u64 last = 0;
1967 int found = 0;
1968
1969 if (WARN_ON(search_end <= cur_start))
1970 return 0;
1971
1972 spin_lock(&tree->lock);
1973 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1974 total_bytes = tree->dirty_bytes;
1975 goto out;
1976 }
1977
1978
1979
1980
1981 node = tree_search(tree, cur_start);
1982 if (!node)
1983 goto out;
1984
1985 while (1) {
1986 state = rb_entry(node, struct extent_state, rb_node);
1987 if (state->start > search_end)
1988 break;
1989 if (contig && found && state->start > last + 1)
1990 break;
1991 if (state->end >= cur_start && (state->state & bits) == bits) {
1992 total_bytes += min(search_end, state->end) + 1 -
1993 max(cur_start, state->start);
1994 if (total_bytes >= max_bytes)
1995 break;
1996 if (!found) {
1997 *start = max(cur_start, state->start);
1998 found = 1;
1999 }
2000 last = state->end;
2001 } else if (contig && found) {
2002 break;
2003 }
2004 node = rb_next(node);
2005 if (!node)
2006 break;
2007 }
2008out:
2009 spin_unlock(&tree->lock);
2010 return total_bytes;
2011}
2012
2013
2014
2015
2016
2017static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
2018 struct io_failure_record *failrec)
2019{
2020 struct rb_node *node;
2021 struct extent_state *state;
2022 int ret = 0;
2023
2024 spin_lock(&tree->lock);
2025
2026
2027
2028
2029 node = tree_search(tree, start);
2030 if (!node) {
2031 ret = -ENOENT;
2032 goto out;
2033 }
2034 state = rb_entry(node, struct extent_state, rb_node);
2035 if (state->start != start) {
2036 ret = -ENOENT;
2037 goto out;
2038 }
2039 state->failrec = failrec;
2040out:
2041 spin_unlock(&tree->lock);
2042 return ret;
2043}
2044
2045static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
2046 struct io_failure_record **failrec)
2047{
2048 struct rb_node *node;
2049 struct extent_state *state;
2050 int ret = 0;
2051
2052 spin_lock(&tree->lock);
2053
2054
2055
2056
2057 node = tree_search(tree, start);
2058 if (!node) {
2059 ret = -ENOENT;
2060 goto out;
2061 }
2062 state = rb_entry(node, struct extent_state, rb_node);
2063 if (state->start != start) {
2064 ret = -ENOENT;
2065 goto out;
2066 }
2067 *failrec = state->failrec;
2068out:
2069 spin_unlock(&tree->lock);
2070 return ret;
2071}
2072
2073
2074
2075
2076
2077
2078
2079int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
2080 unsigned bits, int filled, struct extent_state *cached)
2081{
2082 struct extent_state *state = NULL;
2083 struct rb_node *node;
2084 int bitset = 0;
2085
2086 spin_lock(&tree->lock);
2087 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
2088 cached->end > start)
2089 node = &cached->rb_node;
2090 else
2091 node = tree_search(tree, start);
2092 while (node && start <= end) {
2093 state = rb_entry(node, struct extent_state, rb_node);
2094
2095 if (filled && state->start > start) {
2096 bitset = 0;
2097 break;
2098 }
2099
2100 if (state->start > end)
2101 break;
2102
2103 if (state->state & bits) {
2104 bitset = 1;
2105 if (!filled)
2106 break;
2107 } else if (filled) {
2108 bitset = 0;
2109 break;
2110 }
2111
2112 if (state->end == (u64)-1)
2113 break;
2114
2115 start = state->end + 1;
2116 if (start > end)
2117 break;
2118 node = rb_next(node);
2119 if (!node) {
2120 if (filled)
2121 bitset = 0;
2122 break;
2123 }
2124 }
2125 spin_unlock(&tree->lock);
2126 return bitset;
2127}
2128
2129
2130
2131
2132
2133static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
2134{
2135 u64 start = page_offset(page);
2136 u64 end = start + PAGE_SIZE - 1;
2137 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
2138 SetPageUptodate(page);
2139}
2140
2141int free_io_failure(struct extent_io_tree *failure_tree,
2142 struct extent_io_tree *io_tree,
2143 struct io_failure_record *rec)
2144{
2145 int ret;
2146 int err = 0;
2147
2148 set_state_failrec(failure_tree, rec->start, NULL);
2149 ret = clear_extent_bits(failure_tree, rec->start,
2150 rec->start + rec->len - 1,
2151 EXTENT_LOCKED | EXTENT_DIRTY);
2152 if (ret)
2153 err = ret;
2154
2155 ret = clear_extent_bits(io_tree, rec->start,
2156 rec->start + rec->len - 1,
2157 EXTENT_DAMAGED);
2158 if (ret && !err)
2159 err = ret;
2160
2161 kfree(rec);
2162 return err;
2163}
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
2176 u64 length, u64 logical, struct page *page,
2177 unsigned int pg_offset, int mirror_num)
2178{
2179 struct bio *bio;
2180 struct btrfs_device *dev;
2181 u64 map_length = 0;
2182 u64 sector;
2183 struct btrfs_bio *bbio = NULL;
2184 int ret;
2185
2186 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
2187 BUG_ON(!mirror_num);
2188
2189 bio = btrfs_io_bio_alloc(1);
2190 bio->bi_iter.bi_size = 0;
2191 map_length = length;
2192
2193
2194
2195
2196
2197
2198 btrfs_bio_counter_inc_blocked(fs_info);
2199 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
2200
2201
2202
2203
2204
2205
2206 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2207 &map_length, &bbio, 0);
2208 if (ret) {
2209 btrfs_bio_counter_dec(fs_info);
2210 bio_put(bio);
2211 return -EIO;
2212 }
2213 ASSERT(bbio->mirror_num == 1);
2214 } else {
2215 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2216 &map_length, &bbio, mirror_num);
2217 if (ret) {
2218 btrfs_bio_counter_dec(fs_info);
2219 bio_put(bio);
2220 return -EIO;
2221 }
2222 BUG_ON(mirror_num != bbio->mirror_num);
2223 }
2224
2225 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
2226 bio->bi_iter.bi_sector = sector;
2227 dev = bbio->stripes[bbio->mirror_num - 1].dev;
2228 btrfs_put_bbio(bbio);
2229 if (!dev || !dev->bdev ||
2230 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
2231 btrfs_bio_counter_dec(fs_info);
2232 bio_put(bio);
2233 return -EIO;
2234 }
2235 bio_set_dev(bio, dev->bdev);
2236 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2237 bio_add_page(bio, page, length, pg_offset);
2238
2239 if (btrfsic_submit_bio_wait(bio)) {
2240
2241 btrfs_bio_counter_dec(fs_info);
2242 bio_put(bio);
2243 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2244 return -EIO;
2245 }
2246
2247 btrfs_info_rl_in_rcu(fs_info,
2248 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2249 ino, start,
2250 rcu_str_deref(dev->name), sector);
2251 btrfs_bio_counter_dec(fs_info);
2252 bio_put(bio);
2253 return 0;
2254}
2255
2256int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num)
2257{
2258 struct btrfs_fs_info *fs_info = eb->fs_info;
2259 u64 start = eb->start;
2260 int i, num_pages = num_extent_pages(eb);
2261 int ret = 0;
2262
2263 if (sb_rdonly(fs_info->sb))
2264 return -EROFS;
2265
2266 for (i = 0; i < num_pages; i++) {
2267 struct page *p = eb->pages[i];
2268
2269 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2270 start - page_offset(p), mirror_num);
2271 if (ret)
2272 break;
2273 start += PAGE_SIZE;
2274 }
2275
2276 return ret;
2277}
2278
2279
2280
2281
2282
2283int clean_io_failure(struct btrfs_fs_info *fs_info,
2284 struct extent_io_tree *failure_tree,
2285 struct extent_io_tree *io_tree, u64 start,
2286 struct page *page, u64 ino, unsigned int pg_offset)
2287{
2288 u64 private;
2289 struct io_failure_record *failrec;
2290 struct extent_state *state;
2291 int num_copies;
2292 int ret;
2293
2294 private = 0;
2295 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2296 EXTENT_DIRTY, 0);
2297 if (!ret)
2298 return 0;
2299
2300 ret = get_state_failrec(failure_tree, start, &failrec);
2301 if (ret)
2302 return 0;
2303
2304 BUG_ON(!failrec->this_mirror);
2305
2306 if (failrec->in_validation) {
2307
2308 btrfs_debug(fs_info,
2309 "clean_io_failure: freeing dummy error at %llu",
2310 failrec->start);
2311 goto out;
2312 }
2313 if (sb_rdonly(fs_info->sb))
2314 goto out;
2315
2316 spin_lock(&io_tree->lock);
2317 state = find_first_extent_bit_state(io_tree,
2318 failrec->start,
2319 EXTENT_LOCKED);
2320 spin_unlock(&io_tree->lock);
2321
2322 if (state && state->start <= failrec->start &&
2323 state->end >= failrec->start + failrec->len - 1) {
2324 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2325 failrec->len);
2326 if (num_copies > 1) {
2327 repair_io_failure(fs_info, ino, start, failrec->len,
2328 failrec->logical, page, pg_offset,
2329 failrec->failed_mirror);
2330 }
2331 }
2332
2333out:
2334 free_io_failure(failure_tree, io_tree, failrec);
2335
2336 return 0;
2337}
2338
2339
2340
2341
2342
2343
2344
2345void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2346{
2347 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2348 struct io_failure_record *failrec;
2349 struct extent_state *state, *next;
2350
2351 if (RB_EMPTY_ROOT(&failure_tree->state))
2352 return;
2353
2354 spin_lock(&failure_tree->lock);
2355 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2356 while (state) {
2357 if (state->start > end)
2358 break;
2359
2360 ASSERT(state->end <= end);
2361
2362 next = next_state(state);
2363
2364 failrec = state->failrec;
2365 free_extent_state(state);
2366 kfree(failrec);
2367
2368 state = next;
2369 }
2370 spin_unlock(&failure_tree->lock);
2371}
2372
2373int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2374 struct io_failure_record **failrec_ret)
2375{
2376 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2377 struct io_failure_record *failrec;
2378 struct extent_map *em;
2379 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2380 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2381 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2382 int ret;
2383 u64 logical;
2384
2385 ret = get_state_failrec(failure_tree, start, &failrec);
2386 if (ret) {
2387 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2388 if (!failrec)
2389 return -ENOMEM;
2390
2391 failrec->start = start;
2392 failrec->len = end - start + 1;
2393 failrec->this_mirror = 0;
2394 failrec->bio_flags = 0;
2395 failrec->in_validation = 0;
2396
2397 read_lock(&em_tree->lock);
2398 em = lookup_extent_mapping(em_tree, start, failrec->len);
2399 if (!em) {
2400 read_unlock(&em_tree->lock);
2401 kfree(failrec);
2402 return -EIO;
2403 }
2404
2405 if (em->start > start || em->start + em->len <= start) {
2406 free_extent_map(em);
2407 em = NULL;
2408 }
2409 read_unlock(&em_tree->lock);
2410 if (!em) {
2411 kfree(failrec);
2412 return -EIO;
2413 }
2414
2415 logical = start - em->start;
2416 logical = em->block_start + logical;
2417 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2418 logical = em->block_start;
2419 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2420 extent_set_compress_type(&failrec->bio_flags,
2421 em->compress_type);
2422 }
2423
2424 btrfs_debug(fs_info,
2425 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2426 logical, start, failrec->len);
2427
2428 failrec->logical = logical;
2429 free_extent_map(em);
2430
2431
2432 ret = set_extent_bits(failure_tree, start, end,
2433 EXTENT_LOCKED | EXTENT_DIRTY);
2434 if (ret >= 0)
2435 ret = set_state_failrec(failure_tree, start, failrec);
2436
2437 if (ret >= 0)
2438 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2439 if (ret < 0) {
2440 kfree(failrec);
2441 return ret;
2442 }
2443 } else {
2444 btrfs_debug(fs_info,
2445 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
2446 failrec->logical, failrec->start, failrec->len,
2447 failrec->in_validation);
2448
2449
2450
2451
2452
2453 }
2454
2455 *failrec_ret = failrec;
2456
2457 return 0;
2458}
2459
2460bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
2461 struct io_failure_record *failrec, int failed_mirror)
2462{
2463 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2464 int num_copies;
2465
2466 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2467 if (num_copies == 1) {
2468
2469
2470
2471
2472
2473 btrfs_debug(fs_info,
2474 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2475 num_copies, failrec->this_mirror, failed_mirror);
2476 return false;
2477 }
2478
2479
2480
2481
2482
2483
2484 if (failed_bio_pages > 1) {
2485
2486
2487
2488
2489
2490
2491
2492
2493 BUG_ON(failrec->in_validation);
2494 failrec->in_validation = 1;
2495 failrec->this_mirror = failed_mirror;
2496 } else {
2497
2498
2499
2500
2501
2502 if (failrec->in_validation) {
2503 BUG_ON(failrec->this_mirror != failed_mirror);
2504 failrec->in_validation = 0;
2505 failrec->this_mirror = 0;
2506 }
2507 failrec->failed_mirror = failed_mirror;
2508 failrec->this_mirror++;
2509 if (failrec->this_mirror == failed_mirror)
2510 failrec->this_mirror++;
2511 }
2512
2513 if (failrec->this_mirror > num_copies) {
2514 btrfs_debug(fs_info,
2515 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2516 num_copies, failrec->this_mirror, failed_mirror);
2517 return false;
2518 }
2519
2520 return true;
2521}
2522
2523
2524struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2525 struct io_failure_record *failrec,
2526 struct page *page, int pg_offset, int icsum,
2527 bio_end_io_t *endio_func, void *data)
2528{
2529 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2530 struct bio *bio;
2531 struct btrfs_io_bio *btrfs_failed_bio;
2532 struct btrfs_io_bio *btrfs_bio;
2533
2534 bio = btrfs_io_bio_alloc(1);
2535 bio->bi_end_io = endio_func;
2536 bio->bi_iter.bi_sector = failrec->logical >> 9;
2537 bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
2538 bio->bi_iter.bi_size = 0;
2539 bio->bi_private = data;
2540
2541 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2542 if (btrfs_failed_bio->csum) {
2543 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2544
2545 btrfs_bio = btrfs_io_bio(bio);
2546 btrfs_bio->csum = btrfs_bio->csum_inline;
2547 icsum *= csum_size;
2548 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2549 csum_size);
2550 }
2551
2552 bio_add_page(bio, page, failrec->len, pg_offset);
2553
2554 return bio;
2555}
2556
2557
2558
2559
2560
2561
2562
2563static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2564 struct page *page, u64 start, u64 end,
2565 int failed_mirror)
2566{
2567 struct io_failure_record *failrec;
2568 struct inode *inode = page->mapping->host;
2569 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2570 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2571 struct bio *bio;
2572 int read_mode = 0;
2573 blk_status_t status;
2574 int ret;
2575 unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
2576
2577 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2578
2579 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2580 if (ret)
2581 return ret;
2582
2583 if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
2584 failed_mirror)) {
2585 free_io_failure(failure_tree, tree, failrec);
2586 return -EIO;
2587 }
2588
2589 if (failed_bio_pages > 1)
2590 read_mode |= REQ_FAILFAST_DEV;
2591
2592 phy_offset >>= inode->i_sb->s_blocksize_bits;
2593 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2594 start - page_offset(page),
2595 (int)phy_offset, failed_bio->bi_end_io,
2596 NULL);
2597 bio->bi_opf = REQ_OP_READ | read_mode;
2598
2599 btrfs_debug(btrfs_sb(inode->i_sb),
2600 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2601 read_mode, failrec->this_mirror, failrec->in_validation);
2602
2603 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
2604 failrec->bio_flags);
2605 if (status) {
2606 free_io_failure(failure_tree, tree, failrec);
2607 bio_put(bio);
2608 ret = blk_status_to_errno(status);
2609 }
2610
2611 return ret;
2612}
2613
2614
2615
2616void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2617{
2618 int uptodate = (err == 0);
2619 int ret = 0;
2620
2621 btrfs_writepage_endio_finish_ordered(page, start, end, uptodate);
2622
2623 if (!uptodate) {
2624 ClearPageUptodate(page);
2625 SetPageError(page);
2626 ret = err < 0 ? err : -EIO;
2627 mapping_set_error(page->mapping, ret);
2628 }
2629}
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640static void end_bio_extent_writepage(struct bio *bio)
2641{
2642 int error = blk_status_to_errno(bio->bi_status);
2643 struct bio_vec *bvec;
2644 u64 start;
2645 u64 end;
2646 struct bvec_iter_all iter_all;
2647
2648 ASSERT(!bio_flagged(bio, BIO_CLONED));
2649 bio_for_each_segment_all(bvec, bio, iter_all) {
2650 struct page *page = bvec->bv_page;
2651 struct inode *inode = page->mapping->host;
2652 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2653
2654
2655
2656
2657
2658
2659 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2660 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2661 btrfs_err(fs_info,
2662 "partial page write in btrfs with offset %u and length %u",
2663 bvec->bv_offset, bvec->bv_len);
2664 else
2665 btrfs_info(fs_info,
2666 "incomplete page write in btrfs with offset %u and length %u",
2667 bvec->bv_offset, bvec->bv_len);
2668 }
2669
2670 start = page_offset(page);
2671 end = start + bvec->bv_offset + bvec->bv_len - 1;
2672
2673 end_extent_writepage(page, error, start, end);
2674 end_page_writeback(page);
2675 }
2676
2677 bio_put(bio);
2678}
2679
2680static void
2681endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2682 int uptodate)
2683{
2684 struct extent_state *cached = NULL;
2685 u64 end = start + len - 1;
2686
2687 if (uptodate && tree->track_uptodate)
2688 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2689 unlock_extent_cached_atomic(tree, start, end, &cached);
2690}
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703static void end_bio_extent_readpage(struct bio *bio)
2704{
2705 struct bio_vec *bvec;
2706 int uptodate = !bio->bi_status;
2707 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2708 struct extent_io_tree *tree, *failure_tree;
2709 u64 offset = 0;
2710 u64 start;
2711 u64 end;
2712 u64 len;
2713 u64 extent_start = 0;
2714 u64 extent_len = 0;
2715 int mirror;
2716 int ret;
2717 struct bvec_iter_all iter_all;
2718
2719 ASSERT(!bio_flagged(bio, BIO_CLONED));
2720 bio_for_each_segment_all(bvec, bio, iter_all) {
2721 struct page *page = bvec->bv_page;
2722 struct inode *inode = page->mapping->host;
2723 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2724 bool data_inode = btrfs_ino(BTRFS_I(inode))
2725 != BTRFS_BTREE_INODE_OBJECTID;
2726
2727 btrfs_debug(fs_info,
2728 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2729 (u64)bio->bi_iter.bi_sector, bio->bi_status,
2730 io_bio->mirror_num);
2731 tree = &BTRFS_I(inode)->io_tree;
2732 failure_tree = &BTRFS_I(inode)->io_failure_tree;
2733
2734
2735
2736
2737
2738
2739 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2740 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2741 btrfs_err(fs_info,
2742 "partial page read in btrfs with offset %u and length %u",
2743 bvec->bv_offset, bvec->bv_len);
2744 else
2745 btrfs_info(fs_info,
2746 "incomplete page read in btrfs with offset %u and length %u",
2747 bvec->bv_offset, bvec->bv_len);
2748 }
2749
2750 start = page_offset(page);
2751 end = start + bvec->bv_offset + bvec->bv_len - 1;
2752 len = bvec->bv_len;
2753
2754 mirror = io_bio->mirror_num;
2755 if (likely(uptodate)) {
2756 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2757 page, start, end,
2758 mirror);
2759 if (ret)
2760 uptodate = 0;
2761 else
2762 clean_io_failure(BTRFS_I(inode)->root->fs_info,
2763 failure_tree, tree, start,
2764 page,
2765 btrfs_ino(BTRFS_I(inode)), 0);
2766 }
2767
2768 if (likely(uptodate))
2769 goto readpage_ok;
2770
2771 if (data_inode) {
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783 ret = bio_readpage_error(bio, offset, page, start, end,
2784 mirror);
2785 if (ret == 0) {
2786 uptodate = !bio->bi_status;
2787 offset += len;
2788 continue;
2789 }
2790 } else {
2791 struct extent_buffer *eb;
2792
2793 eb = (struct extent_buffer *)page->private;
2794 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
2795 eb->read_mirror = mirror;
2796 atomic_dec(&eb->io_pages);
2797 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
2798 &eb->bflags))
2799 btree_readahead_hook(eb, -EIO);
2800 }
2801readpage_ok:
2802 if (likely(uptodate)) {
2803 loff_t i_size = i_size_read(inode);
2804 pgoff_t end_index = i_size >> PAGE_SHIFT;
2805 unsigned off;
2806
2807
2808 off = offset_in_page(i_size);
2809 if (page->index == end_index && off)
2810 zero_user_segment(page, off, PAGE_SIZE);
2811 SetPageUptodate(page);
2812 } else {
2813 ClearPageUptodate(page);
2814 SetPageError(page);
2815 }
2816 unlock_page(page);
2817 offset += len;
2818
2819 if (unlikely(!uptodate)) {
2820 if (extent_len) {
2821 endio_readpage_release_extent(tree,
2822 extent_start,
2823 extent_len, 1);
2824 extent_start = 0;
2825 extent_len = 0;
2826 }
2827 endio_readpage_release_extent(tree, start,
2828 end - start + 1, 0);
2829 } else if (!extent_len) {
2830 extent_start = start;
2831 extent_len = end + 1 - start;
2832 } else if (extent_start + extent_len == start) {
2833 extent_len += end + 1 - start;
2834 } else {
2835 endio_readpage_release_extent(tree, extent_start,
2836 extent_len, uptodate);
2837 extent_start = start;
2838 extent_len = end + 1 - start;
2839 }
2840 }
2841
2842 if (extent_len)
2843 endio_readpage_release_extent(tree, extent_start, extent_len,
2844 uptodate);
2845 btrfs_io_bio_free_csum(io_bio);
2846 bio_put(bio);
2847}
2848
2849
2850
2851
2852
2853
2854static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
2855{
2856 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
2857}
2858
2859
2860
2861
2862
2863
2864struct bio *btrfs_bio_alloc(u64 first_byte)
2865{
2866 struct bio *bio;
2867
2868 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
2869 bio->bi_iter.bi_sector = first_byte >> 9;
2870 btrfs_io_bio_init(btrfs_io_bio(bio));
2871 return bio;
2872}
2873
2874struct bio *btrfs_bio_clone(struct bio *bio)
2875{
2876 struct btrfs_io_bio *btrfs_bio;
2877 struct bio *new;
2878
2879
2880 new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
2881 btrfs_bio = btrfs_io_bio(new);
2882 btrfs_io_bio_init(btrfs_bio);
2883 btrfs_bio->iter = bio->bi_iter;
2884 return new;
2885}
2886
2887struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
2888{
2889 struct bio *bio;
2890
2891
2892 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
2893 btrfs_io_bio_init(btrfs_io_bio(bio));
2894 return bio;
2895}
2896
2897struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
2898{
2899 struct bio *bio;
2900 struct btrfs_io_bio *btrfs_bio;
2901
2902
2903 bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
2904 ASSERT(bio);
2905
2906 btrfs_bio = btrfs_io_bio(bio);
2907 btrfs_io_bio_init(btrfs_bio);
2908
2909 bio_trim(bio, offset >> 9, size >> 9);
2910 btrfs_bio->iter = bio->bi_iter;
2911 return bio;
2912}
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
2931 struct writeback_control *wbc,
2932 struct page *page, u64 offset,
2933 size_t size, unsigned long pg_offset,
2934 struct block_device *bdev,
2935 struct bio **bio_ret,
2936 bio_end_io_t end_io_func,
2937 int mirror_num,
2938 unsigned long prev_bio_flags,
2939 unsigned long bio_flags,
2940 bool force_bio_submit)
2941{
2942 int ret = 0;
2943 struct bio *bio;
2944 size_t page_size = min_t(size_t, size, PAGE_SIZE);
2945 sector_t sector = offset >> 9;
2946
2947 ASSERT(bio_ret);
2948
2949 if (*bio_ret) {
2950 bool contig;
2951 bool can_merge = true;
2952
2953 bio = *bio_ret;
2954 if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
2955 contig = bio->bi_iter.bi_sector == sector;
2956 else
2957 contig = bio_end_sector(bio) == sector;
2958
2959 ASSERT(tree->ops);
2960 if (btrfs_bio_fits_in_stripe(page, page_size, bio, bio_flags))
2961 can_merge = false;
2962
2963 if (prev_bio_flags != bio_flags || !contig || !can_merge ||
2964 force_bio_submit ||
2965 bio_add_page(bio, page, page_size, pg_offset) < page_size) {
2966 ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
2967 if (ret < 0) {
2968 *bio_ret = NULL;
2969 return ret;
2970 }
2971 bio = NULL;
2972 } else {
2973 if (wbc)
2974 wbc_account_cgroup_owner(wbc, page, page_size);
2975 return 0;
2976 }
2977 }
2978
2979 bio = btrfs_bio_alloc(offset);
2980 bio_set_dev(bio, bdev);
2981 bio_add_page(bio, page, page_size, pg_offset);
2982 bio->bi_end_io = end_io_func;
2983 bio->bi_private = tree;
2984 bio->bi_write_hint = page->mapping->host->i_write_hint;
2985 bio->bi_opf = opf;
2986 if (wbc) {
2987 wbc_init_bio(wbc, bio);
2988 wbc_account_cgroup_owner(wbc, page, page_size);
2989 }
2990
2991 *bio_ret = bio;
2992
2993 return ret;
2994}
2995
2996static void attach_extent_buffer_page(struct extent_buffer *eb,
2997 struct page *page)
2998{
2999 if (!PagePrivate(page)) {
3000 SetPagePrivate(page);
3001 get_page(page);
3002 set_page_private(page, (unsigned long)eb);
3003 } else {
3004 WARN_ON(page->private != (unsigned long)eb);
3005 }
3006}
3007
3008void set_page_extent_mapped(struct page *page)
3009{
3010 if (!PagePrivate(page)) {
3011 SetPagePrivate(page);
3012 get_page(page);
3013 set_page_private(page, EXTENT_PAGE_PRIVATE);
3014 }
3015}
3016
3017static struct extent_map *
3018__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
3019 u64 start, u64 len, get_extent_t *get_extent,
3020 struct extent_map **em_cached)
3021{
3022 struct extent_map *em;
3023
3024 if (em_cached && *em_cached) {
3025 em = *em_cached;
3026 if (extent_map_in_tree(em) && start >= em->start &&
3027 start < extent_map_end(em)) {
3028 refcount_inc(&em->refs);
3029 return em;
3030 }
3031
3032 free_extent_map(em);
3033 *em_cached = NULL;
3034 }
3035
3036 em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
3037 if (em_cached && !IS_ERR_OR_NULL(em)) {
3038 BUG_ON(*em_cached);
3039 refcount_inc(&em->refs);
3040 *em_cached = em;
3041 }
3042 return em;
3043}
3044
3045
3046
3047
3048
3049
3050
3051static int __do_readpage(struct extent_io_tree *tree,
3052 struct page *page,
3053 get_extent_t *get_extent,
3054 struct extent_map **em_cached,
3055 struct bio **bio, int mirror_num,
3056 unsigned long *bio_flags, unsigned int read_flags,
3057 u64 *prev_em_start)
3058{
3059 struct inode *inode = page->mapping->host;
3060 u64 start = page_offset(page);
3061 const u64 end = start + PAGE_SIZE - 1;
3062 u64 cur = start;
3063 u64 extent_offset;
3064 u64 last_byte = i_size_read(inode);
3065 u64 block_start;
3066 u64 cur_end;
3067 struct extent_map *em;
3068 struct block_device *bdev;
3069 int ret = 0;
3070 int nr = 0;
3071 size_t pg_offset = 0;
3072 size_t iosize;
3073 size_t disk_io_size;
3074 size_t blocksize = inode->i_sb->s_blocksize;
3075 unsigned long this_bio_flag = 0;
3076
3077 set_page_extent_mapped(page);
3078
3079 if (!PageUptodate(page)) {
3080 if (cleancache_get_page(page) == 0) {
3081 BUG_ON(blocksize != PAGE_SIZE);
3082 unlock_extent(tree, start, end);
3083 goto out;
3084 }
3085 }
3086
3087 if (page->index == last_byte >> PAGE_SHIFT) {
3088 char *userpage;
3089 size_t zero_offset = offset_in_page(last_byte);
3090
3091 if (zero_offset) {
3092 iosize = PAGE_SIZE - zero_offset;
3093 userpage = kmap_atomic(page);
3094 memset(userpage + zero_offset, 0, iosize);
3095 flush_dcache_page(page);
3096 kunmap_atomic(userpage);
3097 }
3098 }
3099 while (cur <= end) {
3100 bool force_bio_submit = false;
3101 u64 offset;
3102
3103 if (cur >= last_byte) {
3104 char *userpage;
3105 struct extent_state *cached = NULL;
3106
3107 iosize = PAGE_SIZE - pg_offset;
3108 userpage = kmap_atomic(page);
3109 memset(userpage + pg_offset, 0, iosize);
3110 flush_dcache_page(page);
3111 kunmap_atomic(userpage);
3112 set_extent_uptodate(tree, cur, cur + iosize - 1,
3113 &cached, GFP_NOFS);
3114 unlock_extent_cached(tree, cur,
3115 cur + iosize - 1, &cached);
3116 break;
3117 }
3118 em = __get_extent_map(inode, page, pg_offset, cur,
3119 end - cur + 1, get_extent, em_cached);
3120 if (IS_ERR_OR_NULL(em)) {
3121 SetPageError(page);
3122 unlock_extent(tree, cur, end);
3123 break;
3124 }
3125 extent_offset = cur - em->start;
3126 BUG_ON(extent_map_end(em) <= cur);
3127 BUG_ON(end < cur);
3128
3129 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
3130 this_bio_flag |= EXTENT_BIO_COMPRESSED;
3131 extent_set_compress_type(&this_bio_flag,
3132 em->compress_type);
3133 }
3134
3135 iosize = min(extent_map_end(em) - cur, end - cur + 1);
3136 cur_end = min(extent_map_end(em) - 1, end);
3137 iosize = ALIGN(iosize, blocksize);
3138 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
3139 disk_io_size = em->block_len;
3140 offset = em->block_start;
3141 } else {
3142 offset = em->block_start + extent_offset;
3143 disk_io_size = iosize;
3144 }
3145 bdev = em->bdev;
3146 block_start = em->block_start;
3147 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3148 block_start = EXTENT_MAP_HOLE;
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3185 prev_em_start && *prev_em_start != (u64)-1 &&
3186 *prev_em_start != em->start)
3187 force_bio_submit = true;
3188
3189 if (prev_em_start)
3190 *prev_em_start = em->start;
3191
3192 free_extent_map(em);
3193 em = NULL;
3194
3195
3196 if (block_start == EXTENT_MAP_HOLE) {
3197 char *userpage;
3198 struct extent_state *cached = NULL;
3199
3200 userpage = kmap_atomic(page);
3201 memset(userpage + pg_offset, 0, iosize);
3202 flush_dcache_page(page);
3203 kunmap_atomic(userpage);
3204
3205 set_extent_uptodate(tree, cur, cur + iosize - 1,
3206 &cached, GFP_NOFS);
3207 unlock_extent_cached(tree, cur,
3208 cur + iosize - 1, &cached);
3209 cur = cur + iosize;
3210 pg_offset += iosize;
3211 continue;
3212 }
3213
3214 if (test_range_bit(tree, cur, cur_end,
3215 EXTENT_UPTODATE, 1, NULL)) {
3216 check_page_uptodate(tree, page);
3217 unlock_extent(tree, cur, cur + iosize - 1);
3218 cur = cur + iosize;
3219 pg_offset += iosize;
3220 continue;
3221 }
3222
3223
3224
3225 if (block_start == EXTENT_MAP_INLINE) {
3226 SetPageError(page);
3227 unlock_extent(tree, cur, cur + iosize - 1);
3228 cur = cur + iosize;
3229 pg_offset += iosize;
3230 continue;
3231 }
3232
3233 ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
3234 page, offset, disk_io_size,
3235 pg_offset, bdev, bio,
3236 end_bio_extent_readpage, mirror_num,
3237 *bio_flags,
3238 this_bio_flag,
3239 force_bio_submit);
3240 if (!ret) {
3241 nr++;
3242 *bio_flags = this_bio_flag;
3243 } else {
3244 SetPageError(page);
3245 unlock_extent(tree, cur, cur + iosize - 1);
3246 goto out;
3247 }
3248 cur = cur + iosize;
3249 pg_offset += iosize;
3250 }
3251out:
3252 if (!nr) {
3253 if (!PageError(page))
3254 SetPageUptodate(page);
3255 unlock_page(page);
3256 }
3257 return ret;
3258}
3259
3260static inline void contiguous_readpages(struct extent_io_tree *tree,
3261 struct page *pages[], int nr_pages,
3262 u64 start, u64 end,
3263 struct extent_map **em_cached,
3264 struct bio **bio,
3265 unsigned long *bio_flags,
3266 u64 *prev_em_start)
3267{
3268 struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
3269 int index;
3270
3271 btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
3272
3273 for (index = 0; index < nr_pages; index++) {
3274 __do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
3275 bio, 0, bio_flags, REQ_RAHEAD, prev_em_start);
3276 put_page(pages[index]);
3277 }
3278}
3279
3280static int __extent_read_full_page(struct extent_io_tree *tree,
3281 struct page *page,
3282 get_extent_t *get_extent,
3283 struct bio **bio, int mirror_num,
3284 unsigned long *bio_flags,
3285 unsigned int read_flags)
3286{
3287 struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
3288 u64 start = page_offset(page);
3289 u64 end = start + PAGE_SIZE - 1;
3290 int ret;
3291
3292 btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
3293
3294 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3295 bio_flags, read_flags, NULL);
3296 return ret;
3297}
3298
3299int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3300 get_extent_t *get_extent, int mirror_num)
3301{
3302 struct bio *bio = NULL;
3303 unsigned long bio_flags = 0;
3304 int ret;
3305
3306 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3307 &bio_flags, 0);
3308 if (bio)
3309 ret = submit_one_bio(bio, mirror_num, bio_flags);
3310 return ret;
3311}
3312
3313static void update_nr_written(struct writeback_control *wbc,
3314 unsigned long nr_written)
3315{
3316 wbc->nr_to_write -= nr_written;
3317}
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329static noinline_for_stack int writepage_delalloc(struct inode *inode,
3330 struct page *page, struct writeback_control *wbc,
3331 u64 delalloc_start, unsigned long *nr_written)
3332{
3333 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3334 bool found;
3335 u64 delalloc_to_write = 0;
3336 u64 delalloc_end = 0;
3337 int ret;
3338 int page_started = 0;
3339
3340
3341 while (delalloc_end < page_end) {
3342 found = find_lock_delalloc_range(inode, page,
3343 &delalloc_start,
3344 &delalloc_end);
3345 if (!found) {
3346 delalloc_start = delalloc_end + 1;
3347 continue;
3348 }
3349 ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
3350 delalloc_end, &page_started, nr_written, wbc);
3351 if (ret) {
3352 SetPageError(page);
3353
3354
3355
3356
3357
3358
3359 ret = ret < 0 ? ret : -EIO;
3360 goto done;
3361 }
3362
3363
3364
3365
3366 delalloc_to_write += (delalloc_end - delalloc_start +
3367 PAGE_SIZE) >> PAGE_SHIFT;
3368 delalloc_start = delalloc_end + 1;
3369 }
3370 if (wbc->nr_to_write < delalloc_to_write) {
3371 int thresh = 8192;
3372
3373 if (delalloc_to_write < thresh * 2)
3374 thresh = delalloc_to_write;
3375 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3376 thresh);
3377 }
3378
3379
3380
3381
3382 if (page_started) {
3383
3384
3385
3386
3387
3388 wbc->nr_to_write -= *nr_written;
3389 return 1;
3390 }
3391
3392 ret = 0;
3393
3394done:
3395 return ret;
3396}
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3407 struct page *page,
3408 struct writeback_control *wbc,
3409 struct extent_page_data *epd,
3410 loff_t i_size,
3411 unsigned long nr_written,
3412 unsigned int write_flags, int *nr_ret)
3413{
3414 struct extent_io_tree *tree = epd->tree;
3415 u64 start = page_offset(page);
3416 u64 page_end = start + PAGE_SIZE - 1;
3417 u64 end;
3418 u64 cur = start;
3419 u64 extent_offset;
3420 u64 block_start;
3421 u64 iosize;
3422 struct extent_map *em;
3423 struct block_device *bdev;
3424 size_t pg_offset = 0;
3425 size_t blocksize;
3426 int ret = 0;
3427 int nr = 0;
3428 bool compressed;
3429
3430 ret = btrfs_writepage_cow_fixup(page, start, page_end);
3431 if (ret) {
3432
3433 if (ret == -EBUSY)
3434 wbc->pages_skipped++;
3435 else
3436 redirty_page_for_writepage(wbc, page);
3437
3438 update_nr_written(wbc, nr_written);
3439 unlock_page(page);
3440 return 1;
3441 }
3442
3443
3444
3445
3446
3447 update_nr_written(wbc, nr_written + 1);
3448
3449 end = page_end;
3450 if (i_size <= start) {
3451 btrfs_writepage_endio_finish_ordered(page, start, page_end, 1);
3452 goto done;
3453 }
3454
3455 blocksize = inode->i_sb->s_blocksize;
3456
3457 while (cur <= end) {
3458 u64 em_end;
3459 u64 offset;
3460
3461 if (cur >= i_size) {
3462 btrfs_writepage_endio_finish_ordered(page, cur,
3463 page_end, 1);
3464 break;
3465 }
3466 em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
3467 end - cur + 1, 1);
3468 if (IS_ERR_OR_NULL(em)) {
3469 SetPageError(page);
3470 ret = PTR_ERR_OR_ZERO(em);
3471 break;
3472 }
3473
3474 extent_offset = cur - em->start;
3475 em_end = extent_map_end(em);
3476 BUG_ON(em_end <= cur);
3477 BUG_ON(end < cur);
3478 iosize = min(em_end - cur, end - cur + 1);
3479 iosize = ALIGN(iosize, blocksize);
3480 offset = em->block_start + extent_offset;
3481 bdev = em->bdev;
3482 block_start = em->block_start;
3483 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3484 free_extent_map(em);
3485 em = NULL;
3486
3487
3488
3489
3490
3491 if (compressed || block_start == EXTENT_MAP_HOLE ||
3492 block_start == EXTENT_MAP_INLINE) {
3493
3494
3495
3496
3497 if (!compressed)
3498 btrfs_writepage_endio_finish_ordered(page, cur,
3499 cur + iosize - 1,
3500 1);
3501 else if (compressed) {
3502
3503
3504
3505
3506 nr++;
3507 }
3508
3509 cur += iosize;
3510 pg_offset += iosize;
3511 continue;
3512 }
3513
3514 btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
3515 if (!PageWriteback(page)) {
3516 btrfs_err(BTRFS_I(inode)->root->fs_info,
3517 "page %lu not writeback, cur %llu end %llu",
3518 page->index, cur, end);
3519 }
3520
3521 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3522 page, offset, iosize, pg_offset,
3523 bdev, &epd->bio,
3524 end_bio_extent_writepage,
3525 0, 0, 0, false);
3526 if (ret) {
3527 SetPageError(page);
3528 if (PageWriteback(page))
3529 end_page_writeback(page);
3530 }
3531
3532 cur = cur + iosize;
3533 pg_offset += iosize;
3534 nr++;
3535 }
3536done:
3537 *nr_ret = nr;
3538 return ret;
3539}
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3551 struct extent_page_data *epd)
3552{
3553 struct inode *inode = page->mapping->host;
3554 u64 start = page_offset(page);
3555 u64 page_end = start + PAGE_SIZE - 1;
3556 int ret;
3557 int nr = 0;
3558 size_t pg_offset = 0;
3559 loff_t i_size = i_size_read(inode);
3560 unsigned long end_index = i_size >> PAGE_SHIFT;
3561 unsigned int write_flags = 0;
3562 unsigned long nr_written = 0;
3563
3564 write_flags = wbc_to_write_flags(wbc);
3565
3566 trace___extent_writepage(page, inode, wbc);
3567
3568 WARN_ON(!PageLocked(page));
3569
3570 ClearPageError(page);
3571
3572 pg_offset = offset_in_page(i_size);
3573 if (page->index > end_index ||
3574 (page->index == end_index && !pg_offset)) {
3575 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
3576 unlock_page(page);
3577 return 0;
3578 }
3579
3580 if (page->index == end_index) {
3581 char *userpage;
3582
3583 userpage = kmap_atomic(page);
3584 memset(userpage + pg_offset, 0,
3585 PAGE_SIZE - pg_offset);
3586 kunmap_atomic(userpage);
3587 flush_dcache_page(page);
3588 }
3589
3590 pg_offset = 0;
3591
3592 set_page_extent_mapped(page);
3593
3594 if (!epd->extent_locked) {
3595 ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
3596 if (ret == 1)
3597 goto done_unlocked;
3598 if (ret)
3599 goto done;
3600 }
3601
3602 ret = __extent_writepage_io(inode, page, wbc, epd,
3603 i_size, nr_written, write_flags, &nr);
3604 if (ret == 1)
3605 goto done_unlocked;
3606
3607done:
3608 if (nr == 0) {
3609
3610 set_page_writeback(page);
3611 end_page_writeback(page);
3612 }
3613 if (PageError(page)) {
3614 ret = ret < 0 ? ret : -EIO;
3615 end_extent_writepage(page, ret, start, page_end);
3616 }
3617 unlock_page(page);
3618 ASSERT(ret <= 0);
3619 return ret;
3620
3621done_unlocked:
3622 return 0;
3623}
3624
3625void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3626{
3627 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3628 TASK_UNINTERRUPTIBLE);
3629}
3630
3631static void end_extent_buffer_writeback(struct extent_buffer *eb)
3632{
3633 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3634 smp_mb__after_atomic();
3635 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3636}
3637
3638
3639
3640
3641
3642
3643
3644
3645static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
3646 struct extent_page_data *epd)
3647{
3648 struct btrfs_fs_info *fs_info = eb->fs_info;
3649 int i, num_pages, failed_page_nr;
3650 int flush = 0;
3651 int ret = 0;
3652
3653 if (!btrfs_try_tree_write_lock(eb)) {
3654 ret = flush_write_bio(epd);
3655 if (ret < 0)
3656 return ret;
3657 flush = 1;
3658 btrfs_tree_lock(eb);
3659 }
3660
3661 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3662 btrfs_tree_unlock(eb);
3663 if (!epd->sync_io)
3664 return 0;
3665 if (!flush) {
3666 ret = flush_write_bio(epd);
3667 if (ret < 0)
3668 return ret;
3669 flush = 1;
3670 }
3671 while (1) {
3672 wait_on_extent_buffer_writeback(eb);
3673 btrfs_tree_lock(eb);
3674 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3675 break;
3676 btrfs_tree_unlock(eb);
3677 }
3678 }
3679
3680
3681
3682
3683
3684
3685 spin_lock(&eb->refs_lock);
3686 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3687 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3688 spin_unlock(&eb->refs_lock);
3689 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3690 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3691 -eb->len,
3692 fs_info->dirty_metadata_batch);
3693 ret = 1;
3694 } else {
3695 spin_unlock(&eb->refs_lock);
3696 }
3697
3698 btrfs_tree_unlock(eb);
3699
3700 if (!ret)
3701 return ret;
3702
3703 num_pages = num_extent_pages(eb);
3704 for (i = 0; i < num_pages; i++) {
3705 struct page *p = eb->pages[i];
3706
3707 if (!trylock_page(p)) {
3708 if (!flush) {
3709 int err;
3710
3711 err = flush_write_bio(epd);
3712 if (err < 0) {
3713 ret = err;
3714 failed_page_nr = i;
3715 goto err_unlock;
3716 }
3717 flush = 1;
3718 }
3719 lock_page(p);
3720 }
3721 }
3722
3723 return ret;
3724err_unlock:
3725
3726 for (i = 0; i < failed_page_nr; i++)
3727 unlock_page(eb->pages[i]);
3728
3729
3730
3731
3732
3733 btrfs_tree_lock(eb);
3734 spin_lock(&eb->refs_lock);
3735 set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
3736 end_extent_buffer_writeback(eb);
3737 spin_unlock(&eb->refs_lock);
3738 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len,
3739 fs_info->dirty_metadata_batch);
3740 btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3741 btrfs_tree_unlock(eb);
3742 return ret;
3743}
3744
3745static void set_btree_ioerr(struct page *page)
3746{
3747 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3748
3749 SetPageError(page);
3750 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3751 return;
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791 switch (eb->log_index) {
3792 case -1:
3793 set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
3794 break;
3795 case 0:
3796 set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
3797 break;
3798 case 1:
3799 set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
3800 break;
3801 default:
3802 BUG();
3803 }
3804}
3805
3806static void end_bio_extent_buffer_writepage(struct bio *bio)
3807{
3808 struct bio_vec *bvec;
3809 struct extent_buffer *eb;
3810 int done;
3811 struct bvec_iter_all iter_all;
3812
3813 ASSERT(!bio_flagged(bio, BIO_CLONED));
3814 bio_for_each_segment_all(bvec, bio, iter_all) {
3815 struct page *page = bvec->bv_page;
3816
3817 eb = (struct extent_buffer *)page->private;
3818 BUG_ON(!eb);
3819 done = atomic_dec_and_test(&eb->io_pages);
3820
3821 if (bio->bi_status ||
3822 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3823 ClearPageUptodate(page);
3824 set_btree_ioerr(page);
3825 }
3826
3827 end_page_writeback(page);
3828
3829 if (!done)
3830 continue;
3831
3832 end_extent_buffer_writeback(eb);
3833 }
3834
3835 bio_put(bio);
3836}
3837
3838static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3839 struct writeback_control *wbc,
3840 struct extent_page_data *epd)
3841{
3842 struct btrfs_fs_info *fs_info = eb->fs_info;
3843 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3844 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3845 u64 offset = eb->start;
3846 u32 nritems;
3847 int i, num_pages;
3848 unsigned long start, end;
3849 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3850 int ret = 0;
3851
3852 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3853 num_pages = num_extent_pages(eb);
3854 atomic_set(&eb->io_pages, num_pages);
3855
3856
3857 nritems = btrfs_header_nritems(eb);
3858 if (btrfs_header_level(eb) > 0) {
3859 end = btrfs_node_key_ptr_offset(nritems);
3860
3861 memzero_extent_buffer(eb, end, eb->len - end);
3862 } else {
3863
3864
3865
3866
3867 start = btrfs_item_nr_offset(nritems);
3868 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
3869 memzero_extent_buffer(eb, start, end - start);
3870 }
3871
3872 for (i = 0; i < num_pages; i++) {
3873 struct page *p = eb->pages[i];
3874
3875 clear_page_dirty_for_io(p);
3876 set_page_writeback(p);
3877 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3878 p, offset, PAGE_SIZE, 0, bdev,
3879 &epd->bio,
3880 end_bio_extent_buffer_writepage,
3881 0, 0, 0, false);
3882 if (ret) {
3883 set_btree_ioerr(p);
3884 if (PageWriteback(p))
3885 end_page_writeback(p);
3886 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3887 end_extent_buffer_writeback(eb);
3888 ret = -EIO;
3889 break;
3890 }
3891 offset += PAGE_SIZE;
3892 update_nr_written(wbc, 1);
3893 unlock_page(p);
3894 }
3895
3896 if (unlikely(ret)) {
3897 for (; i < num_pages; i++) {
3898 struct page *p = eb->pages[i];
3899 clear_page_dirty_for_io(p);
3900 unlock_page(p);
3901 }
3902 }
3903
3904 return ret;
3905}
3906
3907int btree_write_cache_pages(struct address_space *mapping,
3908 struct writeback_control *wbc)
3909{
3910 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3911 struct extent_buffer *eb, *prev_eb = NULL;
3912 struct extent_page_data epd = {
3913 .bio = NULL,
3914 .tree = tree,
3915 .extent_locked = 0,
3916 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3917 };
3918 int ret = 0;
3919 int done = 0;
3920 int nr_to_write_done = 0;
3921 struct pagevec pvec;
3922 int nr_pages;
3923 pgoff_t index;
3924 pgoff_t end;
3925 int scanned = 0;
3926 xa_mark_t tag;
3927
3928 pagevec_init(&pvec);
3929 if (wbc->range_cyclic) {
3930 index = mapping->writeback_index;
3931 end = -1;
3932 } else {
3933 index = wbc->range_start >> PAGE_SHIFT;
3934 end = wbc->range_end >> PAGE_SHIFT;
3935 scanned = 1;
3936 }
3937 if (wbc->sync_mode == WB_SYNC_ALL)
3938 tag = PAGECACHE_TAG_TOWRITE;
3939 else
3940 tag = PAGECACHE_TAG_DIRTY;
3941retry:
3942 if (wbc->sync_mode == WB_SYNC_ALL)
3943 tag_pages_for_writeback(mapping, index, end);
3944 while (!done && !nr_to_write_done && (index <= end) &&
3945 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
3946 tag))) {
3947 unsigned i;
3948
3949 scanned = 1;
3950 for (i = 0; i < nr_pages; i++) {
3951 struct page *page = pvec.pages[i];
3952
3953 if (!PagePrivate(page))
3954 continue;
3955
3956 spin_lock(&mapping->private_lock);
3957 if (!PagePrivate(page)) {
3958 spin_unlock(&mapping->private_lock);
3959 continue;
3960 }
3961
3962 eb = (struct extent_buffer *)page->private;
3963
3964
3965
3966
3967
3968
3969 if (WARN_ON(!eb)) {
3970 spin_unlock(&mapping->private_lock);
3971 continue;
3972 }
3973
3974 if (eb == prev_eb) {
3975 spin_unlock(&mapping->private_lock);
3976 continue;
3977 }
3978
3979 ret = atomic_inc_not_zero(&eb->refs);
3980 spin_unlock(&mapping->private_lock);
3981 if (!ret)
3982 continue;
3983
3984 prev_eb = eb;
3985 ret = lock_extent_buffer_for_io(eb, &epd);
3986 if (!ret) {
3987 free_extent_buffer(eb);
3988 continue;
3989 }
3990
3991 ret = write_one_eb(eb, wbc, &epd);
3992 if (ret) {
3993 done = 1;
3994 free_extent_buffer(eb);
3995 break;
3996 }
3997 free_extent_buffer(eb);
3998
3999
4000
4001
4002
4003
4004 nr_to_write_done = wbc->nr_to_write <= 0;
4005 }
4006 pagevec_release(&pvec);
4007 cond_resched();
4008 }
4009 if (!scanned && !done) {
4010
4011
4012
4013
4014 scanned = 1;
4015 index = 0;
4016 goto retry;
4017 }
4018 ASSERT(ret <= 0);
4019 if (ret < 0) {
4020 end_write_bio(&epd, ret);
4021 return ret;
4022 }
4023 ret = flush_write_bio(&epd);
4024 return ret;
4025}
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041static int extent_write_cache_pages(struct address_space *mapping,
4042 struct writeback_control *wbc,
4043 struct extent_page_data *epd)
4044{
4045 struct inode *inode = mapping->host;
4046 int ret = 0;
4047 int done = 0;
4048 int nr_to_write_done = 0;
4049 struct pagevec pvec;
4050 int nr_pages;
4051 pgoff_t index;
4052 pgoff_t end;
4053 pgoff_t done_index;
4054 int range_whole = 0;
4055 int scanned = 0;
4056 xa_mark_t tag;
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067 if (!igrab(inode))
4068 return 0;
4069
4070 pagevec_init(&pvec);
4071 if (wbc->range_cyclic) {
4072 index = mapping->writeback_index;
4073 end = -1;
4074 } else {
4075 index = wbc->range_start >> PAGE_SHIFT;
4076 end = wbc->range_end >> PAGE_SHIFT;
4077 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
4078 range_whole = 1;
4079 scanned = 1;
4080 }
4081
4082
4083
4084
4085
4086
4087
4088
4089 if (range_whole && wbc->nr_to_write == LONG_MAX &&
4090 test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
4091 &BTRFS_I(inode)->runtime_flags))
4092 wbc->tagged_writepages = 1;
4093
4094 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4095 tag = PAGECACHE_TAG_TOWRITE;
4096 else
4097 tag = PAGECACHE_TAG_DIRTY;
4098retry:
4099 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
4100 tag_pages_for_writeback(mapping, index, end);
4101 done_index = index;
4102 while (!done && !nr_to_write_done && (index <= end) &&
4103 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
4104 &index, end, tag))) {
4105 unsigned i;
4106
4107 scanned = 1;
4108 for (i = 0; i < nr_pages; i++) {
4109 struct page *page = pvec.pages[i];
4110
4111 done_index = page->index;
4112
4113
4114
4115
4116
4117
4118
4119 if (!trylock_page(page)) {
4120 ret = flush_write_bio(epd);
4121 BUG_ON(ret < 0);
4122 lock_page(page);
4123 }
4124
4125 if (unlikely(page->mapping != mapping)) {
4126 unlock_page(page);
4127 continue;
4128 }
4129
4130 if (wbc->sync_mode != WB_SYNC_NONE) {
4131 if (PageWriteback(page)) {
4132 ret = flush_write_bio(epd);
4133 BUG_ON(ret < 0);
4134 }
4135 wait_on_page_writeback(page);
4136 }
4137
4138 if (PageWriteback(page) ||
4139 !clear_page_dirty_for_io(page)) {
4140 unlock_page(page);
4141 continue;
4142 }
4143
4144 ret = __extent_writepage(page, wbc, epd);
4145 if (ret < 0) {
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155 done_index = page->index + 1;
4156 done = 1;
4157 break;
4158 }
4159
4160
4161
4162
4163
4164
4165 nr_to_write_done = wbc->nr_to_write <= 0;
4166 }
4167 pagevec_release(&pvec);
4168 cond_resched();
4169 }
4170 if (!scanned && !done) {
4171
4172
4173
4174
4175 scanned = 1;
4176 index = 0;
4177 goto retry;
4178 }
4179
4180 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4181 mapping->writeback_index = done_index;
4182
4183 btrfs_add_delayed_iput(inode);
4184 return ret;
4185}
4186
4187int extent_write_full_page(struct page *page, struct writeback_control *wbc)
4188{
4189 int ret;
4190 struct extent_page_data epd = {
4191 .bio = NULL,
4192 .tree = &BTRFS_I(page->mapping->host)->io_tree,
4193 .extent_locked = 0,
4194 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4195 };
4196
4197 ret = __extent_writepage(page, wbc, &epd);
4198 ASSERT(ret <= 0);
4199 if (ret < 0) {
4200 end_write_bio(&epd, ret);
4201 return ret;
4202 }
4203
4204 ret = flush_write_bio(&epd);
4205 ASSERT(ret <= 0);
4206 return ret;
4207}
4208
4209int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
4210 int mode)
4211{
4212 int ret = 0;
4213 struct address_space *mapping = inode->i_mapping;
4214 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
4215 struct page *page;
4216 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4217 PAGE_SHIFT;
4218
4219 struct extent_page_data epd = {
4220 .bio = NULL,
4221 .tree = tree,
4222 .extent_locked = 1,
4223 .sync_io = mode == WB_SYNC_ALL,
4224 };
4225 struct writeback_control wbc_writepages = {
4226 .sync_mode = mode,
4227 .nr_to_write = nr_pages * 2,
4228 .range_start = start,
4229 .range_end = end + 1,
4230 };
4231
4232 while (start <= end) {
4233 page = find_get_page(mapping, start >> PAGE_SHIFT);
4234 if (clear_page_dirty_for_io(page))
4235 ret = __extent_writepage(page, &wbc_writepages, &epd);
4236 else {
4237 btrfs_writepage_endio_finish_ordered(page, start,
4238 start + PAGE_SIZE - 1, 1);
4239 unlock_page(page);
4240 }
4241 put_page(page);
4242 start += PAGE_SIZE;
4243 }
4244
4245 ASSERT(ret <= 0);
4246 if (ret < 0) {
4247 end_write_bio(&epd, ret);
4248 return ret;
4249 }
4250 ret = flush_write_bio(&epd);
4251 return ret;
4252}
4253
4254int extent_writepages(struct address_space *mapping,
4255 struct writeback_control *wbc)
4256{
4257 int ret = 0;
4258 struct extent_page_data epd = {
4259 .bio = NULL,
4260 .tree = &BTRFS_I(mapping->host)->io_tree,
4261 .extent_locked = 0,
4262 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4263 };
4264
4265 ret = extent_write_cache_pages(mapping, wbc, &epd);
4266 ASSERT(ret <= 0);
4267 if (ret < 0) {
4268 end_write_bio(&epd, ret);
4269 return ret;
4270 }
4271 ret = flush_write_bio(&epd);
4272 return ret;
4273}
4274
4275int extent_readpages(struct address_space *mapping, struct list_head *pages,
4276 unsigned nr_pages)
4277{
4278 struct bio *bio = NULL;
4279 unsigned long bio_flags = 0;
4280 struct page *pagepool[16];
4281 struct extent_map *em_cached = NULL;
4282 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
4283 int nr = 0;
4284 u64 prev_em_start = (u64)-1;
4285
4286 while (!list_empty(pages)) {
4287 u64 contig_end = 0;
4288
4289 for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
4290 struct page *page = lru_to_page(pages);
4291
4292 prefetchw(&page->flags);
4293 list_del(&page->lru);
4294 if (add_to_page_cache_lru(page, mapping, page->index,
4295 readahead_gfp_mask(mapping))) {
4296 put_page(page);
4297 break;
4298 }
4299
4300 pagepool[nr++] = page;
4301 contig_end = page_offset(page) + PAGE_SIZE - 1;
4302 }
4303
4304 if (nr) {
4305 u64 contig_start = page_offset(pagepool[0]);
4306
4307 ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
4308
4309 contiguous_readpages(tree, pagepool, nr, contig_start,
4310 contig_end, &em_cached, &bio, &bio_flags,
4311 &prev_em_start);
4312 }
4313 }
4314
4315 if (em_cached)
4316 free_extent_map(em_cached);
4317
4318 if (bio)
4319 return submit_one_bio(bio, 0, bio_flags);
4320 return 0;
4321}
4322
4323
4324
4325
4326
4327
4328int extent_invalidatepage(struct extent_io_tree *tree,
4329 struct page *page, unsigned long offset)
4330{
4331 struct extent_state *cached_state = NULL;
4332 u64 start = page_offset(page);
4333 u64 end = start + PAGE_SIZE - 1;
4334 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4335
4336 start += ALIGN(offset, blocksize);
4337 if (start > end)
4338 return 0;
4339
4340 lock_extent_bits(tree, start, end, &cached_state);
4341 wait_on_page_writeback(page);
4342 clear_extent_bit(tree, start, end,
4343 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4344 EXTENT_DO_ACCOUNTING,
4345 1, 1, &cached_state);
4346 return 0;
4347}
4348
4349
4350
4351
4352
4353
4354static int try_release_extent_state(struct extent_io_tree *tree,
4355 struct page *page, gfp_t mask)
4356{
4357 u64 start = page_offset(page);
4358 u64 end = start + PAGE_SIZE - 1;
4359 int ret = 1;
4360
4361 if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
4362 ret = 0;
4363 } else {
4364
4365
4366
4367
4368 ret = __clear_extent_bit(tree, start, end,
4369 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4370 0, 0, NULL, mask, NULL);
4371
4372
4373
4374
4375 if (ret < 0)
4376 ret = 0;
4377 else
4378 ret = 1;
4379 }
4380 return ret;
4381}
4382
4383
4384
4385
4386
4387
4388int try_release_extent_mapping(struct page *page, gfp_t mask)
4389{
4390 struct extent_map *em;
4391 u64 start = page_offset(page);
4392 u64 end = start + PAGE_SIZE - 1;
4393 struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
4394 struct extent_io_tree *tree = &btrfs_inode->io_tree;
4395 struct extent_map_tree *map = &btrfs_inode->extent_tree;
4396
4397 if (gfpflags_allow_blocking(mask) &&
4398 page->mapping->host->i_size > SZ_16M) {
4399 u64 len;
4400 while (start <= end) {
4401 len = end - start + 1;
4402 write_lock(&map->lock);
4403 em = lookup_extent_mapping(map, start, len);
4404 if (!em) {
4405 write_unlock(&map->lock);
4406 break;
4407 }
4408 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4409 em->start != start) {
4410 write_unlock(&map->lock);
4411 free_extent_map(em);
4412 break;
4413 }
4414 if (!test_range_bit(tree, em->start,
4415 extent_map_end(em) - 1,
4416 EXTENT_LOCKED, 0, NULL)) {
4417 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4418 &btrfs_inode->runtime_flags);
4419 remove_extent_mapping(map, em);
4420
4421 free_extent_map(em);
4422 }
4423 start = extent_map_end(em);
4424 write_unlock(&map->lock);
4425
4426
4427 free_extent_map(em);
4428 }
4429 }
4430 return try_release_extent_state(tree, page, mask);
4431}
4432
4433
4434
4435
4436
4437static struct extent_map *get_extent_skip_holes(struct inode *inode,
4438 u64 offset, u64 last)
4439{
4440 u64 sectorsize = btrfs_inode_sectorsize(inode);
4441 struct extent_map *em;
4442 u64 len;
4443
4444 if (offset >= last)
4445 return NULL;
4446
4447 while (1) {
4448 len = last - offset;
4449 if (len == 0)
4450 break;
4451 len = ALIGN(len, sectorsize);
4452 em = btrfs_get_extent_fiemap(BTRFS_I(inode), offset, len);
4453 if (IS_ERR_OR_NULL(em))
4454 return em;
4455
4456
4457 if (em->block_start != EXTENT_MAP_HOLE)
4458 return em;
4459
4460
4461 offset = extent_map_end(em);
4462 free_extent_map(em);
4463 if (offset >= last)
4464 break;
4465 }
4466 return NULL;
4467}
4468
4469
4470
4471
4472
4473
4474struct fiemap_cache {
4475 u64 offset;
4476 u64 phys;
4477 u64 len;
4478 u32 flags;
4479 bool cached;
4480};
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
4493 struct fiemap_cache *cache,
4494 u64 offset, u64 phys, u64 len, u32 flags)
4495{
4496 int ret = 0;
4497
4498 if (!cache->cached)
4499 goto assign;
4500
4501
4502
4503
4504
4505
4506
4507
4508 if (cache->offset + cache->len > offset) {
4509 WARN_ON(1);
4510 return -EINVAL;
4511 }
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524 if (cache->offset + cache->len == offset &&
4525 cache->phys + cache->len == phys &&
4526 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
4527 (flags & ~FIEMAP_EXTENT_LAST)) {
4528 cache->len += len;
4529 cache->flags |= flags;
4530 goto try_submit_last;
4531 }
4532
4533
4534 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4535 cache->len, cache->flags);
4536 cache->cached = false;
4537 if (ret)
4538 return ret;
4539assign:
4540 cache->cached = true;
4541 cache->offset = offset;
4542 cache->phys = phys;
4543 cache->len = len;
4544 cache->flags = flags;
4545try_submit_last:
4546 if (cache->flags & FIEMAP_EXTENT_LAST) {
4547 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
4548 cache->phys, cache->len, cache->flags);
4549 cache->cached = false;
4550 }
4551 return ret;
4552}
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
4566 struct fiemap_cache *cache)
4567{
4568 int ret;
4569
4570 if (!cache->cached)
4571 return 0;
4572
4573 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4574 cache->len, cache->flags);
4575 cache->cached = false;
4576 if (ret > 0)
4577 ret = 0;
4578 return ret;
4579}
4580
4581int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4582 __u64 start, __u64 len)
4583{
4584 int ret = 0;
4585 u64 off = start;
4586 u64 max = start + len;
4587 u32 flags = 0;
4588 u32 found_type;
4589 u64 last;
4590 u64 last_for_get_extent = 0;
4591 u64 disko = 0;
4592 u64 isize = i_size_read(inode);
4593 struct btrfs_key found_key;
4594 struct extent_map *em = NULL;
4595 struct extent_state *cached_state = NULL;
4596 struct btrfs_path *path;
4597 struct btrfs_root *root = BTRFS_I(inode)->root;
4598 struct fiemap_cache cache = { 0 };
4599 struct ulist *roots;
4600 struct ulist *tmp_ulist;
4601 int end = 0;
4602 u64 em_start = 0;
4603 u64 em_len = 0;
4604 u64 em_end = 0;
4605
4606 if (len == 0)
4607 return -EINVAL;
4608
4609 path = btrfs_alloc_path();
4610 if (!path)
4611 return -ENOMEM;
4612 path->leave_spinning = 1;
4613
4614 roots = ulist_alloc(GFP_KERNEL);
4615 tmp_ulist = ulist_alloc(GFP_KERNEL);
4616 if (!roots || !tmp_ulist) {
4617 ret = -ENOMEM;
4618 goto out_free_ulist;
4619 }
4620
4621 start = round_down(start, btrfs_inode_sectorsize(inode));
4622 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
4623
4624
4625
4626
4627
4628 ret = btrfs_lookup_file_extent(NULL, root, path,
4629 btrfs_ino(BTRFS_I(inode)), -1, 0);
4630 if (ret < 0) {
4631 goto out_free_ulist;
4632 } else {
4633 WARN_ON(!ret);
4634 if (ret == 1)
4635 ret = 0;
4636 }
4637
4638 path->slots[0]--;
4639 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4640 found_type = found_key.type;
4641
4642
4643 if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
4644 found_type != BTRFS_EXTENT_DATA_KEY) {
4645
4646 last = (u64)-1;
4647 last_for_get_extent = isize;
4648 } else {
4649
4650
4651
4652
4653
4654 last = found_key.offset;
4655 last_for_get_extent = last + 1;
4656 }
4657 btrfs_release_path(path);
4658
4659
4660
4661
4662
4663
4664 if (last < isize) {
4665 last = (u64)-1;
4666 last_for_get_extent = isize;
4667 }
4668
4669 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4670 &cached_state);
4671
4672 em = get_extent_skip_holes(inode, start, last_for_get_extent);
4673 if (!em)
4674 goto out;
4675 if (IS_ERR(em)) {
4676 ret = PTR_ERR(em);
4677 goto out;
4678 }
4679
4680 while (!end) {
4681 u64 offset_in_extent = 0;
4682
4683
4684 if (em->start >= max || extent_map_end(em) < off)
4685 break;
4686
4687
4688
4689
4690
4691
4692
4693 em_start = max(em->start, off);
4694
4695
4696
4697
4698
4699
4700
4701 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4702 offset_in_extent = em_start - em->start;
4703 em_end = extent_map_end(em);
4704 em_len = em_end - em_start;
4705 flags = 0;
4706 if (em->block_start < EXTENT_MAP_LAST_BYTE)
4707 disko = em->block_start + offset_in_extent;
4708 else
4709 disko = 0;
4710
4711
4712
4713
4714 off = extent_map_end(em);
4715 if (off >= max)
4716 end = 1;
4717
4718 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4719 end = 1;
4720 flags |= FIEMAP_EXTENT_LAST;
4721 } else if (em->block_start == EXTENT_MAP_INLINE) {
4722 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4723 FIEMAP_EXTENT_NOT_ALIGNED);
4724 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4725 flags |= (FIEMAP_EXTENT_DELALLOC |
4726 FIEMAP_EXTENT_UNKNOWN);
4727 } else if (fieinfo->fi_extents_max) {
4728 u64 bytenr = em->block_start -
4729 (em->start - em->orig_start);
4730
4731
4732
4733
4734
4735
4736
4737
4738 ret = btrfs_check_shared(root,
4739 btrfs_ino(BTRFS_I(inode)),
4740 bytenr, roots, tmp_ulist);
4741 if (ret < 0)
4742 goto out_free;
4743 if (ret)
4744 flags |= FIEMAP_EXTENT_SHARED;
4745 ret = 0;
4746 }
4747 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4748 flags |= FIEMAP_EXTENT_ENCODED;
4749 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4750 flags |= FIEMAP_EXTENT_UNWRITTEN;
4751
4752 free_extent_map(em);
4753 em = NULL;
4754 if ((em_start >= last) || em_len == (u64)-1 ||
4755 (last == (u64)-1 && isize <= em_end)) {
4756 flags |= FIEMAP_EXTENT_LAST;
4757 end = 1;
4758 }
4759
4760
4761 em = get_extent_skip_holes(inode, off, last_for_get_extent);
4762 if (IS_ERR(em)) {
4763 ret = PTR_ERR(em);
4764 goto out;
4765 }
4766 if (!em) {
4767 flags |= FIEMAP_EXTENT_LAST;
4768 end = 1;
4769 }
4770 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
4771 em_len, flags);
4772 if (ret) {
4773 if (ret == 1)
4774 ret = 0;
4775 goto out_free;
4776 }
4777 }
4778out_free:
4779 if (!ret)
4780 ret = emit_last_fiemap_cache(fieinfo, &cache);
4781 free_extent_map(em);
4782out:
4783 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4784 &cached_state);
4785
4786out_free_ulist:
4787 btrfs_free_path(path);
4788 ulist_free(roots);
4789 ulist_free(tmp_ulist);
4790 return ret;
4791}
4792
4793static void __free_extent_buffer(struct extent_buffer *eb)
4794{
4795 btrfs_leak_debug_del(&eb->leak_list);
4796 kmem_cache_free(extent_buffer_cache, eb);
4797}
4798
4799int extent_buffer_under_io(struct extent_buffer *eb)
4800{
4801 return (atomic_read(&eb->io_pages) ||
4802 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4803 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4804}
4805
4806
4807
4808
4809static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
4810{
4811 int i;
4812 int num_pages;
4813 int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4814
4815 BUG_ON(extent_buffer_under_io(eb));
4816
4817 num_pages = num_extent_pages(eb);
4818 for (i = 0; i < num_pages; i++) {
4819 struct page *page = eb->pages[i];
4820
4821 if (!page)
4822 continue;
4823 if (mapped)
4824 spin_lock(&page->mapping->private_lock);
4825
4826
4827
4828
4829
4830
4831
4832 if (PagePrivate(page) &&
4833 page->private == (unsigned long)eb) {
4834 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4835 BUG_ON(PageDirty(page));
4836 BUG_ON(PageWriteback(page));
4837
4838
4839
4840
4841 ClearPagePrivate(page);
4842 set_page_private(page, 0);
4843
4844 put_page(page);
4845 }
4846
4847 if (mapped)
4848 spin_unlock(&page->mapping->private_lock);
4849
4850
4851 put_page(page);
4852 }
4853}
4854
4855
4856
4857
4858static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4859{
4860 btrfs_release_extent_buffer_pages(eb);
4861 __free_extent_buffer(eb);
4862}
4863
4864static struct extent_buffer *
4865__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4866 unsigned long len)
4867{
4868 struct extent_buffer *eb = NULL;
4869
4870 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4871 eb->start = start;
4872 eb->len = len;
4873 eb->fs_info = fs_info;
4874 eb->bflags = 0;
4875 rwlock_init(&eb->lock);
4876 atomic_set(&eb->blocking_readers, 0);
4877 eb->blocking_writers = 0;
4878 eb->lock_nested = false;
4879 init_waitqueue_head(&eb->write_lock_wq);
4880 init_waitqueue_head(&eb->read_lock_wq);
4881
4882 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4883
4884 spin_lock_init(&eb->refs_lock);
4885 atomic_set(&eb->refs, 1);
4886 atomic_set(&eb->io_pages, 0);
4887
4888
4889
4890
4891 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4892 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4893 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4894
4895#ifdef CONFIG_BTRFS_DEBUG
4896 eb->spinning_writers = 0;
4897 atomic_set(&eb->spinning_readers, 0);
4898 atomic_set(&eb->read_locks, 0);
4899 eb->write_locks = 0;
4900#endif
4901
4902 return eb;
4903}
4904
4905struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4906{
4907 int i;
4908 struct page *p;
4909 struct extent_buffer *new;
4910 int num_pages = num_extent_pages(src);
4911
4912 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4913 if (new == NULL)
4914 return NULL;
4915
4916 for (i = 0; i < num_pages; i++) {
4917 p = alloc_page(GFP_NOFS);
4918 if (!p) {
4919 btrfs_release_extent_buffer(new);
4920 return NULL;
4921 }
4922 attach_extent_buffer_page(new, p);
4923 WARN_ON(PageDirty(p));
4924 SetPageUptodate(p);
4925 new->pages[i] = p;
4926 copy_page(page_address(p), page_address(src->pages[i]));
4927 }
4928
4929 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4930 set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
4931
4932 return new;
4933}
4934
4935struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4936 u64 start, unsigned long len)
4937{
4938 struct extent_buffer *eb;
4939 int num_pages;
4940 int i;
4941
4942 eb = __alloc_extent_buffer(fs_info, start, len);
4943 if (!eb)
4944 return NULL;
4945
4946 num_pages = num_extent_pages(eb);
4947 for (i = 0; i < num_pages; i++) {
4948 eb->pages[i] = alloc_page(GFP_NOFS);
4949 if (!eb->pages[i])
4950 goto err;
4951 }
4952 set_extent_buffer_uptodate(eb);
4953 btrfs_set_header_nritems(eb, 0);
4954 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4955
4956 return eb;
4957err:
4958 for (; i > 0; i--)
4959 __free_page(eb->pages[i - 1]);
4960 __free_extent_buffer(eb);
4961 return NULL;
4962}
4963
4964struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4965 u64 start)
4966{
4967 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
4968}
4969
4970static void check_buffer_tree_ref(struct extent_buffer *eb)
4971{
4972 int refs;
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993 refs = atomic_read(&eb->refs);
4994 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4995 return;
4996
4997 spin_lock(&eb->refs_lock);
4998 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4999 atomic_inc(&eb->refs);
5000 spin_unlock(&eb->refs_lock);
5001}
5002
5003static void mark_extent_buffer_accessed(struct extent_buffer *eb,
5004 struct page *accessed)
5005{
5006 int num_pages, i;
5007
5008 check_buffer_tree_ref(eb);
5009
5010 num_pages = num_extent_pages(eb);
5011 for (i = 0; i < num_pages; i++) {
5012 struct page *p = eb->pages[i];
5013
5014 if (p != accessed)
5015 mark_page_accessed(p);
5016 }
5017}
5018
5019struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
5020 u64 start)
5021{
5022 struct extent_buffer *eb;
5023
5024 rcu_read_lock();
5025 eb = radix_tree_lookup(&fs_info->buffer_radix,
5026 start >> PAGE_SHIFT);
5027 if (eb && atomic_inc_not_zero(&eb->refs)) {
5028 rcu_read_unlock();
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
5045 spin_lock(&eb->refs_lock);
5046 spin_unlock(&eb->refs_lock);
5047 }
5048 mark_extent_buffer_accessed(eb, NULL);
5049 return eb;
5050 }
5051 rcu_read_unlock();
5052
5053 return NULL;
5054}
5055
5056#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5057struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
5058 u64 start)
5059{
5060 struct extent_buffer *eb, *exists = NULL;
5061 int ret;
5062
5063 eb = find_extent_buffer(fs_info, start);
5064 if (eb)
5065 return eb;
5066 eb = alloc_dummy_extent_buffer(fs_info, start);
5067 if (!eb)
5068 return NULL;
5069 eb->fs_info = fs_info;
5070again:
5071 ret = radix_tree_preload(GFP_NOFS);
5072 if (ret)
5073 goto free_eb;
5074 spin_lock(&fs_info->buffer_lock);
5075 ret = radix_tree_insert(&fs_info->buffer_radix,
5076 start >> PAGE_SHIFT, eb);
5077 spin_unlock(&fs_info->buffer_lock);
5078 radix_tree_preload_end();
5079 if (ret == -EEXIST) {
5080 exists = find_extent_buffer(fs_info, start);
5081 if (exists)
5082 goto free_eb;
5083 else
5084 goto again;
5085 }
5086 check_buffer_tree_ref(eb);
5087 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5088
5089 return eb;
5090free_eb:
5091 btrfs_release_extent_buffer(eb);
5092 return exists;
5093}
5094#endif
5095
5096struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
5097 u64 start)
5098{
5099 unsigned long len = fs_info->nodesize;
5100 int num_pages;
5101 int i;
5102 unsigned long index = start >> PAGE_SHIFT;
5103 struct extent_buffer *eb;
5104 struct extent_buffer *exists = NULL;
5105 struct page *p;
5106 struct address_space *mapping = fs_info->btree_inode->i_mapping;
5107 int uptodate = 1;
5108 int ret;
5109
5110 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
5111 btrfs_err(fs_info, "bad tree block start %llu", start);
5112 return ERR_PTR(-EINVAL);
5113 }
5114
5115 eb = find_extent_buffer(fs_info, start);
5116 if (eb)
5117 return eb;
5118
5119 eb = __alloc_extent_buffer(fs_info, start, len);
5120 if (!eb)
5121 return ERR_PTR(-ENOMEM);
5122
5123 num_pages = num_extent_pages(eb);
5124 for (i = 0; i < num_pages; i++, index++) {
5125 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
5126 if (!p) {
5127 exists = ERR_PTR(-ENOMEM);
5128 goto free_eb;
5129 }
5130
5131 spin_lock(&mapping->private_lock);
5132 if (PagePrivate(p)) {
5133
5134
5135
5136
5137
5138
5139
5140 exists = (struct extent_buffer *)p->private;
5141 if (atomic_inc_not_zero(&exists->refs)) {
5142 spin_unlock(&mapping->private_lock);
5143 unlock_page(p);
5144 put_page(p);
5145 mark_extent_buffer_accessed(exists, p);
5146 goto free_eb;
5147 }
5148 exists = NULL;
5149
5150
5151
5152
5153
5154 ClearPagePrivate(p);
5155 WARN_ON(PageDirty(p));
5156 put_page(p);
5157 }
5158 attach_extent_buffer_page(eb, p);
5159 spin_unlock(&mapping->private_lock);
5160 WARN_ON(PageDirty(p));
5161 eb->pages[i] = p;
5162 if (!PageUptodate(p))
5163 uptodate = 0;
5164
5165
5166
5167
5168
5169
5170
5171
5172 }
5173 if (uptodate)
5174 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5175again:
5176 ret = radix_tree_preload(GFP_NOFS);
5177 if (ret) {
5178 exists = ERR_PTR(ret);
5179 goto free_eb;
5180 }
5181
5182 spin_lock(&fs_info->buffer_lock);
5183 ret = radix_tree_insert(&fs_info->buffer_radix,
5184 start >> PAGE_SHIFT, eb);
5185 spin_unlock(&fs_info->buffer_lock);
5186 radix_tree_preload_end();
5187 if (ret == -EEXIST) {
5188 exists = find_extent_buffer(fs_info, start);
5189 if (exists)
5190 goto free_eb;
5191 else
5192 goto again;
5193 }
5194
5195 check_buffer_tree_ref(eb);
5196 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5197
5198
5199
5200
5201
5202
5203 for (i = 0; i < num_pages; i++)
5204 unlock_page(eb->pages[i]);
5205 return eb;
5206
5207free_eb:
5208 WARN_ON(!atomic_dec_and_test(&eb->refs));
5209 for (i = 0; i < num_pages; i++) {
5210 if (eb->pages[i])
5211 unlock_page(eb->pages[i]);
5212 }
5213
5214 btrfs_release_extent_buffer(eb);
5215 return exists;
5216}
5217
5218static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5219{
5220 struct extent_buffer *eb =
5221 container_of(head, struct extent_buffer, rcu_head);
5222
5223 __free_extent_buffer(eb);
5224}
5225
5226static int release_extent_buffer(struct extent_buffer *eb)
5227{
5228 lockdep_assert_held(&eb->refs_lock);
5229
5230 WARN_ON(atomic_read(&eb->refs) == 0);
5231 if (atomic_dec_and_test(&eb->refs)) {
5232 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5233 struct btrfs_fs_info *fs_info = eb->fs_info;
5234
5235 spin_unlock(&eb->refs_lock);
5236
5237 spin_lock(&fs_info->buffer_lock);
5238 radix_tree_delete(&fs_info->buffer_radix,
5239 eb->start >> PAGE_SHIFT);
5240 spin_unlock(&fs_info->buffer_lock);
5241 } else {
5242 spin_unlock(&eb->refs_lock);
5243 }
5244
5245
5246 btrfs_release_extent_buffer_pages(eb);
5247#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5248 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
5249 __free_extent_buffer(eb);
5250 return 1;
5251 }
5252#endif
5253 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5254 return 1;
5255 }
5256 spin_unlock(&eb->refs_lock);
5257
5258 return 0;
5259}
5260
5261void free_extent_buffer(struct extent_buffer *eb)
5262{
5263 int refs;
5264 int old;
5265 if (!eb)
5266 return;
5267
5268 while (1) {
5269 refs = atomic_read(&eb->refs);
5270 if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
5271 || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
5272 refs == 1))
5273 break;
5274 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5275 if (old == refs)
5276 return;
5277 }
5278
5279 spin_lock(&eb->refs_lock);
5280 if (atomic_read(&eb->refs) == 2 &&
5281 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5282 !extent_buffer_under_io(eb) &&
5283 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5284 atomic_dec(&eb->refs);
5285
5286
5287
5288
5289
5290 release_extent_buffer(eb);
5291}
5292
5293void free_extent_buffer_stale(struct extent_buffer *eb)
5294{
5295 if (!eb)
5296 return;
5297
5298 spin_lock(&eb->refs_lock);
5299 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5300
5301 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5302 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5303 atomic_dec(&eb->refs);
5304 release_extent_buffer(eb);
5305}
5306
5307void clear_extent_buffer_dirty(struct extent_buffer *eb)
5308{
5309 int i;
5310 int num_pages;
5311 struct page *page;
5312
5313 num_pages = num_extent_pages(eb);
5314
5315 for (i = 0; i < num_pages; i++) {
5316 page = eb->pages[i];
5317 if (!PageDirty(page))
5318 continue;
5319
5320 lock_page(page);
5321 WARN_ON(!PagePrivate(page));
5322
5323 clear_page_dirty_for_io(page);
5324 xa_lock_irq(&page->mapping->i_pages);
5325 if (!PageDirty(page))
5326 __xa_clear_mark(&page->mapping->i_pages,
5327 page_index(page), PAGECACHE_TAG_DIRTY);
5328 xa_unlock_irq(&page->mapping->i_pages);
5329 ClearPageError(page);
5330 unlock_page(page);
5331 }
5332 WARN_ON(atomic_read(&eb->refs) == 0);
5333}
5334
5335bool set_extent_buffer_dirty(struct extent_buffer *eb)
5336{
5337 int i;
5338 int num_pages;
5339 bool was_dirty;
5340
5341 check_buffer_tree_ref(eb);
5342
5343 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5344
5345 num_pages = num_extent_pages(eb);
5346 WARN_ON(atomic_read(&eb->refs) == 0);
5347 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5348
5349 if (!was_dirty)
5350 for (i = 0; i < num_pages; i++)
5351 set_page_dirty(eb->pages[i]);
5352
5353#ifdef CONFIG_BTRFS_DEBUG
5354 for (i = 0; i < num_pages; i++)
5355 ASSERT(PageDirty(eb->pages[i]));
5356#endif
5357
5358 return was_dirty;
5359}
5360
5361void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5362{
5363 int i;
5364 struct page *page;
5365 int num_pages;
5366
5367 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5368 num_pages = num_extent_pages(eb);
5369 for (i = 0; i < num_pages; i++) {
5370 page = eb->pages[i];
5371 if (page)
5372 ClearPageUptodate(page);
5373 }
5374}
5375
5376void set_extent_buffer_uptodate(struct extent_buffer *eb)
5377{
5378 int i;
5379 struct page *page;
5380 int num_pages;
5381
5382 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5383 num_pages = num_extent_pages(eb);
5384 for (i = 0; i < num_pages; i++) {
5385 page = eb->pages[i];
5386 SetPageUptodate(page);
5387 }
5388}
5389
5390int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
5391{
5392 int i;
5393 struct page *page;
5394 int err;
5395 int ret = 0;
5396 int locked_pages = 0;
5397 int all_uptodate = 1;
5398 int num_pages;
5399 unsigned long num_reads = 0;
5400 struct bio *bio = NULL;
5401 unsigned long bio_flags = 0;
5402 struct extent_io_tree *tree = &BTRFS_I(eb->fs_info->btree_inode)->io_tree;
5403
5404 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5405 return 0;
5406
5407 num_pages = num_extent_pages(eb);
5408 for (i = 0; i < num_pages; i++) {
5409 page = eb->pages[i];
5410 if (wait == WAIT_NONE) {
5411 if (!trylock_page(page))
5412 goto unlock_exit;
5413 } else {
5414 lock_page(page);
5415 }
5416 locked_pages++;
5417 }
5418
5419
5420
5421
5422
5423 for (i = 0; i < num_pages; i++) {
5424 page = eb->pages[i];
5425 if (!PageUptodate(page)) {
5426 num_reads++;
5427 all_uptodate = 0;
5428 }
5429 }
5430
5431 if (all_uptodate) {
5432 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5433 goto unlock_exit;
5434 }
5435
5436 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5437 eb->read_mirror = 0;
5438 atomic_set(&eb->io_pages, num_reads);
5439 for (i = 0; i < num_pages; i++) {
5440 page = eb->pages[i];
5441
5442 if (!PageUptodate(page)) {
5443 if (ret) {
5444 atomic_dec(&eb->io_pages);
5445 unlock_page(page);
5446 continue;
5447 }
5448
5449 ClearPageError(page);
5450 err = __extent_read_full_page(tree, page,
5451 btree_get_extent, &bio,
5452 mirror_num, &bio_flags,
5453 REQ_META);
5454 if (err) {
5455 ret = err;
5456
5457
5458
5459
5460
5461
5462
5463
5464 atomic_dec(&eb->io_pages);
5465 }
5466 } else {
5467 unlock_page(page);
5468 }
5469 }
5470
5471 if (bio) {
5472 err = submit_one_bio(bio, mirror_num, bio_flags);
5473 if (err)
5474 return err;
5475 }
5476
5477 if (ret || wait != WAIT_COMPLETE)
5478 return ret;
5479
5480 for (i = 0; i < num_pages; i++) {
5481 page = eb->pages[i];
5482 wait_on_page_locked(page);
5483 if (!PageUptodate(page))
5484 ret = -EIO;
5485 }
5486
5487 return ret;
5488
5489unlock_exit:
5490 while (locked_pages > 0) {
5491 locked_pages--;
5492 page = eb->pages[locked_pages];
5493 unlock_page(page);
5494 }
5495 return ret;
5496}
5497
5498void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
5499 unsigned long start, unsigned long len)
5500{
5501 size_t cur;
5502 size_t offset;
5503 struct page *page;
5504 char *kaddr;
5505 char *dst = (char *)dstv;
5506 size_t start_offset = offset_in_page(eb->start);
5507 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5508
5509 if (start + len > eb->len) {
5510 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5511 eb->start, eb->len, start, len);
5512 memset(dst, 0, len);
5513 return;
5514 }
5515
5516 offset = offset_in_page(start_offset + start);
5517
5518 while (len > 0) {
5519 page = eb->pages[i];
5520
5521 cur = min(len, (PAGE_SIZE - offset));
5522 kaddr = page_address(page);
5523 memcpy(dst, kaddr + offset, cur);
5524
5525 dst += cur;
5526 len -= cur;
5527 offset = 0;
5528 i++;
5529 }
5530}
5531
5532int read_extent_buffer_to_user(const struct extent_buffer *eb,
5533 void __user *dstv,
5534 unsigned long start, unsigned long len)
5535{
5536 size_t cur;
5537 size_t offset;
5538 struct page *page;
5539 char *kaddr;
5540 char __user *dst = (char __user *)dstv;
5541 size_t start_offset = offset_in_page(eb->start);
5542 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5543 int ret = 0;
5544
5545 WARN_ON(start > eb->len);
5546 WARN_ON(start + len > eb->start + eb->len);
5547
5548 offset = offset_in_page(start_offset + start);
5549
5550 while (len > 0) {
5551 page = eb->pages[i];
5552
5553 cur = min(len, (PAGE_SIZE - offset));
5554 kaddr = page_address(page);
5555 if (copy_to_user(dst, kaddr + offset, cur)) {
5556 ret = -EFAULT;
5557 break;
5558 }
5559
5560 dst += cur;
5561 len -= cur;
5562 offset = 0;
5563 i++;
5564 }
5565
5566 return ret;
5567}
5568
5569
5570
5571
5572
5573
5574int map_private_extent_buffer(const struct extent_buffer *eb,
5575 unsigned long start, unsigned long min_len,
5576 char **map, unsigned long *map_start,
5577 unsigned long *map_len)
5578{
5579 size_t offset;
5580 char *kaddr;
5581 struct page *p;
5582 size_t start_offset = offset_in_page(eb->start);
5583 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5584 unsigned long end_i = (start_offset + start + min_len - 1) >>
5585 PAGE_SHIFT;
5586
5587 if (start + min_len > eb->len) {
5588 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5589 eb->start, eb->len, start, min_len);
5590 return -EINVAL;
5591 }
5592
5593 if (i != end_i)
5594 return 1;
5595
5596 if (i == 0) {
5597 offset = start_offset;
5598 *map_start = 0;
5599 } else {
5600 offset = 0;
5601 *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
5602 }
5603
5604 p = eb->pages[i];
5605 kaddr = page_address(p);
5606 *map = kaddr + offset;
5607 *map_len = PAGE_SIZE - offset;
5608 return 0;
5609}
5610
5611int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
5612 unsigned long start, unsigned long len)
5613{
5614 size_t cur;
5615 size_t offset;
5616 struct page *page;
5617 char *kaddr;
5618 char *ptr = (char *)ptrv;
5619 size_t start_offset = offset_in_page(eb->start);
5620 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5621 int ret = 0;
5622
5623 WARN_ON(start > eb->len);
5624 WARN_ON(start + len > eb->start + eb->len);
5625
5626 offset = offset_in_page(start_offset + start);
5627
5628 while (len > 0) {
5629 page = eb->pages[i];
5630
5631 cur = min(len, (PAGE_SIZE - offset));
5632
5633 kaddr = page_address(page);
5634 ret = memcmp(ptr, kaddr + offset, cur);
5635 if (ret)
5636 break;
5637
5638 ptr += cur;
5639 len -= cur;
5640 offset = 0;
5641 i++;
5642 }
5643 return ret;
5644}
5645
5646void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
5647 const void *srcv)
5648{
5649 char *kaddr;
5650
5651 WARN_ON(!PageUptodate(eb->pages[0]));
5652 kaddr = page_address(eb->pages[0]);
5653 memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
5654 BTRFS_FSID_SIZE);
5655}
5656
5657void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
5658{
5659 char *kaddr;
5660
5661 WARN_ON(!PageUptodate(eb->pages[0]));
5662 kaddr = page_address(eb->pages[0]);
5663 memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
5664 BTRFS_FSID_SIZE);
5665}
5666
5667void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5668 unsigned long start, unsigned long len)
5669{
5670 size_t cur;
5671 size_t offset;
5672 struct page *page;
5673 char *kaddr;
5674 char *src = (char *)srcv;
5675 size_t start_offset = offset_in_page(eb->start);
5676 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5677
5678 WARN_ON(start > eb->len);
5679 WARN_ON(start + len > eb->start + eb->len);
5680
5681 offset = offset_in_page(start_offset + start);
5682
5683 while (len > 0) {
5684 page = eb->pages[i];
5685 WARN_ON(!PageUptodate(page));
5686
5687 cur = min(len, PAGE_SIZE - offset);
5688 kaddr = page_address(page);
5689 memcpy(kaddr + offset, src, cur);
5690
5691 src += cur;
5692 len -= cur;
5693 offset = 0;
5694 i++;
5695 }
5696}
5697
5698void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
5699 unsigned long len)
5700{
5701 size_t cur;
5702 size_t offset;
5703 struct page *page;
5704 char *kaddr;
5705 size_t start_offset = offset_in_page(eb->start);
5706 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5707
5708 WARN_ON(start > eb->len);
5709 WARN_ON(start + len > eb->start + eb->len);
5710
5711 offset = offset_in_page(start_offset + start);
5712
5713 while (len > 0) {
5714 page = eb->pages[i];
5715 WARN_ON(!PageUptodate(page));
5716
5717 cur = min(len, PAGE_SIZE - offset);
5718 kaddr = page_address(page);
5719 memset(kaddr + offset, 0, cur);
5720
5721 len -= cur;
5722 offset = 0;
5723 i++;
5724 }
5725}
5726
5727void copy_extent_buffer_full(struct extent_buffer *dst,
5728 struct extent_buffer *src)
5729{
5730 int i;
5731 int num_pages;
5732
5733 ASSERT(dst->len == src->len);
5734
5735 num_pages = num_extent_pages(dst);
5736 for (i = 0; i < num_pages; i++)
5737 copy_page(page_address(dst->pages[i]),
5738 page_address(src->pages[i]));
5739}
5740
5741void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5742 unsigned long dst_offset, unsigned long src_offset,
5743 unsigned long len)
5744{
5745 u64 dst_len = dst->len;
5746 size_t cur;
5747 size_t offset;
5748 struct page *page;
5749 char *kaddr;
5750 size_t start_offset = offset_in_page(dst->start);
5751 unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
5752
5753 WARN_ON(src->len != dst_len);
5754
5755 offset = offset_in_page(start_offset + dst_offset);
5756
5757 while (len > 0) {
5758 page = dst->pages[i];
5759 WARN_ON(!PageUptodate(page));
5760
5761 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
5762
5763 kaddr = page_address(page);
5764 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5765
5766 src_offset += cur;
5767 len -= cur;
5768 offset = 0;
5769 i++;
5770 }
5771}
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786static inline void eb_bitmap_offset(struct extent_buffer *eb,
5787 unsigned long start, unsigned long nr,
5788 unsigned long *page_index,
5789 size_t *page_offset)
5790{
5791 size_t start_offset = offset_in_page(eb->start);
5792 size_t byte_offset = BIT_BYTE(nr);
5793 size_t offset;
5794
5795
5796
5797
5798
5799
5800 offset = start_offset + start + byte_offset;
5801
5802 *page_index = offset >> PAGE_SHIFT;
5803 *page_offset = offset_in_page(offset);
5804}
5805
5806
5807
5808
5809
5810
5811
5812int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5813 unsigned long nr)
5814{
5815 u8 *kaddr;
5816 struct page *page;
5817 unsigned long i;
5818 size_t offset;
5819
5820 eb_bitmap_offset(eb, start, nr, &i, &offset);
5821 page = eb->pages[i];
5822 WARN_ON(!PageUptodate(page));
5823 kaddr = page_address(page);
5824 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5825}
5826
5827
5828
5829
5830
5831
5832
5833
5834void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5835 unsigned long pos, unsigned long len)
5836{
5837 u8 *kaddr;
5838 struct page *page;
5839 unsigned long i;
5840 size_t offset;
5841 const unsigned int size = pos + len;
5842 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5843 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
5844
5845 eb_bitmap_offset(eb, start, pos, &i, &offset);
5846 page = eb->pages[i];
5847 WARN_ON(!PageUptodate(page));
5848 kaddr = page_address(page);
5849
5850 while (len >= bits_to_set) {
5851 kaddr[offset] |= mask_to_set;
5852 len -= bits_to_set;
5853 bits_to_set = BITS_PER_BYTE;
5854 mask_to_set = ~0;
5855 if (++offset >= PAGE_SIZE && len > 0) {
5856 offset = 0;
5857 page = eb->pages[++i];
5858 WARN_ON(!PageUptodate(page));
5859 kaddr = page_address(page);
5860 }
5861 }
5862 if (len) {
5863 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5864 kaddr[offset] |= mask_to_set;
5865 }
5866}
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5877 unsigned long pos, unsigned long len)
5878{
5879 u8 *kaddr;
5880 struct page *page;
5881 unsigned long i;
5882 size_t offset;
5883 const unsigned int size = pos + len;
5884 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5885 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
5886
5887 eb_bitmap_offset(eb, start, pos, &i, &offset);
5888 page = eb->pages[i];
5889 WARN_ON(!PageUptodate(page));
5890 kaddr = page_address(page);
5891
5892 while (len >= bits_to_clear) {
5893 kaddr[offset] &= ~mask_to_clear;
5894 len -= bits_to_clear;
5895 bits_to_clear = BITS_PER_BYTE;
5896 mask_to_clear = ~0;
5897 if (++offset >= PAGE_SIZE && len > 0) {
5898 offset = 0;
5899 page = eb->pages[++i];
5900 WARN_ON(!PageUptodate(page));
5901 kaddr = page_address(page);
5902 }
5903 }
5904 if (len) {
5905 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5906 kaddr[offset] &= ~mask_to_clear;
5907 }
5908}
5909
5910static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5911{
5912 unsigned long distance = (src > dst) ? src - dst : dst - src;
5913 return distance < len;
5914}
5915
5916static void copy_pages(struct page *dst_page, struct page *src_page,
5917 unsigned long dst_off, unsigned long src_off,
5918 unsigned long len)
5919{
5920 char *dst_kaddr = page_address(dst_page);
5921 char *src_kaddr;
5922 int must_memmove = 0;
5923
5924 if (dst_page != src_page) {
5925 src_kaddr = page_address(src_page);
5926 } else {
5927 src_kaddr = dst_kaddr;
5928 if (areas_overlap(src_off, dst_off, len))
5929 must_memmove = 1;
5930 }
5931
5932 if (must_memmove)
5933 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5934 else
5935 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5936}
5937
5938void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5939 unsigned long src_offset, unsigned long len)
5940{
5941 struct btrfs_fs_info *fs_info = dst->fs_info;
5942 size_t cur;
5943 size_t dst_off_in_page;
5944 size_t src_off_in_page;
5945 size_t start_offset = offset_in_page(dst->start);
5946 unsigned long dst_i;
5947 unsigned long src_i;
5948
5949 if (src_offset + len > dst->len) {
5950 btrfs_err(fs_info,
5951 "memmove bogus src_offset %lu move len %lu dst len %lu",
5952 src_offset, len, dst->len);
5953 BUG();
5954 }
5955 if (dst_offset + len > dst->len) {
5956 btrfs_err(fs_info,
5957 "memmove bogus dst_offset %lu move len %lu dst len %lu",
5958 dst_offset, len, dst->len);
5959 BUG();
5960 }
5961
5962 while (len > 0) {
5963 dst_off_in_page = offset_in_page(start_offset + dst_offset);
5964 src_off_in_page = offset_in_page(start_offset + src_offset);
5965
5966 dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
5967 src_i = (start_offset + src_offset) >> PAGE_SHIFT;
5968
5969 cur = min(len, (unsigned long)(PAGE_SIZE -
5970 src_off_in_page));
5971 cur = min_t(unsigned long, cur,
5972 (unsigned long)(PAGE_SIZE - dst_off_in_page));
5973
5974 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5975 dst_off_in_page, src_off_in_page, cur);
5976
5977 src_offset += cur;
5978 dst_offset += cur;
5979 len -= cur;
5980 }
5981}
5982
5983void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5984 unsigned long src_offset, unsigned long len)
5985{
5986 struct btrfs_fs_info *fs_info = dst->fs_info;
5987 size_t cur;
5988 size_t dst_off_in_page;
5989 size_t src_off_in_page;
5990 unsigned long dst_end = dst_offset + len - 1;
5991 unsigned long src_end = src_offset + len - 1;
5992 size_t start_offset = offset_in_page(dst->start);
5993 unsigned long dst_i;
5994 unsigned long src_i;
5995
5996 if (src_offset + len > dst->len) {
5997 btrfs_err(fs_info,
5998 "memmove bogus src_offset %lu move len %lu len %lu",
5999 src_offset, len, dst->len);
6000 BUG();
6001 }
6002 if (dst_offset + len > dst->len) {
6003 btrfs_err(fs_info,
6004 "memmove bogus dst_offset %lu move len %lu len %lu",
6005 dst_offset, len, dst->len);
6006 BUG();
6007 }
6008 if (dst_offset < src_offset) {
6009 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
6010 return;
6011 }
6012 while (len > 0) {
6013 dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
6014 src_i = (start_offset + src_end) >> PAGE_SHIFT;
6015
6016 dst_off_in_page = offset_in_page(start_offset + dst_end);
6017 src_off_in_page = offset_in_page(start_offset + src_end);
6018
6019 cur = min_t(unsigned long, len, src_off_in_page + 1);
6020 cur = min(cur, dst_off_in_page + 1);
6021 copy_pages(dst->pages[dst_i], dst->pages[src_i],
6022 dst_off_in_page - cur + 1,
6023 src_off_in_page - cur + 1, cur);
6024
6025 dst_end -= cur;
6026 src_end -= cur;
6027 len -= cur;
6028 }
6029}
6030
6031int try_release_extent_buffer(struct page *page)
6032{
6033 struct extent_buffer *eb;
6034
6035
6036
6037
6038
6039 spin_lock(&page->mapping->private_lock);
6040 if (!PagePrivate(page)) {
6041 spin_unlock(&page->mapping->private_lock);
6042 return 1;
6043 }
6044
6045 eb = (struct extent_buffer *)page->private;
6046 BUG_ON(!eb);
6047
6048
6049
6050
6051
6052
6053 spin_lock(&eb->refs_lock);
6054 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
6055 spin_unlock(&eb->refs_lock);
6056 spin_unlock(&page->mapping->private_lock);
6057 return 0;
6058 }
6059 spin_unlock(&page->mapping->private_lock);
6060
6061
6062
6063
6064
6065 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
6066 spin_unlock(&eb->refs_lock);
6067 return 0;
6068 }
6069
6070 return release_extent_buffer(eb);
6071}
6072