1#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/pagemap.h>
6#include <linux/page-flags.h>
7#include <linux/spinlock.h>
8#include <linux/blkdev.h>
9#include <linux/swap.h>
10#include <linux/writeback.h>
11#include <linux/pagevec.h>
12#include <linux/prefetch.h>
13#include <linux/cleancache.h>
14#include "extent_io.h"
15#include "extent_map.h"
16#include "ctree.h"
17#include "btrfs_inode.h"
18#include "volumes.h"
19#include "check-integrity.h"
20#include "locking.h"
21#include "rcu-string.h"
22#include "backref.h"
23#include "transaction.h"
24
25static struct kmem_cache *extent_state_cache;
26static struct kmem_cache *extent_buffer_cache;
27static struct bio_set *btrfs_bioset;
28
29static inline bool extent_state_in_tree(const struct extent_state *state)
30{
31 return !RB_EMPTY_NODE(&state->rb_node);
32}
33
34#ifdef CONFIG_BTRFS_DEBUG
35static LIST_HEAD(buffers);
36static LIST_HEAD(states);
37
38static DEFINE_SPINLOCK(leak_lock);
39
40static inline
41void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
42{
43 unsigned long flags;
44
45 spin_lock_irqsave(&leak_lock, flags);
46 list_add(new, head);
47 spin_unlock_irqrestore(&leak_lock, flags);
48}
49
50static inline
51void btrfs_leak_debug_del(struct list_head *entry)
52{
53 unsigned long flags;
54
55 spin_lock_irqsave(&leak_lock, flags);
56 list_del(entry);
57 spin_unlock_irqrestore(&leak_lock, flags);
58}
59
60static inline
61void btrfs_leak_debug_check(void)
62{
63 struct extent_state *state;
64 struct extent_buffer *eb;
65
66 while (!list_empty(&states)) {
67 state = list_entry(states.next, struct extent_state, leak_list);
68 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
69 state->start, state->end, state->state,
70 extent_state_in_tree(state),
71 refcount_read(&state->refs));
72 list_del(&state->leak_list);
73 kmem_cache_free(extent_state_cache, state);
74 }
75
76 while (!list_empty(&buffers)) {
77 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
78 pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n",
79 eb->start, eb->len, atomic_read(&eb->refs));
80 list_del(&eb->leak_list);
81 kmem_cache_free(extent_buffer_cache, eb);
82 }
83}
84
85#define btrfs_debug_check_extent_io_range(tree, start, end) \
86 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
87static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 struct extent_io_tree *tree, u64 start, u64 end)
89{
90 if (tree->ops && tree->ops->check_extent_io_range)
91 tree->ops->check_extent_io_range(tree->private_data, caller,
92 start, end);
93}
94#else
95#define btrfs_leak_debug_add(new, head) do {} while (0)
96#define btrfs_leak_debug_del(entry) do {} while (0)
97#define btrfs_leak_debug_check() do {} while (0)
98#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
99#endif
100
101#define BUFFER_LRU_MAX 64
102
103struct tree_entry {
104 u64 start;
105 u64 end;
106 struct rb_node rb_node;
107};
108
109struct extent_page_data {
110 struct bio *bio;
111 struct extent_io_tree *tree;
112 get_extent_t *get_extent;
113 unsigned long bio_flags;
114
115
116
117
118 unsigned int extent_locked:1;
119
120
121 unsigned int sync_io:1;
122};
123
124static void add_extent_changeset(struct extent_state *state, unsigned bits,
125 struct extent_changeset *changeset,
126 int set)
127{
128 int ret;
129
130 if (!changeset)
131 return;
132 if (set && (state->state & bits) == bits)
133 return;
134 if (!set && (state->state & bits) == 0)
135 return;
136 changeset->bytes_changed += state->end - state->start + 1;
137 ret = ulist_add(&changeset->range_changed, state->start, state->end,
138 GFP_ATOMIC);
139
140 BUG_ON(ret < 0);
141}
142
143static noinline void flush_write_bio(void *data);
144static inline struct btrfs_fs_info *
145tree_fs_info(struct extent_io_tree *tree)
146{
147 if (tree->ops)
148 return tree->ops->tree_fs_info(tree->private_data);
149 return NULL;
150}
151
152int __init extent_io_init(void)
153{
154 extent_state_cache = kmem_cache_create("btrfs_extent_state",
155 sizeof(struct extent_state), 0,
156 SLAB_MEM_SPREAD, NULL);
157 if (!extent_state_cache)
158 return -ENOMEM;
159
160 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
161 sizeof(struct extent_buffer), 0,
162 SLAB_MEM_SPREAD, NULL);
163 if (!extent_buffer_cache)
164 goto free_state_cache;
165
166 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
167 offsetof(struct btrfs_io_bio, bio),
168 BIOSET_NEED_BVECS);
169 if (!btrfs_bioset)
170 goto free_buffer_cache;
171
172 if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
173 goto free_bioset;
174
175 return 0;
176
177free_bioset:
178 bioset_free(btrfs_bioset);
179 btrfs_bioset = NULL;
180
181free_buffer_cache:
182 kmem_cache_destroy(extent_buffer_cache);
183 extent_buffer_cache = NULL;
184
185free_state_cache:
186 kmem_cache_destroy(extent_state_cache);
187 extent_state_cache = NULL;
188 return -ENOMEM;
189}
190
191void extent_io_exit(void)
192{
193 btrfs_leak_debug_check();
194
195
196
197
198
199 rcu_barrier();
200 kmem_cache_destroy(extent_state_cache);
201 kmem_cache_destroy(extent_buffer_cache);
202 if (btrfs_bioset)
203 bioset_free(btrfs_bioset);
204}
205
206void extent_io_tree_init(struct extent_io_tree *tree,
207 void *private_data)
208{
209 tree->state = RB_ROOT;
210 tree->ops = NULL;
211 tree->dirty_bytes = 0;
212 spin_lock_init(&tree->lock);
213 tree->private_data = private_data;
214}
215
216static struct extent_state *alloc_extent_state(gfp_t mask)
217{
218 struct extent_state *state;
219
220
221
222
223
224 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
225 state = kmem_cache_alloc(extent_state_cache, mask);
226 if (!state)
227 return state;
228 state->state = 0;
229 state->failrec = NULL;
230 RB_CLEAR_NODE(&state->rb_node);
231 btrfs_leak_debug_add(&state->leak_list, &states);
232 refcount_set(&state->refs, 1);
233 init_waitqueue_head(&state->wq);
234 trace_alloc_extent_state(state, mask, _RET_IP_);
235 return state;
236}
237
238void free_extent_state(struct extent_state *state)
239{
240 if (!state)
241 return;
242 if (refcount_dec_and_test(&state->refs)) {
243 WARN_ON(extent_state_in_tree(state));
244 btrfs_leak_debug_del(&state->leak_list);
245 trace_free_extent_state(state, _RET_IP_);
246 kmem_cache_free(extent_state_cache, state);
247 }
248}
249
250static struct rb_node *tree_insert(struct rb_root *root,
251 struct rb_node *search_start,
252 u64 offset,
253 struct rb_node *node,
254 struct rb_node ***p_in,
255 struct rb_node **parent_in)
256{
257 struct rb_node **p;
258 struct rb_node *parent = NULL;
259 struct tree_entry *entry;
260
261 if (p_in && parent_in) {
262 p = *p_in;
263 parent = *parent_in;
264 goto do_insert;
265 }
266
267 p = search_start ? &search_start : &root->rb_node;
268 while (*p) {
269 parent = *p;
270 entry = rb_entry(parent, struct tree_entry, rb_node);
271
272 if (offset < entry->start)
273 p = &(*p)->rb_left;
274 else if (offset > entry->end)
275 p = &(*p)->rb_right;
276 else
277 return parent;
278 }
279
280do_insert:
281 rb_link_node(node, parent, p);
282 rb_insert_color(node, root);
283 return NULL;
284}
285
286static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
287 struct rb_node **prev_ret,
288 struct rb_node **next_ret,
289 struct rb_node ***p_ret,
290 struct rb_node **parent_ret)
291{
292 struct rb_root *root = &tree->state;
293 struct rb_node **n = &root->rb_node;
294 struct rb_node *prev = NULL;
295 struct rb_node *orig_prev = NULL;
296 struct tree_entry *entry;
297 struct tree_entry *prev_entry = NULL;
298
299 while (*n) {
300 prev = *n;
301 entry = rb_entry(prev, struct tree_entry, rb_node);
302 prev_entry = entry;
303
304 if (offset < entry->start)
305 n = &(*n)->rb_left;
306 else if (offset > entry->end)
307 n = &(*n)->rb_right;
308 else
309 return *n;
310 }
311
312 if (p_ret)
313 *p_ret = n;
314 if (parent_ret)
315 *parent_ret = prev;
316
317 if (prev_ret) {
318 orig_prev = prev;
319 while (prev && offset > prev_entry->end) {
320 prev = rb_next(prev);
321 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
322 }
323 *prev_ret = prev;
324 prev = orig_prev;
325 }
326
327 if (next_ret) {
328 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
329 while (prev && offset < prev_entry->start) {
330 prev = rb_prev(prev);
331 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
332 }
333 *next_ret = prev;
334 }
335 return NULL;
336}
337
338static inline struct rb_node *
339tree_search_for_insert(struct extent_io_tree *tree,
340 u64 offset,
341 struct rb_node ***p_ret,
342 struct rb_node **parent_ret)
343{
344 struct rb_node *prev = NULL;
345 struct rb_node *ret;
346
347 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
348 if (!ret)
349 return prev;
350 return ret;
351}
352
353static inline struct rb_node *tree_search(struct extent_io_tree *tree,
354 u64 offset)
355{
356 return tree_search_for_insert(tree, offset, NULL, NULL);
357}
358
359static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
360 struct extent_state *other)
361{
362 if (tree->ops && tree->ops->merge_extent_hook)
363 tree->ops->merge_extent_hook(tree->private_data, new, other);
364}
365
366
367
368
369
370
371
372
373
374
375static void merge_state(struct extent_io_tree *tree,
376 struct extent_state *state)
377{
378 struct extent_state *other;
379 struct rb_node *other_node;
380
381 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
382 return;
383
384 other_node = rb_prev(&state->rb_node);
385 if (other_node) {
386 other = rb_entry(other_node, struct extent_state, rb_node);
387 if (other->end == state->start - 1 &&
388 other->state == state->state) {
389 merge_cb(tree, state, other);
390 state->start = other->start;
391 rb_erase(&other->rb_node, &tree->state);
392 RB_CLEAR_NODE(&other->rb_node);
393 free_extent_state(other);
394 }
395 }
396 other_node = rb_next(&state->rb_node);
397 if (other_node) {
398 other = rb_entry(other_node, struct extent_state, rb_node);
399 if (other->start == state->end + 1 &&
400 other->state == state->state) {
401 merge_cb(tree, state, other);
402 state->end = other->end;
403 rb_erase(&other->rb_node, &tree->state);
404 RB_CLEAR_NODE(&other->rb_node);
405 free_extent_state(other);
406 }
407 }
408}
409
410static void set_state_cb(struct extent_io_tree *tree,
411 struct extent_state *state, unsigned *bits)
412{
413 if (tree->ops && tree->ops->set_bit_hook)
414 tree->ops->set_bit_hook(tree->private_data, state, bits);
415}
416
417static void clear_state_cb(struct extent_io_tree *tree,
418 struct extent_state *state, unsigned *bits)
419{
420 if (tree->ops && tree->ops->clear_bit_hook)
421 tree->ops->clear_bit_hook(tree->private_data, state, bits);
422}
423
424static void set_state_bits(struct extent_io_tree *tree,
425 struct extent_state *state, unsigned *bits,
426 struct extent_changeset *changeset);
427
428
429
430
431
432
433
434
435
436
437
438static int insert_state(struct extent_io_tree *tree,
439 struct extent_state *state, u64 start, u64 end,
440 struct rb_node ***p,
441 struct rb_node **parent,
442 unsigned *bits, struct extent_changeset *changeset)
443{
444 struct rb_node *node;
445
446 if (end < start)
447 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
448 end, start);
449 state->start = start;
450 state->end = end;
451
452 set_state_bits(tree, state, bits, changeset);
453
454 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
455 if (node) {
456 struct extent_state *found;
457 found = rb_entry(node, struct extent_state, rb_node);
458 pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
459 found->start, found->end, start, end);
460 return -EEXIST;
461 }
462 merge_state(tree, state);
463 return 0;
464}
465
466static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
467 u64 split)
468{
469 if (tree->ops && tree->ops->split_extent_hook)
470 tree->ops->split_extent_hook(tree->private_data, orig, split);
471}
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
488 struct extent_state *prealloc, u64 split)
489{
490 struct rb_node *node;
491
492 split_cb(tree, orig, split);
493
494 prealloc->start = orig->start;
495 prealloc->end = split - 1;
496 prealloc->state = orig->state;
497 orig->start = split;
498
499 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
500 &prealloc->rb_node, NULL, NULL);
501 if (node) {
502 free_extent_state(prealloc);
503 return -EEXIST;
504 }
505 return 0;
506}
507
508static struct extent_state *next_state(struct extent_state *state)
509{
510 struct rb_node *next = rb_next(&state->rb_node);
511 if (next)
512 return rb_entry(next, struct extent_state, rb_node);
513 else
514 return NULL;
515}
516
517
518
519
520
521
522
523
524static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
525 struct extent_state *state,
526 unsigned *bits, int wake,
527 struct extent_changeset *changeset)
528{
529 struct extent_state *next;
530 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
531
532 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
533 u64 range = state->end - state->start + 1;
534 WARN_ON(range > tree->dirty_bytes);
535 tree->dirty_bytes -= range;
536 }
537 clear_state_cb(tree, state, bits);
538 add_extent_changeset(state, bits_to_clear, changeset, 0);
539 state->state &= ~bits_to_clear;
540 if (wake)
541 wake_up(&state->wq);
542 if (state->state == 0) {
543 next = next_state(state);
544 if (extent_state_in_tree(state)) {
545 rb_erase(&state->rb_node, &tree->state);
546 RB_CLEAR_NODE(&state->rb_node);
547 free_extent_state(state);
548 } else {
549 WARN_ON(1);
550 }
551 } else {
552 merge_state(tree, state);
553 next = next_state(state);
554 }
555 return next;
556}
557
558static struct extent_state *
559alloc_extent_state_atomic(struct extent_state *prealloc)
560{
561 if (!prealloc)
562 prealloc = alloc_extent_state(GFP_ATOMIC);
563
564 return prealloc;
565}
566
567static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
568{
569 btrfs_panic(tree_fs_info(tree), err,
570 "Locking error: Extent tree was modified by another thread while locked.");
571}
572
573
574
575
576
577
578
579
580
581
582
583
584
585static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
586 unsigned bits, int wake, int delete,
587 struct extent_state **cached_state,
588 gfp_t mask, struct extent_changeset *changeset)
589{
590 struct extent_state *state;
591 struct extent_state *cached;
592 struct extent_state *prealloc = NULL;
593 struct rb_node *node;
594 u64 last_end;
595 int err;
596 int clear = 0;
597
598 btrfs_debug_check_extent_io_range(tree, start, end);
599
600 if (bits & EXTENT_DELALLOC)
601 bits |= EXTENT_NORESERVE;
602
603 if (delete)
604 bits |= ~EXTENT_CTLBITS;
605 bits |= EXTENT_FIRST_DELALLOC;
606
607 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
608 clear = 1;
609again:
610 if (!prealloc && gfpflags_allow_blocking(mask)) {
611
612
613
614
615
616
617
618 prealloc = alloc_extent_state(mask);
619 }
620
621 spin_lock(&tree->lock);
622 if (cached_state) {
623 cached = *cached_state;
624
625 if (clear) {
626 *cached_state = NULL;
627 cached_state = NULL;
628 }
629
630 if (cached && extent_state_in_tree(cached) &&
631 cached->start <= start && cached->end > start) {
632 if (clear)
633 refcount_dec(&cached->refs);
634 state = cached;
635 goto hit_next;
636 }
637 if (clear)
638 free_extent_state(cached);
639 }
640
641
642
643
644 node = tree_search(tree, start);
645 if (!node)
646 goto out;
647 state = rb_entry(node, struct extent_state, rb_node);
648hit_next:
649 if (state->start > end)
650 goto out;
651 WARN_ON(state->end < start);
652 last_end = state->end;
653
654
655 if (!(state->state & bits)) {
656 state = next_state(state);
657 goto next;
658 }
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676 if (state->start < start) {
677 prealloc = alloc_extent_state_atomic(prealloc);
678 BUG_ON(!prealloc);
679 err = split_state(tree, state, prealloc, start);
680 if (err)
681 extent_io_tree_panic(tree, err);
682
683 prealloc = NULL;
684 if (err)
685 goto out;
686 if (state->end <= end) {
687 state = clear_state_bit(tree, state, &bits, wake,
688 changeset);
689 goto next;
690 }
691 goto search_again;
692 }
693
694
695
696
697
698
699 if (state->start <= end && state->end > end) {
700 prealloc = alloc_extent_state_atomic(prealloc);
701 BUG_ON(!prealloc);
702 err = split_state(tree, state, prealloc, end + 1);
703 if (err)
704 extent_io_tree_panic(tree, err);
705
706 if (wake)
707 wake_up(&state->wq);
708
709 clear_state_bit(tree, prealloc, &bits, wake, changeset);
710
711 prealloc = NULL;
712 goto out;
713 }
714
715 state = clear_state_bit(tree, state, &bits, wake, changeset);
716next:
717 if (last_end == (u64)-1)
718 goto out;
719 start = last_end + 1;
720 if (start <= end && state && !need_resched())
721 goto hit_next;
722
723search_again:
724 if (start > end)
725 goto out;
726 spin_unlock(&tree->lock);
727 if (gfpflags_allow_blocking(mask))
728 cond_resched();
729 goto again;
730
731out:
732 spin_unlock(&tree->lock);
733 if (prealloc)
734 free_extent_state(prealloc);
735
736 return 0;
737
738}
739
740static void wait_on_state(struct extent_io_tree *tree,
741 struct extent_state *state)
742 __releases(tree->lock)
743 __acquires(tree->lock)
744{
745 DEFINE_WAIT(wait);
746 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
747 spin_unlock(&tree->lock);
748 schedule();
749 spin_lock(&tree->lock);
750 finish_wait(&state->wq, &wait);
751}
752
753
754
755
756
757
758static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
759 unsigned long bits)
760{
761 struct extent_state *state;
762 struct rb_node *node;
763
764 btrfs_debug_check_extent_io_range(tree, start, end);
765
766 spin_lock(&tree->lock);
767again:
768 while (1) {
769
770
771
772
773 node = tree_search(tree, start);
774process_node:
775 if (!node)
776 break;
777
778 state = rb_entry(node, struct extent_state, rb_node);
779
780 if (state->start > end)
781 goto out;
782
783 if (state->state & bits) {
784 start = state->start;
785 refcount_inc(&state->refs);
786 wait_on_state(tree, state);
787 free_extent_state(state);
788 goto again;
789 }
790 start = state->end + 1;
791
792 if (start > end)
793 break;
794
795 if (!cond_resched_lock(&tree->lock)) {
796 node = rb_next(node);
797 goto process_node;
798 }
799 }
800out:
801 spin_unlock(&tree->lock);
802}
803
804static void set_state_bits(struct extent_io_tree *tree,
805 struct extent_state *state,
806 unsigned *bits, struct extent_changeset *changeset)
807{
808 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
809
810 set_state_cb(tree, state, bits);
811 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
812 u64 range = state->end - state->start + 1;
813 tree->dirty_bytes += range;
814 }
815 add_extent_changeset(state, bits_to_set, changeset, 1);
816 state->state |= bits_to_set;
817}
818
819static void cache_state_if_flags(struct extent_state *state,
820 struct extent_state **cached_ptr,
821 unsigned flags)
822{
823 if (cached_ptr && !(*cached_ptr)) {
824 if (!flags || (state->state & flags)) {
825 *cached_ptr = state;
826 refcount_inc(&state->refs);
827 }
828 }
829}
830
831static void cache_state(struct extent_state *state,
832 struct extent_state **cached_ptr)
833{
834 return cache_state_if_flags(state, cached_ptr,
835 EXTENT_IOBITS | EXTENT_BOUNDARY);
836}
837
838
839
840
841
842
843
844
845
846
847
848
849static int __must_check
850__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
851 unsigned bits, unsigned exclusive_bits,
852 u64 *failed_start, struct extent_state **cached_state,
853 gfp_t mask, struct extent_changeset *changeset)
854{
855 struct extent_state *state;
856 struct extent_state *prealloc = NULL;
857 struct rb_node *node;
858 struct rb_node **p;
859 struct rb_node *parent;
860 int err = 0;
861 u64 last_start;
862 u64 last_end;
863
864 btrfs_debug_check_extent_io_range(tree, start, end);
865
866 bits |= EXTENT_FIRST_DELALLOC;
867again:
868 if (!prealloc && gfpflags_allow_blocking(mask)) {
869
870
871
872
873
874
875
876 prealloc = alloc_extent_state(mask);
877 }
878
879 spin_lock(&tree->lock);
880 if (cached_state && *cached_state) {
881 state = *cached_state;
882 if (state->start <= start && state->end > start &&
883 extent_state_in_tree(state)) {
884 node = &state->rb_node;
885 goto hit_next;
886 }
887 }
888
889
890
891
892 node = tree_search_for_insert(tree, start, &p, &parent);
893 if (!node) {
894 prealloc = alloc_extent_state_atomic(prealloc);
895 BUG_ON(!prealloc);
896 err = insert_state(tree, prealloc, start, end,
897 &p, &parent, &bits, changeset);
898 if (err)
899 extent_io_tree_panic(tree, err);
900
901 cache_state(prealloc, cached_state);
902 prealloc = NULL;
903 goto out;
904 }
905 state = rb_entry(node, struct extent_state, rb_node);
906hit_next:
907 last_start = state->start;
908 last_end = state->end;
909
910
911
912
913
914
915
916 if (state->start == start && state->end <= end) {
917 if (state->state & exclusive_bits) {
918 *failed_start = state->start;
919 err = -EEXIST;
920 goto out;
921 }
922
923 set_state_bits(tree, state, &bits, changeset);
924 cache_state(state, cached_state);
925 merge_state(tree, state);
926 if (last_end == (u64)-1)
927 goto out;
928 start = last_end + 1;
929 state = next_state(state);
930 if (start < end && state && state->start == start &&
931 !need_resched())
932 goto hit_next;
933 goto search_again;
934 }
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952 if (state->start < start) {
953 if (state->state & exclusive_bits) {
954 *failed_start = start;
955 err = -EEXIST;
956 goto out;
957 }
958
959 prealloc = alloc_extent_state_atomic(prealloc);
960 BUG_ON(!prealloc);
961 err = split_state(tree, state, prealloc, start);
962 if (err)
963 extent_io_tree_panic(tree, err);
964
965 prealloc = NULL;
966 if (err)
967 goto out;
968 if (state->end <= end) {
969 set_state_bits(tree, state, &bits, changeset);
970 cache_state(state, cached_state);
971 merge_state(tree, state);
972 if (last_end == (u64)-1)
973 goto out;
974 start = last_end + 1;
975 state = next_state(state);
976 if (start < end && state && state->start == start &&
977 !need_resched())
978 goto hit_next;
979 }
980 goto search_again;
981 }
982
983
984
985
986
987
988
989 if (state->start > start) {
990 u64 this_end;
991 if (end < last_start)
992 this_end = end;
993 else
994 this_end = last_start - 1;
995
996 prealloc = alloc_extent_state_atomic(prealloc);
997 BUG_ON(!prealloc);
998
999
1000
1001
1002
1003 err = insert_state(tree, prealloc, start, this_end,
1004 NULL, NULL, &bits, changeset);
1005 if (err)
1006 extent_io_tree_panic(tree, err);
1007
1008 cache_state(prealloc, cached_state);
1009 prealloc = NULL;
1010 start = this_end + 1;
1011 goto search_again;
1012 }
1013
1014
1015
1016
1017
1018
1019 if (state->start <= end && state->end > end) {
1020 if (state->state & exclusive_bits) {
1021 *failed_start = start;
1022 err = -EEXIST;
1023 goto out;
1024 }
1025
1026 prealloc = alloc_extent_state_atomic(prealloc);
1027 BUG_ON(!prealloc);
1028 err = split_state(tree, state, prealloc, end + 1);
1029 if (err)
1030 extent_io_tree_panic(tree, err);
1031
1032 set_state_bits(tree, prealloc, &bits, changeset);
1033 cache_state(prealloc, cached_state);
1034 merge_state(tree, prealloc);
1035 prealloc = NULL;
1036 goto out;
1037 }
1038
1039search_again:
1040 if (start > end)
1041 goto out;
1042 spin_unlock(&tree->lock);
1043 if (gfpflags_allow_blocking(mask))
1044 cond_resched();
1045 goto again;
1046
1047out:
1048 spin_unlock(&tree->lock);
1049 if (prealloc)
1050 free_extent_state(prealloc);
1051
1052 return err;
1053
1054}
1055
1056int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1057 unsigned bits, u64 * failed_start,
1058 struct extent_state **cached_state, gfp_t mask)
1059{
1060 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1061 cached_state, mask, NULL);
1062}
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1084 unsigned bits, unsigned clear_bits,
1085 struct extent_state **cached_state)
1086{
1087 struct extent_state *state;
1088 struct extent_state *prealloc = NULL;
1089 struct rb_node *node;
1090 struct rb_node **p;
1091 struct rb_node *parent;
1092 int err = 0;
1093 u64 last_start;
1094 u64 last_end;
1095 bool first_iteration = true;
1096
1097 btrfs_debug_check_extent_io_range(tree, start, end);
1098
1099again:
1100 if (!prealloc) {
1101
1102
1103
1104
1105
1106
1107
1108 prealloc = alloc_extent_state(GFP_NOFS);
1109 if (!prealloc && !first_iteration)
1110 return -ENOMEM;
1111 }
1112
1113 spin_lock(&tree->lock);
1114 if (cached_state && *cached_state) {
1115 state = *cached_state;
1116 if (state->start <= start && state->end > start &&
1117 extent_state_in_tree(state)) {
1118 node = &state->rb_node;
1119 goto hit_next;
1120 }
1121 }
1122
1123
1124
1125
1126
1127 node = tree_search_for_insert(tree, start, &p, &parent);
1128 if (!node) {
1129 prealloc = alloc_extent_state_atomic(prealloc);
1130 if (!prealloc) {
1131 err = -ENOMEM;
1132 goto out;
1133 }
1134 err = insert_state(tree, prealloc, start, end,
1135 &p, &parent, &bits, NULL);
1136 if (err)
1137 extent_io_tree_panic(tree, err);
1138 cache_state(prealloc, cached_state);
1139 prealloc = NULL;
1140 goto out;
1141 }
1142 state = rb_entry(node, struct extent_state, rb_node);
1143hit_next:
1144 last_start = state->start;
1145 last_end = state->end;
1146
1147
1148
1149
1150
1151
1152
1153 if (state->start == start && state->end <= end) {
1154 set_state_bits(tree, state, &bits, NULL);
1155 cache_state(state, cached_state);
1156 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1157 if (last_end == (u64)-1)
1158 goto out;
1159 start = last_end + 1;
1160 if (start < end && state && state->start == start &&
1161 !need_resched())
1162 goto hit_next;
1163 goto search_again;
1164 }
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182 if (state->start < start) {
1183 prealloc = alloc_extent_state_atomic(prealloc);
1184 if (!prealloc) {
1185 err = -ENOMEM;
1186 goto out;
1187 }
1188 err = split_state(tree, state, prealloc, start);
1189 if (err)
1190 extent_io_tree_panic(tree, err);
1191 prealloc = NULL;
1192 if (err)
1193 goto out;
1194 if (state->end <= end) {
1195 set_state_bits(tree, state, &bits, NULL);
1196 cache_state(state, cached_state);
1197 state = clear_state_bit(tree, state, &clear_bits, 0,
1198 NULL);
1199 if (last_end == (u64)-1)
1200 goto out;
1201 start = last_end + 1;
1202 if (start < end && state && state->start == start &&
1203 !need_resched())
1204 goto hit_next;
1205 }
1206 goto search_again;
1207 }
1208
1209
1210
1211
1212
1213
1214
1215 if (state->start > start) {
1216 u64 this_end;
1217 if (end < last_start)
1218 this_end = end;
1219 else
1220 this_end = last_start - 1;
1221
1222 prealloc = alloc_extent_state_atomic(prealloc);
1223 if (!prealloc) {
1224 err = -ENOMEM;
1225 goto out;
1226 }
1227
1228
1229
1230
1231
1232 err = insert_state(tree, prealloc, start, this_end,
1233 NULL, NULL, &bits, NULL);
1234 if (err)
1235 extent_io_tree_panic(tree, err);
1236 cache_state(prealloc, cached_state);
1237 prealloc = NULL;
1238 start = this_end + 1;
1239 goto search_again;
1240 }
1241
1242
1243
1244
1245
1246
1247 if (state->start <= end && state->end > end) {
1248 prealloc = alloc_extent_state_atomic(prealloc);
1249 if (!prealloc) {
1250 err = -ENOMEM;
1251 goto out;
1252 }
1253
1254 err = split_state(tree, state, prealloc, end + 1);
1255 if (err)
1256 extent_io_tree_panic(tree, err);
1257
1258 set_state_bits(tree, prealloc, &bits, NULL);
1259 cache_state(prealloc, cached_state);
1260 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1261 prealloc = NULL;
1262 goto out;
1263 }
1264
1265search_again:
1266 if (start > end)
1267 goto out;
1268 spin_unlock(&tree->lock);
1269 cond_resched();
1270 first_iteration = false;
1271 goto again;
1272
1273out:
1274 spin_unlock(&tree->lock);
1275 if (prealloc)
1276 free_extent_state(prealloc);
1277
1278 return err;
1279}
1280
1281
1282int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1283 unsigned bits, struct extent_changeset *changeset)
1284{
1285
1286
1287
1288
1289
1290
1291 BUG_ON(bits & EXTENT_LOCKED);
1292
1293 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1294 changeset);
1295}
1296
1297int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1298 unsigned bits, int wake, int delete,
1299 struct extent_state **cached, gfp_t mask)
1300{
1301 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1302 cached, mask, NULL);
1303}
1304
1305int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1306 unsigned bits, struct extent_changeset *changeset)
1307{
1308
1309
1310
1311
1312 BUG_ON(bits & EXTENT_LOCKED);
1313
1314 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1315 changeset);
1316}
1317
1318
1319
1320
1321
1322int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1323 struct extent_state **cached_state)
1324{
1325 int err;
1326 u64 failed_start;
1327
1328 while (1) {
1329 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1330 EXTENT_LOCKED, &failed_start,
1331 cached_state, GFP_NOFS, NULL);
1332 if (err == -EEXIST) {
1333 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1334 start = failed_start;
1335 } else
1336 break;
1337 WARN_ON(start > end);
1338 }
1339 return err;
1340}
1341
1342int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1343{
1344 int err;
1345 u64 failed_start;
1346
1347 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1348 &failed_start, NULL, GFP_NOFS, NULL);
1349 if (err == -EEXIST) {
1350 if (failed_start > start)
1351 clear_extent_bit(tree, start, failed_start - 1,
1352 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
1353 return 0;
1354 }
1355 return 1;
1356}
1357
1358void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1359{
1360 unsigned long index = start >> PAGE_SHIFT;
1361 unsigned long end_index = end >> PAGE_SHIFT;
1362 struct page *page;
1363
1364 while (index <= end_index) {
1365 page = find_get_page(inode->i_mapping, index);
1366 BUG_ON(!page);
1367 clear_page_dirty_for_io(page);
1368 put_page(page);
1369 index++;
1370 }
1371}
1372
1373void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1374{
1375 unsigned long index = start >> PAGE_SHIFT;
1376 unsigned long end_index = end >> PAGE_SHIFT;
1377 struct page *page;
1378
1379 while (index <= end_index) {
1380 page = find_get_page(inode->i_mapping, index);
1381 BUG_ON(!page);
1382 __set_page_dirty_nobuffers(page);
1383 account_page_redirty(page);
1384 put_page(page);
1385 index++;
1386 }
1387}
1388
1389
1390
1391
1392static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1393{
1394 tree->ops->set_range_writeback(tree->private_data, start, end);
1395}
1396
1397
1398
1399
1400
1401static struct extent_state *
1402find_first_extent_bit_state(struct extent_io_tree *tree,
1403 u64 start, unsigned bits)
1404{
1405 struct rb_node *node;
1406 struct extent_state *state;
1407
1408
1409
1410
1411
1412 node = tree_search(tree, start);
1413 if (!node)
1414 goto out;
1415
1416 while (1) {
1417 state = rb_entry(node, struct extent_state, rb_node);
1418 if (state->end >= start && (state->state & bits))
1419 return state;
1420
1421 node = rb_next(node);
1422 if (!node)
1423 break;
1424 }
1425out:
1426 return NULL;
1427}
1428
1429
1430
1431
1432
1433
1434
1435
1436int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1437 u64 *start_ret, u64 *end_ret, unsigned bits,
1438 struct extent_state **cached_state)
1439{
1440 struct extent_state *state;
1441 struct rb_node *n;
1442 int ret = 1;
1443
1444 spin_lock(&tree->lock);
1445 if (cached_state && *cached_state) {
1446 state = *cached_state;
1447 if (state->end == start - 1 && extent_state_in_tree(state)) {
1448 n = rb_next(&state->rb_node);
1449 while (n) {
1450 state = rb_entry(n, struct extent_state,
1451 rb_node);
1452 if (state->state & bits)
1453 goto got_it;
1454 n = rb_next(n);
1455 }
1456 free_extent_state(*cached_state);
1457 *cached_state = NULL;
1458 goto out;
1459 }
1460 free_extent_state(*cached_state);
1461 *cached_state = NULL;
1462 }
1463
1464 state = find_first_extent_bit_state(tree, start, bits);
1465got_it:
1466 if (state) {
1467 cache_state_if_flags(state, cached_state, 0);
1468 *start_ret = state->start;
1469 *end_ret = state->end;
1470 ret = 0;
1471 }
1472out:
1473 spin_unlock(&tree->lock);
1474 return ret;
1475}
1476
1477
1478
1479
1480
1481
1482
1483static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1484 u64 *start, u64 *end, u64 max_bytes,
1485 struct extent_state **cached_state)
1486{
1487 struct rb_node *node;
1488 struct extent_state *state;
1489 u64 cur_start = *start;
1490 u64 found = 0;
1491 u64 total_bytes = 0;
1492
1493 spin_lock(&tree->lock);
1494
1495
1496
1497
1498
1499 node = tree_search(tree, cur_start);
1500 if (!node) {
1501 if (!found)
1502 *end = (u64)-1;
1503 goto out;
1504 }
1505
1506 while (1) {
1507 state = rb_entry(node, struct extent_state, rb_node);
1508 if (found && (state->start != cur_start ||
1509 (state->state & EXTENT_BOUNDARY))) {
1510 goto out;
1511 }
1512 if (!(state->state & EXTENT_DELALLOC)) {
1513 if (!found)
1514 *end = state->end;
1515 goto out;
1516 }
1517 if (!found) {
1518 *start = state->start;
1519 *cached_state = state;
1520 refcount_inc(&state->refs);
1521 }
1522 found++;
1523 *end = state->end;
1524 cur_start = state->end + 1;
1525 node = rb_next(node);
1526 total_bytes += state->end - state->start + 1;
1527 if (total_bytes >= max_bytes)
1528 break;
1529 if (!node)
1530 break;
1531 }
1532out:
1533 spin_unlock(&tree->lock);
1534 return found;
1535}
1536
1537static int __process_pages_contig(struct address_space *mapping,
1538 struct page *locked_page,
1539 pgoff_t start_index, pgoff_t end_index,
1540 unsigned long page_ops, pgoff_t *index_ret);
1541
1542static noinline void __unlock_for_delalloc(struct inode *inode,
1543 struct page *locked_page,
1544 u64 start, u64 end)
1545{
1546 unsigned long index = start >> PAGE_SHIFT;
1547 unsigned long end_index = end >> PAGE_SHIFT;
1548
1549 ASSERT(locked_page);
1550 if (index == locked_page->index && end_index == index)
1551 return;
1552
1553 __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
1554 PAGE_UNLOCK, NULL);
1555}
1556
1557static noinline int lock_delalloc_pages(struct inode *inode,
1558 struct page *locked_page,
1559 u64 delalloc_start,
1560 u64 delalloc_end)
1561{
1562 unsigned long index = delalloc_start >> PAGE_SHIFT;
1563 unsigned long index_ret = index;
1564 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1565 int ret;
1566
1567 ASSERT(locked_page);
1568 if (index == locked_page->index && index == end_index)
1569 return 0;
1570
1571 ret = __process_pages_contig(inode->i_mapping, locked_page, index,
1572 end_index, PAGE_LOCK, &index_ret);
1573 if (ret == -EAGAIN)
1574 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1575 (u64)index_ret << PAGE_SHIFT);
1576 return ret;
1577}
1578
1579
1580
1581
1582
1583
1584
1585STATIC u64 find_lock_delalloc_range(struct inode *inode,
1586 struct extent_io_tree *tree,
1587 struct page *locked_page, u64 *start,
1588 u64 *end, u64 max_bytes)
1589{
1590 u64 delalloc_start;
1591 u64 delalloc_end;
1592 u64 found;
1593 struct extent_state *cached_state = NULL;
1594 int ret;
1595 int loops = 0;
1596
1597again:
1598
1599 delalloc_start = *start;
1600 delalloc_end = 0;
1601 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1602 max_bytes, &cached_state);
1603 if (!found || delalloc_end <= *start) {
1604 *start = delalloc_start;
1605 *end = delalloc_end;
1606 free_extent_state(cached_state);
1607 return 0;
1608 }
1609
1610
1611
1612
1613
1614
1615 if (delalloc_start < *start)
1616 delalloc_start = *start;
1617
1618
1619
1620
1621 if (delalloc_end + 1 - delalloc_start > max_bytes)
1622 delalloc_end = delalloc_start + max_bytes - 1;
1623
1624
1625 ret = lock_delalloc_pages(inode, locked_page,
1626 delalloc_start, delalloc_end);
1627 if (ret == -EAGAIN) {
1628
1629
1630
1631 free_extent_state(cached_state);
1632 cached_state = NULL;
1633 if (!loops) {
1634 max_bytes = PAGE_SIZE;
1635 loops = 1;
1636 goto again;
1637 } else {
1638 found = 0;
1639 goto out_failed;
1640 }
1641 }
1642 BUG_ON(ret);
1643
1644
1645 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1646
1647
1648 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1649 EXTENT_DELALLOC, 1, cached_state);
1650 if (!ret) {
1651 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1652 &cached_state, GFP_NOFS);
1653 __unlock_for_delalloc(inode, locked_page,
1654 delalloc_start, delalloc_end);
1655 cond_resched();
1656 goto again;
1657 }
1658 free_extent_state(cached_state);
1659 *start = delalloc_start;
1660 *end = delalloc_end;
1661out_failed:
1662 return found;
1663}
1664
1665static int __process_pages_contig(struct address_space *mapping,
1666 struct page *locked_page,
1667 pgoff_t start_index, pgoff_t end_index,
1668 unsigned long page_ops, pgoff_t *index_ret)
1669{
1670 unsigned long nr_pages = end_index - start_index + 1;
1671 unsigned long pages_locked = 0;
1672 pgoff_t index = start_index;
1673 struct page *pages[16];
1674 unsigned ret;
1675 int err = 0;
1676 int i;
1677
1678 if (page_ops & PAGE_LOCK) {
1679 ASSERT(page_ops == PAGE_LOCK);
1680 ASSERT(index_ret && *index_ret == start_index);
1681 }
1682
1683 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1684 mapping_set_error(mapping, -EIO);
1685
1686 while (nr_pages > 0) {
1687 ret = find_get_pages_contig(mapping, index,
1688 min_t(unsigned long,
1689 nr_pages, ARRAY_SIZE(pages)), pages);
1690 if (ret == 0) {
1691
1692
1693
1694
1695 ASSERT(page_ops & PAGE_LOCK);
1696 err = -EAGAIN;
1697 goto out;
1698 }
1699
1700 for (i = 0; i < ret; i++) {
1701 if (page_ops & PAGE_SET_PRIVATE2)
1702 SetPagePrivate2(pages[i]);
1703
1704 if (pages[i] == locked_page) {
1705 put_page(pages[i]);
1706 pages_locked++;
1707 continue;
1708 }
1709 if (page_ops & PAGE_CLEAR_DIRTY)
1710 clear_page_dirty_for_io(pages[i]);
1711 if (page_ops & PAGE_SET_WRITEBACK)
1712 set_page_writeback(pages[i]);
1713 if (page_ops & PAGE_SET_ERROR)
1714 SetPageError(pages[i]);
1715 if (page_ops & PAGE_END_WRITEBACK)
1716 end_page_writeback(pages[i]);
1717 if (page_ops & PAGE_UNLOCK)
1718 unlock_page(pages[i]);
1719 if (page_ops & PAGE_LOCK) {
1720 lock_page(pages[i]);
1721 if (!PageDirty(pages[i]) ||
1722 pages[i]->mapping != mapping) {
1723 unlock_page(pages[i]);
1724 put_page(pages[i]);
1725 err = -EAGAIN;
1726 goto out;
1727 }
1728 }
1729 put_page(pages[i]);
1730 pages_locked++;
1731 }
1732 nr_pages -= ret;
1733 index += ret;
1734 cond_resched();
1735 }
1736out:
1737 if (err && index_ret)
1738 *index_ret = start_index + pages_locked - 1;
1739 return err;
1740}
1741
1742void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1743 u64 delalloc_end, struct page *locked_page,
1744 unsigned clear_bits,
1745 unsigned long page_ops)
1746{
1747 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
1748 NULL, GFP_NOFS);
1749
1750 __process_pages_contig(inode->i_mapping, locked_page,
1751 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
1752 page_ops, NULL);
1753}
1754
1755
1756
1757
1758
1759
1760u64 count_range_bits(struct extent_io_tree *tree,
1761 u64 *start, u64 search_end, u64 max_bytes,
1762 unsigned bits, int contig)
1763{
1764 struct rb_node *node;
1765 struct extent_state *state;
1766 u64 cur_start = *start;
1767 u64 total_bytes = 0;
1768 u64 last = 0;
1769 int found = 0;
1770
1771 if (WARN_ON(search_end <= cur_start))
1772 return 0;
1773
1774 spin_lock(&tree->lock);
1775 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1776 total_bytes = tree->dirty_bytes;
1777 goto out;
1778 }
1779
1780
1781
1782
1783 node = tree_search(tree, cur_start);
1784 if (!node)
1785 goto out;
1786
1787 while (1) {
1788 state = rb_entry(node, struct extent_state, rb_node);
1789 if (state->start > search_end)
1790 break;
1791 if (contig && found && state->start > last + 1)
1792 break;
1793 if (state->end >= cur_start && (state->state & bits) == bits) {
1794 total_bytes += min(search_end, state->end) + 1 -
1795 max(cur_start, state->start);
1796 if (total_bytes >= max_bytes)
1797 break;
1798 if (!found) {
1799 *start = max(cur_start, state->start);
1800 found = 1;
1801 }
1802 last = state->end;
1803 } else if (contig && found) {
1804 break;
1805 }
1806 node = rb_next(node);
1807 if (!node)
1808 break;
1809 }
1810out:
1811 spin_unlock(&tree->lock);
1812 return total_bytes;
1813}
1814
1815
1816
1817
1818
1819static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
1820 struct io_failure_record *failrec)
1821{
1822 struct rb_node *node;
1823 struct extent_state *state;
1824 int ret = 0;
1825
1826 spin_lock(&tree->lock);
1827
1828
1829
1830
1831 node = tree_search(tree, start);
1832 if (!node) {
1833 ret = -ENOENT;
1834 goto out;
1835 }
1836 state = rb_entry(node, struct extent_state, rb_node);
1837 if (state->start != start) {
1838 ret = -ENOENT;
1839 goto out;
1840 }
1841 state->failrec = failrec;
1842out:
1843 spin_unlock(&tree->lock);
1844 return ret;
1845}
1846
1847static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
1848 struct io_failure_record **failrec)
1849{
1850 struct rb_node *node;
1851 struct extent_state *state;
1852 int ret = 0;
1853
1854 spin_lock(&tree->lock);
1855
1856
1857
1858
1859 node = tree_search(tree, start);
1860 if (!node) {
1861 ret = -ENOENT;
1862 goto out;
1863 }
1864 state = rb_entry(node, struct extent_state, rb_node);
1865 if (state->start != start) {
1866 ret = -ENOENT;
1867 goto out;
1868 }
1869 *failrec = state->failrec;
1870out:
1871 spin_unlock(&tree->lock);
1872 return ret;
1873}
1874
1875
1876
1877
1878
1879
1880
1881int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1882 unsigned bits, int filled, struct extent_state *cached)
1883{
1884 struct extent_state *state = NULL;
1885 struct rb_node *node;
1886 int bitset = 0;
1887
1888 spin_lock(&tree->lock);
1889 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1890 cached->end > start)
1891 node = &cached->rb_node;
1892 else
1893 node = tree_search(tree, start);
1894 while (node && start <= end) {
1895 state = rb_entry(node, struct extent_state, rb_node);
1896
1897 if (filled && state->start > start) {
1898 bitset = 0;
1899 break;
1900 }
1901
1902 if (state->start > end)
1903 break;
1904
1905 if (state->state & bits) {
1906 bitset = 1;
1907 if (!filled)
1908 break;
1909 } else if (filled) {
1910 bitset = 0;
1911 break;
1912 }
1913
1914 if (state->end == (u64)-1)
1915 break;
1916
1917 start = state->end + 1;
1918 if (start > end)
1919 break;
1920 node = rb_next(node);
1921 if (!node) {
1922 if (filled)
1923 bitset = 0;
1924 break;
1925 }
1926 }
1927 spin_unlock(&tree->lock);
1928 return bitset;
1929}
1930
1931
1932
1933
1934
1935static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1936{
1937 u64 start = page_offset(page);
1938 u64 end = start + PAGE_SIZE - 1;
1939 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1940 SetPageUptodate(page);
1941}
1942
1943int free_io_failure(struct extent_io_tree *failure_tree,
1944 struct extent_io_tree *io_tree,
1945 struct io_failure_record *rec)
1946{
1947 int ret;
1948 int err = 0;
1949
1950 set_state_failrec(failure_tree, rec->start, NULL);
1951 ret = clear_extent_bits(failure_tree, rec->start,
1952 rec->start + rec->len - 1,
1953 EXTENT_LOCKED | EXTENT_DIRTY);
1954 if (ret)
1955 err = ret;
1956
1957 ret = clear_extent_bits(io_tree, rec->start,
1958 rec->start + rec->len - 1,
1959 EXTENT_DAMAGED);
1960 if (ret && !err)
1961 err = ret;
1962
1963 kfree(rec);
1964 return err;
1965}
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
1978 u64 length, u64 logical, struct page *page,
1979 unsigned int pg_offset, int mirror_num)
1980{
1981 struct bio *bio;
1982 struct btrfs_device *dev;
1983 u64 map_length = 0;
1984 u64 sector;
1985 struct btrfs_bio *bbio = NULL;
1986 int ret;
1987
1988 ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
1989 BUG_ON(!mirror_num);
1990
1991 bio = btrfs_io_bio_alloc(1);
1992 bio->bi_iter.bi_size = 0;
1993 map_length = length;
1994
1995
1996
1997
1998
1999
2000 btrfs_bio_counter_inc_blocked(fs_info);
2001 if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num)) {
2002
2003
2004
2005
2006
2007
2008 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2009 &map_length, &bbio, 0);
2010 if (ret) {
2011 btrfs_bio_counter_dec(fs_info);
2012 bio_put(bio);
2013 return -EIO;
2014 }
2015 ASSERT(bbio->mirror_num == 1);
2016 } else {
2017 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2018 &map_length, &bbio, mirror_num);
2019 if (ret) {
2020 btrfs_bio_counter_dec(fs_info);
2021 bio_put(bio);
2022 return -EIO;
2023 }
2024 BUG_ON(mirror_num != bbio->mirror_num);
2025 }
2026
2027 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
2028 bio->bi_iter.bi_sector = sector;
2029 dev = bbio->stripes[bbio->mirror_num - 1].dev;
2030 btrfs_put_bbio(bbio);
2031 if (!dev || !dev->bdev || !dev->writeable) {
2032 btrfs_bio_counter_dec(fs_info);
2033 bio_put(bio);
2034 return -EIO;
2035 }
2036 bio->bi_bdev = dev->bdev;
2037 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2038 bio_add_page(bio, page, length, pg_offset);
2039
2040 if (btrfsic_submit_bio_wait(bio)) {
2041
2042 btrfs_bio_counter_dec(fs_info);
2043 bio_put(bio);
2044 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2045 return -EIO;
2046 }
2047
2048 btrfs_info_rl_in_rcu(fs_info,
2049 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2050 ino, start,
2051 rcu_str_deref(dev->name), sector);
2052 btrfs_bio_counter_dec(fs_info);
2053 bio_put(bio);
2054 return 0;
2055}
2056
2057int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
2058 struct extent_buffer *eb, int mirror_num)
2059{
2060 u64 start = eb->start;
2061 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
2062 int ret = 0;
2063
2064 if (fs_info->sb->s_flags & MS_RDONLY)
2065 return -EROFS;
2066
2067 for (i = 0; i < num_pages; i++) {
2068 struct page *p = eb->pages[i];
2069
2070 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2071 start - page_offset(p), mirror_num);
2072 if (ret)
2073 break;
2074 start += PAGE_SIZE;
2075 }
2076
2077 return ret;
2078}
2079
2080
2081
2082
2083
2084int clean_io_failure(struct btrfs_fs_info *fs_info,
2085 struct extent_io_tree *failure_tree,
2086 struct extent_io_tree *io_tree, u64 start,
2087 struct page *page, u64 ino, unsigned int pg_offset)
2088{
2089 u64 private;
2090 struct io_failure_record *failrec;
2091 struct extent_state *state;
2092 int num_copies;
2093 int ret;
2094
2095 private = 0;
2096 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2097 EXTENT_DIRTY, 0);
2098 if (!ret)
2099 return 0;
2100
2101 ret = get_state_failrec(failure_tree, start, &failrec);
2102 if (ret)
2103 return 0;
2104
2105 BUG_ON(!failrec->this_mirror);
2106
2107 if (failrec->in_validation) {
2108
2109 btrfs_debug(fs_info,
2110 "clean_io_failure: freeing dummy error at %llu",
2111 failrec->start);
2112 goto out;
2113 }
2114 if (fs_info->sb->s_flags & MS_RDONLY)
2115 goto out;
2116
2117 spin_lock(&io_tree->lock);
2118 state = find_first_extent_bit_state(io_tree,
2119 failrec->start,
2120 EXTENT_LOCKED);
2121 spin_unlock(&io_tree->lock);
2122
2123 if (state && state->start <= failrec->start &&
2124 state->end >= failrec->start + failrec->len - 1) {
2125 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2126 failrec->len);
2127 if (num_copies > 1) {
2128 repair_io_failure(fs_info, ino, start, failrec->len,
2129 failrec->logical, page, pg_offset,
2130 failrec->failed_mirror);
2131 }
2132 }
2133
2134out:
2135 free_io_failure(failure_tree, io_tree, failrec);
2136
2137 return 0;
2138}
2139
2140
2141
2142
2143
2144
2145
2146void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2147{
2148 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2149 struct io_failure_record *failrec;
2150 struct extent_state *state, *next;
2151
2152 if (RB_EMPTY_ROOT(&failure_tree->state))
2153 return;
2154
2155 spin_lock(&failure_tree->lock);
2156 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2157 while (state) {
2158 if (state->start > end)
2159 break;
2160
2161 ASSERT(state->end <= end);
2162
2163 next = next_state(state);
2164
2165 failrec = state->failrec;
2166 free_extent_state(state);
2167 kfree(failrec);
2168
2169 state = next;
2170 }
2171 spin_unlock(&failure_tree->lock);
2172}
2173
2174int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2175 struct io_failure_record **failrec_ret)
2176{
2177 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2178 struct io_failure_record *failrec;
2179 struct extent_map *em;
2180 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2181 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2182 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2183 int ret;
2184 u64 logical;
2185
2186 ret = get_state_failrec(failure_tree, start, &failrec);
2187 if (ret) {
2188 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2189 if (!failrec)
2190 return -ENOMEM;
2191
2192 failrec->start = start;
2193 failrec->len = end - start + 1;
2194 failrec->this_mirror = 0;
2195 failrec->bio_flags = 0;
2196 failrec->in_validation = 0;
2197
2198 read_lock(&em_tree->lock);
2199 em = lookup_extent_mapping(em_tree, start, failrec->len);
2200 if (!em) {
2201 read_unlock(&em_tree->lock);
2202 kfree(failrec);
2203 return -EIO;
2204 }
2205
2206 if (em->start > start || em->start + em->len <= start) {
2207 free_extent_map(em);
2208 em = NULL;
2209 }
2210 read_unlock(&em_tree->lock);
2211 if (!em) {
2212 kfree(failrec);
2213 return -EIO;
2214 }
2215
2216 logical = start - em->start;
2217 logical = em->block_start + logical;
2218 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2219 logical = em->block_start;
2220 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2221 extent_set_compress_type(&failrec->bio_flags,
2222 em->compress_type);
2223 }
2224
2225 btrfs_debug(fs_info,
2226 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2227 logical, start, failrec->len);
2228
2229 failrec->logical = logical;
2230 free_extent_map(em);
2231
2232
2233 ret = set_extent_bits(failure_tree, start, end,
2234 EXTENT_LOCKED | EXTENT_DIRTY);
2235 if (ret >= 0)
2236 ret = set_state_failrec(failure_tree, start, failrec);
2237
2238 if (ret >= 0)
2239 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2240 if (ret < 0) {
2241 kfree(failrec);
2242 return ret;
2243 }
2244 } else {
2245 btrfs_debug(fs_info,
2246 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
2247 failrec->logical, failrec->start, failrec->len,
2248 failrec->in_validation);
2249
2250
2251
2252
2253
2254 }
2255
2256 *failrec_ret = failrec;
2257
2258 return 0;
2259}
2260
2261bool btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
2262 struct io_failure_record *failrec, int failed_mirror)
2263{
2264 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2265 int num_copies;
2266
2267 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2268 if (num_copies == 1) {
2269
2270
2271
2272
2273
2274 btrfs_debug(fs_info,
2275 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2276 num_copies, failrec->this_mirror, failed_mirror);
2277 return false;
2278 }
2279
2280
2281
2282
2283
2284
2285 if (failed_bio->bi_vcnt > 1) {
2286
2287
2288
2289
2290
2291
2292
2293
2294 BUG_ON(failrec->in_validation);
2295 failrec->in_validation = 1;
2296 failrec->this_mirror = failed_mirror;
2297 } else {
2298
2299
2300
2301
2302
2303 if (failrec->in_validation) {
2304 BUG_ON(failrec->this_mirror != failed_mirror);
2305 failrec->in_validation = 0;
2306 failrec->this_mirror = 0;
2307 }
2308 failrec->failed_mirror = failed_mirror;
2309 failrec->this_mirror++;
2310 if (failrec->this_mirror == failed_mirror)
2311 failrec->this_mirror++;
2312 }
2313
2314 if (failrec->this_mirror > num_copies) {
2315 btrfs_debug(fs_info,
2316 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2317 num_copies, failrec->this_mirror, failed_mirror);
2318 return false;
2319 }
2320
2321 return true;
2322}
2323
2324
2325struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2326 struct io_failure_record *failrec,
2327 struct page *page, int pg_offset, int icsum,
2328 bio_end_io_t *endio_func, void *data)
2329{
2330 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2331 struct bio *bio;
2332 struct btrfs_io_bio *btrfs_failed_bio;
2333 struct btrfs_io_bio *btrfs_bio;
2334
2335 bio = btrfs_io_bio_alloc(1);
2336 bio->bi_end_io = endio_func;
2337 bio->bi_iter.bi_sector = failrec->logical >> 9;
2338 bio->bi_bdev = fs_info->fs_devices->latest_bdev;
2339 bio->bi_iter.bi_size = 0;
2340 bio->bi_private = data;
2341
2342 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2343 if (btrfs_failed_bio->csum) {
2344 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2345
2346 btrfs_bio = btrfs_io_bio(bio);
2347 btrfs_bio->csum = btrfs_bio->csum_inline;
2348 icsum *= csum_size;
2349 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2350 csum_size);
2351 }
2352
2353 bio_add_page(bio, page, failrec->len, pg_offset);
2354
2355 return bio;
2356}
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2367 struct page *page, u64 start, u64 end,
2368 int failed_mirror)
2369{
2370 struct io_failure_record *failrec;
2371 struct inode *inode = page->mapping->host;
2372 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2373 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2374 struct bio *bio;
2375 int read_mode = 0;
2376 blk_status_t status;
2377 int ret;
2378
2379 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2380
2381 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2382 if (ret)
2383 return ret;
2384
2385 if (!btrfs_check_repairable(inode, failed_bio, failrec,
2386 failed_mirror)) {
2387 free_io_failure(failure_tree, tree, failrec);
2388 return -EIO;
2389 }
2390
2391 if (failed_bio->bi_vcnt > 1)
2392 read_mode |= REQ_FAILFAST_DEV;
2393
2394 phy_offset >>= inode->i_sb->s_blocksize_bits;
2395 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2396 start - page_offset(page),
2397 (int)phy_offset, failed_bio->bi_end_io,
2398 NULL);
2399 bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
2400
2401 btrfs_debug(btrfs_sb(inode->i_sb),
2402 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2403 read_mode, failrec->this_mirror, failrec->in_validation);
2404
2405 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
2406 failrec->bio_flags, 0);
2407 if (status) {
2408 free_io_failure(failure_tree, tree, failrec);
2409 bio_put(bio);
2410 ret = blk_status_to_errno(status);
2411 }
2412
2413 return ret;
2414}
2415
2416
2417
2418void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2419{
2420 int uptodate = (err == 0);
2421 struct extent_io_tree *tree;
2422 int ret = 0;
2423
2424 tree = &BTRFS_I(page->mapping->host)->io_tree;
2425
2426 if (tree->ops && tree->ops->writepage_end_io_hook)
2427 tree->ops->writepage_end_io_hook(page, start, end, NULL,
2428 uptodate);
2429
2430 if (!uptodate) {
2431 ClearPageUptodate(page);
2432 SetPageError(page);
2433 ret = err < 0 ? err : -EIO;
2434 mapping_set_error(page->mapping, ret);
2435 }
2436}
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447static void end_bio_extent_writepage(struct bio *bio)
2448{
2449 int error = blk_status_to_errno(bio->bi_status);
2450 struct bio_vec *bvec;
2451 u64 start;
2452 u64 end;
2453 int i;
2454
2455 ASSERT(!bio_flagged(bio, BIO_CLONED));
2456 bio_for_each_segment_all(bvec, bio, i) {
2457 struct page *page = bvec->bv_page;
2458 struct inode *inode = page->mapping->host;
2459 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2460
2461
2462
2463
2464
2465
2466 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2467 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2468 btrfs_err(fs_info,
2469 "partial page write in btrfs with offset %u and length %u",
2470 bvec->bv_offset, bvec->bv_len);
2471 else
2472 btrfs_info(fs_info,
2473 "incomplete page write in btrfs with offset %u and length %u",
2474 bvec->bv_offset, bvec->bv_len);
2475 }
2476
2477 start = page_offset(page);
2478 end = start + bvec->bv_offset + bvec->bv_len - 1;
2479
2480 end_extent_writepage(page, error, start, end);
2481 end_page_writeback(page);
2482 }
2483
2484 bio_put(bio);
2485}
2486
2487static void
2488endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2489 int uptodate)
2490{
2491 struct extent_state *cached = NULL;
2492 u64 end = start + len - 1;
2493
2494 if (uptodate && tree->track_uptodate)
2495 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2496 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2497}
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510static void end_bio_extent_readpage(struct bio *bio)
2511{
2512 struct bio_vec *bvec;
2513 int uptodate = !bio->bi_status;
2514 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2515 struct extent_io_tree *tree, *failure_tree;
2516 u64 offset = 0;
2517 u64 start;
2518 u64 end;
2519 u64 len;
2520 u64 extent_start = 0;
2521 u64 extent_len = 0;
2522 int mirror;
2523 int ret;
2524 int i;
2525
2526 ASSERT(!bio_flagged(bio, BIO_CLONED));
2527 bio_for_each_segment_all(bvec, bio, i) {
2528 struct page *page = bvec->bv_page;
2529 struct inode *inode = page->mapping->host;
2530 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2531
2532 btrfs_debug(fs_info,
2533 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2534 (u64)bio->bi_iter.bi_sector, bio->bi_status,
2535 io_bio->mirror_num);
2536 tree = &BTRFS_I(inode)->io_tree;
2537 failure_tree = &BTRFS_I(inode)->io_failure_tree;
2538
2539
2540
2541
2542
2543
2544 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2545 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2546 btrfs_err(fs_info,
2547 "partial page read in btrfs with offset %u and length %u",
2548 bvec->bv_offset, bvec->bv_len);
2549 else
2550 btrfs_info(fs_info,
2551 "incomplete page read in btrfs with offset %u and length %u",
2552 bvec->bv_offset, bvec->bv_len);
2553 }
2554
2555 start = page_offset(page);
2556 end = start + bvec->bv_offset + bvec->bv_len - 1;
2557 len = bvec->bv_len;
2558
2559 mirror = io_bio->mirror_num;
2560 if (likely(uptodate && tree->ops)) {
2561 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2562 page, start, end,
2563 mirror);
2564 if (ret)
2565 uptodate = 0;
2566 else
2567 clean_io_failure(BTRFS_I(inode)->root->fs_info,
2568 failure_tree, tree, start,
2569 page,
2570 btrfs_ino(BTRFS_I(inode)), 0);
2571 }
2572
2573 if (likely(uptodate))
2574 goto readpage_ok;
2575
2576 if (tree->ops) {
2577 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2578 if (ret == -EAGAIN) {
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593 ret = bio_readpage_error(bio, offset, page,
2594 start, end, mirror);
2595 if (ret == 0) {
2596 uptodate = !bio->bi_status;
2597 offset += len;
2598 continue;
2599 }
2600 }
2601
2602
2603
2604
2605
2606
2607 ASSERT(ret == -EIO);
2608 }
2609readpage_ok:
2610 if (likely(uptodate)) {
2611 loff_t i_size = i_size_read(inode);
2612 pgoff_t end_index = i_size >> PAGE_SHIFT;
2613 unsigned off;
2614
2615
2616 off = i_size & (PAGE_SIZE-1);
2617 if (page->index == end_index && off)
2618 zero_user_segment(page, off, PAGE_SIZE);
2619 SetPageUptodate(page);
2620 } else {
2621 ClearPageUptodate(page);
2622 SetPageError(page);
2623 }
2624 unlock_page(page);
2625 offset += len;
2626
2627 if (unlikely(!uptodate)) {
2628 if (extent_len) {
2629 endio_readpage_release_extent(tree,
2630 extent_start,
2631 extent_len, 1);
2632 extent_start = 0;
2633 extent_len = 0;
2634 }
2635 endio_readpage_release_extent(tree, start,
2636 end - start + 1, 0);
2637 } else if (!extent_len) {
2638 extent_start = start;
2639 extent_len = end + 1 - start;
2640 } else if (extent_start + extent_len == start) {
2641 extent_len += end + 1 - start;
2642 } else {
2643 endio_readpage_release_extent(tree, extent_start,
2644 extent_len, uptodate);
2645 extent_start = start;
2646 extent_len = end + 1 - start;
2647 }
2648 }
2649
2650 if (extent_len)
2651 endio_readpage_release_extent(tree, extent_start, extent_len,
2652 uptodate);
2653 if (io_bio->end_io)
2654 io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
2655 bio_put(bio);
2656}
2657
2658
2659
2660
2661
2662
2663static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
2664{
2665 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
2666}
2667
2668
2669
2670
2671
2672
2673struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
2674{
2675 struct bio *bio;
2676
2677 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
2678 bio->bi_bdev = bdev;
2679 bio->bi_iter.bi_sector = first_byte >> 9;
2680 btrfs_io_bio_init(btrfs_io_bio(bio));
2681 return bio;
2682}
2683
2684struct bio *btrfs_bio_clone(struct bio *bio)
2685{
2686 struct btrfs_io_bio *btrfs_bio;
2687 struct bio *new;
2688
2689
2690 new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset);
2691 btrfs_bio = btrfs_io_bio(new);
2692 btrfs_io_bio_init(btrfs_bio);
2693 btrfs_bio->iter = bio->bi_iter;
2694 return new;
2695}
2696
2697struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
2698{
2699 struct bio *bio;
2700
2701
2702 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset);
2703 btrfs_io_bio_init(btrfs_io_bio(bio));
2704 return bio;
2705}
2706
2707struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
2708{
2709 struct bio *bio;
2710 struct btrfs_io_bio *btrfs_bio;
2711
2712
2713 bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset);
2714 ASSERT(bio);
2715
2716 btrfs_bio = btrfs_io_bio(bio);
2717 btrfs_io_bio_init(btrfs_bio);
2718
2719 bio_trim(bio, offset >> 9, size >> 9);
2720 btrfs_bio->iter = bio->bi_iter;
2721 return bio;
2722}
2723
2724static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
2725 unsigned long bio_flags)
2726{
2727 blk_status_t ret = 0;
2728 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2729 struct page *page = bvec->bv_page;
2730 struct extent_io_tree *tree = bio->bi_private;
2731 u64 start;
2732
2733 start = page_offset(page) + bvec->bv_offset;
2734
2735 bio->bi_private = NULL;
2736 bio_get(bio);
2737
2738 if (tree->ops)
2739 ret = tree->ops->submit_bio_hook(tree->private_data, bio,
2740 mirror_num, bio_flags, start);
2741 else
2742 btrfsic_submit_bio(bio);
2743
2744 bio_put(bio);
2745 return blk_status_to_errno(ret);
2746}
2747
2748static int merge_bio(struct extent_io_tree *tree, struct page *page,
2749 unsigned long offset, size_t size, struct bio *bio,
2750 unsigned long bio_flags)
2751{
2752 int ret = 0;
2753 if (tree->ops)
2754 ret = tree->ops->merge_bio_hook(page, offset, size, bio,
2755 bio_flags);
2756 return ret;
2757
2758}
2759
2760static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
2761 struct writeback_control *wbc,
2762 struct page *page, sector_t sector,
2763 size_t size, unsigned long offset,
2764 struct block_device *bdev,
2765 struct bio **bio_ret,
2766 bio_end_io_t end_io_func,
2767 int mirror_num,
2768 unsigned long prev_bio_flags,
2769 unsigned long bio_flags,
2770 bool force_bio_submit)
2771{
2772 int ret = 0;
2773 struct bio *bio;
2774 int contig = 0;
2775 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
2776 size_t page_size = min_t(size_t, size, PAGE_SIZE);
2777
2778 if (bio_ret && *bio_ret) {
2779 bio = *bio_ret;
2780 if (old_compressed)
2781 contig = bio->bi_iter.bi_sector == sector;
2782 else
2783 contig = bio_end_sector(bio) == sector;
2784
2785 if (prev_bio_flags != bio_flags || !contig ||
2786 force_bio_submit ||
2787 merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
2788 bio_add_page(bio, page, page_size, offset) < page_size) {
2789 ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
2790 if (ret < 0) {
2791 *bio_ret = NULL;
2792 return ret;
2793 }
2794 bio = NULL;
2795 } else {
2796 if (wbc)
2797 wbc_account_io(wbc, page, page_size);
2798 return 0;
2799 }
2800 }
2801
2802 bio = btrfs_bio_alloc(bdev, sector << 9);
2803 bio_add_page(bio, page, page_size, offset);
2804 bio->bi_end_io = end_io_func;
2805 bio->bi_private = tree;
2806 bio->bi_write_hint = page->mapping->host->i_write_hint;
2807 bio_set_op_attrs(bio, op, op_flags);
2808 if (wbc) {
2809 wbc_init_bio(wbc, bio);
2810 wbc_account_io(wbc, page, page_size);
2811 }
2812
2813 if (bio_ret)
2814 *bio_ret = bio;
2815 else
2816 ret = submit_one_bio(bio, mirror_num, bio_flags);
2817
2818 return ret;
2819}
2820
2821static void attach_extent_buffer_page(struct extent_buffer *eb,
2822 struct page *page)
2823{
2824 if (!PagePrivate(page)) {
2825 SetPagePrivate(page);
2826 get_page(page);
2827 set_page_private(page, (unsigned long)eb);
2828 } else {
2829 WARN_ON(page->private != (unsigned long)eb);
2830 }
2831}
2832
2833void set_page_extent_mapped(struct page *page)
2834{
2835 if (!PagePrivate(page)) {
2836 SetPagePrivate(page);
2837 get_page(page);
2838 set_page_private(page, EXTENT_PAGE_PRIVATE);
2839 }
2840}
2841
2842static struct extent_map *
2843__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2844 u64 start, u64 len, get_extent_t *get_extent,
2845 struct extent_map **em_cached)
2846{
2847 struct extent_map *em;
2848
2849 if (em_cached && *em_cached) {
2850 em = *em_cached;
2851 if (extent_map_in_tree(em) && start >= em->start &&
2852 start < extent_map_end(em)) {
2853 refcount_inc(&em->refs);
2854 return em;
2855 }
2856
2857 free_extent_map(em);
2858 *em_cached = NULL;
2859 }
2860
2861 em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
2862 if (em_cached && !IS_ERR_OR_NULL(em)) {
2863 BUG_ON(*em_cached);
2864 refcount_inc(&em->refs);
2865 *em_cached = em;
2866 }
2867 return em;
2868}
2869
2870
2871
2872
2873
2874
2875
2876static int __do_readpage(struct extent_io_tree *tree,
2877 struct page *page,
2878 get_extent_t *get_extent,
2879 struct extent_map **em_cached,
2880 struct bio **bio, int mirror_num,
2881 unsigned long *bio_flags, int read_flags,
2882 u64 *prev_em_start)
2883{
2884 struct inode *inode = page->mapping->host;
2885 u64 start = page_offset(page);
2886 u64 page_end = start + PAGE_SIZE - 1;
2887 u64 end;
2888 u64 cur = start;
2889 u64 extent_offset;
2890 u64 last_byte = i_size_read(inode);
2891 u64 block_start;
2892 u64 cur_end;
2893 sector_t sector;
2894 struct extent_map *em;
2895 struct block_device *bdev;
2896 int ret = 0;
2897 int nr = 0;
2898 size_t pg_offset = 0;
2899 size_t iosize;
2900 size_t disk_io_size;
2901 size_t blocksize = inode->i_sb->s_blocksize;
2902 unsigned long this_bio_flag = 0;
2903
2904 set_page_extent_mapped(page);
2905
2906 end = page_end;
2907 if (!PageUptodate(page)) {
2908 if (cleancache_get_page(page) == 0) {
2909 BUG_ON(blocksize != PAGE_SIZE);
2910 unlock_extent(tree, start, end);
2911 goto out;
2912 }
2913 }
2914
2915 if (page->index == last_byte >> PAGE_SHIFT) {
2916 char *userpage;
2917 size_t zero_offset = last_byte & (PAGE_SIZE - 1);
2918
2919 if (zero_offset) {
2920 iosize = PAGE_SIZE - zero_offset;
2921 userpage = kmap_atomic(page);
2922 memset(userpage + zero_offset, 0, iosize);
2923 flush_dcache_page(page);
2924 kunmap_atomic(userpage);
2925 }
2926 }
2927 while (cur <= end) {
2928 bool force_bio_submit = false;
2929
2930 if (cur >= last_byte) {
2931 char *userpage;
2932 struct extent_state *cached = NULL;
2933
2934 iosize = PAGE_SIZE - pg_offset;
2935 userpage = kmap_atomic(page);
2936 memset(userpage + pg_offset, 0, iosize);
2937 flush_dcache_page(page);
2938 kunmap_atomic(userpage);
2939 set_extent_uptodate(tree, cur, cur + iosize - 1,
2940 &cached, GFP_NOFS);
2941 unlock_extent_cached(tree, cur,
2942 cur + iosize - 1,
2943 &cached, GFP_NOFS);
2944 break;
2945 }
2946 em = __get_extent_map(inode, page, pg_offset, cur,
2947 end - cur + 1, get_extent, em_cached);
2948 if (IS_ERR_OR_NULL(em)) {
2949 SetPageError(page);
2950 unlock_extent(tree, cur, end);
2951 break;
2952 }
2953 extent_offset = cur - em->start;
2954 BUG_ON(extent_map_end(em) <= cur);
2955 BUG_ON(end < cur);
2956
2957 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2958 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2959 extent_set_compress_type(&this_bio_flag,
2960 em->compress_type);
2961 }
2962
2963 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2964 cur_end = min(extent_map_end(em) - 1, end);
2965 iosize = ALIGN(iosize, blocksize);
2966 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2967 disk_io_size = em->block_len;
2968 sector = em->block_start >> 9;
2969 } else {
2970 sector = (em->block_start + extent_offset) >> 9;
2971 disk_io_size = iosize;
2972 }
2973 bdev = em->bdev;
2974 block_start = em->block_start;
2975 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2976 block_start = EXTENT_MAP_HOLE;
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3013 prev_em_start && *prev_em_start != (u64)-1 &&
3014 *prev_em_start != em->orig_start)
3015 force_bio_submit = true;
3016
3017 if (prev_em_start)
3018 *prev_em_start = em->orig_start;
3019
3020 free_extent_map(em);
3021 em = NULL;
3022
3023
3024 if (block_start == EXTENT_MAP_HOLE) {
3025 char *userpage;
3026 struct extent_state *cached = NULL;
3027
3028 userpage = kmap_atomic(page);
3029 memset(userpage + pg_offset, 0, iosize);
3030 flush_dcache_page(page);
3031 kunmap_atomic(userpage);
3032
3033 set_extent_uptodate(tree, cur, cur + iosize - 1,
3034 &cached, GFP_NOFS);
3035 unlock_extent_cached(tree, cur,
3036 cur + iosize - 1,
3037 &cached, GFP_NOFS);
3038 cur = cur + iosize;
3039 pg_offset += iosize;
3040 continue;
3041 }
3042
3043 if (test_range_bit(tree, cur, cur_end,
3044 EXTENT_UPTODATE, 1, NULL)) {
3045 check_page_uptodate(tree, page);
3046 unlock_extent(tree, cur, cur + iosize - 1);
3047 cur = cur + iosize;
3048 pg_offset += iosize;
3049 continue;
3050 }
3051
3052
3053
3054 if (block_start == EXTENT_MAP_INLINE) {
3055 SetPageError(page);
3056 unlock_extent(tree, cur, cur + iosize - 1);
3057 cur = cur + iosize;
3058 pg_offset += iosize;
3059 continue;
3060 }
3061
3062 ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL,
3063 page, sector, disk_io_size, pg_offset,
3064 bdev, bio,
3065 end_bio_extent_readpage, mirror_num,
3066 *bio_flags,
3067 this_bio_flag,
3068 force_bio_submit);
3069 if (!ret) {
3070 nr++;
3071 *bio_flags = this_bio_flag;
3072 } else {
3073 SetPageError(page);
3074 unlock_extent(tree, cur, cur + iosize - 1);
3075 goto out;
3076 }
3077 cur = cur + iosize;
3078 pg_offset += iosize;
3079 }
3080out:
3081 if (!nr) {
3082 if (!PageError(page))
3083 SetPageUptodate(page);
3084 unlock_page(page);
3085 }
3086 return ret;
3087}
3088
3089static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3090 struct page *pages[], int nr_pages,
3091 u64 start, u64 end,
3092 get_extent_t *get_extent,
3093 struct extent_map **em_cached,
3094 struct bio **bio, int mirror_num,
3095 unsigned long *bio_flags,
3096 u64 *prev_em_start)
3097{
3098 struct inode *inode;
3099 struct btrfs_ordered_extent *ordered;
3100 int index;
3101
3102 inode = pages[0]->mapping->host;
3103 while (1) {
3104 lock_extent(tree, start, end);
3105 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3106 end - start + 1);
3107 if (!ordered)
3108 break;
3109 unlock_extent(tree, start, end);
3110 btrfs_start_ordered_extent(inode, ordered, 1);
3111 btrfs_put_ordered_extent(ordered);
3112 }
3113
3114 for (index = 0; index < nr_pages; index++) {
3115 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
3116 mirror_num, bio_flags, 0, prev_em_start);
3117 put_page(pages[index]);
3118 }
3119}
3120
3121static void __extent_readpages(struct extent_io_tree *tree,
3122 struct page *pages[],
3123 int nr_pages, get_extent_t *get_extent,
3124 struct extent_map **em_cached,
3125 struct bio **bio, int mirror_num,
3126 unsigned long *bio_flags,
3127 u64 *prev_em_start)
3128{
3129 u64 start = 0;
3130 u64 end = 0;
3131 u64 page_start;
3132 int index;
3133 int first_index = 0;
3134
3135 for (index = 0; index < nr_pages; index++) {
3136 page_start = page_offset(pages[index]);
3137 if (!end) {
3138 start = page_start;
3139 end = start + PAGE_SIZE - 1;
3140 first_index = index;
3141 } else if (end + 1 == page_start) {
3142 end += PAGE_SIZE;
3143 } else {
3144 __do_contiguous_readpages(tree, &pages[first_index],
3145 index - first_index, start,
3146 end, get_extent, em_cached,
3147 bio, mirror_num, bio_flags,
3148 prev_em_start);
3149 start = page_start;
3150 end = start + PAGE_SIZE - 1;
3151 first_index = index;
3152 }
3153 }
3154
3155 if (end)
3156 __do_contiguous_readpages(tree, &pages[first_index],
3157 index - first_index, start,
3158 end, get_extent, em_cached, bio,
3159 mirror_num, bio_flags,
3160 prev_em_start);
3161}
3162
3163static int __extent_read_full_page(struct extent_io_tree *tree,
3164 struct page *page,
3165 get_extent_t *get_extent,
3166 struct bio **bio, int mirror_num,
3167 unsigned long *bio_flags, int read_flags)
3168{
3169 struct inode *inode = page->mapping->host;
3170 struct btrfs_ordered_extent *ordered;
3171 u64 start = page_offset(page);
3172 u64 end = start + PAGE_SIZE - 1;
3173 int ret;
3174
3175 while (1) {
3176 lock_extent(tree, start, end);
3177 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3178 PAGE_SIZE);
3179 if (!ordered)
3180 break;
3181 unlock_extent(tree, start, end);
3182 btrfs_start_ordered_extent(inode, ordered, 1);
3183 btrfs_put_ordered_extent(ordered);
3184 }
3185
3186 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3187 bio_flags, read_flags, NULL);
3188 return ret;
3189}
3190
3191int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3192 get_extent_t *get_extent, int mirror_num)
3193{
3194 struct bio *bio = NULL;
3195 unsigned long bio_flags = 0;
3196 int ret;
3197
3198 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3199 &bio_flags, 0);
3200 if (bio)
3201 ret = submit_one_bio(bio, mirror_num, bio_flags);
3202 return ret;
3203}
3204
3205static void update_nr_written(struct writeback_control *wbc,
3206 unsigned long nr_written)
3207{
3208 wbc->nr_to_write -= nr_written;
3209}
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221static noinline_for_stack int writepage_delalloc(struct inode *inode,
3222 struct page *page, struct writeback_control *wbc,
3223 struct extent_page_data *epd,
3224 u64 delalloc_start,
3225 unsigned long *nr_written)
3226{
3227 struct extent_io_tree *tree = epd->tree;
3228 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3229 u64 nr_delalloc;
3230 u64 delalloc_to_write = 0;
3231 u64 delalloc_end = 0;
3232 int ret;
3233 int page_started = 0;
3234
3235 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3236 return 0;
3237
3238 while (delalloc_end < page_end) {
3239 nr_delalloc = find_lock_delalloc_range(inode, tree,
3240 page,
3241 &delalloc_start,
3242 &delalloc_end,
3243 BTRFS_MAX_EXTENT_SIZE);
3244 if (nr_delalloc == 0) {
3245 delalloc_start = delalloc_end + 1;
3246 continue;
3247 }
3248 ret = tree->ops->fill_delalloc(inode, page,
3249 delalloc_start,
3250 delalloc_end,
3251 &page_started,
3252 nr_written);
3253
3254 if (ret) {
3255 SetPageError(page);
3256
3257
3258
3259
3260
3261 ret = ret < 0 ? ret : -EIO;
3262 goto done;
3263 }
3264
3265
3266
3267
3268 delalloc_to_write += (delalloc_end - delalloc_start +
3269 PAGE_SIZE) >> PAGE_SHIFT;
3270 delalloc_start = delalloc_end + 1;
3271 }
3272 if (wbc->nr_to_write < delalloc_to_write) {
3273 int thresh = 8192;
3274
3275 if (delalloc_to_write < thresh * 2)
3276 thresh = delalloc_to_write;
3277 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3278 thresh);
3279 }
3280
3281
3282
3283
3284 if (page_started) {
3285
3286
3287
3288
3289
3290 wbc->nr_to_write -= *nr_written;
3291 return 1;
3292 }
3293
3294 ret = 0;
3295
3296done:
3297 return ret;
3298}
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3309 struct page *page,
3310 struct writeback_control *wbc,
3311 struct extent_page_data *epd,
3312 loff_t i_size,
3313 unsigned long nr_written,
3314 int write_flags, int *nr_ret)
3315{
3316 struct extent_io_tree *tree = epd->tree;
3317 u64 start = page_offset(page);
3318 u64 page_end = start + PAGE_SIZE - 1;
3319 u64 end;
3320 u64 cur = start;
3321 u64 extent_offset;
3322 u64 block_start;
3323 u64 iosize;
3324 sector_t sector;
3325 struct extent_map *em;
3326 struct block_device *bdev;
3327 size_t pg_offset = 0;
3328 size_t blocksize;
3329 int ret = 0;
3330 int nr = 0;
3331 bool compressed;
3332
3333 if (tree->ops && tree->ops->writepage_start_hook) {
3334 ret = tree->ops->writepage_start_hook(page, start,
3335 page_end);
3336 if (ret) {
3337
3338 if (ret == -EBUSY)
3339 wbc->pages_skipped++;
3340 else
3341 redirty_page_for_writepage(wbc, page);
3342
3343 update_nr_written(wbc, nr_written);
3344 unlock_page(page);
3345 return 1;
3346 }
3347 }
3348
3349
3350
3351
3352
3353 update_nr_written(wbc, nr_written + 1);
3354
3355 end = page_end;
3356 if (i_size <= start) {
3357 if (tree->ops && tree->ops->writepage_end_io_hook)
3358 tree->ops->writepage_end_io_hook(page, start,
3359 page_end, NULL, 1);
3360 goto done;
3361 }
3362
3363 blocksize = inode->i_sb->s_blocksize;
3364
3365 while (cur <= end) {
3366 u64 em_end;
3367
3368 if (cur >= i_size) {
3369 if (tree->ops && tree->ops->writepage_end_io_hook)
3370 tree->ops->writepage_end_io_hook(page, cur,
3371 page_end, NULL, 1);
3372 break;
3373 }
3374 em = epd->get_extent(BTRFS_I(inode), page, pg_offset, cur,
3375 end - cur + 1, 1);
3376 if (IS_ERR_OR_NULL(em)) {
3377 SetPageError(page);
3378 ret = PTR_ERR_OR_ZERO(em);
3379 break;
3380 }
3381
3382 extent_offset = cur - em->start;
3383 em_end = extent_map_end(em);
3384 BUG_ON(em_end <= cur);
3385 BUG_ON(end < cur);
3386 iosize = min(em_end - cur, end - cur + 1);
3387 iosize = ALIGN(iosize, blocksize);
3388 sector = (em->block_start + extent_offset) >> 9;
3389 bdev = em->bdev;
3390 block_start = em->block_start;
3391 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3392 free_extent_map(em);
3393 em = NULL;
3394
3395
3396
3397
3398
3399 if (compressed || block_start == EXTENT_MAP_HOLE ||
3400 block_start == EXTENT_MAP_INLINE) {
3401
3402
3403
3404
3405 if (!compressed && tree->ops &&
3406 tree->ops->writepage_end_io_hook)
3407 tree->ops->writepage_end_io_hook(page, cur,
3408 cur + iosize - 1,
3409 NULL, 1);
3410 else if (compressed) {
3411
3412
3413
3414
3415 nr++;
3416 }
3417
3418 cur += iosize;
3419 pg_offset += iosize;
3420 continue;
3421 }
3422
3423 set_range_writeback(tree, cur, cur + iosize - 1);
3424 if (!PageWriteback(page)) {
3425 btrfs_err(BTRFS_I(inode)->root->fs_info,
3426 "page %lu not writeback, cur %llu end %llu",
3427 page->index, cur, end);
3428 }
3429
3430 ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
3431 page, sector, iosize, pg_offset,
3432 bdev, &epd->bio,
3433 end_bio_extent_writepage,
3434 0, 0, 0, false);
3435 if (ret) {
3436 SetPageError(page);
3437 if (PageWriteback(page))
3438 end_page_writeback(page);
3439 }
3440
3441 cur = cur + iosize;
3442 pg_offset += iosize;
3443 nr++;
3444 }
3445done:
3446 *nr_ret = nr;
3447 return ret;
3448}
3449
3450
3451
3452
3453
3454
3455
3456static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3457 void *data)
3458{
3459 struct inode *inode = page->mapping->host;
3460 struct extent_page_data *epd = data;
3461 u64 start = page_offset(page);
3462 u64 page_end = start + PAGE_SIZE - 1;
3463 int ret;
3464 int nr = 0;
3465 size_t pg_offset = 0;
3466 loff_t i_size = i_size_read(inode);
3467 unsigned long end_index = i_size >> PAGE_SHIFT;
3468 int write_flags = 0;
3469 unsigned long nr_written = 0;
3470
3471 if (wbc->sync_mode == WB_SYNC_ALL)
3472 write_flags = REQ_SYNC;
3473
3474 trace___extent_writepage(page, inode, wbc);
3475
3476 WARN_ON(!PageLocked(page));
3477
3478 ClearPageError(page);
3479
3480 pg_offset = i_size & (PAGE_SIZE - 1);
3481 if (page->index > end_index ||
3482 (page->index == end_index && !pg_offset)) {
3483 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
3484 unlock_page(page);
3485 return 0;
3486 }
3487
3488 if (page->index == end_index) {
3489 char *userpage;
3490
3491 userpage = kmap_atomic(page);
3492 memset(userpage + pg_offset, 0,
3493 PAGE_SIZE - pg_offset);
3494 kunmap_atomic(userpage);
3495 flush_dcache_page(page);
3496 }
3497
3498 pg_offset = 0;
3499
3500 set_page_extent_mapped(page);
3501
3502 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3503 if (ret == 1)
3504 goto done_unlocked;
3505 if (ret)
3506 goto done;
3507
3508 ret = __extent_writepage_io(inode, page, wbc, epd,
3509 i_size, nr_written, write_flags, &nr);
3510 if (ret == 1)
3511 goto done_unlocked;
3512
3513done:
3514 if (nr == 0) {
3515
3516 set_page_writeback(page);
3517 end_page_writeback(page);
3518 }
3519 if (PageError(page)) {
3520 ret = ret < 0 ? ret : -EIO;
3521 end_extent_writepage(page, ret, start, page_end);
3522 }
3523 unlock_page(page);
3524 return ret;
3525
3526done_unlocked:
3527 return 0;
3528}
3529
3530void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3531{
3532 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3533 TASK_UNINTERRUPTIBLE);
3534}
3535
3536static noinline_for_stack int
3537lock_extent_buffer_for_io(struct extent_buffer *eb,
3538 struct btrfs_fs_info *fs_info,
3539 struct extent_page_data *epd)
3540{
3541 unsigned long i, num_pages;
3542 int flush = 0;
3543 int ret = 0;
3544
3545 if (!btrfs_try_tree_write_lock(eb)) {
3546 flush = 1;
3547 flush_write_bio(epd);
3548 btrfs_tree_lock(eb);
3549 }
3550
3551 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3552 btrfs_tree_unlock(eb);
3553 if (!epd->sync_io)
3554 return 0;
3555 if (!flush) {
3556 flush_write_bio(epd);
3557 flush = 1;
3558 }
3559 while (1) {
3560 wait_on_extent_buffer_writeback(eb);
3561 btrfs_tree_lock(eb);
3562 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3563 break;
3564 btrfs_tree_unlock(eb);
3565 }
3566 }
3567
3568
3569
3570
3571
3572
3573 spin_lock(&eb->refs_lock);
3574 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3575 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3576 spin_unlock(&eb->refs_lock);
3577 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3578 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3579 -eb->len,
3580 fs_info->dirty_metadata_batch);
3581 ret = 1;
3582 } else {
3583 spin_unlock(&eb->refs_lock);
3584 }
3585
3586 btrfs_tree_unlock(eb);
3587
3588 if (!ret)
3589 return ret;
3590
3591 num_pages = num_extent_pages(eb->start, eb->len);
3592 for (i = 0; i < num_pages; i++) {
3593 struct page *p = eb->pages[i];
3594
3595 if (!trylock_page(p)) {
3596 if (!flush) {
3597 flush_write_bio(epd);
3598 flush = 1;
3599 }
3600 lock_page(p);
3601 }
3602 }
3603
3604 return ret;
3605}
3606
3607static void end_extent_buffer_writeback(struct extent_buffer *eb)
3608{
3609 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3610 smp_mb__after_atomic();
3611 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3612}
3613
3614static void set_btree_ioerr(struct page *page)
3615{
3616 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3617
3618 SetPageError(page);
3619 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3620 return;
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660 switch (eb->log_index) {
3661 case -1:
3662 set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
3663 break;
3664 case 0:
3665 set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
3666 break;
3667 case 1:
3668 set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
3669 break;
3670 default:
3671 BUG();
3672 }
3673}
3674
3675static void end_bio_extent_buffer_writepage(struct bio *bio)
3676{
3677 struct bio_vec *bvec;
3678 struct extent_buffer *eb;
3679 int i, done;
3680
3681 ASSERT(!bio_flagged(bio, BIO_CLONED));
3682 bio_for_each_segment_all(bvec, bio, i) {
3683 struct page *page = bvec->bv_page;
3684
3685 eb = (struct extent_buffer *)page->private;
3686 BUG_ON(!eb);
3687 done = atomic_dec_and_test(&eb->io_pages);
3688
3689 if (bio->bi_status ||
3690 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3691 ClearPageUptodate(page);
3692 set_btree_ioerr(page);
3693 }
3694
3695 end_page_writeback(page);
3696
3697 if (!done)
3698 continue;
3699
3700 end_extent_buffer_writeback(eb);
3701 }
3702
3703 bio_put(bio);
3704}
3705
3706static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3707 struct btrfs_fs_info *fs_info,
3708 struct writeback_control *wbc,
3709 struct extent_page_data *epd)
3710{
3711 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3712 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3713 u64 offset = eb->start;
3714 u32 nritems;
3715 unsigned long i, num_pages;
3716 unsigned long bio_flags = 0;
3717 unsigned long start, end;
3718 int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META;
3719 int ret = 0;
3720
3721 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3722 num_pages = num_extent_pages(eb->start, eb->len);
3723 atomic_set(&eb->io_pages, num_pages);
3724 if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
3725 bio_flags = EXTENT_BIO_TREE_LOG;
3726
3727
3728 nritems = btrfs_header_nritems(eb);
3729 if (btrfs_header_level(eb) > 0) {
3730 end = btrfs_node_key_ptr_offset(nritems);
3731
3732 memzero_extent_buffer(eb, end, eb->len - end);
3733 } else {
3734
3735
3736
3737
3738 start = btrfs_item_nr_offset(nritems);
3739 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
3740 memzero_extent_buffer(eb, start, end - start);
3741 }
3742
3743 for (i = 0; i < num_pages; i++) {
3744 struct page *p = eb->pages[i];
3745
3746 clear_page_dirty_for_io(p);
3747 set_page_writeback(p);
3748 ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
3749 p, offset >> 9, PAGE_SIZE, 0, bdev,
3750 &epd->bio,
3751 end_bio_extent_buffer_writepage,
3752 0, epd->bio_flags, bio_flags, false);
3753 epd->bio_flags = bio_flags;
3754 if (ret) {
3755 set_btree_ioerr(p);
3756 if (PageWriteback(p))
3757 end_page_writeback(p);
3758 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3759 end_extent_buffer_writeback(eb);
3760 ret = -EIO;
3761 break;
3762 }
3763 offset += PAGE_SIZE;
3764 update_nr_written(wbc, 1);
3765 unlock_page(p);
3766 }
3767
3768 if (unlikely(ret)) {
3769 for (; i < num_pages; i++) {
3770 struct page *p = eb->pages[i];
3771 clear_page_dirty_for_io(p);
3772 unlock_page(p);
3773 }
3774 }
3775
3776 return ret;
3777}
3778
3779int btree_write_cache_pages(struct address_space *mapping,
3780 struct writeback_control *wbc)
3781{
3782 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3783 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3784 struct extent_buffer *eb, *prev_eb = NULL;
3785 struct extent_page_data epd = {
3786 .bio = NULL,
3787 .tree = tree,
3788 .extent_locked = 0,
3789 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3790 .bio_flags = 0,
3791 };
3792 int ret = 0;
3793 int done = 0;
3794 int nr_to_write_done = 0;
3795 struct pagevec pvec;
3796 int nr_pages;
3797 pgoff_t index;
3798 pgoff_t end;
3799 int scanned = 0;
3800 int tag;
3801
3802 pagevec_init(&pvec, 0);
3803 if (wbc->range_cyclic) {
3804 index = mapping->writeback_index;
3805 end = -1;
3806 } else {
3807 index = wbc->range_start >> PAGE_SHIFT;
3808 end = wbc->range_end >> PAGE_SHIFT;
3809 scanned = 1;
3810 }
3811 if (wbc->sync_mode == WB_SYNC_ALL)
3812 tag = PAGECACHE_TAG_TOWRITE;
3813 else
3814 tag = PAGECACHE_TAG_DIRTY;
3815retry:
3816 if (wbc->sync_mode == WB_SYNC_ALL)
3817 tag_pages_for_writeback(mapping, index, end);
3818 while (!done && !nr_to_write_done && (index <= end) &&
3819 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3820 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3821 unsigned i;
3822
3823 scanned = 1;
3824 for (i = 0; i < nr_pages; i++) {
3825 struct page *page = pvec.pages[i];
3826
3827 if (!PagePrivate(page))
3828 continue;
3829
3830 if (!wbc->range_cyclic && page->index > end) {
3831 done = 1;
3832 break;
3833 }
3834
3835 spin_lock(&mapping->private_lock);
3836 if (!PagePrivate(page)) {
3837 spin_unlock(&mapping->private_lock);
3838 continue;
3839 }
3840
3841 eb = (struct extent_buffer *)page->private;
3842
3843
3844
3845
3846
3847
3848 if (WARN_ON(!eb)) {
3849 spin_unlock(&mapping->private_lock);
3850 continue;
3851 }
3852
3853 if (eb == prev_eb) {
3854 spin_unlock(&mapping->private_lock);
3855 continue;
3856 }
3857
3858 ret = atomic_inc_not_zero(&eb->refs);
3859 spin_unlock(&mapping->private_lock);
3860 if (!ret)
3861 continue;
3862
3863 prev_eb = eb;
3864 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3865 if (!ret) {
3866 free_extent_buffer(eb);
3867 continue;
3868 }
3869
3870 ret = write_one_eb(eb, fs_info, wbc, &epd);
3871 if (ret) {
3872 done = 1;
3873 free_extent_buffer(eb);
3874 break;
3875 }
3876 free_extent_buffer(eb);
3877
3878
3879
3880
3881
3882
3883 nr_to_write_done = wbc->nr_to_write <= 0;
3884 }
3885 pagevec_release(&pvec);
3886 cond_resched();
3887 }
3888 if (!scanned && !done) {
3889
3890
3891
3892
3893 scanned = 1;
3894 index = 0;
3895 goto retry;
3896 }
3897 flush_write_bio(&epd);
3898 return ret;
3899}
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916static int extent_write_cache_pages(struct address_space *mapping,
3917 struct writeback_control *wbc,
3918 writepage_t writepage, void *data,
3919 void (*flush_fn)(void *))
3920{
3921 struct inode *inode = mapping->host;
3922 int ret = 0;
3923 int done = 0;
3924 int nr_to_write_done = 0;
3925 struct pagevec pvec;
3926 int nr_pages;
3927 pgoff_t index;
3928 pgoff_t end;
3929 pgoff_t done_index;
3930 int range_whole = 0;
3931 int scanned = 0;
3932 int tag;
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943 if (!igrab(inode))
3944 return 0;
3945
3946 pagevec_init(&pvec, 0);
3947 if (wbc->range_cyclic) {
3948 index = mapping->writeback_index;
3949 end = -1;
3950 } else {
3951 index = wbc->range_start >> PAGE_SHIFT;
3952 end = wbc->range_end >> PAGE_SHIFT;
3953 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3954 range_whole = 1;
3955 scanned = 1;
3956 }
3957 if (wbc->sync_mode == WB_SYNC_ALL)
3958 tag = PAGECACHE_TAG_TOWRITE;
3959 else
3960 tag = PAGECACHE_TAG_DIRTY;
3961retry:
3962 if (wbc->sync_mode == WB_SYNC_ALL)
3963 tag_pages_for_writeback(mapping, index, end);
3964 done_index = index;
3965 while (!done && !nr_to_write_done && (index <= end) &&
3966 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3967 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3968 unsigned i;
3969
3970 scanned = 1;
3971 for (i = 0; i < nr_pages; i++) {
3972 struct page *page = pvec.pages[i];
3973
3974 done_index = page->index;
3975
3976
3977
3978
3979
3980
3981
3982 if (!trylock_page(page)) {
3983 flush_fn(data);
3984 lock_page(page);
3985 }
3986
3987 if (unlikely(page->mapping != mapping)) {
3988 unlock_page(page);
3989 continue;
3990 }
3991
3992 if (!wbc->range_cyclic && page->index > end) {
3993 done = 1;
3994 unlock_page(page);
3995 continue;
3996 }
3997
3998 if (wbc->sync_mode != WB_SYNC_NONE) {
3999 if (PageWriteback(page))
4000 flush_fn(data);
4001 wait_on_page_writeback(page);
4002 }
4003
4004 if (PageWriteback(page) ||
4005 !clear_page_dirty_for_io(page)) {
4006 unlock_page(page);
4007 continue;
4008 }
4009
4010 ret = (*writepage)(page, wbc, data);
4011
4012 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
4013 unlock_page(page);
4014 ret = 0;
4015 }
4016 if (ret < 0) {
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026 done_index = page->index + 1;
4027 done = 1;
4028 break;
4029 }
4030
4031
4032
4033
4034
4035
4036 nr_to_write_done = wbc->nr_to_write <= 0;
4037 }
4038 pagevec_release(&pvec);
4039 cond_resched();
4040 }
4041 if (!scanned && !done) {
4042
4043
4044
4045
4046 scanned = 1;
4047 index = 0;
4048 goto retry;
4049 }
4050
4051 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4052 mapping->writeback_index = done_index;
4053
4054 btrfs_add_delayed_iput(inode);
4055 return ret;
4056}
4057
4058static void flush_epd_write_bio(struct extent_page_data *epd)
4059{
4060 if (epd->bio) {
4061 int ret;
4062
4063 bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
4064 epd->sync_io ? REQ_SYNC : 0);
4065
4066 ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
4067 BUG_ON(ret < 0);
4068 epd->bio = NULL;
4069 }
4070}
4071
4072static noinline void flush_write_bio(void *data)
4073{
4074 struct extent_page_data *epd = data;
4075 flush_epd_write_bio(epd);
4076}
4077
4078int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
4079 get_extent_t *get_extent,
4080 struct writeback_control *wbc)
4081{
4082 int ret;
4083 struct extent_page_data epd = {
4084 .bio = NULL,
4085 .tree = tree,
4086 .get_extent = get_extent,
4087 .extent_locked = 0,
4088 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4089 .bio_flags = 0,
4090 };
4091
4092 ret = __extent_writepage(page, wbc, &epd);
4093
4094 flush_epd_write_bio(&epd);
4095 return ret;
4096}
4097
4098int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
4099 u64 start, u64 end, get_extent_t *get_extent,
4100 int mode)
4101{
4102 int ret = 0;
4103 struct address_space *mapping = inode->i_mapping;
4104 struct page *page;
4105 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4106 PAGE_SHIFT;
4107
4108 struct extent_page_data epd = {
4109 .bio = NULL,
4110 .tree = tree,
4111 .get_extent = get_extent,
4112 .extent_locked = 1,
4113 .sync_io = mode == WB_SYNC_ALL,
4114 .bio_flags = 0,
4115 };
4116 struct writeback_control wbc_writepages = {
4117 .sync_mode = mode,
4118 .nr_to_write = nr_pages * 2,
4119 .range_start = start,
4120 .range_end = end + 1,
4121 };
4122
4123 while (start <= end) {
4124 page = find_get_page(mapping, start >> PAGE_SHIFT);
4125 if (clear_page_dirty_for_io(page))
4126 ret = __extent_writepage(page, &wbc_writepages, &epd);
4127 else {
4128 if (tree->ops && tree->ops->writepage_end_io_hook)
4129 tree->ops->writepage_end_io_hook(page, start,
4130 start + PAGE_SIZE - 1,
4131 NULL, 1);
4132 unlock_page(page);
4133 }
4134 put_page(page);
4135 start += PAGE_SIZE;
4136 }
4137
4138 flush_epd_write_bio(&epd);
4139 return ret;
4140}
4141
4142int extent_writepages(struct extent_io_tree *tree,
4143 struct address_space *mapping,
4144 get_extent_t *get_extent,
4145 struct writeback_control *wbc)
4146{
4147 int ret = 0;
4148 struct extent_page_data epd = {
4149 .bio = NULL,
4150 .tree = tree,
4151 .get_extent = get_extent,
4152 .extent_locked = 0,
4153 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4154 .bio_flags = 0,
4155 };
4156
4157 ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
4158 flush_write_bio);
4159 flush_epd_write_bio(&epd);
4160 return ret;
4161}
4162
4163int extent_readpages(struct extent_io_tree *tree,
4164 struct address_space *mapping,
4165 struct list_head *pages, unsigned nr_pages,
4166 get_extent_t get_extent)
4167{
4168 struct bio *bio = NULL;
4169 unsigned page_idx;
4170 unsigned long bio_flags = 0;
4171 struct page *pagepool[16];
4172 struct page *page;
4173 struct extent_map *em_cached = NULL;
4174 int nr = 0;
4175 u64 prev_em_start = (u64)-1;
4176
4177 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
4178 page = list_entry(pages->prev, struct page, lru);
4179
4180 prefetchw(&page->flags);
4181 list_del(&page->lru);
4182 if (add_to_page_cache_lru(page, mapping,
4183 page->index,
4184 readahead_gfp_mask(mapping))) {
4185 put_page(page);
4186 continue;
4187 }
4188
4189 pagepool[nr++] = page;
4190 if (nr < ARRAY_SIZE(pagepool))
4191 continue;
4192 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4193 &bio, 0, &bio_flags, &prev_em_start);
4194 nr = 0;
4195 }
4196 if (nr)
4197 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4198 &bio, 0, &bio_flags, &prev_em_start);
4199
4200 if (em_cached)
4201 free_extent_map(em_cached);
4202
4203 BUG_ON(!list_empty(pages));
4204 if (bio)
4205 return submit_one_bio(bio, 0, bio_flags);
4206 return 0;
4207}
4208
4209
4210
4211
4212
4213
4214int extent_invalidatepage(struct extent_io_tree *tree,
4215 struct page *page, unsigned long offset)
4216{
4217 struct extent_state *cached_state = NULL;
4218 u64 start = page_offset(page);
4219 u64 end = start + PAGE_SIZE - 1;
4220 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4221
4222 start += ALIGN(offset, blocksize);
4223 if (start > end)
4224 return 0;
4225
4226 lock_extent_bits(tree, start, end, &cached_state);
4227 wait_on_page_writeback(page);
4228 clear_extent_bit(tree, start, end,
4229 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4230 EXTENT_DO_ACCOUNTING,
4231 1, 1, &cached_state, GFP_NOFS);
4232 return 0;
4233}
4234
4235
4236
4237
4238
4239
4240static int try_release_extent_state(struct extent_map_tree *map,
4241 struct extent_io_tree *tree,
4242 struct page *page, gfp_t mask)
4243{
4244 u64 start = page_offset(page);
4245 u64 end = start + PAGE_SIZE - 1;
4246 int ret = 1;
4247
4248 if (test_range_bit(tree, start, end,
4249 EXTENT_IOBITS, 0, NULL))
4250 ret = 0;
4251 else {
4252
4253
4254
4255
4256 ret = clear_extent_bit(tree, start, end,
4257 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4258 0, 0, NULL, mask);
4259
4260
4261
4262
4263 if (ret < 0)
4264 ret = 0;
4265 else
4266 ret = 1;
4267 }
4268 return ret;
4269}
4270
4271
4272
4273
4274
4275
4276int try_release_extent_mapping(struct extent_map_tree *map,
4277 struct extent_io_tree *tree, struct page *page,
4278 gfp_t mask)
4279{
4280 struct extent_map *em;
4281 u64 start = page_offset(page);
4282 u64 end = start + PAGE_SIZE - 1;
4283
4284 if (gfpflags_allow_blocking(mask) &&
4285 page->mapping->host->i_size > SZ_16M) {
4286 u64 len;
4287 while (start <= end) {
4288 len = end - start + 1;
4289 write_lock(&map->lock);
4290 em = lookup_extent_mapping(map, start, len);
4291 if (!em) {
4292 write_unlock(&map->lock);
4293 break;
4294 }
4295 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4296 em->start != start) {
4297 write_unlock(&map->lock);
4298 free_extent_map(em);
4299 break;
4300 }
4301 if (!test_range_bit(tree, em->start,
4302 extent_map_end(em) - 1,
4303 EXTENT_LOCKED | EXTENT_WRITEBACK,
4304 0, NULL)) {
4305 remove_extent_mapping(map, em);
4306
4307 free_extent_map(em);
4308 }
4309 start = extent_map_end(em);
4310 write_unlock(&map->lock);
4311
4312
4313 free_extent_map(em);
4314 }
4315 }
4316 return try_release_extent_state(map, tree, page, mask);
4317}
4318
4319
4320
4321
4322
4323static struct extent_map *get_extent_skip_holes(struct inode *inode,
4324 u64 offset,
4325 u64 last,
4326 get_extent_t *get_extent)
4327{
4328 u64 sectorsize = btrfs_inode_sectorsize(inode);
4329 struct extent_map *em;
4330 u64 len;
4331
4332 if (offset >= last)
4333 return NULL;
4334
4335 while (1) {
4336 len = last - offset;
4337 if (len == 0)
4338 break;
4339 len = ALIGN(len, sectorsize);
4340 em = get_extent(BTRFS_I(inode), NULL, 0, offset, len, 0);
4341 if (IS_ERR_OR_NULL(em))
4342 return em;
4343
4344
4345 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
4346 em->block_start != EXTENT_MAP_HOLE) {
4347 return em;
4348 }
4349
4350
4351 offset = extent_map_end(em);
4352 free_extent_map(em);
4353 if (offset >= last)
4354 break;
4355 }
4356 return NULL;
4357}
4358
4359
4360
4361
4362
4363
4364struct fiemap_cache {
4365 u64 offset;
4366 u64 phys;
4367 u64 len;
4368 u32 flags;
4369 bool cached;
4370};
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
4383 struct fiemap_cache *cache,
4384 u64 offset, u64 phys, u64 len, u32 flags)
4385{
4386 int ret = 0;
4387
4388 if (!cache->cached)
4389 goto assign;
4390
4391
4392
4393
4394
4395
4396
4397
4398 if (cache->offset + cache->len > offset) {
4399 WARN_ON(1);
4400 return -EINVAL;
4401 }
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414 if (cache->offset + cache->len == offset &&
4415 cache->phys + cache->len == phys &&
4416 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
4417 (flags & ~FIEMAP_EXTENT_LAST)) {
4418 cache->len += len;
4419 cache->flags |= flags;
4420 goto try_submit_last;
4421 }
4422
4423
4424 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4425 cache->len, cache->flags);
4426 cache->cached = false;
4427 if (ret)
4428 return ret;
4429assign:
4430 cache->cached = true;
4431 cache->offset = offset;
4432 cache->phys = phys;
4433 cache->len = len;
4434 cache->flags = flags;
4435try_submit_last:
4436 if (cache->flags & FIEMAP_EXTENT_LAST) {
4437 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
4438 cache->phys, cache->len, cache->flags);
4439 cache->cached = false;
4440 }
4441 return ret;
4442}
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
4456 struct fiemap_extent_info *fieinfo,
4457 struct fiemap_cache *cache)
4458{
4459 int ret;
4460
4461 if (!cache->cached)
4462 return 0;
4463
4464 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4465 cache->len, cache->flags);
4466 cache->cached = false;
4467 if (ret > 0)
4468 ret = 0;
4469 return ret;
4470}
4471
4472int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4473 __u64 start, __u64 len, get_extent_t *get_extent)
4474{
4475 int ret = 0;
4476 u64 off = start;
4477 u64 max = start + len;
4478 u32 flags = 0;
4479 u32 found_type;
4480 u64 last;
4481 u64 last_for_get_extent = 0;
4482 u64 disko = 0;
4483 u64 isize = i_size_read(inode);
4484 struct btrfs_key found_key;
4485 struct extent_map *em = NULL;
4486 struct extent_state *cached_state = NULL;
4487 struct btrfs_path *path;
4488 struct btrfs_root *root = BTRFS_I(inode)->root;
4489 struct fiemap_cache cache = { 0 };
4490 int end = 0;
4491 u64 em_start = 0;
4492 u64 em_len = 0;
4493 u64 em_end = 0;
4494
4495 if (len == 0)
4496 return -EINVAL;
4497
4498 path = btrfs_alloc_path();
4499 if (!path)
4500 return -ENOMEM;
4501 path->leave_spinning = 1;
4502
4503 start = round_down(start, btrfs_inode_sectorsize(inode));
4504 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
4505
4506
4507
4508
4509
4510 ret = btrfs_lookup_file_extent(NULL, root, path,
4511 btrfs_ino(BTRFS_I(inode)), -1, 0);
4512 if (ret < 0) {
4513 btrfs_free_path(path);
4514 return ret;
4515 } else {
4516 WARN_ON(!ret);
4517 if (ret == 1)
4518 ret = 0;
4519 }
4520
4521 path->slots[0]--;
4522 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4523 found_type = found_key.type;
4524
4525
4526 if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
4527 found_type != BTRFS_EXTENT_DATA_KEY) {
4528
4529 last = (u64)-1;
4530 last_for_get_extent = isize;
4531 } else {
4532
4533
4534
4535
4536
4537 last = found_key.offset;
4538 last_for_get_extent = last + 1;
4539 }
4540 btrfs_release_path(path);
4541
4542
4543
4544
4545
4546
4547 if (last < isize) {
4548 last = (u64)-1;
4549 last_for_get_extent = isize;
4550 }
4551
4552 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4553 &cached_state);
4554
4555 em = get_extent_skip_holes(inode, start, last_for_get_extent,
4556 get_extent);
4557 if (!em)
4558 goto out;
4559 if (IS_ERR(em)) {
4560 ret = PTR_ERR(em);
4561 goto out;
4562 }
4563
4564 while (!end) {
4565 u64 offset_in_extent = 0;
4566
4567
4568 if (em->start >= max || extent_map_end(em) < off)
4569 break;
4570
4571
4572
4573
4574
4575
4576
4577 em_start = max(em->start, off);
4578
4579
4580
4581
4582
4583
4584
4585 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4586 offset_in_extent = em_start - em->start;
4587 em_end = extent_map_end(em);
4588 em_len = em_end - em_start;
4589 disko = 0;
4590 flags = 0;
4591
4592
4593
4594
4595 off = extent_map_end(em);
4596 if (off >= max)
4597 end = 1;
4598
4599 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4600 end = 1;
4601 flags |= FIEMAP_EXTENT_LAST;
4602 } else if (em->block_start == EXTENT_MAP_INLINE) {
4603 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4604 FIEMAP_EXTENT_NOT_ALIGNED);
4605 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4606 flags |= (FIEMAP_EXTENT_DELALLOC |
4607 FIEMAP_EXTENT_UNKNOWN);
4608 } else if (fieinfo->fi_extents_max) {
4609 struct btrfs_trans_handle *trans;
4610
4611 u64 bytenr = em->block_start -
4612 (em->start - em->orig_start);
4613
4614 disko = em->block_start + offset_in_extent;
4615
4616
4617
4618
4619 trans = btrfs_join_transaction(root);
4620
4621
4622
4623
4624 if (IS_ERR(trans))
4625 trans = NULL;
4626
4627
4628
4629
4630
4631
4632
4633
4634 ret = btrfs_check_shared(trans, root->fs_info,
4635 root->objectid,
4636 btrfs_ino(BTRFS_I(inode)), bytenr);
4637 if (trans)
4638 btrfs_end_transaction(trans);
4639 if (ret < 0)
4640 goto out_free;
4641 if (ret)
4642 flags |= FIEMAP_EXTENT_SHARED;
4643 ret = 0;
4644 }
4645 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4646 flags |= FIEMAP_EXTENT_ENCODED;
4647 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4648 flags |= FIEMAP_EXTENT_UNWRITTEN;
4649
4650 free_extent_map(em);
4651 em = NULL;
4652 if ((em_start >= last) || em_len == (u64)-1 ||
4653 (last == (u64)-1 && isize <= em_end)) {
4654 flags |= FIEMAP_EXTENT_LAST;
4655 end = 1;
4656 }
4657
4658
4659 em = get_extent_skip_holes(inode, off, last_for_get_extent,
4660 get_extent);
4661 if (IS_ERR(em)) {
4662 ret = PTR_ERR(em);
4663 goto out;
4664 }
4665 if (!em) {
4666 flags |= FIEMAP_EXTENT_LAST;
4667 end = 1;
4668 }
4669 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
4670 em_len, flags);
4671 if (ret) {
4672 if (ret == 1)
4673 ret = 0;
4674 goto out_free;
4675 }
4676 }
4677out_free:
4678 if (!ret)
4679 ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
4680 free_extent_map(em);
4681out:
4682 btrfs_free_path(path);
4683 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4684 &cached_state, GFP_NOFS);
4685 return ret;
4686}
4687
4688static void __free_extent_buffer(struct extent_buffer *eb)
4689{
4690 btrfs_leak_debug_del(&eb->leak_list);
4691 kmem_cache_free(extent_buffer_cache, eb);
4692}
4693
4694int extent_buffer_under_io(struct extent_buffer *eb)
4695{
4696 return (atomic_read(&eb->io_pages) ||
4697 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4698 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4699}
4700
4701
4702
4703
4704static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4705{
4706 unsigned long index;
4707 struct page *page;
4708 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4709
4710 BUG_ON(extent_buffer_under_io(eb));
4711
4712 index = num_extent_pages(eb->start, eb->len);
4713 if (index == 0)
4714 return;
4715
4716 do {
4717 index--;
4718 page = eb->pages[index];
4719 if (!page)
4720 continue;
4721 if (mapped)
4722 spin_lock(&page->mapping->private_lock);
4723
4724
4725
4726
4727
4728
4729
4730 if (PagePrivate(page) &&
4731 page->private == (unsigned long)eb) {
4732 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4733 BUG_ON(PageDirty(page));
4734 BUG_ON(PageWriteback(page));
4735
4736
4737
4738
4739 ClearPagePrivate(page);
4740 set_page_private(page, 0);
4741
4742 put_page(page);
4743 }
4744
4745 if (mapped)
4746 spin_unlock(&page->mapping->private_lock);
4747
4748
4749 put_page(page);
4750 } while (index != 0);
4751}
4752
4753
4754
4755
4756static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4757{
4758 btrfs_release_extent_buffer_page(eb);
4759 __free_extent_buffer(eb);
4760}
4761
4762static struct extent_buffer *
4763__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4764 unsigned long len)
4765{
4766 struct extent_buffer *eb = NULL;
4767
4768 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4769 eb->start = start;
4770 eb->len = len;
4771 eb->fs_info = fs_info;
4772 eb->bflags = 0;
4773 rwlock_init(&eb->lock);
4774 atomic_set(&eb->write_locks, 0);
4775 atomic_set(&eb->read_locks, 0);
4776 atomic_set(&eb->blocking_readers, 0);
4777 atomic_set(&eb->blocking_writers, 0);
4778 atomic_set(&eb->spinning_readers, 0);
4779 atomic_set(&eb->spinning_writers, 0);
4780 eb->lock_nested = 0;
4781 init_waitqueue_head(&eb->write_lock_wq);
4782 init_waitqueue_head(&eb->read_lock_wq);
4783
4784 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4785
4786 spin_lock_init(&eb->refs_lock);
4787 atomic_set(&eb->refs, 1);
4788 atomic_set(&eb->io_pages, 0);
4789
4790
4791
4792
4793 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4794 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4795 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4796
4797 return eb;
4798}
4799
4800struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4801{
4802 unsigned long i;
4803 struct page *p;
4804 struct extent_buffer *new;
4805 unsigned long num_pages = num_extent_pages(src->start, src->len);
4806
4807 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4808 if (new == NULL)
4809 return NULL;
4810
4811 for (i = 0; i < num_pages; i++) {
4812 p = alloc_page(GFP_NOFS);
4813 if (!p) {
4814 btrfs_release_extent_buffer(new);
4815 return NULL;
4816 }
4817 attach_extent_buffer_page(new, p);
4818 WARN_ON(PageDirty(p));
4819 SetPageUptodate(p);
4820 new->pages[i] = p;
4821 copy_page(page_address(p), page_address(src->pages[i]));
4822 }
4823
4824 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4825 set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
4826
4827 return new;
4828}
4829
4830struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4831 u64 start, unsigned long len)
4832{
4833 struct extent_buffer *eb;
4834 unsigned long num_pages;
4835 unsigned long i;
4836
4837 num_pages = num_extent_pages(start, len);
4838
4839 eb = __alloc_extent_buffer(fs_info, start, len);
4840 if (!eb)
4841 return NULL;
4842
4843 for (i = 0; i < num_pages; i++) {
4844 eb->pages[i] = alloc_page(GFP_NOFS);
4845 if (!eb->pages[i])
4846 goto err;
4847 }
4848 set_extent_buffer_uptodate(eb);
4849 btrfs_set_header_nritems(eb, 0);
4850 set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4851
4852 return eb;
4853err:
4854 for (; i > 0; i--)
4855 __free_page(eb->pages[i - 1]);
4856 __free_extent_buffer(eb);
4857 return NULL;
4858}
4859
4860struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4861 u64 start)
4862{
4863 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
4864}
4865
4866static void check_buffer_tree_ref(struct extent_buffer *eb)
4867{
4868 int refs;
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889 refs = atomic_read(&eb->refs);
4890 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4891 return;
4892
4893 spin_lock(&eb->refs_lock);
4894 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4895 atomic_inc(&eb->refs);
4896 spin_unlock(&eb->refs_lock);
4897}
4898
4899static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4900 struct page *accessed)
4901{
4902 unsigned long num_pages, i;
4903
4904 check_buffer_tree_ref(eb);
4905
4906 num_pages = num_extent_pages(eb->start, eb->len);
4907 for (i = 0; i < num_pages; i++) {
4908 struct page *p = eb->pages[i];
4909
4910 if (p != accessed)
4911 mark_page_accessed(p);
4912 }
4913}
4914
4915struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4916 u64 start)
4917{
4918 struct extent_buffer *eb;
4919
4920 rcu_read_lock();
4921 eb = radix_tree_lookup(&fs_info->buffer_radix,
4922 start >> PAGE_SHIFT);
4923 if (eb && atomic_inc_not_zero(&eb->refs)) {
4924 rcu_read_unlock();
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4941 spin_lock(&eb->refs_lock);
4942 spin_unlock(&eb->refs_lock);
4943 }
4944 mark_extent_buffer_accessed(eb, NULL);
4945 return eb;
4946 }
4947 rcu_read_unlock();
4948
4949 return NULL;
4950}
4951
4952#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4953struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4954 u64 start)
4955{
4956 struct extent_buffer *eb, *exists = NULL;
4957 int ret;
4958
4959 eb = find_extent_buffer(fs_info, start);
4960 if (eb)
4961 return eb;
4962 eb = alloc_dummy_extent_buffer(fs_info, start);
4963 if (!eb)
4964 return NULL;
4965 eb->fs_info = fs_info;
4966again:
4967 ret = radix_tree_preload(GFP_NOFS);
4968 if (ret)
4969 goto free_eb;
4970 spin_lock(&fs_info->buffer_lock);
4971 ret = radix_tree_insert(&fs_info->buffer_radix,
4972 start >> PAGE_SHIFT, eb);
4973 spin_unlock(&fs_info->buffer_lock);
4974 radix_tree_preload_end();
4975 if (ret == -EEXIST) {
4976 exists = find_extent_buffer(fs_info, start);
4977 if (exists)
4978 goto free_eb;
4979 else
4980 goto again;
4981 }
4982 check_buffer_tree_ref(eb);
4983 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4984
4985
4986
4987
4988
4989
4990
4991 atomic_inc(&eb->refs);
4992 return eb;
4993free_eb:
4994 btrfs_release_extent_buffer(eb);
4995 return exists;
4996}
4997#endif
4998
4999struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
5000 u64 start)
5001{
5002 unsigned long len = fs_info->nodesize;
5003 unsigned long num_pages = num_extent_pages(start, len);
5004 unsigned long i;
5005 unsigned long index = start >> PAGE_SHIFT;
5006 struct extent_buffer *eb;
5007 struct extent_buffer *exists = NULL;
5008 struct page *p;
5009 struct address_space *mapping = fs_info->btree_inode->i_mapping;
5010 int uptodate = 1;
5011 int ret;
5012
5013 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
5014 btrfs_err(fs_info, "bad tree block start %llu", start);
5015 return ERR_PTR(-EINVAL);
5016 }
5017
5018 eb = find_extent_buffer(fs_info, start);
5019 if (eb)
5020 return eb;
5021
5022 eb = __alloc_extent_buffer(fs_info, start, len);
5023 if (!eb)
5024 return ERR_PTR(-ENOMEM);
5025
5026 for (i = 0; i < num_pages; i++, index++) {
5027 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
5028 if (!p) {
5029 exists = ERR_PTR(-ENOMEM);
5030 goto free_eb;
5031 }
5032
5033 spin_lock(&mapping->private_lock);
5034 if (PagePrivate(p)) {
5035
5036
5037
5038
5039
5040
5041
5042 exists = (struct extent_buffer *)p->private;
5043 if (atomic_inc_not_zero(&exists->refs)) {
5044 spin_unlock(&mapping->private_lock);
5045 unlock_page(p);
5046 put_page(p);
5047 mark_extent_buffer_accessed(exists, p);
5048 goto free_eb;
5049 }
5050 exists = NULL;
5051
5052
5053
5054
5055
5056 ClearPagePrivate(p);
5057 WARN_ON(PageDirty(p));
5058 put_page(p);
5059 }
5060 attach_extent_buffer_page(eb, p);
5061 spin_unlock(&mapping->private_lock);
5062 WARN_ON(PageDirty(p));
5063 eb->pages[i] = p;
5064 if (!PageUptodate(p))
5065 uptodate = 0;
5066
5067
5068
5069
5070
5071 }
5072 if (uptodate)
5073 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5074again:
5075 ret = radix_tree_preload(GFP_NOFS);
5076 if (ret) {
5077 exists = ERR_PTR(ret);
5078 goto free_eb;
5079 }
5080
5081 spin_lock(&fs_info->buffer_lock);
5082 ret = radix_tree_insert(&fs_info->buffer_radix,
5083 start >> PAGE_SHIFT, eb);
5084 spin_unlock(&fs_info->buffer_lock);
5085 radix_tree_preload_end();
5086 if (ret == -EEXIST) {
5087 exists = find_extent_buffer(fs_info, start);
5088 if (exists)
5089 goto free_eb;
5090 else
5091 goto again;
5092 }
5093
5094 check_buffer_tree_ref(eb);
5095 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106 SetPageChecked(eb->pages[0]);
5107 for (i = 1; i < num_pages; i++) {
5108 p = eb->pages[i];
5109 ClearPageChecked(p);
5110 unlock_page(p);
5111 }
5112 unlock_page(eb->pages[0]);
5113 return eb;
5114
5115free_eb:
5116 WARN_ON(!atomic_dec_and_test(&eb->refs));
5117 for (i = 0; i < num_pages; i++) {
5118 if (eb->pages[i])
5119 unlock_page(eb->pages[i]);
5120 }
5121
5122 btrfs_release_extent_buffer(eb);
5123 return exists;
5124}
5125
5126static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5127{
5128 struct extent_buffer *eb =
5129 container_of(head, struct extent_buffer, rcu_head);
5130
5131 __free_extent_buffer(eb);
5132}
5133
5134
5135static int release_extent_buffer(struct extent_buffer *eb)
5136{
5137 WARN_ON(atomic_read(&eb->refs) == 0);
5138 if (atomic_dec_and_test(&eb->refs)) {
5139 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5140 struct btrfs_fs_info *fs_info = eb->fs_info;
5141
5142 spin_unlock(&eb->refs_lock);
5143
5144 spin_lock(&fs_info->buffer_lock);
5145 radix_tree_delete(&fs_info->buffer_radix,
5146 eb->start >> PAGE_SHIFT);
5147 spin_unlock(&fs_info->buffer_lock);
5148 } else {
5149 spin_unlock(&eb->refs_lock);
5150 }
5151
5152
5153 btrfs_release_extent_buffer_page(eb);
5154#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5155 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
5156 __free_extent_buffer(eb);
5157 return 1;
5158 }
5159#endif
5160 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5161 return 1;
5162 }
5163 spin_unlock(&eb->refs_lock);
5164
5165 return 0;
5166}
5167
5168void free_extent_buffer(struct extent_buffer *eb)
5169{
5170 int refs;
5171 int old;
5172 if (!eb)
5173 return;
5174
5175 while (1) {
5176 refs = atomic_read(&eb->refs);
5177 if (refs <= 3)
5178 break;
5179 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5180 if (old == refs)
5181 return;
5182 }
5183
5184 spin_lock(&eb->refs_lock);
5185 if (atomic_read(&eb->refs) == 2 &&
5186 test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
5187 atomic_dec(&eb->refs);
5188
5189 if (atomic_read(&eb->refs) == 2 &&
5190 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5191 !extent_buffer_under_io(eb) &&
5192 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5193 atomic_dec(&eb->refs);
5194
5195
5196
5197
5198
5199 release_extent_buffer(eb);
5200}
5201
5202void free_extent_buffer_stale(struct extent_buffer *eb)
5203{
5204 if (!eb)
5205 return;
5206
5207 spin_lock(&eb->refs_lock);
5208 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5209
5210 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5211 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5212 atomic_dec(&eb->refs);
5213 release_extent_buffer(eb);
5214}
5215
5216void clear_extent_buffer_dirty(struct extent_buffer *eb)
5217{
5218 unsigned long i;
5219 unsigned long num_pages;
5220 struct page *page;
5221
5222 num_pages = num_extent_pages(eb->start, eb->len);
5223
5224 for (i = 0; i < num_pages; i++) {
5225 page = eb->pages[i];
5226 if (!PageDirty(page))
5227 continue;
5228
5229 lock_page(page);
5230 WARN_ON(!PagePrivate(page));
5231
5232 clear_page_dirty_for_io(page);
5233 spin_lock_irq(&page->mapping->tree_lock);
5234 if (!PageDirty(page)) {
5235 radix_tree_tag_clear(&page->mapping->page_tree,
5236 page_index(page),
5237 PAGECACHE_TAG_DIRTY);
5238 }
5239 spin_unlock_irq(&page->mapping->tree_lock);
5240 ClearPageError(page);
5241 unlock_page(page);
5242 }
5243 WARN_ON(atomic_read(&eb->refs) == 0);
5244}
5245
5246int set_extent_buffer_dirty(struct extent_buffer *eb)
5247{
5248 unsigned long i;
5249 unsigned long num_pages;
5250 int was_dirty = 0;
5251
5252 check_buffer_tree_ref(eb);
5253
5254 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5255
5256 num_pages = num_extent_pages(eb->start, eb->len);
5257 WARN_ON(atomic_read(&eb->refs) == 0);
5258 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5259
5260 for (i = 0; i < num_pages; i++)
5261 set_page_dirty(eb->pages[i]);
5262 return was_dirty;
5263}
5264
5265void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5266{
5267 unsigned long i;
5268 struct page *page;
5269 unsigned long num_pages;
5270
5271 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5272 num_pages = num_extent_pages(eb->start, eb->len);
5273 for (i = 0; i < num_pages; i++) {
5274 page = eb->pages[i];
5275 if (page)
5276 ClearPageUptodate(page);
5277 }
5278}
5279
5280void set_extent_buffer_uptodate(struct extent_buffer *eb)
5281{
5282 unsigned long i;
5283 struct page *page;
5284 unsigned long num_pages;
5285
5286 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5287 num_pages = num_extent_pages(eb->start, eb->len);
5288 for (i = 0; i < num_pages; i++) {
5289 page = eb->pages[i];
5290 SetPageUptodate(page);
5291 }
5292}
5293
5294int extent_buffer_uptodate(struct extent_buffer *eb)
5295{
5296 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5297}
5298
5299int read_extent_buffer_pages(struct extent_io_tree *tree,
5300 struct extent_buffer *eb, int wait,
5301 get_extent_t *get_extent, int mirror_num)
5302{
5303 unsigned long i;
5304 struct page *page;
5305 int err;
5306 int ret = 0;
5307 int locked_pages = 0;
5308 int all_uptodate = 1;
5309 unsigned long num_pages;
5310 unsigned long num_reads = 0;
5311 struct bio *bio = NULL;
5312 unsigned long bio_flags = 0;
5313
5314 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5315 return 0;
5316
5317 num_pages = num_extent_pages(eb->start, eb->len);
5318 for (i = 0; i < num_pages; i++) {
5319 page = eb->pages[i];
5320 if (wait == WAIT_NONE) {
5321 if (!trylock_page(page))
5322 goto unlock_exit;
5323 } else {
5324 lock_page(page);
5325 }
5326 locked_pages++;
5327 }
5328
5329
5330
5331
5332
5333 for (i = 0; i < num_pages; i++) {
5334 page = eb->pages[i];
5335 if (!PageUptodate(page)) {
5336 num_reads++;
5337 all_uptodate = 0;
5338 }
5339 }
5340
5341 if (all_uptodate) {
5342 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5343 goto unlock_exit;
5344 }
5345
5346 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5347 eb->read_mirror = 0;
5348 atomic_set(&eb->io_pages, num_reads);
5349 for (i = 0; i < num_pages; i++) {
5350 page = eb->pages[i];
5351
5352 if (!PageUptodate(page)) {
5353 if (ret) {
5354 atomic_dec(&eb->io_pages);
5355 unlock_page(page);
5356 continue;
5357 }
5358
5359 ClearPageError(page);
5360 err = __extent_read_full_page(tree, page,
5361 get_extent, &bio,
5362 mirror_num, &bio_flags,
5363 REQ_META);
5364 if (err) {
5365 ret = err;
5366
5367
5368
5369
5370
5371
5372
5373
5374 atomic_dec(&eb->io_pages);
5375 }
5376 } else {
5377 unlock_page(page);
5378 }
5379 }
5380
5381 if (bio) {
5382 err = submit_one_bio(bio, mirror_num, bio_flags);
5383 if (err)
5384 return err;
5385 }
5386
5387 if (ret || wait != WAIT_COMPLETE)
5388 return ret;
5389
5390 for (i = 0; i < num_pages; i++) {
5391 page = eb->pages[i];
5392 wait_on_page_locked(page);
5393 if (!PageUptodate(page))
5394 ret = -EIO;
5395 }
5396
5397 return ret;
5398
5399unlock_exit:
5400 while (locked_pages > 0) {
5401 locked_pages--;
5402 page = eb->pages[locked_pages];
5403 unlock_page(page);
5404 }
5405 return ret;
5406}
5407
5408void read_extent_buffer(struct extent_buffer *eb, void *dstv,
5409 unsigned long start,
5410 unsigned long len)
5411{
5412 size_t cur;
5413 size_t offset;
5414 struct page *page;
5415 char *kaddr;
5416 char *dst = (char *)dstv;
5417 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5418 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5419
5420 WARN_ON(start > eb->len);
5421 WARN_ON(start + len > eb->start + eb->len);
5422
5423 offset = (start_offset + start) & (PAGE_SIZE - 1);
5424
5425 while (len > 0) {
5426 page = eb->pages[i];
5427
5428 cur = min(len, (PAGE_SIZE - offset));
5429 kaddr = page_address(page);
5430 memcpy(dst, kaddr + offset, cur);
5431
5432 dst += cur;
5433 len -= cur;
5434 offset = 0;
5435 i++;
5436 }
5437}
5438
5439int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
5440 unsigned long start,
5441 unsigned long len)
5442{
5443 size_t cur;
5444 size_t offset;
5445 struct page *page;
5446 char *kaddr;
5447 char __user *dst = (char __user *)dstv;
5448 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5449 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5450 int ret = 0;
5451
5452 WARN_ON(start > eb->len);
5453 WARN_ON(start + len > eb->start + eb->len);
5454
5455 offset = (start_offset + start) & (PAGE_SIZE - 1);
5456
5457 while (len > 0) {
5458 page = eb->pages[i];
5459
5460 cur = min(len, (PAGE_SIZE - offset));
5461 kaddr = page_address(page);
5462 if (copy_to_user(dst, kaddr + offset, cur)) {
5463 ret = -EFAULT;
5464 break;
5465 }
5466
5467 dst += cur;
5468 len -= cur;
5469 offset = 0;
5470 i++;
5471 }
5472
5473 return ret;
5474}
5475
5476
5477
5478
5479
5480
5481int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
5482 unsigned long min_len, char **map,
5483 unsigned long *map_start,
5484 unsigned long *map_len)
5485{
5486 size_t offset = start & (PAGE_SIZE - 1);
5487 char *kaddr;
5488 struct page *p;
5489 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5490 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5491 unsigned long end_i = (start_offset + start + min_len - 1) >>
5492 PAGE_SHIFT;
5493
5494 if (i != end_i)
5495 return 1;
5496
5497 if (i == 0) {
5498 offset = start_offset;
5499 *map_start = 0;
5500 } else {
5501 offset = 0;
5502 *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
5503 }
5504
5505 if (start + min_len > eb->len) {
5506 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5507 eb->start, eb->len, start, min_len);
5508 return -EINVAL;
5509 }
5510
5511 p = eb->pages[i];
5512 kaddr = page_address(p);
5513 *map = kaddr + offset;
5514 *map_len = PAGE_SIZE - offset;
5515 return 0;
5516}
5517
5518int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
5519 unsigned long start,
5520 unsigned long len)
5521{
5522 size_t cur;
5523 size_t offset;
5524 struct page *page;
5525 char *kaddr;
5526 char *ptr = (char *)ptrv;
5527 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5528 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5529 int ret = 0;
5530
5531 WARN_ON(start > eb->len);
5532 WARN_ON(start + len > eb->start + eb->len);
5533
5534 offset = (start_offset + start) & (PAGE_SIZE - 1);
5535
5536 while (len > 0) {
5537 page = eb->pages[i];
5538
5539 cur = min(len, (PAGE_SIZE - offset));
5540
5541 kaddr = page_address(page);
5542 ret = memcmp(ptr, kaddr + offset, cur);
5543 if (ret)
5544 break;
5545
5546 ptr += cur;
5547 len -= cur;
5548 offset = 0;
5549 i++;
5550 }
5551 return ret;
5552}
5553
5554void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
5555 const void *srcv)
5556{
5557 char *kaddr;
5558
5559 WARN_ON(!PageUptodate(eb->pages[0]));
5560 kaddr = page_address(eb->pages[0]);
5561 memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
5562 BTRFS_FSID_SIZE);
5563}
5564
5565void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
5566{
5567 char *kaddr;
5568
5569 WARN_ON(!PageUptodate(eb->pages[0]));
5570 kaddr = page_address(eb->pages[0]);
5571 memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
5572 BTRFS_FSID_SIZE);
5573}
5574
5575void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5576 unsigned long start, unsigned long len)
5577{
5578 size_t cur;
5579 size_t offset;
5580 struct page *page;
5581 char *kaddr;
5582 char *src = (char *)srcv;
5583 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5584 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5585
5586 WARN_ON(start > eb->len);
5587 WARN_ON(start + len > eb->start + eb->len);
5588
5589 offset = (start_offset + start) & (PAGE_SIZE - 1);
5590
5591 while (len > 0) {
5592 page = eb->pages[i];
5593 WARN_ON(!PageUptodate(page));
5594
5595 cur = min(len, PAGE_SIZE - offset);
5596 kaddr = page_address(page);
5597 memcpy(kaddr + offset, src, cur);
5598
5599 src += cur;
5600 len -= cur;
5601 offset = 0;
5602 i++;
5603 }
5604}
5605
5606void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
5607 unsigned long len)
5608{
5609 size_t cur;
5610 size_t offset;
5611 struct page *page;
5612 char *kaddr;
5613 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5614 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5615
5616 WARN_ON(start > eb->len);
5617 WARN_ON(start + len > eb->start + eb->len);
5618
5619 offset = (start_offset + start) & (PAGE_SIZE - 1);
5620
5621 while (len > 0) {
5622 page = eb->pages[i];
5623 WARN_ON(!PageUptodate(page));
5624
5625 cur = min(len, PAGE_SIZE - offset);
5626 kaddr = page_address(page);
5627 memset(kaddr + offset, 0, cur);
5628
5629 len -= cur;
5630 offset = 0;
5631 i++;
5632 }
5633}
5634
5635void copy_extent_buffer_full(struct extent_buffer *dst,
5636 struct extent_buffer *src)
5637{
5638 int i;
5639 unsigned num_pages;
5640
5641 ASSERT(dst->len == src->len);
5642
5643 num_pages = num_extent_pages(dst->start, dst->len);
5644 for (i = 0; i < num_pages; i++)
5645 copy_page(page_address(dst->pages[i]),
5646 page_address(src->pages[i]));
5647}
5648
5649void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5650 unsigned long dst_offset, unsigned long src_offset,
5651 unsigned long len)
5652{
5653 u64 dst_len = dst->len;
5654 size_t cur;
5655 size_t offset;
5656 struct page *page;
5657 char *kaddr;
5658 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5659 unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
5660
5661 WARN_ON(src->len != dst_len);
5662
5663 offset = (start_offset + dst_offset) &
5664 (PAGE_SIZE - 1);
5665
5666 while (len > 0) {
5667 page = dst->pages[i];
5668 WARN_ON(!PageUptodate(page));
5669
5670 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
5671
5672 kaddr = page_address(page);
5673 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5674
5675 src_offset += cur;
5676 len -= cur;
5677 offset = 0;
5678 i++;
5679 }
5680}
5681
5682void le_bitmap_set(u8 *map, unsigned int start, int len)
5683{
5684 u8 *p = map + BIT_BYTE(start);
5685 const unsigned int size = start + len;
5686 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
5687 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
5688
5689 while (len - bits_to_set >= 0) {
5690 *p |= mask_to_set;
5691 len -= bits_to_set;
5692 bits_to_set = BITS_PER_BYTE;
5693 mask_to_set = ~0;
5694 p++;
5695 }
5696 if (len) {
5697 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5698 *p |= mask_to_set;
5699 }
5700}
5701
5702void le_bitmap_clear(u8 *map, unsigned int start, int len)
5703{
5704 u8 *p = map + BIT_BYTE(start);
5705 const unsigned int size = start + len;
5706 int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
5707 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
5708
5709 while (len - bits_to_clear >= 0) {
5710 *p &= ~mask_to_clear;
5711 len -= bits_to_clear;
5712 bits_to_clear = BITS_PER_BYTE;
5713 mask_to_clear = ~0;
5714 p++;
5715 }
5716 if (len) {
5717 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5718 *p &= ~mask_to_clear;
5719 }
5720}
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735static inline void eb_bitmap_offset(struct extent_buffer *eb,
5736 unsigned long start, unsigned long nr,
5737 unsigned long *page_index,
5738 size_t *page_offset)
5739{
5740 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5741 size_t byte_offset = BIT_BYTE(nr);
5742 size_t offset;
5743
5744
5745
5746
5747
5748
5749 offset = start_offset + start + byte_offset;
5750
5751 *page_index = offset >> PAGE_SHIFT;
5752 *page_offset = offset & (PAGE_SIZE - 1);
5753}
5754
5755
5756
5757
5758
5759
5760
5761int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5762 unsigned long nr)
5763{
5764 u8 *kaddr;
5765 struct page *page;
5766 unsigned long i;
5767 size_t offset;
5768
5769 eb_bitmap_offset(eb, start, nr, &i, &offset);
5770 page = eb->pages[i];
5771 WARN_ON(!PageUptodate(page));
5772 kaddr = page_address(page);
5773 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5774}
5775
5776
5777
5778
5779
5780
5781
5782
5783void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5784 unsigned long pos, unsigned long len)
5785{
5786 u8 *kaddr;
5787 struct page *page;
5788 unsigned long i;
5789 size_t offset;
5790 const unsigned int size = pos + len;
5791 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5792 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
5793
5794 eb_bitmap_offset(eb, start, pos, &i, &offset);
5795 page = eb->pages[i];
5796 WARN_ON(!PageUptodate(page));
5797 kaddr = page_address(page);
5798
5799 while (len >= bits_to_set) {
5800 kaddr[offset] |= mask_to_set;
5801 len -= bits_to_set;
5802 bits_to_set = BITS_PER_BYTE;
5803 mask_to_set = ~0;
5804 if (++offset >= PAGE_SIZE && len > 0) {
5805 offset = 0;
5806 page = eb->pages[++i];
5807 WARN_ON(!PageUptodate(page));
5808 kaddr = page_address(page);
5809 }
5810 }
5811 if (len) {
5812 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5813 kaddr[offset] |= mask_to_set;
5814 }
5815}
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5826 unsigned long pos, unsigned long len)
5827{
5828 u8 *kaddr;
5829 struct page *page;
5830 unsigned long i;
5831 size_t offset;
5832 const unsigned int size = pos + len;
5833 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5834 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
5835
5836 eb_bitmap_offset(eb, start, pos, &i, &offset);
5837 page = eb->pages[i];
5838 WARN_ON(!PageUptodate(page));
5839 kaddr = page_address(page);
5840
5841 while (len >= bits_to_clear) {
5842 kaddr[offset] &= ~mask_to_clear;
5843 len -= bits_to_clear;
5844 bits_to_clear = BITS_PER_BYTE;
5845 mask_to_clear = ~0;
5846 if (++offset >= PAGE_SIZE && len > 0) {
5847 offset = 0;
5848 page = eb->pages[++i];
5849 WARN_ON(!PageUptodate(page));
5850 kaddr = page_address(page);
5851 }
5852 }
5853 if (len) {
5854 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5855 kaddr[offset] &= ~mask_to_clear;
5856 }
5857}
5858
5859static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5860{
5861 unsigned long distance = (src > dst) ? src - dst : dst - src;
5862 return distance < len;
5863}
5864
5865static void copy_pages(struct page *dst_page, struct page *src_page,
5866 unsigned long dst_off, unsigned long src_off,
5867 unsigned long len)
5868{
5869 char *dst_kaddr = page_address(dst_page);
5870 char *src_kaddr;
5871 int must_memmove = 0;
5872
5873 if (dst_page != src_page) {
5874 src_kaddr = page_address(src_page);
5875 } else {
5876 src_kaddr = dst_kaddr;
5877 if (areas_overlap(src_off, dst_off, len))
5878 must_memmove = 1;
5879 }
5880
5881 if (must_memmove)
5882 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5883 else
5884 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5885}
5886
5887void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5888 unsigned long src_offset, unsigned long len)
5889{
5890 struct btrfs_fs_info *fs_info = dst->fs_info;
5891 size_t cur;
5892 size_t dst_off_in_page;
5893 size_t src_off_in_page;
5894 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5895 unsigned long dst_i;
5896 unsigned long src_i;
5897
5898 if (src_offset + len > dst->len) {
5899 btrfs_err(fs_info,
5900 "memmove bogus src_offset %lu move len %lu dst len %lu",
5901 src_offset, len, dst->len);
5902 BUG_ON(1);
5903 }
5904 if (dst_offset + len > dst->len) {
5905 btrfs_err(fs_info,
5906 "memmove bogus dst_offset %lu move len %lu dst len %lu",
5907 dst_offset, len, dst->len);
5908 BUG_ON(1);
5909 }
5910
5911 while (len > 0) {
5912 dst_off_in_page = (start_offset + dst_offset) &
5913 (PAGE_SIZE - 1);
5914 src_off_in_page = (start_offset + src_offset) &
5915 (PAGE_SIZE - 1);
5916
5917 dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
5918 src_i = (start_offset + src_offset) >> PAGE_SHIFT;
5919
5920 cur = min(len, (unsigned long)(PAGE_SIZE -
5921 src_off_in_page));
5922 cur = min_t(unsigned long, cur,
5923 (unsigned long)(PAGE_SIZE - dst_off_in_page));
5924
5925 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5926 dst_off_in_page, src_off_in_page, cur);
5927
5928 src_offset += cur;
5929 dst_offset += cur;
5930 len -= cur;
5931 }
5932}
5933
5934void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5935 unsigned long src_offset, unsigned long len)
5936{
5937 struct btrfs_fs_info *fs_info = dst->fs_info;
5938 size_t cur;
5939 size_t dst_off_in_page;
5940 size_t src_off_in_page;
5941 unsigned long dst_end = dst_offset + len - 1;
5942 unsigned long src_end = src_offset + len - 1;
5943 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5944 unsigned long dst_i;
5945 unsigned long src_i;
5946
5947 if (src_offset + len > dst->len) {
5948 btrfs_err(fs_info,
5949 "memmove bogus src_offset %lu move len %lu len %lu",
5950 src_offset, len, dst->len);
5951 BUG_ON(1);
5952 }
5953 if (dst_offset + len > dst->len) {
5954 btrfs_err(fs_info,
5955 "memmove bogus dst_offset %lu move len %lu len %lu",
5956 dst_offset, len, dst->len);
5957 BUG_ON(1);
5958 }
5959 if (dst_offset < src_offset) {
5960 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5961 return;
5962 }
5963 while (len > 0) {
5964 dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
5965 src_i = (start_offset + src_end) >> PAGE_SHIFT;
5966
5967 dst_off_in_page = (start_offset + dst_end) &
5968 (PAGE_SIZE - 1);
5969 src_off_in_page = (start_offset + src_end) &
5970 (PAGE_SIZE - 1);
5971
5972 cur = min_t(unsigned long, len, src_off_in_page + 1);
5973 cur = min(cur, dst_off_in_page + 1);
5974 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5975 dst_off_in_page - cur + 1,
5976 src_off_in_page - cur + 1, cur);
5977
5978 dst_end -= cur;
5979 src_end -= cur;
5980 len -= cur;
5981 }
5982}
5983
5984int try_release_extent_buffer(struct page *page)
5985{
5986 struct extent_buffer *eb;
5987
5988
5989
5990
5991
5992 spin_lock(&page->mapping->private_lock);
5993 if (!PagePrivate(page)) {
5994 spin_unlock(&page->mapping->private_lock);
5995 return 1;
5996 }
5997
5998 eb = (struct extent_buffer *)page->private;
5999 BUG_ON(!eb);
6000
6001
6002
6003
6004
6005
6006 spin_lock(&eb->refs_lock);
6007 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
6008 spin_unlock(&eb->refs_lock);
6009 spin_unlock(&page->mapping->private_lock);
6010 return 0;
6011 }
6012 spin_unlock(&page->mapping->private_lock);
6013
6014
6015
6016
6017
6018 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
6019 spin_unlock(&eb->refs_lock);
6020 return 0;
6021 }
6022
6023 return release_extent_buffer(eb);
6024}
6025