1
2#include <linux/bitops.h>
3#include <linux/slab.h>
4#include <linux/bio.h>
5#include <linux/mm.h>
6#include <linux/pagemap.h>
7#include <linux/page-flags.h>
8#include <linux/spinlock.h>
9#include <linux/blkdev.h>
10#include <linux/swap.h>
11#include <linux/writeback.h>
12#include <linux/pagevec.h>
13#include <linux/prefetch.h>
14#include <linux/cleancache.h>
15#include "extent_io.h"
16#include "extent_map.h"
17#include "ctree.h"
18#include "btrfs_inode.h"
19#include "volumes.h"
20#include "check-integrity.h"
21#include "locking.h"
22#include "rcu-string.h"
23#include "backref.h"
24
25static struct kmem_cache *extent_state_cache;
26static struct kmem_cache *extent_buffer_cache;
27static struct bio_set *btrfs_bioset;
28
29static inline bool extent_state_in_tree(const struct extent_state *state)
30{
31 return !RB_EMPTY_NODE(&state->rb_node);
32}
33
34#ifdef CONFIG_BTRFS_DEBUG
35static LIST_HEAD(buffers);
36static LIST_HEAD(states);
37
38static DEFINE_SPINLOCK(leak_lock);
39
40static inline
41void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
42{
43 unsigned long flags;
44
45 spin_lock_irqsave(&leak_lock, flags);
46 list_add(new, head);
47 spin_unlock_irqrestore(&leak_lock, flags);
48}
49
50static inline
51void btrfs_leak_debug_del(struct list_head *entry)
52{
53 unsigned long flags;
54
55 spin_lock_irqsave(&leak_lock, flags);
56 list_del(entry);
57 spin_unlock_irqrestore(&leak_lock, flags);
58}
59
60static inline
61void btrfs_leak_debug_check(void)
62{
63 struct extent_state *state;
64 struct extent_buffer *eb;
65
66 while (!list_empty(&states)) {
67 state = list_entry(states.next, struct extent_state, leak_list);
68 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
69 state->start, state->end, state->state,
70 extent_state_in_tree(state),
71 refcount_read(&state->refs));
72 list_del(&state->leak_list);
73 kmem_cache_free(extent_state_cache, state);
74 }
75
76 while (!list_empty(&buffers)) {
77 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
78 pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n",
79 eb->start, eb->len, atomic_read(&eb->refs));
80 list_del(&eb->leak_list);
81 kmem_cache_free(extent_buffer_cache, eb);
82 }
83}
84
85#define btrfs_debug_check_extent_io_range(tree, start, end) \
86 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
87static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 struct extent_io_tree *tree, u64 start, u64 end)
89{
90 if (tree->ops && tree->ops->check_extent_io_range)
91 tree->ops->check_extent_io_range(tree->private_data, caller,
92 start, end);
93}
94#else
95#define btrfs_leak_debug_add(new, head) do {} while (0)
96#define btrfs_leak_debug_del(entry) do {} while (0)
97#define btrfs_leak_debug_check() do {} while (0)
98#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
99#endif
100
101#define BUFFER_LRU_MAX 64
102
103struct tree_entry {
104 u64 start;
105 u64 end;
106 struct rb_node rb_node;
107};
108
109struct extent_page_data {
110 struct bio *bio;
111 struct extent_io_tree *tree;
112 get_extent_t *get_extent;
113 unsigned long bio_flags;
114
115
116
117
118 unsigned int extent_locked:1;
119
120
121 unsigned int sync_io:1;
122};
123
124static void add_extent_changeset(struct extent_state *state, unsigned bits,
125 struct extent_changeset *changeset,
126 int set)
127{
128 int ret;
129
130 if (!changeset)
131 return;
132 if (set && (state->state & bits) == bits)
133 return;
134 if (!set && (state->state & bits) == 0)
135 return;
136 changeset->bytes_changed += state->end - state->start + 1;
137 ret = ulist_add(&changeset->range_changed, state->start, state->end,
138 GFP_ATOMIC);
139
140 BUG_ON(ret < 0);
141}
142
143static noinline void flush_write_bio(void *data);
144static inline struct btrfs_fs_info *
145tree_fs_info(struct extent_io_tree *tree)
146{
147 if (tree->ops)
148 return tree->ops->tree_fs_info(tree->private_data);
149 return NULL;
150}
151
152int __init extent_io_init(void)
153{
154 extent_state_cache = kmem_cache_create("btrfs_extent_state",
155 sizeof(struct extent_state), 0,
156 SLAB_MEM_SPREAD, NULL);
157 if (!extent_state_cache)
158 return -ENOMEM;
159
160 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
161 sizeof(struct extent_buffer), 0,
162 SLAB_MEM_SPREAD, NULL);
163 if (!extent_buffer_cache)
164 goto free_state_cache;
165
166 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
167 offsetof(struct btrfs_io_bio, bio),
168 BIOSET_NEED_BVECS);
169 if (!btrfs_bioset)
170 goto free_buffer_cache;
171
172 if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
173 goto free_bioset;
174
175 return 0;
176
177free_bioset:
178 bioset_free(btrfs_bioset);
179 btrfs_bioset = NULL;
180
181free_buffer_cache:
182 kmem_cache_destroy(extent_buffer_cache);
183 extent_buffer_cache = NULL;
184
185free_state_cache:
186 kmem_cache_destroy(extent_state_cache);
187 extent_state_cache = NULL;
188 return -ENOMEM;
189}
190
191void extent_io_exit(void)
192{
193 btrfs_leak_debug_check();
194
195
196
197
198
199 rcu_barrier();
200 kmem_cache_destroy(extent_state_cache);
201 kmem_cache_destroy(extent_buffer_cache);
202 if (btrfs_bioset)
203 bioset_free(btrfs_bioset);
204}
205
206void extent_io_tree_init(struct extent_io_tree *tree,
207 void *private_data)
208{
209 tree->state = RB_ROOT;
210 tree->ops = NULL;
211 tree->dirty_bytes = 0;
212 spin_lock_init(&tree->lock);
213 tree->private_data = private_data;
214}
215
216static struct extent_state *alloc_extent_state(gfp_t mask)
217{
218 struct extent_state *state;
219
220
221
222
223
224 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
225 state = kmem_cache_alloc(extent_state_cache, mask);
226 if (!state)
227 return state;
228 state->state = 0;
229 state->failrec = NULL;
230 RB_CLEAR_NODE(&state->rb_node);
231 btrfs_leak_debug_add(&state->leak_list, &states);
232 refcount_set(&state->refs, 1);
233 init_waitqueue_head(&state->wq);
234 trace_alloc_extent_state(state, mask, _RET_IP_);
235 return state;
236}
237
238void free_extent_state(struct extent_state *state)
239{
240 if (!state)
241 return;
242 if (refcount_dec_and_test(&state->refs)) {
243 WARN_ON(extent_state_in_tree(state));
244 btrfs_leak_debug_del(&state->leak_list);
245 trace_free_extent_state(state, _RET_IP_);
246 kmem_cache_free(extent_state_cache, state);
247 }
248}
249
250static struct rb_node *tree_insert(struct rb_root *root,
251 struct rb_node *search_start,
252 u64 offset,
253 struct rb_node *node,
254 struct rb_node ***p_in,
255 struct rb_node **parent_in)
256{
257 struct rb_node **p;
258 struct rb_node *parent = NULL;
259 struct tree_entry *entry;
260
261 if (p_in && parent_in) {
262 p = *p_in;
263 parent = *parent_in;
264 goto do_insert;
265 }
266
267 p = search_start ? &search_start : &root->rb_node;
268 while (*p) {
269 parent = *p;
270 entry = rb_entry(parent, struct tree_entry, rb_node);
271
272 if (offset < entry->start)
273 p = &(*p)->rb_left;
274 else if (offset > entry->end)
275 p = &(*p)->rb_right;
276 else
277 return parent;
278 }
279
280do_insert:
281 rb_link_node(node, parent, p);
282 rb_insert_color(node, root);
283 return NULL;
284}
285
286static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
287 struct rb_node **prev_ret,
288 struct rb_node **next_ret,
289 struct rb_node ***p_ret,
290 struct rb_node **parent_ret)
291{
292 struct rb_root *root = &tree->state;
293 struct rb_node **n = &root->rb_node;
294 struct rb_node *prev = NULL;
295 struct rb_node *orig_prev = NULL;
296 struct tree_entry *entry;
297 struct tree_entry *prev_entry = NULL;
298
299 while (*n) {
300 prev = *n;
301 entry = rb_entry(prev, struct tree_entry, rb_node);
302 prev_entry = entry;
303
304 if (offset < entry->start)
305 n = &(*n)->rb_left;
306 else if (offset > entry->end)
307 n = &(*n)->rb_right;
308 else
309 return *n;
310 }
311
312 if (p_ret)
313 *p_ret = n;
314 if (parent_ret)
315 *parent_ret = prev;
316
317 if (prev_ret) {
318 orig_prev = prev;
319 while (prev && offset > prev_entry->end) {
320 prev = rb_next(prev);
321 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
322 }
323 *prev_ret = prev;
324 prev = orig_prev;
325 }
326
327 if (next_ret) {
328 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
329 while (prev && offset < prev_entry->start) {
330 prev = rb_prev(prev);
331 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
332 }
333 *next_ret = prev;
334 }
335 return NULL;
336}
337
338static inline struct rb_node *
339tree_search_for_insert(struct extent_io_tree *tree,
340 u64 offset,
341 struct rb_node ***p_ret,
342 struct rb_node **parent_ret)
343{
344 struct rb_node *prev = NULL;
345 struct rb_node *ret;
346
347 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
348 if (!ret)
349 return prev;
350 return ret;
351}
352
353static inline struct rb_node *tree_search(struct extent_io_tree *tree,
354 u64 offset)
355{
356 return tree_search_for_insert(tree, offset, NULL, NULL);
357}
358
359static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
360 struct extent_state *other)
361{
362 if (tree->ops && tree->ops->merge_extent_hook)
363 tree->ops->merge_extent_hook(tree->private_data, new, other);
364}
365
366
367
368
369
370
371
372
373
374
375static void merge_state(struct extent_io_tree *tree,
376 struct extent_state *state)
377{
378 struct extent_state *other;
379 struct rb_node *other_node;
380
381 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
382 return;
383
384 other_node = rb_prev(&state->rb_node);
385 if (other_node) {
386 other = rb_entry(other_node, struct extent_state, rb_node);
387 if (other->end == state->start - 1 &&
388 other->state == state->state) {
389 merge_cb(tree, state, other);
390 state->start = other->start;
391 rb_erase(&other->rb_node, &tree->state);
392 RB_CLEAR_NODE(&other->rb_node);
393 free_extent_state(other);
394 }
395 }
396 other_node = rb_next(&state->rb_node);
397 if (other_node) {
398 other = rb_entry(other_node, struct extent_state, rb_node);
399 if (other->start == state->end + 1 &&
400 other->state == state->state) {
401 merge_cb(tree, state, other);
402 state->end = other->end;
403 rb_erase(&other->rb_node, &tree->state);
404 RB_CLEAR_NODE(&other->rb_node);
405 free_extent_state(other);
406 }
407 }
408}
409
410static void set_state_cb(struct extent_io_tree *tree,
411 struct extent_state *state, unsigned *bits)
412{
413 if (tree->ops && tree->ops->set_bit_hook)
414 tree->ops->set_bit_hook(tree->private_data, state, bits);
415}
416
417static void clear_state_cb(struct extent_io_tree *tree,
418 struct extent_state *state, unsigned *bits)
419{
420 if (tree->ops && tree->ops->clear_bit_hook)
421 tree->ops->clear_bit_hook(tree->private_data, state, bits);
422}
423
424static void set_state_bits(struct extent_io_tree *tree,
425 struct extent_state *state, unsigned *bits,
426 struct extent_changeset *changeset);
427
428
429
430
431
432
433
434
435
436
437
438static int insert_state(struct extent_io_tree *tree,
439 struct extent_state *state, u64 start, u64 end,
440 struct rb_node ***p,
441 struct rb_node **parent,
442 unsigned *bits, struct extent_changeset *changeset)
443{
444 struct rb_node *node;
445
446 if (end < start)
447 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
448 end, start);
449 state->start = start;
450 state->end = end;
451
452 set_state_bits(tree, state, bits, changeset);
453
454 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
455 if (node) {
456 struct extent_state *found;
457 found = rb_entry(node, struct extent_state, rb_node);
458 pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
459 found->start, found->end, start, end);
460 return -EEXIST;
461 }
462 merge_state(tree, state);
463 return 0;
464}
465
466static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
467 u64 split)
468{
469 if (tree->ops && tree->ops->split_extent_hook)
470 tree->ops->split_extent_hook(tree->private_data, orig, split);
471}
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
488 struct extent_state *prealloc, u64 split)
489{
490 struct rb_node *node;
491
492 split_cb(tree, orig, split);
493
494 prealloc->start = orig->start;
495 prealloc->end = split - 1;
496 prealloc->state = orig->state;
497 orig->start = split;
498
499 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
500 &prealloc->rb_node, NULL, NULL);
501 if (node) {
502 free_extent_state(prealloc);
503 return -EEXIST;
504 }
505 return 0;
506}
507
508static struct extent_state *next_state(struct extent_state *state)
509{
510 struct rb_node *next = rb_next(&state->rb_node);
511 if (next)
512 return rb_entry(next, struct extent_state, rb_node);
513 else
514 return NULL;
515}
516
517
518
519
520
521
522
523
524static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
525 struct extent_state *state,
526 unsigned *bits, int wake,
527 struct extent_changeset *changeset)
528{
529 struct extent_state *next;
530 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
531
532 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
533 u64 range = state->end - state->start + 1;
534 WARN_ON(range > tree->dirty_bytes);
535 tree->dirty_bytes -= range;
536 }
537 clear_state_cb(tree, state, bits);
538 add_extent_changeset(state, bits_to_clear, changeset, 0);
539 state->state &= ~bits_to_clear;
540 if (wake)
541 wake_up(&state->wq);
542 if (state->state == 0) {
543 next = next_state(state);
544 if (extent_state_in_tree(state)) {
545 rb_erase(&state->rb_node, &tree->state);
546 RB_CLEAR_NODE(&state->rb_node);
547 free_extent_state(state);
548 } else {
549 WARN_ON(1);
550 }
551 } else {
552 merge_state(tree, state);
553 next = next_state(state);
554 }
555 return next;
556}
557
558static struct extent_state *
559alloc_extent_state_atomic(struct extent_state *prealloc)
560{
561 if (!prealloc)
562 prealloc = alloc_extent_state(GFP_ATOMIC);
563
564 return prealloc;
565}
566
567static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
568{
569 btrfs_panic(tree_fs_info(tree), err,
570 "Locking error: Extent tree was modified by another thread while locked.");
571}
572
573
574
575
576
577
578
579
580
581
582
583
584
585static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
586 unsigned bits, int wake, int delete,
587 struct extent_state **cached_state,
588 gfp_t mask, struct extent_changeset *changeset)
589{
590 struct extent_state *state;
591 struct extent_state *cached;
592 struct extent_state *prealloc = NULL;
593 struct rb_node *node;
594 u64 last_end;
595 int err;
596 int clear = 0;
597
598 btrfs_debug_check_extent_io_range(tree, start, end);
599
600 if (bits & EXTENT_DELALLOC)
601 bits |= EXTENT_NORESERVE;
602
603 if (delete)
604 bits |= ~EXTENT_CTLBITS;
605 bits |= EXTENT_FIRST_DELALLOC;
606
607 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
608 clear = 1;
609again:
610 if (!prealloc && gfpflags_allow_blocking(mask)) {
611
612
613
614
615
616
617
618 prealloc = alloc_extent_state(mask);
619 }
620
621 spin_lock(&tree->lock);
622 if (cached_state) {
623 cached = *cached_state;
624
625 if (clear) {
626 *cached_state = NULL;
627 cached_state = NULL;
628 }
629
630 if (cached && extent_state_in_tree(cached) &&
631 cached->start <= start && cached->end > start) {
632 if (clear)
633 refcount_dec(&cached->refs);
634 state = cached;
635 goto hit_next;
636 }
637 if (clear)
638 free_extent_state(cached);
639 }
640
641
642
643
644 node = tree_search(tree, start);
645 if (!node)
646 goto out;
647 state = rb_entry(node, struct extent_state, rb_node);
648hit_next:
649 if (state->start > end)
650 goto out;
651 WARN_ON(state->end < start);
652 last_end = state->end;
653
654
655 if (!(state->state & bits)) {
656 state = next_state(state);
657 goto next;
658 }
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676 if (state->start < start) {
677 prealloc = alloc_extent_state_atomic(prealloc);
678 BUG_ON(!prealloc);
679 err = split_state(tree, state, prealloc, start);
680 if (err)
681 extent_io_tree_panic(tree, err);
682
683 prealloc = NULL;
684 if (err)
685 goto out;
686 if (state->end <= end) {
687 state = clear_state_bit(tree, state, &bits, wake,
688 changeset);
689 goto next;
690 }
691 goto search_again;
692 }
693
694
695
696
697
698
699 if (state->start <= end && state->end > end) {
700 prealloc = alloc_extent_state_atomic(prealloc);
701 BUG_ON(!prealloc);
702 err = split_state(tree, state, prealloc, end + 1);
703 if (err)
704 extent_io_tree_panic(tree, err);
705
706 if (wake)
707 wake_up(&state->wq);
708
709 clear_state_bit(tree, prealloc, &bits, wake, changeset);
710
711 prealloc = NULL;
712 goto out;
713 }
714
715 state = clear_state_bit(tree, state, &bits, wake, changeset);
716next:
717 if (last_end == (u64)-1)
718 goto out;
719 start = last_end + 1;
720 if (start <= end && state && !need_resched())
721 goto hit_next;
722
723search_again:
724 if (start > end)
725 goto out;
726 spin_unlock(&tree->lock);
727 if (gfpflags_allow_blocking(mask))
728 cond_resched();
729 goto again;
730
731out:
732 spin_unlock(&tree->lock);
733 if (prealloc)
734 free_extent_state(prealloc);
735
736 return 0;
737
738}
739
740static void wait_on_state(struct extent_io_tree *tree,
741 struct extent_state *state)
742 __releases(tree->lock)
743 __acquires(tree->lock)
744{
745 DEFINE_WAIT(wait);
746 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
747 spin_unlock(&tree->lock);
748 schedule();
749 spin_lock(&tree->lock);
750 finish_wait(&state->wq, &wait);
751}
752
753
754
755
756
757
758static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
759 unsigned long bits)
760{
761 struct extent_state *state;
762 struct rb_node *node;
763
764 btrfs_debug_check_extent_io_range(tree, start, end);
765
766 spin_lock(&tree->lock);
767again:
768 while (1) {
769
770
771
772
773 node = tree_search(tree, start);
774process_node:
775 if (!node)
776 break;
777
778 state = rb_entry(node, struct extent_state, rb_node);
779
780 if (state->start > end)
781 goto out;
782
783 if (state->state & bits) {
784 start = state->start;
785 refcount_inc(&state->refs);
786 wait_on_state(tree, state);
787 free_extent_state(state);
788 goto again;
789 }
790 start = state->end + 1;
791
792 if (start > end)
793 break;
794
795 if (!cond_resched_lock(&tree->lock)) {
796 node = rb_next(node);
797 goto process_node;
798 }
799 }
800out:
801 spin_unlock(&tree->lock);
802}
803
804static void set_state_bits(struct extent_io_tree *tree,
805 struct extent_state *state,
806 unsigned *bits, struct extent_changeset *changeset)
807{
808 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
809
810 set_state_cb(tree, state, bits);
811 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
812 u64 range = state->end - state->start + 1;
813 tree->dirty_bytes += range;
814 }
815 add_extent_changeset(state, bits_to_set, changeset, 1);
816 state->state |= bits_to_set;
817}
818
819static void cache_state_if_flags(struct extent_state *state,
820 struct extent_state **cached_ptr,
821 unsigned flags)
822{
823 if (cached_ptr && !(*cached_ptr)) {
824 if (!flags || (state->state & flags)) {
825 *cached_ptr = state;
826 refcount_inc(&state->refs);
827 }
828 }
829}
830
831static void cache_state(struct extent_state *state,
832 struct extent_state **cached_ptr)
833{
834 return cache_state_if_flags(state, cached_ptr,
835 EXTENT_IOBITS | EXTENT_BOUNDARY);
836}
837
838
839
840
841
842
843
844
845
846
847
848
849static int __must_check
850__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
851 unsigned bits, unsigned exclusive_bits,
852 u64 *failed_start, struct extent_state **cached_state,
853 gfp_t mask, struct extent_changeset *changeset)
854{
855 struct extent_state *state;
856 struct extent_state *prealloc = NULL;
857 struct rb_node *node;
858 struct rb_node **p;
859 struct rb_node *parent;
860 int err = 0;
861 u64 last_start;
862 u64 last_end;
863
864 btrfs_debug_check_extent_io_range(tree, start, end);
865
866 bits |= EXTENT_FIRST_DELALLOC;
867again:
868 if (!prealloc && gfpflags_allow_blocking(mask)) {
869
870
871
872
873
874
875
876 prealloc = alloc_extent_state(mask);
877 }
878
879 spin_lock(&tree->lock);
880 if (cached_state && *cached_state) {
881 state = *cached_state;
882 if (state->start <= start && state->end > start &&
883 extent_state_in_tree(state)) {
884 node = &state->rb_node;
885 goto hit_next;
886 }
887 }
888
889
890
891
892 node = tree_search_for_insert(tree, start, &p, &parent);
893 if (!node) {
894 prealloc = alloc_extent_state_atomic(prealloc);
895 BUG_ON(!prealloc);
896 err = insert_state(tree, prealloc, start, end,
897 &p, &parent, &bits, changeset);
898 if (err)
899 extent_io_tree_panic(tree, err);
900
901 cache_state(prealloc, cached_state);
902 prealloc = NULL;
903 goto out;
904 }
905 state = rb_entry(node, struct extent_state, rb_node);
906hit_next:
907 last_start = state->start;
908 last_end = state->end;
909
910
911
912
913
914
915
916 if (state->start == start && state->end <= end) {
917 if (state->state & exclusive_bits) {
918 *failed_start = state->start;
919 err = -EEXIST;
920 goto out;
921 }
922
923 set_state_bits(tree, state, &bits, changeset);
924 cache_state(state, cached_state);
925 merge_state(tree, state);
926 if (last_end == (u64)-1)
927 goto out;
928 start = last_end + 1;
929 state = next_state(state);
930 if (start < end && state && state->start == start &&
931 !need_resched())
932 goto hit_next;
933 goto search_again;
934 }
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952 if (state->start < start) {
953 if (state->state & exclusive_bits) {
954 *failed_start = start;
955 err = -EEXIST;
956 goto out;
957 }
958
959 prealloc = alloc_extent_state_atomic(prealloc);
960 BUG_ON(!prealloc);
961 err = split_state(tree, state, prealloc, start);
962 if (err)
963 extent_io_tree_panic(tree, err);
964
965 prealloc = NULL;
966 if (err)
967 goto out;
968 if (state->end <= end) {
969 set_state_bits(tree, state, &bits, changeset);
970 cache_state(state, cached_state);
971 merge_state(tree, state);
972 if (last_end == (u64)-1)
973 goto out;
974 start = last_end + 1;
975 state = next_state(state);
976 if (start < end && state && state->start == start &&
977 !need_resched())
978 goto hit_next;
979 }
980 goto search_again;
981 }
982
983
984
985
986
987
988
989 if (state->start > start) {
990 u64 this_end;
991 if (end < last_start)
992 this_end = end;
993 else
994 this_end = last_start - 1;
995
996 prealloc = alloc_extent_state_atomic(prealloc);
997 BUG_ON(!prealloc);
998
999
1000
1001
1002
1003 err = insert_state(tree, prealloc, start, this_end,
1004 NULL, NULL, &bits, changeset);
1005 if (err)
1006 extent_io_tree_panic(tree, err);
1007
1008 cache_state(prealloc, cached_state);
1009 prealloc = NULL;
1010 start = this_end + 1;
1011 goto search_again;
1012 }
1013
1014
1015
1016
1017
1018
1019 if (state->start <= end && state->end > end) {
1020 if (state->state & exclusive_bits) {
1021 *failed_start = start;
1022 err = -EEXIST;
1023 goto out;
1024 }
1025
1026 prealloc = alloc_extent_state_atomic(prealloc);
1027 BUG_ON(!prealloc);
1028 err = split_state(tree, state, prealloc, end + 1);
1029 if (err)
1030 extent_io_tree_panic(tree, err);
1031
1032 set_state_bits(tree, prealloc, &bits, changeset);
1033 cache_state(prealloc, cached_state);
1034 merge_state(tree, prealloc);
1035 prealloc = NULL;
1036 goto out;
1037 }
1038
1039search_again:
1040 if (start > end)
1041 goto out;
1042 spin_unlock(&tree->lock);
1043 if (gfpflags_allow_blocking(mask))
1044 cond_resched();
1045 goto again;
1046
1047out:
1048 spin_unlock(&tree->lock);
1049 if (prealloc)
1050 free_extent_state(prealloc);
1051
1052 return err;
1053
1054}
1055
1056int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1057 unsigned bits, u64 * failed_start,
1058 struct extent_state **cached_state, gfp_t mask)
1059{
1060 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1061 cached_state, mask, NULL);
1062}
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1084 unsigned bits, unsigned clear_bits,
1085 struct extent_state **cached_state)
1086{
1087 struct extent_state *state;
1088 struct extent_state *prealloc = NULL;
1089 struct rb_node *node;
1090 struct rb_node **p;
1091 struct rb_node *parent;
1092 int err = 0;
1093 u64 last_start;
1094 u64 last_end;
1095 bool first_iteration = true;
1096
1097 btrfs_debug_check_extent_io_range(tree, start, end);
1098
1099again:
1100 if (!prealloc) {
1101
1102
1103
1104
1105
1106
1107
1108 prealloc = alloc_extent_state(GFP_NOFS);
1109 if (!prealloc && !first_iteration)
1110 return -ENOMEM;
1111 }
1112
1113 spin_lock(&tree->lock);
1114 if (cached_state && *cached_state) {
1115 state = *cached_state;
1116 if (state->start <= start && state->end > start &&
1117 extent_state_in_tree(state)) {
1118 node = &state->rb_node;
1119 goto hit_next;
1120 }
1121 }
1122
1123
1124
1125
1126
1127 node = tree_search_for_insert(tree, start, &p, &parent);
1128 if (!node) {
1129 prealloc = alloc_extent_state_atomic(prealloc);
1130 if (!prealloc) {
1131 err = -ENOMEM;
1132 goto out;
1133 }
1134 err = insert_state(tree, prealloc, start, end,
1135 &p, &parent, &bits, NULL);
1136 if (err)
1137 extent_io_tree_panic(tree, err);
1138 cache_state(prealloc, cached_state);
1139 prealloc = NULL;
1140 goto out;
1141 }
1142 state = rb_entry(node, struct extent_state, rb_node);
1143hit_next:
1144 last_start = state->start;
1145 last_end = state->end;
1146
1147
1148
1149
1150
1151
1152
1153 if (state->start == start && state->end <= end) {
1154 set_state_bits(tree, state, &bits, NULL);
1155 cache_state(state, cached_state);
1156 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1157 if (last_end == (u64)-1)
1158 goto out;
1159 start = last_end + 1;
1160 if (start < end && state && state->start == start &&
1161 !need_resched())
1162 goto hit_next;
1163 goto search_again;
1164 }
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182 if (state->start < start) {
1183 prealloc = alloc_extent_state_atomic(prealloc);
1184 if (!prealloc) {
1185 err = -ENOMEM;
1186 goto out;
1187 }
1188 err = split_state(tree, state, prealloc, start);
1189 if (err)
1190 extent_io_tree_panic(tree, err);
1191 prealloc = NULL;
1192 if (err)
1193 goto out;
1194 if (state->end <= end) {
1195 set_state_bits(tree, state, &bits, NULL);
1196 cache_state(state, cached_state);
1197 state = clear_state_bit(tree, state, &clear_bits, 0,
1198 NULL);
1199 if (last_end == (u64)-1)
1200 goto out;
1201 start = last_end + 1;
1202 if (start < end && state && state->start == start &&
1203 !need_resched())
1204 goto hit_next;
1205 }
1206 goto search_again;
1207 }
1208
1209
1210
1211
1212
1213
1214
1215 if (state->start > start) {
1216 u64 this_end;
1217 if (end < last_start)
1218 this_end = end;
1219 else
1220 this_end = last_start - 1;
1221
1222 prealloc = alloc_extent_state_atomic(prealloc);
1223 if (!prealloc) {
1224 err = -ENOMEM;
1225 goto out;
1226 }
1227
1228
1229
1230
1231
1232 err = insert_state(tree, prealloc, start, this_end,
1233 NULL, NULL, &bits, NULL);
1234 if (err)
1235 extent_io_tree_panic(tree, err);
1236 cache_state(prealloc, cached_state);
1237 prealloc = NULL;
1238 start = this_end + 1;
1239 goto search_again;
1240 }
1241
1242
1243
1244
1245
1246
1247 if (state->start <= end && state->end > end) {
1248 prealloc = alloc_extent_state_atomic(prealloc);
1249 if (!prealloc) {
1250 err = -ENOMEM;
1251 goto out;
1252 }
1253
1254 err = split_state(tree, state, prealloc, end + 1);
1255 if (err)
1256 extent_io_tree_panic(tree, err);
1257
1258 set_state_bits(tree, prealloc, &bits, NULL);
1259 cache_state(prealloc, cached_state);
1260 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1261 prealloc = NULL;
1262 goto out;
1263 }
1264
1265search_again:
1266 if (start > end)
1267 goto out;
1268 spin_unlock(&tree->lock);
1269 cond_resched();
1270 first_iteration = false;
1271 goto again;
1272
1273out:
1274 spin_unlock(&tree->lock);
1275 if (prealloc)
1276 free_extent_state(prealloc);
1277
1278 return err;
1279}
1280
1281
1282int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1283 unsigned bits, struct extent_changeset *changeset)
1284{
1285
1286
1287
1288
1289
1290
1291 BUG_ON(bits & EXTENT_LOCKED);
1292
1293 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1294 changeset);
1295}
1296
1297int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1298 unsigned bits, int wake, int delete,
1299 struct extent_state **cached, gfp_t mask)
1300{
1301 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1302 cached, mask, NULL);
1303}
1304
1305int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1306 unsigned bits, struct extent_changeset *changeset)
1307{
1308
1309
1310
1311
1312 BUG_ON(bits & EXTENT_LOCKED);
1313
1314 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1315 changeset);
1316}
1317
1318
1319
1320
1321
1322int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1323 struct extent_state **cached_state)
1324{
1325 int err;
1326 u64 failed_start;
1327
1328 while (1) {
1329 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1330 EXTENT_LOCKED, &failed_start,
1331 cached_state, GFP_NOFS, NULL);
1332 if (err == -EEXIST) {
1333 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1334 start = failed_start;
1335 } else
1336 break;
1337 WARN_ON(start > end);
1338 }
1339 return err;
1340}
1341
1342int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1343{
1344 int err;
1345 u64 failed_start;
1346
1347 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1348 &failed_start, NULL, GFP_NOFS, NULL);
1349 if (err == -EEXIST) {
1350 if (failed_start > start)
1351 clear_extent_bit(tree, start, failed_start - 1,
1352 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
1353 return 0;
1354 }
1355 return 1;
1356}
1357
1358void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1359{
1360 unsigned long index = start >> PAGE_SHIFT;
1361 unsigned long end_index = end >> PAGE_SHIFT;
1362 struct page *page;
1363
1364 while (index <= end_index) {
1365 page = find_get_page(inode->i_mapping, index);
1366 BUG_ON(!page);
1367 clear_page_dirty_for_io(page);
1368 put_page(page);
1369 index++;
1370 }
1371}
1372
1373void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1374{
1375 unsigned long index = start >> PAGE_SHIFT;
1376 unsigned long end_index = end >> PAGE_SHIFT;
1377 struct page *page;
1378
1379 while (index <= end_index) {
1380 page = find_get_page(inode->i_mapping, index);
1381 BUG_ON(!page);
1382 __set_page_dirty_nobuffers(page);
1383 account_page_redirty(page);
1384 put_page(page);
1385 index++;
1386 }
1387}
1388
1389
1390
1391
1392static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1393{
1394 tree->ops->set_range_writeback(tree->private_data, start, end);
1395}
1396
1397
1398
1399
1400
1401static struct extent_state *
1402find_first_extent_bit_state(struct extent_io_tree *tree,
1403 u64 start, unsigned bits)
1404{
1405 struct rb_node *node;
1406 struct extent_state *state;
1407
1408
1409
1410
1411
1412 node = tree_search(tree, start);
1413 if (!node)
1414 goto out;
1415
1416 while (1) {
1417 state = rb_entry(node, struct extent_state, rb_node);
1418 if (state->end >= start && (state->state & bits))
1419 return state;
1420
1421 node = rb_next(node);
1422 if (!node)
1423 break;
1424 }
1425out:
1426 return NULL;
1427}
1428
1429
1430
1431
1432
1433
1434
1435
1436int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1437 u64 *start_ret, u64 *end_ret, unsigned bits,
1438 struct extent_state **cached_state)
1439{
1440 struct extent_state *state;
1441 struct rb_node *n;
1442 int ret = 1;
1443
1444 spin_lock(&tree->lock);
1445 if (cached_state && *cached_state) {
1446 state = *cached_state;
1447 if (state->end == start - 1 && extent_state_in_tree(state)) {
1448 n = rb_next(&state->rb_node);
1449 while (n) {
1450 state = rb_entry(n, struct extent_state,
1451 rb_node);
1452 if (state->state & bits)
1453 goto got_it;
1454 n = rb_next(n);
1455 }
1456 free_extent_state(*cached_state);
1457 *cached_state = NULL;
1458 goto out;
1459 }
1460 free_extent_state(*cached_state);
1461 *cached_state = NULL;
1462 }
1463
1464 state = find_first_extent_bit_state(tree, start, bits);
1465got_it:
1466 if (state) {
1467 cache_state_if_flags(state, cached_state, 0);
1468 *start_ret = state->start;
1469 *end_ret = state->end;
1470 ret = 0;
1471 }
1472out:
1473 spin_unlock(&tree->lock);
1474 return ret;
1475}
1476
1477
1478
1479
1480
1481
1482
1483static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1484 u64 *start, u64 *end, u64 max_bytes,
1485 struct extent_state **cached_state)
1486{
1487 struct rb_node *node;
1488 struct extent_state *state;
1489 u64 cur_start = *start;
1490 u64 found = 0;
1491 u64 total_bytes = 0;
1492
1493 spin_lock(&tree->lock);
1494
1495
1496
1497
1498
1499 node = tree_search(tree, cur_start);
1500 if (!node) {
1501 if (!found)
1502 *end = (u64)-1;
1503 goto out;
1504 }
1505
1506 while (1) {
1507 state = rb_entry(node, struct extent_state, rb_node);
1508 if (found && (state->start != cur_start ||
1509 (state->state & EXTENT_BOUNDARY))) {
1510 goto out;
1511 }
1512 if (!(state->state & EXTENT_DELALLOC)) {
1513 if (!found)
1514 *end = state->end;
1515 goto out;
1516 }
1517 if (!found) {
1518 *start = state->start;
1519 *cached_state = state;
1520 refcount_inc(&state->refs);
1521 }
1522 found++;
1523 *end = state->end;
1524 cur_start = state->end + 1;
1525 node = rb_next(node);
1526 total_bytes += state->end - state->start + 1;
1527 if (total_bytes >= max_bytes)
1528 break;
1529 if (!node)
1530 break;
1531 }
1532out:
1533 spin_unlock(&tree->lock);
1534 return found;
1535}
1536
1537static int __process_pages_contig(struct address_space *mapping,
1538 struct page *locked_page,
1539 pgoff_t start_index, pgoff_t end_index,
1540 unsigned long page_ops, pgoff_t *index_ret);
1541
1542static noinline void __unlock_for_delalloc(struct inode *inode,
1543 struct page *locked_page,
1544 u64 start, u64 end)
1545{
1546 unsigned long index = start >> PAGE_SHIFT;
1547 unsigned long end_index = end >> PAGE_SHIFT;
1548
1549 ASSERT(locked_page);
1550 if (index == locked_page->index && end_index == index)
1551 return;
1552
1553 __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
1554 PAGE_UNLOCK, NULL);
1555}
1556
1557static noinline int lock_delalloc_pages(struct inode *inode,
1558 struct page *locked_page,
1559 u64 delalloc_start,
1560 u64 delalloc_end)
1561{
1562 unsigned long index = delalloc_start >> PAGE_SHIFT;
1563 unsigned long index_ret = index;
1564 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1565 int ret;
1566
1567 ASSERT(locked_page);
1568 if (index == locked_page->index && index == end_index)
1569 return 0;
1570
1571 ret = __process_pages_contig(inode->i_mapping, locked_page, index,
1572 end_index, PAGE_LOCK, &index_ret);
1573 if (ret == -EAGAIN)
1574 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1575 (u64)index_ret << PAGE_SHIFT);
1576 return ret;
1577}
1578
1579
1580
1581
1582
1583
1584
1585STATIC u64 find_lock_delalloc_range(struct inode *inode,
1586 struct extent_io_tree *tree,
1587 struct page *locked_page, u64 *start,
1588 u64 *end, u64 max_bytes)
1589{
1590 u64 delalloc_start;
1591 u64 delalloc_end;
1592 u64 found;
1593 struct extent_state *cached_state = NULL;
1594 int ret;
1595 int loops = 0;
1596
1597again:
1598
1599 delalloc_start = *start;
1600 delalloc_end = 0;
1601 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1602 max_bytes, &cached_state);
1603 if (!found || delalloc_end <= *start) {
1604 *start = delalloc_start;
1605 *end = delalloc_end;
1606 free_extent_state(cached_state);
1607 return 0;
1608 }
1609
1610
1611
1612
1613
1614
1615 if (delalloc_start < *start)
1616 delalloc_start = *start;
1617
1618
1619
1620
1621 if (delalloc_end + 1 - delalloc_start > max_bytes)
1622 delalloc_end = delalloc_start + max_bytes - 1;
1623
1624
1625 ret = lock_delalloc_pages(inode, locked_page,
1626 delalloc_start, delalloc_end);
1627 if (ret == -EAGAIN) {
1628
1629
1630
1631 free_extent_state(cached_state);
1632 cached_state = NULL;
1633 if (!loops) {
1634 max_bytes = PAGE_SIZE;
1635 loops = 1;
1636 goto again;
1637 } else {
1638 found = 0;
1639 goto out_failed;
1640 }
1641 }
1642 BUG_ON(ret);
1643
1644
1645 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1646
1647
1648 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1649 EXTENT_DELALLOC, 1, cached_state);
1650 if (!ret) {
1651 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1652 &cached_state, GFP_NOFS);
1653 __unlock_for_delalloc(inode, locked_page,
1654 delalloc_start, delalloc_end);
1655 cond_resched();
1656 goto again;
1657 }
1658 free_extent_state(cached_state);
1659 *start = delalloc_start;
1660 *end = delalloc_end;
1661out_failed:
1662 return found;
1663}
1664
1665static int __process_pages_contig(struct address_space *mapping,
1666 struct page *locked_page,
1667 pgoff_t start_index, pgoff_t end_index,
1668 unsigned long page_ops, pgoff_t *index_ret)
1669{
1670 unsigned long nr_pages = end_index - start_index + 1;
1671 unsigned long pages_locked = 0;
1672 pgoff_t index = start_index;
1673 struct page *pages[16];
1674 unsigned ret;
1675 int err = 0;
1676 int i;
1677
1678 if (page_ops & PAGE_LOCK) {
1679 ASSERT(page_ops == PAGE_LOCK);
1680 ASSERT(index_ret && *index_ret == start_index);
1681 }
1682
1683 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1684 mapping_set_error(mapping, -EIO);
1685
1686 while (nr_pages > 0) {
1687 ret = find_get_pages_contig(mapping, index,
1688 min_t(unsigned long,
1689 nr_pages, ARRAY_SIZE(pages)), pages);
1690 if (ret == 0) {
1691
1692
1693
1694
1695 ASSERT(page_ops & PAGE_LOCK);
1696 err = -EAGAIN;
1697 goto out;
1698 }
1699
1700 for (i = 0; i < ret; i++) {
1701 if (page_ops & PAGE_SET_PRIVATE2)
1702 SetPagePrivate2(pages[i]);
1703
1704 if (pages[i] == locked_page) {
1705 put_page(pages[i]);
1706 pages_locked++;
1707 continue;
1708 }
1709 if (page_ops & PAGE_CLEAR_DIRTY)
1710 clear_page_dirty_for_io(pages[i]);
1711 if (page_ops & PAGE_SET_WRITEBACK)
1712 set_page_writeback(pages[i]);
1713 if (page_ops & PAGE_SET_ERROR)
1714 SetPageError(pages[i]);
1715 if (page_ops & PAGE_END_WRITEBACK)
1716 end_page_writeback(pages[i]);
1717 if (page_ops & PAGE_UNLOCK)
1718 unlock_page(pages[i]);
1719 if (page_ops & PAGE_LOCK) {
1720 lock_page(pages[i]);
1721 if (!PageDirty(pages[i]) ||
1722 pages[i]->mapping != mapping) {
1723 unlock_page(pages[i]);
1724 put_page(pages[i]);
1725 err = -EAGAIN;
1726 goto out;
1727 }
1728 }
1729 put_page(pages[i]);
1730 pages_locked++;
1731 }
1732 nr_pages -= ret;
1733 index += ret;
1734 cond_resched();
1735 }
1736out:
1737 if (err && index_ret)
1738 *index_ret = start_index + pages_locked - 1;
1739 return err;
1740}
1741
1742void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1743 u64 delalloc_end, struct page *locked_page,
1744 unsigned clear_bits,
1745 unsigned long page_ops)
1746{
1747 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
1748 NULL, GFP_NOFS);
1749
1750 __process_pages_contig(inode->i_mapping, locked_page,
1751 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
1752 page_ops, NULL);
1753}
1754
1755
1756
1757
1758
1759
1760u64 count_range_bits(struct extent_io_tree *tree,
1761 u64 *start, u64 search_end, u64 max_bytes,
1762 unsigned bits, int contig)
1763{
1764 struct rb_node *node;
1765 struct extent_state *state;
1766 u64 cur_start = *start;
1767 u64 total_bytes = 0;
1768 u64 last = 0;
1769 int found = 0;
1770
1771 if (WARN_ON(search_end <= cur_start))
1772 return 0;
1773
1774 spin_lock(&tree->lock);
1775 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1776 total_bytes = tree->dirty_bytes;
1777 goto out;
1778 }
1779
1780
1781
1782
1783 node = tree_search(tree, cur_start);
1784 if (!node)
1785 goto out;
1786
1787 while (1) {
1788 state = rb_entry(node, struct extent_state, rb_node);
1789 if (state->start > search_end)
1790 break;
1791 if (contig && found && state->start > last + 1)
1792 break;
1793 if (state->end >= cur_start && (state->state & bits) == bits) {
1794 total_bytes += min(search_end, state->end) + 1 -
1795 max(cur_start, state->start);
1796 if (total_bytes >= max_bytes)
1797 break;
1798 if (!found) {
1799 *start = max(cur_start, state->start);
1800 found = 1;
1801 }
1802 last = state->end;
1803 } else if (contig && found) {
1804 break;
1805 }
1806 node = rb_next(node);
1807 if (!node)
1808 break;
1809 }
1810out:
1811 spin_unlock(&tree->lock);
1812 return total_bytes;
1813}
1814
1815
1816
1817
1818
1819static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
1820 struct io_failure_record *failrec)
1821{
1822 struct rb_node *node;
1823 struct extent_state *state;
1824 int ret = 0;
1825
1826 spin_lock(&tree->lock);
1827
1828
1829
1830
1831 node = tree_search(tree, start);
1832 if (!node) {
1833 ret = -ENOENT;
1834 goto out;
1835 }
1836 state = rb_entry(node, struct extent_state, rb_node);
1837 if (state->start != start) {
1838 ret = -ENOENT;
1839 goto out;
1840 }
1841 state->failrec = failrec;
1842out:
1843 spin_unlock(&tree->lock);
1844 return ret;
1845}
1846
1847static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
1848 struct io_failure_record **failrec)
1849{
1850 struct rb_node *node;
1851 struct extent_state *state;
1852 int ret = 0;
1853
1854 spin_lock(&tree->lock);
1855
1856
1857
1858
1859 node = tree_search(tree, start);
1860 if (!node) {
1861 ret = -ENOENT;
1862 goto out;
1863 }
1864 state = rb_entry(node, struct extent_state, rb_node);
1865 if (state->start != start) {
1866 ret = -ENOENT;
1867 goto out;
1868 }
1869 *failrec = state->failrec;
1870out:
1871 spin_unlock(&tree->lock);
1872 return ret;
1873}
1874
1875
1876
1877
1878
1879
1880
1881int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1882 unsigned bits, int filled, struct extent_state *cached)
1883{
1884 struct extent_state *state = NULL;
1885 struct rb_node *node;
1886 int bitset = 0;
1887
1888 spin_lock(&tree->lock);
1889 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1890 cached->end > start)
1891 node = &cached->rb_node;
1892 else
1893 node = tree_search(tree, start);
1894 while (node && start <= end) {
1895 state = rb_entry(node, struct extent_state, rb_node);
1896
1897 if (filled && state->start > start) {
1898 bitset = 0;
1899 break;
1900 }
1901
1902 if (state->start > end)
1903 break;
1904
1905 if (state->state & bits) {
1906 bitset = 1;
1907 if (!filled)
1908 break;
1909 } else if (filled) {
1910 bitset = 0;
1911 break;
1912 }
1913
1914 if (state->end == (u64)-1)
1915 break;
1916
1917 start = state->end + 1;
1918 if (start > end)
1919 break;
1920 node = rb_next(node);
1921 if (!node) {
1922 if (filled)
1923 bitset = 0;
1924 break;
1925 }
1926 }
1927 spin_unlock(&tree->lock);
1928 return bitset;
1929}
1930
1931
1932
1933
1934
1935static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1936{
1937 u64 start = page_offset(page);
1938 u64 end = start + PAGE_SIZE - 1;
1939 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1940 SetPageUptodate(page);
1941}
1942
1943int free_io_failure(struct extent_io_tree *failure_tree,
1944 struct extent_io_tree *io_tree,
1945 struct io_failure_record *rec)
1946{
1947 int ret;
1948 int err = 0;
1949
1950 set_state_failrec(failure_tree, rec->start, NULL);
1951 ret = clear_extent_bits(failure_tree, rec->start,
1952 rec->start + rec->len - 1,
1953 EXTENT_LOCKED | EXTENT_DIRTY);
1954 if (ret)
1955 err = ret;
1956
1957 ret = clear_extent_bits(io_tree, rec->start,
1958 rec->start + rec->len - 1,
1959 EXTENT_DAMAGED);
1960 if (ret && !err)
1961 err = ret;
1962
1963 kfree(rec);
1964 return err;
1965}
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
1978 u64 length, u64 logical, struct page *page,
1979 unsigned int pg_offset, int mirror_num)
1980{
1981 struct bio *bio;
1982 struct btrfs_device *dev;
1983 u64 map_length = 0;
1984 u64 sector;
1985 struct btrfs_bio *bbio = NULL;
1986 int ret;
1987
1988 ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
1989 BUG_ON(!mirror_num);
1990
1991 bio = btrfs_io_bio_alloc(1);
1992 bio->bi_iter.bi_size = 0;
1993 map_length = length;
1994
1995
1996
1997
1998
1999
2000 btrfs_bio_counter_inc_blocked(fs_info);
2001 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
2002
2003
2004
2005
2006
2007
2008 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2009 &map_length, &bbio, 0);
2010 if (ret) {
2011 btrfs_bio_counter_dec(fs_info);
2012 bio_put(bio);
2013 return -EIO;
2014 }
2015 ASSERT(bbio->mirror_num == 1);
2016 } else {
2017 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2018 &map_length, &bbio, mirror_num);
2019 if (ret) {
2020 btrfs_bio_counter_dec(fs_info);
2021 bio_put(bio);
2022 return -EIO;
2023 }
2024 BUG_ON(mirror_num != bbio->mirror_num);
2025 }
2026
2027 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
2028 bio->bi_iter.bi_sector = sector;
2029 dev = bbio->stripes[bbio->mirror_num - 1].dev;
2030 btrfs_put_bbio(bbio);
2031 if (!dev || !dev->bdev || !dev->writeable) {
2032 btrfs_bio_counter_dec(fs_info);
2033 bio_put(bio);
2034 return -EIO;
2035 }
2036 bio_set_dev(bio, dev->bdev);
2037 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2038 bio_add_page(bio, page, length, pg_offset);
2039
2040 if (btrfsic_submit_bio_wait(bio)) {
2041
2042 btrfs_bio_counter_dec(fs_info);
2043 bio_put(bio);
2044 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2045 return -EIO;
2046 }
2047
2048 btrfs_info_rl_in_rcu(fs_info,
2049 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2050 ino, start,
2051 rcu_str_deref(dev->name), sector);
2052 btrfs_bio_counter_dec(fs_info);
2053 bio_put(bio);
2054 return 0;
2055}
2056
2057int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
2058 struct extent_buffer *eb, int mirror_num)
2059{
2060 u64 start = eb->start;
2061 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
2062 int ret = 0;
2063
2064 if (sb_rdonly(fs_info->sb))
2065 return -EROFS;
2066
2067 for (i = 0; i < num_pages; i++) {
2068 struct page *p = eb->pages[i];
2069
2070 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2071 start - page_offset(p), mirror_num);
2072 if (ret)
2073 break;
2074 start += PAGE_SIZE;
2075 }
2076
2077 return ret;
2078}
2079
2080
2081
2082
2083
2084int clean_io_failure(struct btrfs_fs_info *fs_info,
2085 struct extent_io_tree *failure_tree,
2086 struct extent_io_tree *io_tree, u64 start,
2087 struct page *page, u64 ino, unsigned int pg_offset)
2088{
2089 u64 private;
2090 struct io_failure_record *failrec;
2091 struct extent_state *state;
2092 int num_copies;
2093 int ret;
2094
2095 private = 0;
2096 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2097 EXTENT_DIRTY, 0);
2098 if (!ret)
2099 return 0;
2100
2101 ret = get_state_failrec(failure_tree, start, &failrec);
2102 if (ret)
2103 return 0;
2104
2105 BUG_ON(!failrec->this_mirror);
2106
2107 if (failrec->in_validation) {
2108
2109 btrfs_debug(fs_info,
2110 "clean_io_failure: freeing dummy error at %llu",
2111 failrec->start);
2112 goto out;
2113 }
2114 if (sb_rdonly(fs_info->sb))
2115 goto out;
2116
2117 spin_lock(&io_tree->lock);
2118 state = find_first_extent_bit_state(io_tree,
2119 failrec->start,
2120 EXTENT_LOCKED);
2121 spin_unlock(&io_tree->lock);
2122
2123 if (state && state->start <= failrec->start &&
2124 state->end >= failrec->start + failrec->len - 1) {
2125 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2126 failrec->len);
2127 if (num_copies > 1) {
2128 repair_io_failure(fs_info, ino, start, failrec->len,
2129 failrec->logical, page, pg_offset,
2130 failrec->failed_mirror);
2131 }
2132 }
2133
2134out:
2135 free_io_failure(failure_tree, io_tree, failrec);
2136
2137 return 0;
2138}
2139
2140
2141
2142
2143
2144
2145
2146void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2147{
2148 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2149 struct io_failure_record *failrec;
2150 struct extent_state *state, *next;
2151
2152 if (RB_EMPTY_ROOT(&failure_tree->state))
2153 return;
2154
2155 spin_lock(&failure_tree->lock);
2156 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2157 while (state) {
2158 if (state->start > end)
2159 break;
2160
2161 ASSERT(state->end <= end);
2162
2163 next = next_state(state);
2164
2165 failrec = state->failrec;
2166 free_extent_state(state);
2167 kfree(failrec);
2168
2169 state = next;
2170 }
2171 spin_unlock(&failure_tree->lock);
2172}
2173
2174int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2175 struct io_failure_record **failrec_ret)
2176{
2177 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2178 struct io_failure_record *failrec;
2179 struct extent_map *em;
2180 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2181 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2182 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2183 int ret;
2184 u64 logical;
2185
2186 ret = get_state_failrec(failure_tree, start, &failrec);
2187 if (ret) {
2188 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2189 if (!failrec)
2190 return -ENOMEM;
2191
2192 failrec->start = start;
2193 failrec->len = end - start + 1;
2194 failrec->this_mirror = 0;
2195 failrec->bio_flags = 0;
2196 failrec->in_validation = 0;
2197
2198 read_lock(&em_tree->lock);
2199 em = lookup_extent_mapping(em_tree, start, failrec->len);
2200 if (!em) {
2201 read_unlock(&em_tree->lock);
2202 kfree(failrec);
2203 return -EIO;
2204 }
2205
2206 if (em->start > start || em->start + em->len <= start) {
2207 free_extent_map(em);
2208 em = NULL;
2209 }
2210 read_unlock(&em_tree->lock);
2211 if (!em) {
2212 kfree(failrec);
2213 return -EIO;
2214 }
2215
2216 logical = start - em->start;
2217 logical = em->block_start + logical;
2218 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2219 logical = em->block_start;
2220 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2221 extent_set_compress_type(&failrec->bio_flags,
2222 em->compress_type);
2223 }
2224
2225 btrfs_debug(fs_info,
2226 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2227 logical, start, failrec->len);
2228
2229 failrec->logical = logical;
2230 free_extent_map(em);
2231
2232
2233 ret = set_extent_bits(failure_tree, start, end,
2234 EXTENT_LOCKED | EXTENT_DIRTY);
2235 if (ret >= 0)
2236 ret = set_state_failrec(failure_tree, start, failrec);
2237
2238 if (ret >= 0)
2239 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2240 if (ret < 0) {
2241 kfree(failrec);
2242 return ret;
2243 }
2244 } else {
2245 btrfs_debug(fs_info,
2246 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
2247 failrec->logical, failrec->start, failrec->len,
2248 failrec->in_validation);
2249
2250
2251
2252
2253
2254 }
2255
2256 *failrec_ret = failrec;
2257
2258 return 0;
2259}
2260
2261bool btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
2262 struct io_failure_record *failrec, int failed_mirror)
2263{
2264 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2265 int num_copies;
2266
2267 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2268 if (num_copies == 1) {
2269
2270
2271
2272
2273
2274 btrfs_debug(fs_info,
2275 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2276 num_copies, failrec->this_mirror, failed_mirror);
2277 return false;
2278 }
2279
2280
2281
2282
2283
2284
2285 if (failed_bio->bi_vcnt > 1) {
2286
2287
2288
2289
2290
2291
2292
2293
2294 BUG_ON(failrec->in_validation);
2295 failrec->in_validation = 1;
2296 failrec->this_mirror = failed_mirror;
2297 } else {
2298
2299
2300
2301
2302
2303 if (failrec->in_validation) {
2304 BUG_ON(failrec->this_mirror != failed_mirror);
2305 failrec->in_validation = 0;
2306 failrec->this_mirror = 0;
2307 }
2308 failrec->failed_mirror = failed_mirror;
2309 failrec->this_mirror++;
2310 if (failrec->this_mirror == failed_mirror)
2311 failrec->this_mirror++;
2312 }
2313
2314 if (failrec->this_mirror > num_copies) {
2315 btrfs_debug(fs_info,
2316 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2317 num_copies, failrec->this_mirror, failed_mirror);
2318 return false;
2319 }
2320
2321 return true;
2322}
2323
2324
2325struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2326 struct io_failure_record *failrec,
2327 struct page *page, int pg_offset, int icsum,
2328 bio_end_io_t *endio_func, void *data)
2329{
2330 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2331 struct bio *bio;
2332 struct btrfs_io_bio *btrfs_failed_bio;
2333 struct btrfs_io_bio *btrfs_bio;
2334
2335 bio = btrfs_io_bio_alloc(1);
2336 bio->bi_end_io = endio_func;
2337 bio->bi_iter.bi_sector = failrec->logical >> 9;
2338 bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
2339 bio->bi_iter.bi_size = 0;
2340 bio->bi_private = data;
2341
2342 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2343 if (btrfs_failed_bio->csum) {
2344 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2345
2346 btrfs_bio = btrfs_io_bio(bio);
2347 btrfs_bio->csum = btrfs_bio->csum_inline;
2348 icsum *= csum_size;
2349 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2350 csum_size);
2351 }
2352
2353 bio_add_page(bio, page, failrec->len, pg_offset);
2354
2355 return bio;
2356}
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2367 struct page *page, u64 start, u64 end,
2368 int failed_mirror)
2369{
2370 struct io_failure_record *failrec;
2371 struct inode *inode = page->mapping->host;
2372 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2373 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2374 struct bio *bio;
2375 int read_mode = 0;
2376 blk_status_t status;
2377 int ret;
2378
2379 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2380
2381 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2382 if (ret)
2383 return ret;
2384
2385 if (!btrfs_check_repairable(inode, failed_bio, failrec,
2386 failed_mirror)) {
2387 free_io_failure(failure_tree, tree, failrec);
2388 return -EIO;
2389 }
2390
2391 if (failed_bio->bi_vcnt > 1)
2392 read_mode |= REQ_FAILFAST_DEV;
2393
2394 phy_offset >>= inode->i_sb->s_blocksize_bits;
2395 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2396 start - page_offset(page),
2397 (int)phy_offset, failed_bio->bi_end_io,
2398 NULL);
2399 bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
2400
2401 btrfs_debug(btrfs_sb(inode->i_sb),
2402 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2403 read_mode, failrec->this_mirror, failrec->in_validation);
2404
2405 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
2406 failrec->bio_flags, 0);
2407 if (status) {
2408 free_io_failure(failure_tree, tree, failrec);
2409 bio_put(bio);
2410 ret = blk_status_to_errno(status);
2411 }
2412
2413 return ret;
2414}
2415
2416
2417
2418void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2419{
2420 int uptodate = (err == 0);
2421 struct extent_io_tree *tree;
2422 int ret = 0;
2423
2424 tree = &BTRFS_I(page->mapping->host)->io_tree;
2425
2426 if (tree->ops && tree->ops->writepage_end_io_hook)
2427 tree->ops->writepage_end_io_hook(page, start, end, NULL,
2428 uptodate);
2429
2430 if (!uptodate) {
2431 ClearPageUptodate(page);
2432 SetPageError(page);
2433 ret = err < 0 ? err : -EIO;
2434 mapping_set_error(page->mapping, ret);
2435 }
2436}
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447static void end_bio_extent_writepage(struct bio *bio)
2448{
2449 int error = blk_status_to_errno(bio->bi_status);
2450 struct bio_vec *bvec;
2451 u64 start;
2452 u64 end;
2453 int i;
2454
2455 ASSERT(!bio_flagged(bio, BIO_CLONED));
2456 bio_for_each_segment_all(bvec, bio, i) {
2457 struct page *page = bvec->bv_page;
2458 struct inode *inode = page->mapping->host;
2459 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2460
2461
2462
2463
2464
2465
2466 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2467 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2468 btrfs_err(fs_info,
2469 "partial page write in btrfs with offset %u and length %u",
2470 bvec->bv_offset, bvec->bv_len);
2471 else
2472 btrfs_info(fs_info,
2473 "incomplete page write in btrfs with offset %u and length %u",
2474 bvec->bv_offset, bvec->bv_len);
2475 }
2476
2477 start = page_offset(page);
2478 end = start + bvec->bv_offset + bvec->bv_len - 1;
2479
2480 end_extent_writepage(page, error, start, end);
2481 end_page_writeback(page);
2482 }
2483
2484 bio_put(bio);
2485}
2486
2487static void
2488endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2489 int uptodate)
2490{
2491 struct extent_state *cached = NULL;
2492 u64 end = start + len - 1;
2493
2494 if (uptodate && tree->track_uptodate)
2495 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2496 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2497}
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510static void end_bio_extent_readpage(struct bio *bio)
2511{
2512 struct bio_vec *bvec;
2513 int uptodate = !bio->bi_status;
2514 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2515 struct extent_io_tree *tree, *failure_tree;
2516 u64 offset = 0;
2517 u64 start;
2518 u64 end;
2519 u64 len;
2520 u64 extent_start = 0;
2521 u64 extent_len = 0;
2522 int mirror;
2523 int ret;
2524 int i;
2525
2526 ASSERT(!bio_flagged(bio, BIO_CLONED));
2527 bio_for_each_segment_all(bvec, bio, i) {
2528 struct page *page = bvec->bv_page;
2529 struct inode *inode = page->mapping->host;
2530 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2531
2532 btrfs_debug(fs_info,
2533 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2534 (u64)bio->bi_iter.bi_sector, bio->bi_status,
2535 io_bio->mirror_num);
2536 tree = &BTRFS_I(inode)->io_tree;
2537 failure_tree = &BTRFS_I(inode)->io_failure_tree;
2538
2539
2540
2541
2542
2543
2544 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2545 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2546 btrfs_err(fs_info,
2547 "partial page read in btrfs with offset %u and length %u",
2548 bvec->bv_offset, bvec->bv_len);
2549 else
2550 btrfs_info(fs_info,
2551 "incomplete page read in btrfs with offset %u and length %u",
2552 bvec->bv_offset, bvec->bv_len);
2553 }
2554
2555 start = page_offset(page);
2556 end = start + bvec->bv_offset + bvec->bv_len - 1;
2557 len = bvec->bv_len;
2558
2559 mirror = io_bio->mirror_num;
2560 if (likely(uptodate && tree->ops)) {
2561 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2562 page, start, end,
2563 mirror);
2564 if (ret)
2565 uptodate = 0;
2566 else
2567 clean_io_failure(BTRFS_I(inode)->root->fs_info,
2568 failure_tree, tree, start,
2569 page,
2570 btrfs_ino(BTRFS_I(inode)), 0);
2571 }
2572
2573 if (likely(uptodate))
2574 goto readpage_ok;
2575
2576 if (tree->ops) {
2577 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2578 if (ret == -EAGAIN) {
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593 ret = bio_readpage_error(bio, offset, page,
2594 start, end, mirror);
2595 if (ret == 0) {
2596 uptodate = !bio->bi_status;
2597 offset += len;
2598 continue;
2599 }
2600 }
2601
2602
2603
2604
2605
2606
2607 ASSERT(ret == -EIO);
2608 }
2609readpage_ok:
2610 if (likely(uptodate)) {
2611 loff_t i_size = i_size_read(inode);
2612 pgoff_t end_index = i_size >> PAGE_SHIFT;
2613 unsigned off;
2614
2615
2616 off = i_size & (PAGE_SIZE-1);
2617 if (page->index == end_index && off)
2618 zero_user_segment(page, off, PAGE_SIZE);
2619 SetPageUptodate(page);
2620 } else {
2621 ClearPageUptodate(page);
2622 SetPageError(page);
2623 }
2624 unlock_page(page);
2625 offset += len;
2626
2627 if (unlikely(!uptodate)) {
2628 if (extent_len) {
2629 endio_readpage_release_extent(tree,
2630 extent_start,
2631 extent_len, 1);
2632 extent_start = 0;
2633 extent_len = 0;
2634 }
2635 endio_readpage_release_extent(tree, start,
2636 end - start + 1, 0);
2637 } else if (!extent_len) {
2638 extent_start = start;
2639 extent_len = end + 1 - start;
2640 } else if (extent_start + extent_len == start) {
2641 extent_len += end + 1 - start;
2642 } else {
2643 endio_readpage_release_extent(tree, extent_start,
2644 extent_len, uptodate);
2645 extent_start = start;
2646 extent_len = end + 1 - start;
2647 }
2648 }
2649
2650 if (extent_len)
2651 endio_readpage_release_extent(tree, extent_start, extent_len,
2652 uptodate);
2653 if (io_bio->end_io)
2654 io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
2655 bio_put(bio);
2656}
2657
2658
2659
2660
2661
2662
2663static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
2664{
2665 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
2666}
2667
2668
2669
2670
2671
2672
2673struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
2674{
2675 struct bio *bio;
2676
2677 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
2678 bio_set_dev(bio, bdev);
2679 bio->bi_iter.bi_sector = first_byte >> 9;
2680 btrfs_io_bio_init(btrfs_io_bio(bio));
2681 return bio;
2682}
2683
2684struct bio *btrfs_bio_clone(struct bio *bio)
2685{
2686 struct btrfs_io_bio *btrfs_bio;
2687 struct bio *new;
2688
2689
2690 new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset);
2691 btrfs_bio = btrfs_io_bio(new);
2692 btrfs_io_bio_init(btrfs_bio);
2693 btrfs_bio->iter = bio->bi_iter;
2694 return new;
2695}
2696
2697struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
2698{
2699 struct bio *bio;
2700
2701
2702 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset);
2703 btrfs_io_bio_init(btrfs_io_bio(bio));
2704 return bio;
2705}
2706
2707struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
2708{
2709 struct bio *bio;
2710 struct btrfs_io_bio *btrfs_bio;
2711
2712
2713 bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset);
2714 ASSERT(bio);
2715
2716 btrfs_bio = btrfs_io_bio(bio);
2717 btrfs_io_bio_init(btrfs_bio);
2718
2719 bio_trim(bio, offset >> 9, size >> 9);
2720 btrfs_bio->iter = bio->bi_iter;
2721 return bio;
2722}
2723
2724static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
2725 unsigned long bio_flags)
2726{
2727 blk_status_t ret = 0;
2728 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2729 struct page *page = bvec->bv_page;
2730 struct extent_io_tree *tree = bio->bi_private;
2731 u64 start;
2732
2733 start = page_offset(page) + bvec->bv_offset;
2734
2735 bio->bi_private = NULL;
2736 bio_get(bio);
2737
2738 if (tree->ops)
2739 ret = tree->ops->submit_bio_hook(tree->private_data, bio,
2740 mirror_num, bio_flags, start);
2741 else
2742 btrfsic_submit_bio(bio);
2743
2744 bio_put(bio);
2745 return blk_status_to_errno(ret);
2746}
2747
2748static int merge_bio(struct extent_io_tree *tree, struct page *page,
2749 unsigned long offset, size_t size, struct bio *bio,
2750 unsigned long bio_flags)
2751{
2752 int ret = 0;
2753 if (tree->ops)
2754 ret = tree->ops->merge_bio_hook(page, offset, size, bio,
2755 bio_flags);
2756 return ret;
2757
2758}
2759
2760
2761
2762
2763static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
2764 struct writeback_control *wbc,
2765 struct page *page, sector_t sector,
2766 size_t size, unsigned long offset,
2767 struct block_device *bdev,
2768 struct bio **bio_ret,
2769 bio_end_io_t end_io_func,
2770 int mirror_num,
2771 unsigned long prev_bio_flags,
2772 unsigned long bio_flags,
2773 bool force_bio_submit)
2774{
2775 int ret = 0;
2776 struct bio *bio;
2777 int contig = 0;
2778 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
2779 size_t page_size = min_t(size_t, size, PAGE_SIZE);
2780
2781 if (bio_ret && *bio_ret) {
2782 bio = *bio_ret;
2783 if (old_compressed)
2784 contig = bio->bi_iter.bi_sector == sector;
2785 else
2786 contig = bio_end_sector(bio) == sector;
2787
2788 if (prev_bio_flags != bio_flags || !contig ||
2789 force_bio_submit ||
2790 merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
2791 bio_add_page(bio, page, page_size, offset) < page_size) {
2792 ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
2793 if (ret < 0) {
2794 *bio_ret = NULL;
2795 return ret;
2796 }
2797 bio = NULL;
2798 } else {
2799 if (wbc)
2800 wbc_account_io(wbc, page, page_size);
2801 return 0;
2802 }
2803 }
2804
2805 bio = btrfs_bio_alloc(bdev, (u64)sector << 9);
2806 bio_add_page(bio, page, page_size, offset);
2807 bio->bi_end_io = end_io_func;
2808 bio->bi_private = tree;
2809 bio->bi_write_hint = page->mapping->host->i_write_hint;
2810 bio->bi_opf = opf;
2811 if (wbc) {
2812 wbc_init_bio(wbc, bio);
2813 wbc_account_io(wbc, page, page_size);
2814 }
2815
2816 if (bio_ret)
2817 *bio_ret = bio;
2818 else
2819 ret = submit_one_bio(bio, mirror_num, bio_flags);
2820
2821 return ret;
2822}
2823
2824static void attach_extent_buffer_page(struct extent_buffer *eb,
2825 struct page *page)
2826{
2827 if (!PagePrivate(page)) {
2828 SetPagePrivate(page);
2829 get_page(page);
2830 set_page_private(page, (unsigned long)eb);
2831 } else {
2832 WARN_ON(page->private != (unsigned long)eb);
2833 }
2834}
2835
2836void set_page_extent_mapped(struct page *page)
2837{
2838 if (!PagePrivate(page)) {
2839 SetPagePrivate(page);
2840 get_page(page);
2841 set_page_private(page, EXTENT_PAGE_PRIVATE);
2842 }
2843}
2844
2845static struct extent_map *
2846__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2847 u64 start, u64 len, get_extent_t *get_extent,
2848 struct extent_map **em_cached)
2849{
2850 struct extent_map *em;
2851
2852 if (em_cached && *em_cached) {
2853 em = *em_cached;
2854 if (extent_map_in_tree(em) && start >= em->start &&
2855 start < extent_map_end(em)) {
2856 refcount_inc(&em->refs);
2857 return em;
2858 }
2859
2860 free_extent_map(em);
2861 *em_cached = NULL;
2862 }
2863
2864 em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
2865 if (em_cached && !IS_ERR_OR_NULL(em)) {
2866 BUG_ON(*em_cached);
2867 refcount_inc(&em->refs);
2868 *em_cached = em;
2869 }
2870 return em;
2871}
2872
2873
2874
2875
2876
2877
2878
2879static int __do_readpage(struct extent_io_tree *tree,
2880 struct page *page,
2881 get_extent_t *get_extent,
2882 struct extent_map **em_cached,
2883 struct bio **bio, int mirror_num,
2884 unsigned long *bio_flags, unsigned int read_flags,
2885 u64 *prev_em_start)
2886{
2887 struct inode *inode = page->mapping->host;
2888 u64 start = page_offset(page);
2889 u64 page_end = start + PAGE_SIZE - 1;
2890 u64 end;
2891 u64 cur = start;
2892 u64 extent_offset;
2893 u64 last_byte = i_size_read(inode);
2894 u64 block_start;
2895 u64 cur_end;
2896 sector_t sector;
2897 struct extent_map *em;
2898 struct block_device *bdev;
2899 int ret = 0;
2900 int nr = 0;
2901 size_t pg_offset = 0;
2902 size_t iosize;
2903 size_t disk_io_size;
2904 size_t blocksize = inode->i_sb->s_blocksize;
2905 unsigned long this_bio_flag = 0;
2906
2907 set_page_extent_mapped(page);
2908
2909 end = page_end;
2910 if (!PageUptodate(page)) {
2911 if (cleancache_get_page(page) == 0) {
2912 BUG_ON(blocksize != PAGE_SIZE);
2913 unlock_extent(tree, start, end);
2914 goto out;
2915 }
2916 }
2917
2918 if (page->index == last_byte >> PAGE_SHIFT) {
2919 char *userpage;
2920 size_t zero_offset = last_byte & (PAGE_SIZE - 1);
2921
2922 if (zero_offset) {
2923 iosize = PAGE_SIZE - zero_offset;
2924 userpage = kmap_atomic(page);
2925 memset(userpage + zero_offset, 0, iosize);
2926 flush_dcache_page(page);
2927 kunmap_atomic(userpage);
2928 }
2929 }
2930 while (cur <= end) {
2931 bool force_bio_submit = false;
2932
2933 if (cur >= last_byte) {
2934 char *userpage;
2935 struct extent_state *cached = NULL;
2936
2937 iosize = PAGE_SIZE - pg_offset;
2938 userpage = kmap_atomic(page);
2939 memset(userpage + pg_offset, 0, iosize);
2940 flush_dcache_page(page);
2941 kunmap_atomic(userpage);
2942 set_extent_uptodate(tree, cur, cur + iosize - 1,
2943 &cached, GFP_NOFS);
2944 unlock_extent_cached(tree, cur,
2945 cur + iosize - 1,
2946 &cached, GFP_NOFS);
2947 break;
2948 }
2949 em = __get_extent_map(inode, page, pg_offset, cur,
2950 end - cur + 1, get_extent, em_cached);
2951 if (IS_ERR_OR_NULL(em)) {
2952 SetPageError(page);
2953 unlock_extent(tree, cur, end);
2954 break;
2955 }
2956 extent_offset = cur - em->start;
2957 BUG_ON(extent_map_end(em) <= cur);
2958 BUG_ON(end < cur);
2959
2960 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2961 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2962 extent_set_compress_type(&this_bio_flag,
2963 em->compress_type);
2964 }
2965
2966 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2967 cur_end = min(extent_map_end(em) - 1, end);
2968 iosize = ALIGN(iosize, blocksize);
2969 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2970 disk_io_size = em->block_len;
2971 sector = em->block_start >> 9;
2972 } else {
2973 sector = (em->block_start + extent_offset) >> 9;
2974 disk_io_size = iosize;
2975 }
2976 bdev = em->bdev;
2977 block_start = em->block_start;
2978 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2979 block_start = EXTENT_MAP_HOLE;
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3016 prev_em_start && *prev_em_start != (u64)-1 &&
3017 *prev_em_start != em->orig_start)
3018 force_bio_submit = true;
3019
3020 if (prev_em_start)
3021 *prev_em_start = em->orig_start;
3022
3023 free_extent_map(em);
3024 em = NULL;
3025
3026
3027 if (block_start == EXTENT_MAP_HOLE) {
3028 char *userpage;
3029 struct extent_state *cached = NULL;
3030
3031 userpage = kmap_atomic(page);
3032 memset(userpage + pg_offset, 0, iosize);
3033 flush_dcache_page(page);
3034 kunmap_atomic(userpage);
3035
3036 set_extent_uptodate(tree, cur, cur + iosize - 1,
3037 &cached, GFP_NOFS);
3038 unlock_extent_cached(tree, cur,
3039 cur + iosize - 1,
3040 &cached, GFP_NOFS);
3041 cur = cur + iosize;
3042 pg_offset += iosize;
3043 continue;
3044 }
3045
3046 if (test_range_bit(tree, cur, cur_end,
3047 EXTENT_UPTODATE, 1, NULL)) {
3048 check_page_uptodate(tree, page);
3049 unlock_extent(tree, cur, cur + iosize - 1);
3050 cur = cur + iosize;
3051 pg_offset += iosize;
3052 continue;
3053 }
3054
3055
3056
3057 if (block_start == EXTENT_MAP_INLINE) {
3058 SetPageError(page);
3059 unlock_extent(tree, cur, cur + iosize - 1);
3060 cur = cur + iosize;
3061 pg_offset += iosize;
3062 continue;
3063 }
3064
3065 ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
3066 page, sector, disk_io_size, pg_offset,
3067 bdev, bio,
3068 end_bio_extent_readpage, mirror_num,
3069 *bio_flags,
3070 this_bio_flag,
3071 force_bio_submit);
3072 if (!ret) {
3073 nr++;
3074 *bio_flags = this_bio_flag;
3075 } else {
3076 SetPageError(page);
3077 unlock_extent(tree, cur, cur + iosize - 1);
3078 goto out;
3079 }
3080 cur = cur + iosize;
3081 pg_offset += iosize;
3082 }
3083out:
3084 if (!nr) {
3085 if (!PageError(page))
3086 SetPageUptodate(page);
3087 unlock_page(page);
3088 }
3089 return ret;
3090}
3091
3092static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3093 struct page *pages[], int nr_pages,
3094 u64 start, u64 end,
3095 get_extent_t *get_extent,
3096 struct extent_map **em_cached,
3097 struct bio **bio, int mirror_num,
3098 unsigned long *bio_flags,
3099 u64 *prev_em_start)
3100{
3101 struct inode *inode;
3102 struct btrfs_ordered_extent *ordered;
3103 int index;
3104
3105 inode = pages[0]->mapping->host;
3106 while (1) {
3107 lock_extent(tree, start, end);
3108 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3109 end - start + 1);
3110 if (!ordered)
3111 break;
3112 unlock_extent(tree, start, end);
3113 btrfs_start_ordered_extent(inode, ordered, 1);
3114 btrfs_put_ordered_extent(ordered);
3115 }
3116
3117 for (index = 0; index < nr_pages; index++) {
3118 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
3119 mirror_num, bio_flags, 0, prev_em_start);
3120 put_page(pages[index]);
3121 }
3122}
3123
3124static void __extent_readpages(struct extent_io_tree *tree,
3125 struct page *pages[],
3126 int nr_pages, get_extent_t *get_extent,
3127 struct extent_map **em_cached,
3128 struct bio **bio, int mirror_num,
3129 unsigned long *bio_flags,
3130 u64 *prev_em_start)
3131{
3132 u64 start = 0;
3133 u64 end = 0;
3134 u64 page_start;
3135 int index;
3136 int first_index = 0;
3137
3138 for (index = 0; index < nr_pages; index++) {
3139 page_start = page_offset(pages[index]);
3140 if (!end) {
3141 start = page_start;
3142 end = start + PAGE_SIZE - 1;
3143 first_index = index;
3144 } else if (end + 1 == page_start) {
3145 end += PAGE_SIZE;
3146 } else {
3147 __do_contiguous_readpages(tree, &pages[first_index],
3148 index - first_index, start,
3149 end, get_extent, em_cached,
3150 bio, mirror_num, bio_flags,
3151 prev_em_start);
3152 start = page_start;
3153 end = start + PAGE_SIZE - 1;
3154 first_index = index;
3155 }
3156 }
3157
3158 if (end)
3159 __do_contiguous_readpages(tree, &pages[first_index],
3160 index - first_index, start,
3161 end, get_extent, em_cached, bio,
3162 mirror_num, bio_flags,
3163 prev_em_start);
3164}
3165
3166static int __extent_read_full_page(struct extent_io_tree *tree,
3167 struct page *page,
3168 get_extent_t *get_extent,
3169 struct bio **bio, int mirror_num,
3170 unsigned long *bio_flags,
3171 unsigned int read_flags)
3172{
3173 struct inode *inode = page->mapping->host;
3174 struct btrfs_ordered_extent *ordered;
3175 u64 start = page_offset(page);
3176 u64 end = start + PAGE_SIZE - 1;
3177 int ret;
3178
3179 while (1) {
3180 lock_extent(tree, start, end);
3181 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3182 PAGE_SIZE);
3183 if (!ordered)
3184 break;
3185 unlock_extent(tree, start, end);
3186 btrfs_start_ordered_extent(inode, ordered, 1);
3187 btrfs_put_ordered_extent(ordered);
3188 }
3189
3190 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3191 bio_flags, read_flags, NULL);
3192 return ret;
3193}
3194
3195int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3196 get_extent_t *get_extent, int mirror_num)
3197{
3198 struct bio *bio = NULL;
3199 unsigned long bio_flags = 0;
3200 int ret;
3201
3202 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3203 &bio_flags, 0);
3204 if (bio)
3205 ret = submit_one_bio(bio, mirror_num, bio_flags);
3206 return ret;
3207}
3208
3209static void update_nr_written(struct writeback_control *wbc,
3210 unsigned long nr_written)
3211{
3212 wbc->nr_to_write -= nr_written;
3213}
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225static noinline_for_stack int writepage_delalloc(struct inode *inode,
3226 struct page *page, struct writeback_control *wbc,
3227 struct extent_page_data *epd,
3228 u64 delalloc_start,
3229 unsigned long *nr_written)
3230{
3231 struct extent_io_tree *tree = epd->tree;
3232 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3233 u64 nr_delalloc;
3234 u64 delalloc_to_write = 0;
3235 u64 delalloc_end = 0;
3236 int ret;
3237 int page_started = 0;
3238
3239 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3240 return 0;
3241
3242 while (delalloc_end < page_end) {
3243 nr_delalloc = find_lock_delalloc_range(inode, tree,
3244 page,
3245 &delalloc_start,
3246 &delalloc_end,
3247 BTRFS_MAX_EXTENT_SIZE);
3248 if (nr_delalloc == 0) {
3249 delalloc_start = delalloc_end + 1;
3250 continue;
3251 }
3252 ret = tree->ops->fill_delalloc(inode, page,
3253 delalloc_start,
3254 delalloc_end,
3255 &page_started,
3256 nr_written);
3257
3258 if (ret) {
3259 SetPageError(page);
3260
3261
3262
3263
3264
3265 ret = ret < 0 ? ret : -EIO;
3266 goto done;
3267 }
3268
3269
3270
3271
3272 delalloc_to_write += (delalloc_end - delalloc_start +
3273 PAGE_SIZE) >> PAGE_SHIFT;
3274 delalloc_start = delalloc_end + 1;
3275 }
3276 if (wbc->nr_to_write < delalloc_to_write) {
3277 int thresh = 8192;
3278
3279 if (delalloc_to_write < thresh * 2)
3280 thresh = delalloc_to_write;
3281 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3282 thresh);
3283 }
3284
3285
3286
3287
3288 if (page_started) {
3289
3290
3291
3292
3293
3294 wbc->nr_to_write -= *nr_written;
3295 return 1;
3296 }
3297
3298 ret = 0;
3299
3300done:
3301 return ret;
3302}
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3313 struct page *page,
3314 struct writeback_control *wbc,
3315 struct extent_page_data *epd,
3316 loff_t i_size,
3317 unsigned long nr_written,
3318 unsigned int write_flags, int *nr_ret)
3319{
3320 struct extent_io_tree *tree = epd->tree;
3321 u64 start = page_offset(page);
3322 u64 page_end = start + PAGE_SIZE - 1;
3323 u64 end;
3324 u64 cur = start;
3325 u64 extent_offset;
3326 u64 block_start;
3327 u64 iosize;
3328 sector_t sector;
3329 struct extent_map *em;
3330 struct block_device *bdev;
3331 size_t pg_offset = 0;
3332 size_t blocksize;
3333 int ret = 0;
3334 int nr = 0;
3335 bool compressed;
3336
3337 if (tree->ops && tree->ops->writepage_start_hook) {
3338 ret = tree->ops->writepage_start_hook(page, start,
3339 page_end);
3340 if (ret) {
3341
3342 if (ret == -EBUSY)
3343 wbc->pages_skipped++;
3344 else
3345 redirty_page_for_writepage(wbc, page);
3346
3347 update_nr_written(wbc, nr_written);
3348 unlock_page(page);
3349 return 1;
3350 }
3351 }
3352
3353
3354
3355
3356
3357 update_nr_written(wbc, nr_written + 1);
3358
3359 end = page_end;
3360 if (i_size <= start) {
3361 if (tree->ops && tree->ops->writepage_end_io_hook)
3362 tree->ops->writepage_end_io_hook(page, start,
3363 page_end, NULL, 1);
3364 goto done;
3365 }
3366
3367 blocksize = inode->i_sb->s_blocksize;
3368
3369 while (cur <= end) {
3370 u64 em_end;
3371
3372 if (cur >= i_size) {
3373 if (tree->ops && tree->ops->writepage_end_io_hook)
3374 tree->ops->writepage_end_io_hook(page, cur,
3375 page_end, NULL, 1);
3376 break;
3377 }
3378 em = epd->get_extent(BTRFS_I(inode), page, pg_offset, cur,
3379 end - cur + 1, 1);
3380 if (IS_ERR_OR_NULL(em)) {
3381 SetPageError(page);
3382 ret = PTR_ERR_OR_ZERO(em);
3383 break;
3384 }
3385
3386 extent_offset = cur - em->start;
3387 em_end = extent_map_end(em);
3388 BUG_ON(em_end <= cur);
3389 BUG_ON(end < cur);
3390 iosize = min(em_end - cur, end - cur + 1);
3391 iosize = ALIGN(iosize, blocksize);
3392 sector = (em->block_start + extent_offset) >> 9;
3393 bdev = em->bdev;
3394 block_start = em->block_start;
3395 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3396 free_extent_map(em);
3397 em = NULL;
3398
3399
3400
3401
3402
3403 if (compressed || block_start == EXTENT_MAP_HOLE ||
3404 block_start == EXTENT_MAP_INLINE) {
3405
3406
3407
3408
3409 if (!compressed && tree->ops &&
3410 tree->ops->writepage_end_io_hook)
3411 tree->ops->writepage_end_io_hook(page, cur,
3412 cur + iosize - 1,
3413 NULL, 1);
3414 else if (compressed) {
3415
3416
3417
3418
3419 nr++;
3420 }
3421
3422 cur += iosize;
3423 pg_offset += iosize;
3424 continue;
3425 }
3426
3427 set_range_writeback(tree, cur, cur + iosize - 1);
3428 if (!PageWriteback(page)) {
3429 btrfs_err(BTRFS_I(inode)->root->fs_info,
3430 "page %lu not writeback, cur %llu end %llu",
3431 page->index, cur, end);
3432 }
3433
3434 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3435 page, sector, iosize, pg_offset,
3436 bdev, &epd->bio,
3437 end_bio_extent_writepage,
3438 0, 0, 0, false);
3439 if (ret) {
3440 SetPageError(page);
3441 if (PageWriteback(page))
3442 end_page_writeback(page);
3443 }
3444
3445 cur = cur + iosize;
3446 pg_offset += iosize;
3447 nr++;
3448 }
3449done:
3450 *nr_ret = nr;
3451 return ret;
3452}
3453
3454
3455
3456
3457
3458
3459
3460static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3461 void *data)
3462{
3463 struct inode *inode = page->mapping->host;
3464 struct extent_page_data *epd = data;
3465 u64 start = page_offset(page);
3466 u64 page_end = start + PAGE_SIZE - 1;
3467 int ret;
3468 int nr = 0;
3469 size_t pg_offset = 0;
3470 loff_t i_size = i_size_read(inode);
3471 unsigned long end_index = i_size >> PAGE_SHIFT;
3472 unsigned int write_flags = 0;
3473 unsigned long nr_written = 0;
3474
3475 write_flags = wbc_to_write_flags(wbc);
3476
3477 trace___extent_writepage(page, inode, wbc);
3478
3479 WARN_ON(!PageLocked(page));
3480
3481 ClearPageError(page);
3482
3483 pg_offset = i_size & (PAGE_SIZE - 1);
3484 if (page->index > end_index ||
3485 (page->index == end_index && !pg_offset)) {
3486 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
3487 unlock_page(page);
3488 return 0;
3489 }
3490
3491 if (page->index == end_index) {
3492 char *userpage;
3493
3494 userpage = kmap_atomic(page);
3495 memset(userpage + pg_offset, 0,
3496 PAGE_SIZE - pg_offset);
3497 kunmap_atomic(userpage);
3498 flush_dcache_page(page);
3499 }
3500
3501 pg_offset = 0;
3502
3503 set_page_extent_mapped(page);
3504
3505 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3506 if (ret == 1)
3507 goto done_unlocked;
3508 if (ret)
3509 goto done;
3510
3511 ret = __extent_writepage_io(inode, page, wbc, epd,
3512 i_size, nr_written, write_flags, &nr);
3513 if (ret == 1)
3514 goto done_unlocked;
3515
3516done:
3517 if (nr == 0) {
3518
3519 set_page_writeback(page);
3520 end_page_writeback(page);
3521 }
3522 if (PageError(page)) {
3523 ret = ret < 0 ? ret : -EIO;
3524 end_extent_writepage(page, ret, start, page_end);
3525 }
3526 unlock_page(page);
3527 return ret;
3528
3529done_unlocked:
3530 return 0;
3531}
3532
3533void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3534{
3535 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3536 TASK_UNINTERRUPTIBLE);
3537}
3538
3539static noinline_for_stack int
3540lock_extent_buffer_for_io(struct extent_buffer *eb,
3541 struct btrfs_fs_info *fs_info,
3542 struct extent_page_data *epd)
3543{
3544 unsigned long i, num_pages;
3545 int flush = 0;
3546 int ret = 0;
3547
3548 if (!btrfs_try_tree_write_lock(eb)) {
3549 flush = 1;
3550 flush_write_bio(epd);
3551 btrfs_tree_lock(eb);
3552 }
3553
3554 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3555 btrfs_tree_unlock(eb);
3556 if (!epd->sync_io)
3557 return 0;
3558 if (!flush) {
3559 flush_write_bio(epd);
3560 flush = 1;
3561 }
3562 while (1) {
3563 wait_on_extent_buffer_writeback(eb);
3564 btrfs_tree_lock(eb);
3565 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3566 break;
3567 btrfs_tree_unlock(eb);
3568 }
3569 }
3570
3571
3572
3573
3574
3575
3576 spin_lock(&eb->refs_lock);
3577 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3578 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3579 spin_unlock(&eb->refs_lock);
3580 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3581 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3582 -eb->len,
3583 fs_info->dirty_metadata_batch);
3584 ret = 1;
3585 } else {
3586 spin_unlock(&eb->refs_lock);
3587 }
3588
3589 btrfs_tree_unlock(eb);
3590
3591 if (!ret)
3592 return ret;
3593
3594 num_pages = num_extent_pages(eb->start, eb->len);
3595 for (i = 0; i < num_pages; i++) {
3596 struct page *p = eb->pages[i];
3597
3598 if (!trylock_page(p)) {
3599 if (!flush) {
3600 flush_write_bio(epd);
3601 flush = 1;
3602 }
3603 lock_page(p);
3604 }
3605 }
3606
3607 return ret;
3608}
3609
3610static void end_extent_buffer_writeback(struct extent_buffer *eb)
3611{
3612 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3613 smp_mb__after_atomic();
3614 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3615}
3616
3617static void set_btree_ioerr(struct page *page)
3618{
3619 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3620
3621 SetPageError(page);
3622 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3623 return;
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663 switch (eb->log_index) {
3664 case -1:
3665 set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
3666 break;
3667 case 0:
3668 set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
3669 break;
3670 case 1:
3671 set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
3672 break;
3673 default:
3674 BUG();
3675 }
3676}
3677
3678static void end_bio_extent_buffer_writepage(struct bio *bio)
3679{
3680 struct bio_vec *bvec;
3681 struct extent_buffer *eb;
3682 int i, done;
3683
3684 ASSERT(!bio_flagged(bio, BIO_CLONED));
3685 bio_for_each_segment_all(bvec, bio, i) {
3686 struct page *page = bvec->bv_page;
3687
3688 eb = (struct extent_buffer *)page->private;
3689 BUG_ON(!eb);
3690 done = atomic_dec_and_test(&eb->io_pages);
3691
3692 if (bio->bi_status ||
3693 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3694 ClearPageUptodate(page);
3695 set_btree_ioerr(page);
3696 }
3697
3698 end_page_writeback(page);
3699
3700 if (!done)
3701 continue;
3702
3703 end_extent_buffer_writeback(eb);
3704 }
3705
3706 bio_put(bio);
3707}
3708
3709static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3710 struct btrfs_fs_info *fs_info,
3711 struct writeback_control *wbc,
3712 struct extent_page_data *epd)
3713{
3714 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3715 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3716 u64 offset = eb->start;
3717 u32 nritems;
3718 unsigned long i, num_pages;
3719 unsigned long bio_flags = 0;
3720 unsigned long start, end;
3721 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3722 int ret = 0;
3723
3724 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3725 num_pages = num_extent_pages(eb->start, eb->len);
3726 atomic_set(&eb->io_pages, num_pages);
3727 if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
3728 bio_flags = EXTENT_BIO_TREE_LOG;
3729
3730
3731 nritems = btrfs_header_nritems(eb);
3732 if (btrfs_header_level(eb) > 0) {
3733 end = btrfs_node_key_ptr_offset(nritems);
3734
3735 memzero_extent_buffer(eb, end, eb->len - end);
3736 } else {
3737
3738
3739
3740
3741 start = btrfs_item_nr_offset(nritems);
3742 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
3743 memzero_extent_buffer(eb, start, end - start);
3744 }
3745
3746 for (i = 0; i < num_pages; i++) {
3747 struct page *p = eb->pages[i];
3748
3749 clear_page_dirty_for_io(p);
3750 set_page_writeback(p);
3751 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3752 p, offset >> 9, PAGE_SIZE, 0, bdev,
3753 &epd->bio,
3754 end_bio_extent_buffer_writepage,
3755 0, epd->bio_flags, bio_flags, false);
3756 epd->bio_flags = bio_flags;
3757 if (ret) {
3758 set_btree_ioerr(p);
3759 if (PageWriteback(p))
3760 end_page_writeback(p);
3761 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3762 end_extent_buffer_writeback(eb);
3763 ret = -EIO;
3764 break;
3765 }
3766 offset += PAGE_SIZE;
3767 update_nr_written(wbc, 1);
3768 unlock_page(p);
3769 }
3770
3771 if (unlikely(ret)) {
3772 for (; i < num_pages; i++) {
3773 struct page *p = eb->pages[i];
3774 clear_page_dirty_for_io(p);
3775 unlock_page(p);
3776 }
3777 }
3778
3779 return ret;
3780}
3781
3782int btree_write_cache_pages(struct address_space *mapping,
3783 struct writeback_control *wbc)
3784{
3785 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3786 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3787 struct extent_buffer *eb, *prev_eb = NULL;
3788 struct extent_page_data epd = {
3789 .bio = NULL,
3790 .tree = tree,
3791 .extent_locked = 0,
3792 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3793 .bio_flags = 0,
3794 };
3795 int ret = 0;
3796 int done = 0;
3797 int nr_to_write_done = 0;
3798 struct pagevec pvec;
3799 int nr_pages;
3800 pgoff_t index;
3801 pgoff_t end;
3802 int scanned = 0;
3803 int tag;
3804
3805 pagevec_init(&pvec, 0);
3806 if (wbc->range_cyclic) {
3807 index = mapping->writeback_index;
3808 end = -1;
3809 } else {
3810 index = wbc->range_start >> PAGE_SHIFT;
3811 end = wbc->range_end >> PAGE_SHIFT;
3812 scanned = 1;
3813 }
3814 if (wbc->sync_mode == WB_SYNC_ALL)
3815 tag = PAGECACHE_TAG_TOWRITE;
3816 else
3817 tag = PAGECACHE_TAG_DIRTY;
3818retry:
3819 if (wbc->sync_mode == WB_SYNC_ALL)
3820 tag_pages_for_writeback(mapping, index, end);
3821 while (!done && !nr_to_write_done && (index <= end) &&
3822 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3823 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3824 unsigned i;
3825
3826 scanned = 1;
3827 for (i = 0; i < nr_pages; i++) {
3828 struct page *page = pvec.pages[i];
3829
3830 if (!PagePrivate(page))
3831 continue;
3832
3833 if (!wbc->range_cyclic && page->index > end) {
3834 done = 1;
3835 break;
3836 }
3837
3838 spin_lock(&mapping->private_lock);
3839 if (!PagePrivate(page)) {
3840 spin_unlock(&mapping->private_lock);
3841 continue;
3842 }
3843
3844 eb = (struct extent_buffer *)page->private;
3845
3846
3847
3848
3849
3850
3851 if (WARN_ON(!eb)) {
3852 spin_unlock(&mapping->private_lock);
3853 continue;
3854 }
3855
3856 if (eb == prev_eb) {
3857 spin_unlock(&mapping->private_lock);
3858 continue;
3859 }
3860
3861 ret = atomic_inc_not_zero(&eb->refs);
3862 spin_unlock(&mapping->private_lock);
3863 if (!ret)
3864 continue;
3865
3866 prev_eb = eb;
3867 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3868 if (!ret) {
3869 free_extent_buffer(eb);
3870 continue;
3871 }
3872
3873 ret = write_one_eb(eb, fs_info, wbc, &epd);
3874 if (ret) {
3875 done = 1;
3876 free_extent_buffer(eb);
3877 break;
3878 }
3879 free_extent_buffer(eb);
3880
3881
3882
3883
3884
3885
3886 nr_to_write_done = wbc->nr_to_write <= 0;
3887 }
3888 pagevec_release(&pvec);
3889 cond_resched();
3890 }
3891 if (!scanned && !done) {
3892
3893
3894
3895
3896 scanned = 1;
3897 index = 0;
3898 goto retry;
3899 }
3900 flush_write_bio(&epd);
3901 return ret;
3902}
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919static int extent_write_cache_pages(struct address_space *mapping,
3920 struct writeback_control *wbc,
3921 writepage_t writepage, void *data,
3922 void (*flush_fn)(void *))
3923{
3924 struct inode *inode = mapping->host;
3925 int ret = 0;
3926 int done = 0;
3927 int nr_to_write_done = 0;
3928 struct pagevec pvec;
3929 int nr_pages;
3930 pgoff_t index;
3931 pgoff_t end;
3932 pgoff_t done_index;
3933 int range_whole = 0;
3934 int scanned = 0;
3935 int tag;
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946 if (!igrab(inode))
3947 return 0;
3948
3949 pagevec_init(&pvec, 0);
3950 if (wbc->range_cyclic) {
3951 index = mapping->writeback_index;
3952 end = -1;
3953 } else {
3954 index = wbc->range_start >> PAGE_SHIFT;
3955 end = wbc->range_end >> PAGE_SHIFT;
3956 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3957 range_whole = 1;
3958 scanned = 1;
3959 }
3960 if (wbc->sync_mode == WB_SYNC_ALL)
3961 tag = PAGECACHE_TAG_TOWRITE;
3962 else
3963 tag = PAGECACHE_TAG_DIRTY;
3964retry:
3965 if (wbc->sync_mode == WB_SYNC_ALL)
3966 tag_pages_for_writeback(mapping, index, end);
3967 done_index = index;
3968 while (!done && !nr_to_write_done && (index <= end) &&
3969 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3970 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3971 unsigned i;
3972
3973 scanned = 1;
3974 for (i = 0; i < nr_pages; i++) {
3975 struct page *page = pvec.pages[i];
3976
3977 done_index = page->index;
3978
3979
3980
3981
3982
3983
3984
3985 if (!trylock_page(page)) {
3986 flush_fn(data);
3987 lock_page(page);
3988 }
3989
3990 if (unlikely(page->mapping != mapping)) {
3991 unlock_page(page);
3992 continue;
3993 }
3994
3995 if (!wbc->range_cyclic && page->index > end) {
3996 done = 1;
3997 unlock_page(page);
3998 continue;
3999 }
4000
4001 if (wbc->sync_mode != WB_SYNC_NONE) {
4002 if (PageWriteback(page))
4003 flush_fn(data);
4004 wait_on_page_writeback(page);
4005 }
4006
4007 if (PageWriteback(page) ||
4008 !clear_page_dirty_for_io(page)) {
4009 unlock_page(page);
4010 continue;
4011 }
4012
4013 ret = (*writepage)(page, wbc, data);
4014
4015 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
4016 unlock_page(page);
4017 ret = 0;
4018 }
4019 if (ret < 0) {
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029 done_index = page->index + 1;
4030 done = 1;
4031 break;
4032 }
4033
4034
4035
4036
4037
4038
4039 nr_to_write_done = wbc->nr_to_write <= 0;
4040 }
4041 pagevec_release(&pvec);
4042 cond_resched();
4043 }
4044 if (!scanned && !done) {
4045
4046
4047
4048
4049 scanned = 1;
4050 index = 0;
4051 goto retry;
4052 }
4053
4054 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4055 mapping->writeback_index = done_index;
4056
4057 btrfs_add_delayed_iput(inode);
4058 return ret;
4059}
4060
4061static void flush_epd_write_bio(struct extent_page_data *epd)
4062{
4063 if (epd->bio) {
4064 int ret;
4065
4066 ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
4067 BUG_ON(ret < 0);
4068 epd->bio = NULL;
4069 }
4070}
4071
4072static noinline void flush_write_bio(void *data)
4073{
4074 struct extent_page_data *epd = data;
4075 flush_epd_write_bio(epd);
4076}
4077
4078int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
4079 get_extent_t *get_extent,
4080 struct writeback_control *wbc)
4081{
4082 int ret;
4083 struct extent_page_data epd = {
4084 .bio = NULL,
4085 .tree = tree,
4086 .get_extent = get_extent,
4087 .extent_locked = 0,
4088 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4089 .bio_flags = 0,
4090 };
4091
4092 ret = __extent_writepage(page, wbc, &epd);
4093
4094 flush_epd_write_bio(&epd);
4095 return ret;
4096}
4097
4098int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
4099 u64 start, u64 end, get_extent_t *get_extent,
4100 int mode)
4101{
4102 int ret = 0;
4103 struct address_space *mapping = inode->i_mapping;
4104 struct page *page;
4105 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4106 PAGE_SHIFT;
4107
4108 struct extent_page_data epd = {
4109 .bio = NULL,
4110 .tree = tree,
4111 .get_extent = get_extent,
4112 .extent_locked = 1,
4113 .sync_io = mode == WB_SYNC_ALL,
4114 .bio_flags = 0,
4115 };
4116 struct writeback_control wbc_writepages = {
4117 .sync_mode = mode,
4118 .nr_to_write = nr_pages * 2,
4119 .range_start = start,
4120 .range_end = end + 1,
4121 };
4122
4123 while (start <= end) {
4124 page = find_get_page(mapping, start >> PAGE_SHIFT);
4125 if (clear_page_dirty_for_io(page))
4126 ret = __extent_writepage(page, &wbc_writepages, &epd);
4127 else {
4128 if (tree->ops && tree->ops->writepage_end_io_hook)
4129 tree->ops->writepage_end_io_hook(page, start,
4130 start + PAGE_SIZE - 1,
4131 NULL, 1);
4132 unlock_page(page);
4133 }
4134 put_page(page);
4135 start += PAGE_SIZE;
4136 }
4137
4138 flush_epd_write_bio(&epd);
4139 return ret;
4140}
4141
4142int extent_writepages(struct extent_io_tree *tree,
4143 struct address_space *mapping,
4144 get_extent_t *get_extent,
4145 struct writeback_control *wbc)
4146{
4147 int ret = 0;
4148 struct extent_page_data epd = {
4149 .bio = NULL,
4150 .tree = tree,
4151 .get_extent = get_extent,
4152 .extent_locked = 0,
4153 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4154 .bio_flags = 0,
4155 };
4156
4157 ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
4158 flush_write_bio);
4159 flush_epd_write_bio(&epd);
4160 return ret;
4161}
4162
4163int extent_readpages(struct extent_io_tree *tree,
4164 struct address_space *mapping,
4165 struct list_head *pages, unsigned nr_pages,
4166 get_extent_t get_extent)
4167{
4168 struct bio *bio = NULL;
4169 unsigned page_idx;
4170 unsigned long bio_flags = 0;
4171 struct page *pagepool[16];
4172 struct page *page;
4173 struct extent_map *em_cached = NULL;
4174 int nr = 0;
4175 u64 prev_em_start = (u64)-1;
4176
4177 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
4178 page = list_entry(pages->prev, struct page, lru);
4179
4180 prefetchw(&page->flags);
4181 list_del(&page->lru);
4182 if (add_to_page_cache_lru(page, mapping,
4183 page->index,
4184 readahead_gfp_mask(mapping))) {
4185 put_page(page);
4186 continue;
4187 }
4188
4189 pagepool[nr++] = page;
4190 if (nr < ARRAY_SIZE(pagepool))
4191 continue;
4192 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4193 &bio, 0, &bio_flags, &prev_em_start);
4194 nr = 0;
4195 }
4196 if (nr)
4197 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4198 &bio, 0, &bio_flags, &prev_em_start);
4199
4200 if (em_cached)
4201 free_extent_map(em_cached);
4202
4203 BUG_ON(!list_empty(pages));
4204 if (bio)
4205 return submit_one_bio(bio, 0, bio_flags);
4206 return 0;
4207}
4208
4209
4210
4211
4212
4213
4214int extent_invalidatepage(struct extent_io_tree *tree,
4215 struct page *page, unsigned long offset)
4216{
4217 struct extent_state *cached_state = NULL;
4218 u64 start = page_offset(page);
4219 u64 end = start + PAGE_SIZE - 1;
4220 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4221
4222 start += ALIGN(offset, blocksize);
4223 if (start > end)
4224 return 0;
4225
4226 lock_extent_bits(tree, start, end, &cached_state);
4227 wait_on_page_writeback(page);
4228 clear_extent_bit(tree, start, end,
4229 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4230 EXTENT_DO_ACCOUNTING,
4231 1, 1, &cached_state, GFP_NOFS);
4232 return 0;
4233}
4234
4235
4236
4237
4238
4239
4240static int try_release_extent_state(struct extent_map_tree *map,
4241 struct extent_io_tree *tree,
4242 struct page *page, gfp_t mask)
4243{
4244 u64 start = page_offset(page);
4245 u64 end = start + PAGE_SIZE - 1;
4246 int ret = 1;
4247
4248 if (test_range_bit(tree, start, end,
4249 EXTENT_IOBITS, 0, NULL))
4250 ret = 0;
4251 else {
4252
4253
4254
4255
4256 ret = clear_extent_bit(tree, start, end,
4257 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4258 0, 0, NULL, mask);
4259
4260
4261
4262
4263 if (ret < 0)
4264 ret = 0;
4265 else
4266 ret = 1;
4267 }
4268 return ret;
4269}
4270
4271
4272
4273
4274
4275
4276int try_release_extent_mapping(struct extent_map_tree *map,
4277 struct extent_io_tree *tree, struct page *page,
4278 gfp_t mask)
4279{
4280 struct extent_map *em;
4281 u64 start = page_offset(page);
4282 u64 end = start + PAGE_SIZE - 1;
4283
4284 if (gfpflags_allow_blocking(mask) &&
4285 page->mapping->host->i_size > SZ_16M) {
4286 u64 len;
4287 while (start <= end) {
4288 len = end - start + 1;
4289 write_lock(&map->lock);
4290 em = lookup_extent_mapping(map, start, len);
4291 if (!em) {
4292 write_unlock(&map->lock);
4293 break;
4294 }
4295 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4296 em->start != start) {
4297 write_unlock(&map->lock);
4298 free_extent_map(em);
4299 break;
4300 }
4301 if (!test_range_bit(tree, em->start,
4302 extent_map_end(em) - 1,
4303 EXTENT_LOCKED | EXTENT_WRITEBACK,
4304 0, NULL)) {
4305 remove_extent_mapping(map, em);
4306
4307 free_extent_map(em);
4308 }
4309 start = extent_map_end(em);
4310 write_unlock(&map->lock);
4311
4312
4313 free_extent_map(em);
4314 }
4315 }
4316 return try_release_extent_state(map, tree, page, mask);
4317}
4318
4319
4320
4321
4322
4323static struct extent_map *get_extent_skip_holes(struct inode *inode,
4324 u64 offset,
4325 u64 last,
4326 get_extent_t *get_extent)
4327{
4328 u64 sectorsize = btrfs_inode_sectorsize(inode);
4329 struct extent_map *em;
4330 u64 len;
4331
4332 if (offset >= last)
4333 return NULL;
4334
4335 while (1) {
4336 len = last - offset;
4337 if (len == 0)
4338 break;
4339 len = ALIGN(len, sectorsize);
4340 em = get_extent(BTRFS_I(inode), NULL, 0, offset, len, 0);
4341 if (IS_ERR_OR_NULL(em))
4342 return em;
4343
4344
4345 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
4346 em->block_start != EXTENT_MAP_HOLE) {
4347 return em;
4348 }
4349
4350
4351 offset = extent_map_end(em);
4352 free_extent_map(em);
4353 if (offset >= last)
4354 break;
4355 }
4356 return NULL;
4357}
4358
4359
4360
4361
4362
4363
4364struct fiemap_cache {
4365 u64 offset;
4366 u64 phys;
4367 u64 len;
4368 u32 flags;
4369 bool cached;
4370};
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
4383 struct fiemap_cache *cache,
4384 u64 offset, u64 phys, u64 len, u32 flags)
4385{
4386 int ret = 0;
4387
4388 if (!cache->cached)
4389 goto assign;
4390
4391
4392
4393
4394
4395
4396
4397
4398 if (cache->offset + cache->len > offset) {
4399 WARN_ON(1);
4400 return -EINVAL;
4401 }
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414 if (cache->offset + cache->len == offset &&
4415 cache->phys + cache->len == phys &&
4416 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
4417 (flags & ~FIEMAP_EXTENT_LAST)) {
4418 cache->len += len;
4419 cache->flags |= flags;
4420 goto try_submit_last;
4421 }
4422
4423
4424 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4425 cache->len, cache->flags);
4426 cache->cached = false;
4427 if (ret)
4428 return ret;
4429assign:
4430 cache->cached = true;
4431 cache->offset = offset;
4432 cache->phys = phys;
4433 cache->len = len;
4434 cache->flags = flags;
4435try_submit_last:
4436 if (cache->flags & FIEMAP_EXTENT_LAST) {
4437 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
4438 cache->phys, cache->len, cache->flags);
4439 cache->cached = false;
4440 }
4441 return ret;
4442}
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
4456 struct fiemap_extent_info *fieinfo,
4457 struct fiemap_cache *cache)
4458{
4459 int ret;
4460
4461 if (!cache->cached)
4462 return 0;
4463
4464 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4465 cache->len, cache->flags);
4466 cache->cached = false;
4467 if (ret > 0)
4468 ret = 0;
4469 return ret;
4470}
4471
4472int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4473 __u64 start, __u64 len, get_extent_t *get_extent)
4474{
4475 int ret = 0;
4476 u64 off = start;
4477 u64 max = start + len;
4478 u32 flags = 0;
4479 u32 found_type;
4480 u64 last;
4481 u64 last_for_get_extent = 0;
4482 u64 disko = 0;
4483 u64 isize = i_size_read(inode);
4484 struct btrfs_key found_key;
4485 struct extent_map *em = NULL;
4486 struct extent_state *cached_state = NULL;
4487 struct btrfs_path *path;
4488 struct btrfs_root *root = BTRFS_I(inode)->root;
4489 struct fiemap_cache cache = { 0 };
4490 int end = 0;
4491 u64 em_start = 0;
4492 u64 em_len = 0;
4493 u64 em_end = 0;
4494
4495 if (len == 0)
4496 return -EINVAL;
4497
4498 path = btrfs_alloc_path();
4499 if (!path)
4500 return -ENOMEM;
4501 path->leave_spinning = 1;
4502
4503 start = round_down(start, btrfs_inode_sectorsize(inode));
4504 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
4505
4506
4507
4508
4509
4510 ret = btrfs_lookup_file_extent(NULL, root, path,
4511 btrfs_ino(BTRFS_I(inode)), -1, 0);
4512 if (ret < 0) {
4513 btrfs_free_path(path);
4514 return ret;
4515 } else {
4516 WARN_ON(!ret);
4517 if (ret == 1)
4518 ret = 0;
4519 }
4520
4521 path->slots[0]--;
4522 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4523 found_type = found_key.type;
4524
4525
4526 if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
4527 found_type != BTRFS_EXTENT_DATA_KEY) {
4528
4529 last = (u64)-1;
4530 last_for_get_extent = isize;
4531 } else {
4532
4533
4534
4535
4536
4537 last = found_key.offset;
4538 last_for_get_extent = last + 1;
4539 }
4540 btrfs_release_path(path);
4541
4542
4543
4544
4545
4546
4547 if (last < isize) {
4548 last = (u64)-1;
4549 last_for_get_extent = isize;
4550 }
4551
4552 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4553 &cached_state);
4554
4555 em = get_extent_skip_holes(inode, start, last_for_get_extent,
4556 get_extent);
4557 if (!em)
4558 goto out;
4559 if (IS_ERR(em)) {
4560 ret = PTR_ERR(em);
4561 goto out;
4562 }
4563
4564 while (!end) {
4565 u64 offset_in_extent = 0;
4566
4567
4568 if (em->start >= max || extent_map_end(em) < off)
4569 break;
4570
4571
4572
4573
4574
4575
4576
4577 em_start = max(em->start, off);
4578
4579
4580
4581
4582
4583
4584
4585 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4586 offset_in_extent = em_start - em->start;
4587 em_end = extent_map_end(em);
4588 em_len = em_end - em_start;
4589 disko = 0;
4590 flags = 0;
4591
4592
4593
4594
4595 off = extent_map_end(em);
4596 if (off >= max)
4597 end = 1;
4598
4599 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4600 end = 1;
4601 flags |= FIEMAP_EXTENT_LAST;
4602 } else if (em->block_start == EXTENT_MAP_INLINE) {
4603 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4604 FIEMAP_EXTENT_NOT_ALIGNED);
4605 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4606 flags |= (FIEMAP_EXTENT_DELALLOC |
4607 FIEMAP_EXTENT_UNKNOWN);
4608 } else if (fieinfo->fi_extents_max) {
4609 u64 bytenr = em->block_start -
4610 (em->start - em->orig_start);
4611
4612 disko = em->block_start + offset_in_extent;
4613
4614
4615
4616
4617
4618
4619
4620
4621 ret = btrfs_check_shared(root,
4622 btrfs_ino(BTRFS_I(inode)),
4623 bytenr);
4624 if (ret < 0)
4625 goto out_free;
4626 if (ret)
4627 flags |= FIEMAP_EXTENT_SHARED;
4628 ret = 0;
4629 }
4630 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4631 flags |= FIEMAP_EXTENT_ENCODED;
4632 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4633 flags |= FIEMAP_EXTENT_UNWRITTEN;
4634
4635 free_extent_map(em);
4636 em = NULL;
4637 if ((em_start >= last) || em_len == (u64)-1 ||
4638 (last == (u64)-1 && isize <= em_end)) {
4639 flags |= FIEMAP_EXTENT_LAST;
4640 end = 1;
4641 }
4642
4643
4644 em = get_extent_skip_holes(inode, off, last_for_get_extent,
4645 get_extent);
4646 if (IS_ERR(em)) {
4647 ret = PTR_ERR(em);
4648 goto out;
4649 }
4650 if (!em) {
4651 flags |= FIEMAP_EXTENT_LAST;
4652 end = 1;
4653 }
4654 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
4655 em_len, flags);
4656 if (ret) {
4657 if (ret == 1)
4658 ret = 0;
4659 goto out_free;
4660 }
4661 }
4662out_free:
4663 if (!ret)
4664 ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
4665 free_extent_map(em);
4666out:
4667 btrfs_free_path(path);
4668 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4669 &cached_state, GFP_NOFS);
4670 return ret;
4671}
4672
4673static void __free_extent_buffer(struct extent_buffer *eb)
4674{
4675 btrfs_leak_debug_del(&eb->leak_list);
4676 kmem_cache_free(extent_buffer_cache, eb);
4677}
4678
4679int extent_buffer_under_io(struct extent_buffer *eb)
4680{
4681 return (atomic_read(&eb->io_pages) ||
4682 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4683 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4684}
4685
4686
4687
4688
4689static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4690{
4691 unsigned long index;
4692 struct page *page;
4693 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4694
4695 BUG_ON(extent_buffer_under_io(eb));
4696
4697 index = num_extent_pages(eb->start, eb->len);
4698 if (index == 0)
4699 return;
4700
4701 do {
4702 index--;
4703 page = eb->pages[index];
4704 if (!page)
4705 continue;
4706 if (mapped)
4707 spin_lock(&page->mapping->private_lock);
4708
4709
4710
4711
4712
4713
4714
4715 if (PagePrivate(page) &&
4716 page->private == (unsigned long)eb) {
4717 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4718 BUG_ON(PageDirty(page));
4719 BUG_ON(PageWriteback(page));
4720
4721
4722
4723
4724 ClearPagePrivate(page);
4725 set_page_private(page, 0);
4726
4727 put_page(page);
4728 }
4729
4730 if (mapped)
4731 spin_unlock(&page->mapping->private_lock);
4732
4733
4734 put_page(page);
4735 } while (index != 0);
4736}
4737
4738
4739
4740
4741static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4742{
4743 btrfs_release_extent_buffer_page(eb);
4744 __free_extent_buffer(eb);
4745}
4746
4747static struct extent_buffer *
4748__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4749 unsigned long len)
4750{
4751 struct extent_buffer *eb = NULL;
4752
4753 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4754 eb->start = start;
4755 eb->len = len;
4756 eb->fs_info = fs_info;
4757 eb->bflags = 0;
4758 rwlock_init(&eb->lock);
4759 atomic_set(&eb->write_locks, 0);
4760 atomic_set(&eb->read_locks, 0);
4761 atomic_set(&eb->blocking_readers, 0);
4762 atomic_set(&eb->blocking_writers, 0);
4763 atomic_set(&eb->spinning_readers, 0);
4764 atomic_set(&eb->spinning_writers, 0);
4765 eb->lock_nested = 0;
4766 init_waitqueue_head(&eb->write_lock_wq);
4767 init_waitqueue_head(&eb->read_lock_wq);
4768
4769 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4770
4771 spin_lock_init(&eb->refs_lock);
4772 atomic_set(&eb->refs, 1);
4773 atomic_set(&eb->io_pages, 0);
4774
4775
4776
4777
4778 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4779 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4780 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4781
4782 return eb;
4783}
4784
4785struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4786{
4787 unsigned long i;
4788 struct page *p;
4789 struct extent_buffer *new;
4790 unsigned long num_pages = num_extent_pages(src->start, src->len);
4791
4792 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4793 if (new == NULL)
4794 return NULL;
4795
4796 for (i = 0; i < num_pages; i++) {
4797 p = alloc_page(GFP_NOFS);
4798 if (!p) {
4799 btrfs_release_extent_buffer(new);
4800 return NULL;
4801 }
4802 attach_extent_buffer_page(new, p);
4803 WARN_ON(PageDirty(p));
4804 SetPageUptodate(p);
4805 new->pages[i] = p;
4806 copy_page(page_address(p), page_address(src->pages[i]));
4807 }
4808
4809 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4810 set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
4811
4812 return new;
4813}
4814
4815struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4816 u64 start, unsigned long len)
4817{
4818 struct extent_buffer *eb;
4819 unsigned long num_pages;
4820 unsigned long i;
4821
4822 num_pages = num_extent_pages(start, len);
4823
4824 eb = __alloc_extent_buffer(fs_info, start, len);
4825 if (!eb)
4826 return NULL;
4827
4828 for (i = 0; i < num_pages; i++) {
4829 eb->pages[i] = alloc_page(GFP_NOFS);
4830 if (!eb->pages[i])
4831 goto err;
4832 }
4833 set_extent_buffer_uptodate(eb);
4834 btrfs_set_header_nritems(eb, 0);
4835 set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4836
4837 return eb;
4838err:
4839 for (; i > 0; i--)
4840 __free_page(eb->pages[i - 1]);
4841 __free_extent_buffer(eb);
4842 return NULL;
4843}
4844
4845struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4846 u64 start)
4847{
4848 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
4849}
4850
4851static void check_buffer_tree_ref(struct extent_buffer *eb)
4852{
4853 int refs;
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874 refs = atomic_read(&eb->refs);
4875 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4876 return;
4877
4878 spin_lock(&eb->refs_lock);
4879 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4880 atomic_inc(&eb->refs);
4881 spin_unlock(&eb->refs_lock);
4882}
4883
4884static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4885 struct page *accessed)
4886{
4887 unsigned long num_pages, i;
4888
4889 check_buffer_tree_ref(eb);
4890
4891 num_pages = num_extent_pages(eb->start, eb->len);
4892 for (i = 0; i < num_pages; i++) {
4893 struct page *p = eb->pages[i];
4894
4895 if (p != accessed)
4896 mark_page_accessed(p);
4897 }
4898}
4899
4900struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4901 u64 start)
4902{
4903 struct extent_buffer *eb;
4904
4905 rcu_read_lock();
4906 eb = radix_tree_lookup(&fs_info->buffer_radix,
4907 start >> PAGE_SHIFT);
4908 if (eb && atomic_inc_not_zero(&eb->refs)) {
4909 rcu_read_unlock();
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4926 spin_lock(&eb->refs_lock);
4927 spin_unlock(&eb->refs_lock);
4928 }
4929 mark_extent_buffer_accessed(eb, NULL);
4930 return eb;
4931 }
4932 rcu_read_unlock();
4933
4934 return NULL;
4935}
4936
4937#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4938struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4939 u64 start)
4940{
4941 struct extent_buffer *eb, *exists = NULL;
4942 int ret;
4943
4944 eb = find_extent_buffer(fs_info, start);
4945 if (eb)
4946 return eb;
4947 eb = alloc_dummy_extent_buffer(fs_info, start);
4948 if (!eb)
4949 return NULL;
4950 eb->fs_info = fs_info;
4951again:
4952 ret = radix_tree_preload(GFP_NOFS);
4953 if (ret)
4954 goto free_eb;
4955 spin_lock(&fs_info->buffer_lock);
4956 ret = radix_tree_insert(&fs_info->buffer_radix,
4957 start >> PAGE_SHIFT, eb);
4958 spin_unlock(&fs_info->buffer_lock);
4959 radix_tree_preload_end();
4960 if (ret == -EEXIST) {
4961 exists = find_extent_buffer(fs_info, start);
4962 if (exists)
4963 goto free_eb;
4964 else
4965 goto again;
4966 }
4967 check_buffer_tree_ref(eb);
4968 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4969
4970
4971
4972
4973
4974
4975
4976 atomic_inc(&eb->refs);
4977 return eb;
4978free_eb:
4979 btrfs_release_extent_buffer(eb);
4980 return exists;
4981}
4982#endif
4983
4984struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4985 u64 start)
4986{
4987 unsigned long len = fs_info->nodesize;
4988 unsigned long num_pages = num_extent_pages(start, len);
4989 unsigned long i;
4990 unsigned long index = start >> PAGE_SHIFT;
4991 struct extent_buffer *eb;
4992 struct extent_buffer *exists = NULL;
4993 struct page *p;
4994 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4995 int uptodate = 1;
4996 int ret;
4997
4998 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
4999 btrfs_err(fs_info, "bad tree block start %llu", start);
5000 return ERR_PTR(-EINVAL);
5001 }
5002
5003 eb = find_extent_buffer(fs_info, start);
5004 if (eb)
5005 return eb;
5006
5007 eb = __alloc_extent_buffer(fs_info, start, len);
5008 if (!eb)
5009 return ERR_PTR(-ENOMEM);
5010
5011 for (i = 0; i < num_pages; i++, index++) {
5012 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
5013 if (!p) {
5014 exists = ERR_PTR(-ENOMEM);
5015 goto free_eb;
5016 }
5017
5018 spin_lock(&mapping->private_lock);
5019 if (PagePrivate(p)) {
5020
5021
5022
5023
5024
5025
5026
5027 exists = (struct extent_buffer *)p->private;
5028 if (atomic_inc_not_zero(&exists->refs)) {
5029 spin_unlock(&mapping->private_lock);
5030 unlock_page(p);
5031 put_page(p);
5032 mark_extent_buffer_accessed(exists, p);
5033 goto free_eb;
5034 }
5035 exists = NULL;
5036
5037
5038
5039
5040
5041 ClearPagePrivate(p);
5042 WARN_ON(PageDirty(p));
5043 put_page(p);
5044 }
5045 attach_extent_buffer_page(eb, p);
5046 spin_unlock(&mapping->private_lock);
5047 WARN_ON(PageDirty(p));
5048 eb->pages[i] = p;
5049 if (!PageUptodate(p))
5050 uptodate = 0;
5051
5052
5053
5054
5055
5056 }
5057 if (uptodate)
5058 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5059again:
5060 ret = radix_tree_preload(GFP_NOFS);
5061 if (ret) {
5062 exists = ERR_PTR(ret);
5063 goto free_eb;
5064 }
5065
5066 spin_lock(&fs_info->buffer_lock);
5067 ret = radix_tree_insert(&fs_info->buffer_radix,
5068 start >> PAGE_SHIFT, eb);
5069 spin_unlock(&fs_info->buffer_lock);
5070 radix_tree_preload_end();
5071 if (ret == -EEXIST) {
5072 exists = find_extent_buffer(fs_info, start);
5073 if (exists)
5074 goto free_eb;
5075 else
5076 goto again;
5077 }
5078
5079 check_buffer_tree_ref(eb);
5080 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091 SetPageChecked(eb->pages[0]);
5092 for (i = 1; i < num_pages; i++) {
5093 p = eb->pages[i];
5094 ClearPageChecked(p);
5095 unlock_page(p);
5096 }
5097 unlock_page(eb->pages[0]);
5098 return eb;
5099
5100free_eb:
5101 WARN_ON(!atomic_dec_and_test(&eb->refs));
5102 for (i = 0; i < num_pages; i++) {
5103 if (eb->pages[i])
5104 unlock_page(eb->pages[i]);
5105 }
5106
5107 btrfs_release_extent_buffer(eb);
5108 return exists;
5109}
5110
5111static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5112{
5113 struct extent_buffer *eb =
5114 container_of(head, struct extent_buffer, rcu_head);
5115
5116 __free_extent_buffer(eb);
5117}
5118
5119
5120static int release_extent_buffer(struct extent_buffer *eb)
5121{
5122 WARN_ON(atomic_read(&eb->refs) == 0);
5123 if (atomic_dec_and_test(&eb->refs)) {
5124 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5125 struct btrfs_fs_info *fs_info = eb->fs_info;
5126
5127 spin_unlock(&eb->refs_lock);
5128
5129 spin_lock(&fs_info->buffer_lock);
5130 radix_tree_delete(&fs_info->buffer_radix,
5131 eb->start >> PAGE_SHIFT);
5132 spin_unlock(&fs_info->buffer_lock);
5133 } else {
5134 spin_unlock(&eb->refs_lock);
5135 }
5136
5137
5138 btrfs_release_extent_buffer_page(eb);
5139#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5140 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
5141 __free_extent_buffer(eb);
5142 return 1;
5143 }
5144#endif
5145 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5146 return 1;
5147 }
5148 spin_unlock(&eb->refs_lock);
5149
5150 return 0;
5151}
5152
5153void free_extent_buffer(struct extent_buffer *eb)
5154{
5155 int refs;
5156 int old;
5157 if (!eb)
5158 return;
5159
5160 while (1) {
5161 refs = atomic_read(&eb->refs);
5162 if (refs <= 3)
5163 break;
5164 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5165 if (old == refs)
5166 return;
5167 }
5168
5169 spin_lock(&eb->refs_lock);
5170 if (atomic_read(&eb->refs) == 2 &&
5171 test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
5172 atomic_dec(&eb->refs);
5173
5174 if (atomic_read(&eb->refs) == 2 &&
5175 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5176 !extent_buffer_under_io(eb) &&
5177 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5178 atomic_dec(&eb->refs);
5179
5180
5181
5182
5183
5184 release_extent_buffer(eb);
5185}
5186
5187void free_extent_buffer_stale(struct extent_buffer *eb)
5188{
5189 if (!eb)
5190 return;
5191
5192 spin_lock(&eb->refs_lock);
5193 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5194
5195 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5196 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5197 atomic_dec(&eb->refs);
5198 release_extent_buffer(eb);
5199}
5200
5201void clear_extent_buffer_dirty(struct extent_buffer *eb)
5202{
5203 unsigned long i;
5204 unsigned long num_pages;
5205 struct page *page;
5206
5207 num_pages = num_extent_pages(eb->start, eb->len);
5208
5209 for (i = 0; i < num_pages; i++) {
5210 page = eb->pages[i];
5211 if (!PageDirty(page))
5212 continue;
5213
5214 lock_page(page);
5215 WARN_ON(!PagePrivate(page));
5216
5217 clear_page_dirty_for_io(page);
5218 spin_lock_irq(&page->mapping->tree_lock);
5219 if (!PageDirty(page)) {
5220 radix_tree_tag_clear(&page->mapping->page_tree,
5221 page_index(page),
5222 PAGECACHE_TAG_DIRTY);
5223 }
5224 spin_unlock_irq(&page->mapping->tree_lock);
5225 ClearPageError(page);
5226 unlock_page(page);
5227 }
5228 WARN_ON(atomic_read(&eb->refs) == 0);
5229}
5230
5231int set_extent_buffer_dirty(struct extent_buffer *eb)
5232{
5233 unsigned long i;
5234 unsigned long num_pages;
5235 int was_dirty = 0;
5236
5237 check_buffer_tree_ref(eb);
5238
5239 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5240
5241 num_pages = num_extent_pages(eb->start, eb->len);
5242 WARN_ON(atomic_read(&eb->refs) == 0);
5243 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5244
5245 for (i = 0; i < num_pages; i++)
5246 set_page_dirty(eb->pages[i]);
5247 return was_dirty;
5248}
5249
5250void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5251{
5252 unsigned long i;
5253 struct page *page;
5254 unsigned long num_pages;
5255
5256 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5257 num_pages = num_extent_pages(eb->start, eb->len);
5258 for (i = 0; i < num_pages; i++) {
5259 page = eb->pages[i];
5260 if (page)
5261 ClearPageUptodate(page);
5262 }
5263}
5264
5265void set_extent_buffer_uptodate(struct extent_buffer *eb)
5266{
5267 unsigned long i;
5268 struct page *page;
5269 unsigned long num_pages;
5270
5271 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5272 num_pages = num_extent_pages(eb->start, eb->len);
5273 for (i = 0; i < num_pages; i++) {
5274 page = eb->pages[i];
5275 SetPageUptodate(page);
5276 }
5277}
5278
5279int extent_buffer_uptodate(struct extent_buffer *eb)
5280{
5281 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5282}
5283
5284int read_extent_buffer_pages(struct extent_io_tree *tree,
5285 struct extent_buffer *eb, int wait,
5286 get_extent_t *get_extent, int mirror_num)
5287{
5288 unsigned long i;
5289 struct page *page;
5290 int err;
5291 int ret = 0;
5292 int locked_pages = 0;
5293 int all_uptodate = 1;
5294 unsigned long num_pages;
5295 unsigned long num_reads = 0;
5296 struct bio *bio = NULL;
5297 unsigned long bio_flags = 0;
5298
5299 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5300 return 0;
5301
5302 num_pages = num_extent_pages(eb->start, eb->len);
5303 for (i = 0; i < num_pages; i++) {
5304 page = eb->pages[i];
5305 if (wait == WAIT_NONE) {
5306 if (!trylock_page(page))
5307 goto unlock_exit;
5308 } else {
5309 lock_page(page);
5310 }
5311 locked_pages++;
5312 }
5313
5314
5315
5316
5317
5318 for (i = 0; i < num_pages; i++) {
5319 page = eb->pages[i];
5320 if (!PageUptodate(page)) {
5321 num_reads++;
5322 all_uptodate = 0;
5323 }
5324 }
5325
5326 if (all_uptodate) {
5327 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5328 goto unlock_exit;
5329 }
5330
5331 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5332 eb->read_mirror = 0;
5333 atomic_set(&eb->io_pages, num_reads);
5334 for (i = 0; i < num_pages; i++) {
5335 page = eb->pages[i];
5336
5337 if (!PageUptodate(page)) {
5338 if (ret) {
5339 atomic_dec(&eb->io_pages);
5340 unlock_page(page);
5341 continue;
5342 }
5343
5344 ClearPageError(page);
5345 err = __extent_read_full_page(tree, page,
5346 get_extent, &bio,
5347 mirror_num, &bio_flags,
5348 REQ_META);
5349 if (err) {
5350 ret = err;
5351
5352
5353
5354
5355
5356
5357
5358
5359 atomic_dec(&eb->io_pages);
5360 }
5361 } else {
5362 unlock_page(page);
5363 }
5364 }
5365
5366 if (bio) {
5367 err = submit_one_bio(bio, mirror_num, bio_flags);
5368 if (err)
5369 return err;
5370 }
5371
5372 if (ret || wait != WAIT_COMPLETE)
5373 return ret;
5374
5375 for (i = 0; i < num_pages; i++) {
5376 page = eb->pages[i];
5377 wait_on_page_locked(page);
5378 if (!PageUptodate(page))
5379 ret = -EIO;
5380 }
5381
5382 return ret;
5383
5384unlock_exit:
5385 while (locked_pages > 0) {
5386 locked_pages--;
5387 page = eb->pages[locked_pages];
5388 unlock_page(page);
5389 }
5390 return ret;
5391}
5392
5393void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
5394 unsigned long start, unsigned long len)
5395{
5396 size_t cur;
5397 size_t offset;
5398 struct page *page;
5399 char *kaddr;
5400 char *dst = (char *)dstv;
5401 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5402 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5403
5404 if (start + len > eb->len) {
5405 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5406 eb->start, eb->len, start, len);
5407 memset(dst, 0, len);
5408 return;
5409 }
5410
5411 offset = (start_offset + start) & (PAGE_SIZE - 1);
5412
5413 while (len > 0) {
5414 page = eb->pages[i];
5415
5416 cur = min(len, (PAGE_SIZE - offset));
5417 kaddr = page_address(page);
5418 memcpy(dst, kaddr + offset, cur);
5419
5420 dst += cur;
5421 len -= cur;
5422 offset = 0;
5423 i++;
5424 }
5425}
5426
5427int read_extent_buffer_to_user(const struct extent_buffer *eb,
5428 void __user *dstv,
5429 unsigned long start, unsigned long len)
5430{
5431 size_t cur;
5432 size_t offset;
5433 struct page *page;
5434 char *kaddr;
5435 char __user *dst = (char __user *)dstv;
5436 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5437 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5438 int ret = 0;
5439
5440 WARN_ON(start > eb->len);
5441 WARN_ON(start + len > eb->start + eb->len);
5442
5443 offset = (start_offset + start) & (PAGE_SIZE - 1);
5444
5445 while (len > 0) {
5446 page = eb->pages[i];
5447
5448 cur = min(len, (PAGE_SIZE - offset));
5449 kaddr = page_address(page);
5450 if (copy_to_user(dst, kaddr + offset, cur)) {
5451 ret = -EFAULT;
5452 break;
5453 }
5454
5455 dst += cur;
5456 len -= cur;
5457 offset = 0;
5458 i++;
5459 }
5460
5461 return ret;
5462}
5463
5464
5465
5466
5467
5468
5469int map_private_extent_buffer(const struct extent_buffer *eb,
5470 unsigned long start, unsigned long min_len,
5471 char **map, unsigned long *map_start,
5472 unsigned long *map_len)
5473{
5474 size_t offset = start & (PAGE_SIZE - 1);
5475 char *kaddr;
5476 struct page *p;
5477 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5478 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5479 unsigned long end_i = (start_offset + start + min_len - 1) >>
5480 PAGE_SHIFT;
5481
5482 if (start + min_len > eb->len) {
5483 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5484 eb->start, eb->len, start, min_len);
5485 return -EINVAL;
5486 }
5487
5488 if (i != end_i)
5489 return 1;
5490
5491 if (i == 0) {
5492 offset = start_offset;
5493 *map_start = 0;
5494 } else {
5495 offset = 0;
5496 *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
5497 }
5498
5499 p = eb->pages[i];
5500 kaddr = page_address(p);
5501 *map = kaddr + offset;
5502 *map_len = PAGE_SIZE - offset;
5503 return 0;
5504}
5505
5506int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
5507 unsigned long start, unsigned long len)
5508{
5509 size_t cur;
5510 size_t offset;
5511 struct page *page;
5512 char *kaddr;
5513 char *ptr = (char *)ptrv;
5514 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5515 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5516 int ret = 0;
5517
5518 WARN_ON(start > eb->len);
5519 WARN_ON(start + len > eb->start + eb->len);
5520
5521 offset = (start_offset + start) & (PAGE_SIZE - 1);
5522
5523 while (len > 0) {
5524 page = eb->pages[i];
5525
5526 cur = min(len, (PAGE_SIZE - offset));
5527
5528 kaddr = page_address(page);
5529 ret = memcmp(ptr, kaddr + offset, cur);
5530 if (ret)
5531 break;
5532
5533 ptr += cur;
5534 len -= cur;
5535 offset = 0;
5536 i++;
5537 }
5538 return ret;
5539}
5540
5541void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
5542 const void *srcv)
5543{
5544 char *kaddr;
5545
5546 WARN_ON(!PageUptodate(eb->pages[0]));
5547 kaddr = page_address(eb->pages[0]);
5548 memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
5549 BTRFS_FSID_SIZE);
5550}
5551
5552void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
5553{
5554 char *kaddr;
5555
5556 WARN_ON(!PageUptodate(eb->pages[0]));
5557 kaddr = page_address(eb->pages[0]);
5558 memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
5559 BTRFS_FSID_SIZE);
5560}
5561
5562void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5563 unsigned long start, unsigned long len)
5564{
5565 size_t cur;
5566 size_t offset;
5567 struct page *page;
5568 char *kaddr;
5569 char *src = (char *)srcv;
5570 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5571 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5572
5573 WARN_ON(start > eb->len);
5574 WARN_ON(start + len > eb->start + eb->len);
5575
5576 offset = (start_offset + start) & (PAGE_SIZE - 1);
5577
5578 while (len > 0) {
5579 page = eb->pages[i];
5580 WARN_ON(!PageUptodate(page));
5581
5582 cur = min(len, PAGE_SIZE - offset);
5583 kaddr = page_address(page);
5584 memcpy(kaddr + offset, src, cur);
5585
5586 src += cur;
5587 len -= cur;
5588 offset = 0;
5589 i++;
5590 }
5591}
5592
5593void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
5594 unsigned long len)
5595{
5596 size_t cur;
5597 size_t offset;
5598 struct page *page;
5599 char *kaddr;
5600 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5601 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5602
5603 WARN_ON(start > eb->len);
5604 WARN_ON(start + len > eb->start + eb->len);
5605
5606 offset = (start_offset + start) & (PAGE_SIZE - 1);
5607
5608 while (len > 0) {
5609 page = eb->pages[i];
5610 WARN_ON(!PageUptodate(page));
5611
5612 cur = min(len, PAGE_SIZE - offset);
5613 kaddr = page_address(page);
5614 memset(kaddr + offset, 0, cur);
5615
5616 len -= cur;
5617 offset = 0;
5618 i++;
5619 }
5620}
5621
5622void copy_extent_buffer_full(struct extent_buffer *dst,
5623 struct extent_buffer *src)
5624{
5625 int i;
5626 unsigned num_pages;
5627
5628 ASSERT(dst->len == src->len);
5629
5630 num_pages = num_extent_pages(dst->start, dst->len);
5631 for (i = 0; i < num_pages; i++)
5632 copy_page(page_address(dst->pages[i]),
5633 page_address(src->pages[i]));
5634}
5635
5636void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5637 unsigned long dst_offset, unsigned long src_offset,
5638 unsigned long len)
5639{
5640 u64 dst_len = dst->len;
5641 size_t cur;
5642 size_t offset;
5643 struct page *page;
5644 char *kaddr;
5645 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5646 unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
5647
5648 WARN_ON(src->len != dst_len);
5649
5650 offset = (start_offset + dst_offset) &
5651 (PAGE_SIZE - 1);
5652
5653 while (len > 0) {
5654 page = dst->pages[i];
5655 WARN_ON(!PageUptodate(page));
5656
5657 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
5658
5659 kaddr = page_address(page);
5660 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5661
5662 src_offset += cur;
5663 len -= cur;
5664 offset = 0;
5665 i++;
5666 }
5667}
5668
5669void le_bitmap_set(u8 *map, unsigned int start, int len)
5670{
5671 u8 *p = map + BIT_BYTE(start);
5672 const unsigned int size = start + len;
5673 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
5674 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
5675
5676 while (len - bits_to_set >= 0) {
5677 *p |= mask_to_set;
5678 len -= bits_to_set;
5679 bits_to_set = BITS_PER_BYTE;
5680 mask_to_set = ~0;
5681 p++;
5682 }
5683 if (len) {
5684 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5685 *p |= mask_to_set;
5686 }
5687}
5688
5689void le_bitmap_clear(u8 *map, unsigned int start, int len)
5690{
5691 u8 *p = map + BIT_BYTE(start);
5692 const unsigned int size = start + len;
5693 int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
5694 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
5695
5696 while (len - bits_to_clear >= 0) {
5697 *p &= ~mask_to_clear;
5698 len -= bits_to_clear;
5699 bits_to_clear = BITS_PER_BYTE;
5700 mask_to_clear = ~0;
5701 p++;
5702 }
5703 if (len) {
5704 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5705 *p &= ~mask_to_clear;
5706 }
5707}
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722static inline void eb_bitmap_offset(struct extent_buffer *eb,
5723 unsigned long start, unsigned long nr,
5724 unsigned long *page_index,
5725 size_t *page_offset)
5726{
5727 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5728 size_t byte_offset = BIT_BYTE(nr);
5729 size_t offset;
5730
5731
5732
5733
5734
5735
5736 offset = start_offset + start + byte_offset;
5737
5738 *page_index = offset >> PAGE_SHIFT;
5739 *page_offset = offset & (PAGE_SIZE - 1);
5740}
5741
5742
5743
5744
5745
5746
5747
5748int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5749 unsigned long nr)
5750{
5751 u8 *kaddr;
5752 struct page *page;
5753 unsigned long i;
5754 size_t offset;
5755
5756 eb_bitmap_offset(eb, start, nr, &i, &offset);
5757 page = eb->pages[i];
5758 WARN_ON(!PageUptodate(page));
5759 kaddr = page_address(page);
5760 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5761}
5762
5763
5764
5765
5766
5767
5768
5769
5770void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5771 unsigned long pos, unsigned long len)
5772{
5773 u8 *kaddr;
5774 struct page *page;
5775 unsigned long i;
5776 size_t offset;
5777 const unsigned int size = pos + len;
5778 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5779 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
5780
5781 eb_bitmap_offset(eb, start, pos, &i, &offset);
5782 page = eb->pages[i];
5783 WARN_ON(!PageUptodate(page));
5784 kaddr = page_address(page);
5785
5786 while (len >= bits_to_set) {
5787 kaddr[offset] |= mask_to_set;
5788 len -= bits_to_set;
5789 bits_to_set = BITS_PER_BYTE;
5790 mask_to_set = ~0;
5791 if (++offset >= PAGE_SIZE && len > 0) {
5792 offset = 0;
5793 page = eb->pages[++i];
5794 WARN_ON(!PageUptodate(page));
5795 kaddr = page_address(page);
5796 }
5797 }
5798 if (len) {
5799 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5800 kaddr[offset] |= mask_to_set;
5801 }
5802}
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5813 unsigned long pos, unsigned long len)
5814{
5815 u8 *kaddr;
5816 struct page *page;
5817 unsigned long i;
5818 size_t offset;
5819 const unsigned int size = pos + len;
5820 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5821 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
5822
5823 eb_bitmap_offset(eb, start, pos, &i, &offset);
5824 page = eb->pages[i];
5825 WARN_ON(!PageUptodate(page));
5826 kaddr = page_address(page);
5827
5828 while (len >= bits_to_clear) {
5829 kaddr[offset] &= ~mask_to_clear;
5830 len -= bits_to_clear;
5831 bits_to_clear = BITS_PER_BYTE;
5832 mask_to_clear = ~0;
5833 if (++offset >= PAGE_SIZE && len > 0) {
5834 offset = 0;
5835 page = eb->pages[++i];
5836 WARN_ON(!PageUptodate(page));
5837 kaddr = page_address(page);
5838 }
5839 }
5840 if (len) {
5841 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5842 kaddr[offset] &= ~mask_to_clear;
5843 }
5844}
5845
5846static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5847{
5848 unsigned long distance = (src > dst) ? src - dst : dst - src;
5849 return distance < len;
5850}
5851
5852static void copy_pages(struct page *dst_page, struct page *src_page,
5853 unsigned long dst_off, unsigned long src_off,
5854 unsigned long len)
5855{
5856 char *dst_kaddr = page_address(dst_page);
5857 char *src_kaddr;
5858 int must_memmove = 0;
5859
5860 if (dst_page != src_page) {
5861 src_kaddr = page_address(src_page);
5862 } else {
5863 src_kaddr = dst_kaddr;
5864 if (areas_overlap(src_off, dst_off, len))
5865 must_memmove = 1;
5866 }
5867
5868 if (must_memmove)
5869 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5870 else
5871 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5872}
5873
5874void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5875 unsigned long src_offset, unsigned long len)
5876{
5877 struct btrfs_fs_info *fs_info = dst->fs_info;
5878 size_t cur;
5879 size_t dst_off_in_page;
5880 size_t src_off_in_page;
5881 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5882 unsigned long dst_i;
5883 unsigned long src_i;
5884
5885 if (src_offset + len > dst->len) {
5886 btrfs_err(fs_info,
5887 "memmove bogus src_offset %lu move len %lu dst len %lu",
5888 src_offset, len, dst->len);
5889 BUG_ON(1);
5890 }
5891 if (dst_offset + len > dst->len) {
5892 btrfs_err(fs_info,
5893 "memmove bogus dst_offset %lu move len %lu dst len %lu",
5894 dst_offset, len, dst->len);
5895 BUG_ON(1);
5896 }
5897
5898 while (len > 0) {
5899 dst_off_in_page = (start_offset + dst_offset) &
5900 (PAGE_SIZE - 1);
5901 src_off_in_page = (start_offset + src_offset) &
5902 (PAGE_SIZE - 1);
5903
5904 dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
5905 src_i = (start_offset + src_offset) >> PAGE_SHIFT;
5906
5907 cur = min(len, (unsigned long)(PAGE_SIZE -
5908 src_off_in_page));
5909 cur = min_t(unsigned long, cur,
5910 (unsigned long)(PAGE_SIZE - dst_off_in_page));
5911
5912 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5913 dst_off_in_page, src_off_in_page, cur);
5914
5915 src_offset += cur;
5916 dst_offset += cur;
5917 len -= cur;
5918 }
5919}
5920
5921void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5922 unsigned long src_offset, unsigned long len)
5923{
5924 struct btrfs_fs_info *fs_info = dst->fs_info;
5925 size_t cur;
5926 size_t dst_off_in_page;
5927 size_t src_off_in_page;
5928 unsigned long dst_end = dst_offset + len - 1;
5929 unsigned long src_end = src_offset + len - 1;
5930 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5931 unsigned long dst_i;
5932 unsigned long src_i;
5933
5934 if (src_offset + len > dst->len) {
5935 btrfs_err(fs_info,
5936 "memmove bogus src_offset %lu move len %lu len %lu",
5937 src_offset, len, dst->len);
5938 BUG_ON(1);
5939 }
5940 if (dst_offset + len > dst->len) {
5941 btrfs_err(fs_info,
5942 "memmove bogus dst_offset %lu move len %lu len %lu",
5943 dst_offset, len, dst->len);
5944 BUG_ON(1);
5945 }
5946 if (dst_offset < src_offset) {
5947 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5948 return;
5949 }
5950 while (len > 0) {
5951 dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
5952 src_i = (start_offset + src_end) >> PAGE_SHIFT;
5953
5954 dst_off_in_page = (start_offset + dst_end) &
5955 (PAGE_SIZE - 1);
5956 src_off_in_page = (start_offset + src_end) &
5957 (PAGE_SIZE - 1);
5958
5959 cur = min_t(unsigned long, len, src_off_in_page + 1);
5960 cur = min(cur, dst_off_in_page + 1);
5961 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5962 dst_off_in_page - cur + 1,
5963 src_off_in_page - cur + 1, cur);
5964
5965 dst_end -= cur;
5966 src_end -= cur;
5967 len -= cur;
5968 }
5969}
5970
5971int try_release_extent_buffer(struct page *page)
5972{
5973 struct extent_buffer *eb;
5974
5975
5976
5977
5978
5979 spin_lock(&page->mapping->private_lock);
5980 if (!PagePrivate(page)) {
5981 spin_unlock(&page->mapping->private_lock);
5982 return 1;
5983 }
5984
5985 eb = (struct extent_buffer *)page->private;
5986 BUG_ON(!eb);
5987
5988
5989
5990
5991
5992
5993 spin_lock(&eb->refs_lock);
5994 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
5995 spin_unlock(&eb->refs_lock);
5996 spin_unlock(&page->mapping->private_lock);
5997 return 0;
5998 }
5999 spin_unlock(&page->mapping->private_lock);
6000
6001
6002
6003
6004
6005 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
6006 spin_unlock(&eb->refs_lock);
6007 return 0;
6008 }
6009
6010 return release_extent_buffer(eb);
6011}
6012