1#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/pagemap.h>
6#include <linux/page-flags.h>
7#include <linux/spinlock.h>
8#include <linux/blkdev.h>
9#include <linux/swap.h>
10#include <linux/writeback.h>
11#include <linux/pagevec.h>
12#include <linux/prefetch.h>
13#include <linux/cleancache.h>
14#include "extent_io.h"
15#include "extent_map.h"
16#include "ctree.h"
17#include "btrfs_inode.h"
18#include "volumes.h"
19#include "check-integrity.h"
20#include "locking.h"
21#include "rcu-string.h"
22#include "backref.h"
23
24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache;
26static struct bio_set *btrfs_bioset;
27
28static inline bool extent_state_in_tree(const struct extent_state *state)
29{
30 return !RB_EMPTY_NODE(&state->rb_node);
31}
32
33#ifdef CONFIG_BTRFS_DEBUG
34static LIST_HEAD(buffers);
35static LIST_HEAD(states);
36
37static DEFINE_SPINLOCK(leak_lock);
38
39static inline
40void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
41{
42 unsigned long flags;
43
44 spin_lock_irqsave(&leak_lock, flags);
45 list_add(new, head);
46 spin_unlock_irqrestore(&leak_lock, flags);
47}
48
49static inline
50void btrfs_leak_debug_del(struct list_head *entry)
51{
52 unsigned long flags;
53
54 spin_lock_irqsave(&leak_lock, flags);
55 list_del(entry);
56 spin_unlock_irqrestore(&leak_lock, flags);
57}
58
59static inline
60void btrfs_leak_debug_check(void)
61{
62 struct extent_state *state;
63 struct extent_buffer *eb;
64
65 while (!list_empty(&states)) {
66 state = list_entry(states.next, struct extent_state, leak_list);
67 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
68 state->start, state->end, state->state,
69 extent_state_in_tree(state),
70 atomic_read(&state->refs));
71 list_del(&state->leak_list);
72 kmem_cache_free(extent_state_cache, state);
73 }
74
75 while (!list_empty(&buffers)) {
76 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
77 printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
78 "refs %d\n",
79 eb->start, eb->len, atomic_read(&eb->refs));
80 list_del(&eb->leak_list);
81 kmem_cache_free(extent_buffer_cache, eb);
82 }
83}
84
85#define btrfs_debug_check_extent_io_range(tree, start, end) \
86 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
87static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 struct extent_io_tree *tree, u64 start, u64 end)
89{
90 struct inode *inode;
91 u64 isize;
92
93 if (!tree->mapping)
94 return;
95
96 inode = tree->mapping->host;
97 isize = i_size_read(inode);
98 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
99 btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
100 "%s: ino %llu isize %llu odd range [%llu,%llu]",
101 caller, btrfs_ino(inode), isize, start, end);
102 }
103}
104#else
105#define btrfs_leak_debug_add(new, head) do {} while (0)
106#define btrfs_leak_debug_del(entry) do {} while (0)
107#define btrfs_leak_debug_check() do {} while (0)
108#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
109#endif
110
111#define BUFFER_LRU_MAX 64
112
113struct tree_entry {
114 u64 start;
115 u64 end;
116 struct rb_node rb_node;
117};
118
119struct extent_page_data {
120 struct bio *bio;
121 struct extent_io_tree *tree;
122 get_extent_t *get_extent;
123 unsigned long bio_flags;
124
125
126
127
128 unsigned int extent_locked:1;
129
130
131 unsigned int sync_io:1;
132};
133
134static void add_extent_changeset(struct extent_state *state, unsigned bits,
135 struct extent_changeset *changeset,
136 int set)
137{
138 int ret;
139
140 if (!changeset)
141 return;
142 if (set && (state->state & bits) == bits)
143 return;
144 if (!set && (state->state & bits) == 0)
145 return;
146 changeset->bytes_changed += state->end - state->start + 1;
147 ret = ulist_add(changeset->range_changed, state->start, state->end,
148 GFP_ATOMIC);
149
150 BUG_ON(ret < 0);
151}
152
153static noinline void flush_write_bio(void *data);
154static inline struct btrfs_fs_info *
155tree_fs_info(struct extent_io_tree *tree)
156{
157 if (!tree->mapping)
158 return NULL;
159 return btrfs_sb(tree->mapping->host->i_sb);
160}
161
162int __init extent_io_init(void)
163{
164 extent_state_cache = kmem_cache_create("btrfs_extent_state",
165 sizeof(struct extent_state), 0,
166 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
167 if (!extent_state_cache)
168 return -ENOMEM;
169
170 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
171 sizeof(struct extent_buffer), 0,
172 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
173 if (!extent_buffer_cache)
174 goto free_state_cache;
175
176 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
177 offsetof(struct btrfs_io_bio, bio));
178 if (!btrfs_bioset)
179 goto free_buffer_cache;
180
181 if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
182 goto free_bioset;
183
184 return 0;
185
186free_bioset:
187 bioset_free(btrfs_bioset);
188 btrfs_bioset = NULL;
189
190free_buffer_cache:
191 kmem_cache_destroy(extent_buffer_cache);
192 extent_buffer_cache = NULL;
193
194free_state_cache:
195 kmem_cache_destroy(extent_state_cache);
196 extent_state_cache = NULL;
197 return -ENOMEM;
198}
199
200void extent_io_exit(void)
201{
202 btrfs_leak_debug_check();
203
204
205
206
207
208 rcu_barrier();
209 kmem_cache_destroy(extent_state_cache);
210 kmem_cache_destroy(extent_buffer_cache);
211 if (btrfs_bioset)
212 bioset_free(btrfs_bioset);
213}
214
215void extent_io_tree_init(struct extent_io_tree *tree,
216 struct address_space *mapping)
217{
218 tree->state = RB_ROOT;
219 tree->ops = NULL;
220 tree->dirty_bytes = 0;
221 spin_lock_init(&tree->lock);
222 tree->mapping = mapping;
223}
224
225static struct extent_state *alloc_extent_state(gfp_t mask)
226{
227 struct extent_state *state;
228
229 state = kmem_cache_alloc(extent_state_cache, mask);
230 if (!state)
231 return state;
232 state->state = 0;
233 state->failrec = NULL;
234 RB_CLEAR_NODE(&state->rb_node);
235 btrfs_leak_debug_add(&state->leak_list, &states);
236 atomic_set(&state->refs, 1);
237 init_waitqueue_head(&state->wq);
238 trace_alloc_extent_state(state, mask, _RET_IP_);
239 return state;
240}
241
242void free_extent_state(struct extent_state *state)
243{
244 if (!state)
245 return;
246 if (atomic_dec_and_test(&state->refs)) {
247 WARN_ON(extent_state_in_tree(state));
248 btrfs_leak_debug_del(&state->leak_list);
249 trace_free_extent_state(state, _RET_IP_);
250 kmem_cache_free(extent_state_cache, state);
251 }
252}
253
254static struct rb_node *tree_insert(struct rb_root *root,
255 struct rb_node *search_start,
256 u64 offset,
257 struct rb_node *node,
258 struct rb_node ***p_in,
259 struct rb_node **parent_in)
260{
261 struct rb_node **p;
262 struct rb_node *parent = NULL;
263 struct tree_entry *entry;
264
265 if (p_in && parent_in) {
266 p = *p_in;
267 parent = *parent_in;
268 goto do_insert;
269 }
270
271 p = search_start ? &search_start : &root->rb_node;
272 while (*p) {
273 parent = *p;
274 entry = rb_entry(parent, struct tree_entry, rb_node);
275
276 if (offset < entry->start)
277 p = &(*p)->rb_left;
278 else if (offset > entry->end)
279 p = &(*p)->rb_right;
280 else
281 return parent;
282 }
283
284do_insert:
285 rb_link_node(node, parent, p);
286 rb_insert_color(node, root);
287 return NULL;
288}
289
290static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
291 struct rb_node **prev_ret,
292 struct rb_node **next_ret,
293 struct rb_node ***p_ret,
294 struct rb_node **parent_ret)
295{
296 struct rb_root *root = &tree->state;
297 struct rb_node **n = &root->rb_node;
298 struct rb_node *prev = NULL;
299 struct rb_node *orig_prev = NULL;
300 struct tree_entry *entry;
301 struct tree_entry *prev_entry = NULL;
302
303 while (*n) {
304 prev = *n;
305 entry = rb_entry(prev, struct tree_entry, rb_node);
306 prev_entry = entry;
307
308 if (offset < entry->start)
309 n = &(*n)->rb_left;
310 else if (offset > entry->end)
311 n = &(*n)->rb_right;
312 else
313 return *n;
314 }
315
316 if (p_ret)
317 *p_ret = n;
318 if (parent_ret)
319 *parent_ret = prev;
320
321 if (prev_ret) {
322 orig_prev = prev;
323 while (prev && offset > prev_entry->end) {
324 prev = rb_next(prev);
325 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
326 }
327 *prev_ret = prev;
328 prev = orig_prev;
329 }
330
331 if (next_ret) {
332 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
333 while (prev && offset < prev_entry->start) {
334 prev = rb_prev(prev);
335 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
336 }
337 *next_ret = prev;
338 }
339 return NULL;
340}
341
342static inline struct rb_node *
343tree_search_for_insert(struct extent_io_tree *tree,
344 u64 offset,
345 struct rb_node ***p_ret,
346 struct rb_node **parent_ret)
347{
348 struct rb_node *prev = NULL;
349 struct rb_node *ret;
350
351 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
352 if (!ret)
353 return prev;
354 return ret;
355}
356
357static inline struct rb_node *tree_search(struct extent_io_tree *tree,
358 u64 offset)
359{
360 return tree_search_for_insert(tree, offset, NULL, NULL);
361}
362
363static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
364 struct extent_state *other)
365{
366 if (tree->ops && tree->ops->merge_extent_hook)
367 tree->ops->merge_extent_hook(tree->mapping->host, new,
368 other);
369}
370
371
372
373
374
375
376
377
378
379
380static void merge_state(struct extent_io_tree *tree,
381 struct extent_state *state)
382{
383 struct extent_state *other;
384 struct rb_node *other_node;
385
386 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
387 return;
388
389 other_node = rb_prev(&state->rb_node);
390 if (other_node) {
391 other = rb_entry(other_node, struct extent_state, rb_node);
392 if (other->end == state->start - 1 &&
393 other->state == state->state) {
394 merge_cb(tree, state, other);
395 state->start = other->start;
396 rb_erase(&other->rb_node, &tree->state);
397 RB_CLEAR_NODE(&other->rb_node);
398 free_extent_state(other);
399 }
400 }
401 other_node = rb_next(&state->rb_node);
402 if (other_node) {
403 other = rb_entry(other_node, struct extent_state, rb_node);
404 if (other->start == state->end + 1 &&
405 other->state == state->state) {
406 merge_cb(tree, state, other);
407 state->end = other->end;
408 rb_erase(&other->rb_node, &tree->state);
409 RB_CLEAR_NODE(&other->rb_node);
410 free_extent_state(other);
411 }
412 }
413}
414
415static void set_state_cb(struct extent_io_tree *tree,
416 struct extent_state *state, unsigned *bits)
417{
418 if (tree->ops && tree->ops->set_bit_hook)
419 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
420}
421
422static void clear_state_cb(struct extent_io_tree *tree,
423 struct extent_state *state, unsigned *bits)
424{
425 if (tree->ops && tree->ops->clear_bit_hook)
426 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
427}
428
429static void set_state_bits(struct extent_io_tree *tree,
430 struct extent_state *state, unsigned *bits,
431 struct extent_changeset *changeset);
432
433
434
435
436
437
438
439
440
441
442
443static int insert_state(struct extent_io_tree *tree,
444 struct extent_state *state, u64 start, u64 end,
445 struct rb_node ***p,
446 struct rb_node **parent,
447 unsigned *bits, struct extent_changeset *changeset)
448{
449 struct rb_node *node;
450
451 if (end < start)
452 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
453 end, start);
454 state->start = start;
455 state->end = end;
456
457 set_state_bits(tree, state, bits, changeset);
458
459 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
460 if (node) {
461 struct extent_state *found;
462 found = rb_entry(node, struct extent_state, rb_node);
463 printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
464 "%llu %llu\n",
465 found->start, found->end, start, end);
466 return -EEXIST;
467 }
468 merge_state(tree, state);
469 return 0;
470}
471
472static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
473 u64 split)
474{
475 if (tree->ops && tree->ops->split_extent_hook)
476 tree->ops->split_extent_hook(tree->mapping->host, orig, split);
477}
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
494 struct extent_state *prealloc, u64 split)
495{
496 struct rb_node *node;
497
498 split_cb(tree, orig, split);
499
500 prealloc->start = orig->start;
501 prealloc->end = split - 1;
502 prealloc->state = orig->state;
503 orig->start = split;
504
505 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
506 &prealloc->rb_node, NULL, NULL);
507 if (node) {
508 free_extent_state(prealloc);
509 return -EEXIST;
510 }
511 return 0;
512}
513
514static struct extent_state *next_state(struct extent_state *state)
515{
516 struct rb_node *next = rb_next(&state->rb_node);
517 if (next)
518 return rb_entry(next, struct extent_state, rb_node);
519 else
520 return NULL;
521}
522
523
524
525
526
527
528
529
530static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
531 struct extent_state *state,
532 unsigned *bits, int wake,
533 struct extent_changeset *changeset)
534{
535 struct extent_state *next;
536 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
537
538 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
539 u64 range = state->end - state->start + 1;
540 WARN_ON(range > tree->dirty_bytes);
541 tree->dirty_bytes -= range;
542 }
543 clear_state_cb(tree, state, bits);
544 add_extent_changeset(state, bits_to_clear, changeset, 0);
545 state->state &= ~bits_to_clear;
546 if (wake)
547 wake_up(&state->wq);
548 if (state->state == 0) {
549 next = next_state(state);
550 if (extent_state_in_tree(state)) {
551 rb_erase(&state->rb_node, &tree->state);
552 RB_CLEAR_NODE(&state->rb_node);
553 free_extent_state(state);
554 } else {
555 WARN_ON(1);
556 }
557 } else {
558 merge_state(tree, state);
559 next = next_state(state);
560 }
561 return next;
562}
563
564static struct extent_state *
565alloc_extent_state_atomic(struct extent_state *prealloc)
566{
567 if (!prealloc)
568 prealloc = alloc_extent_state(GFP_ATOMIC);
569
570 return prealloc;
571}
572
573static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
574{
575 btrfs_panic(tree_fs_info(tree), err, "Locking error: "
576 "Extent tree was modified by another "
577 "thread while locked.");
578}
579
580
581
582
583
584
585
586
587
588
589
590
591
592static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
593 unsigned bits, int wake, int delete,
594 struct extent_state **cached_state,
595 gfp_t mask, struct extent_changeset *changeset)
596{
597 struct extent_state *state;
598 struct extent_state *cached;
599 struct extent_state *prealloc = NULL;
600 struct rb_node *node;
601 u64 last_end;
602 int err;
603 int clear = 0;
604
605 btrfs_debug_check_extent_io_range(tree, start, end);
606
607 if (bits & EXTENT_DELALLOC)
608 bits |= EXTENT_NORESERVE;
609
610 if (delete)
611 bits |= ~EXTENT_CTLBITS;
612 bits |= EXTENT_FIRST_DELALLOC;
613
614 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
615 clear = 1;
616again:
617 if (!prealloc && gfpflags_allow_blocking(mask)) {
618
619
620
621
622
623
624
625 prealloc = alloc_extent_state(mask);
626 }
627
628 spin_lock(&tree->lock);
629 if (cached_state) {
630 cached = *cached_state;
631
632 if (clear) {
633 *cached_state = NULL;
634 cached_state = NULL;
635 }
636
637 if (cached && extent_state_in_tree(cached) &&
638 cached->start <= start && cached->end > start) {
639 if (clear)
640 atomic_dec(&cached->refs);
641 state = cached;
642 goto hit_next;
643 }
644 if (clear)
645 free_extent_state(cached);
646 }
647
648
649
650
651 node = tree_search(tree, start);
652 if (!node)
653 goto out;
654 state = rb_entry(node, struct extent_state, rb_node);
655hit_next:
656 if (state->start > end)
657 goto out;
658 WARN_ON(state->end < start);
659 last_end = state->end;
660
661
662 if (!(state->state & bits)) {
663 state = next_state(state);
664 goto next;
665 }
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683 if (state->start < start) {
684 prealloc = alloc_extent_state_atomic(prealloc);
685 BUG_ON(!prealloc);
686 err = split_state(tree, state, prealloc, start);
687 if (err)
688 extent_io_tree_panic(tree, err);
689
690 prealloc = NULL;
691 if (err)
692 goto out;
693 if (state->end <= end) {
694 state = clear_state_bit(tree, state, &bits, wake,
695 changeset);
696 goto next;
697 }
698 goto search_again;
699 }
700
701
702
703
704
705
706 if (state->start <= end && state->end > end) {
707 prealloc = alloc_extent_state_atomic(prealloc);
708 BUG_ON(!prealloc);
709 err = split_state(tree, state, prealloc, end + 1);
710 if (err)
711 extent_io_tree_panic(tree, err);
712
713 if (wake)
714 wake_up(&state->wq);
715
716 clear_state_bit(tree, prealloc, &bits, wake, changeset);
717
718 prealloc = NULL;
719 goto out;
720 }
721
722 state = clear_state_bit(tree, state, &bits, wake, changeset);
723next:
724 if (last_end == (u64)-1)
725 goto out;
726 start = last_end + 1;
727 if (start <= end && state && !need_resched())
728 goto hit_next;
729
730search_again:
731 if (start > end)
732 goto out;
733 spin_unlock(&tree->lock);
734 if (gfpflags_allow_blocking(mask))
735 cond_resched();
736 goto again;
737
738out:
739 spin_unlock(&tree->lock);
740 if (prealloc)
741 free_extent_state(prealloc);
742
743 return 0;
744
745}
746
747static void wait_on_state(struct extent_io_tree *tree,
748 struct extent_state *state)
749 __releases(tree->lock)
750 __acquires(tree->lock)
751{
752 DEFINE_WAIT(wait);
753 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
754 spin_unlock(&tree->lock);
755 schedule();
756 spin_lock(&tree->lock);
757 finish_wait(&state->wq, &wait);
758}
759
760
761
762
763
764
765static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
766 unsigned long bits)
767{
768 struct extent_state *state;
769 struct rb_node *node;
770
771 btrfs_debug_check_extent_io_range(tree, start, end);
772
773 spin_lock(&tree->lock);
774again:
775 while (1) {
776
777
778
779
780 node = tree_search(tree, start);
781process_node:
782 if (!node)
783 break;
784
785 state = rb_entry(node, struct extent_state, rb_node);
786
787 if (state->start > end)
788 goto out;
789
790 if (state->state & bits) {
791 start = state->start;
792 atomic_inc(&state->refs);
793 wait_on_state(tree, state);
794 free_extent_state(state);
795 goto again;
796 }
797 start = state->end + 1;
798
799 if (start > end)
800 break;
801
802 if (!cond_resched_lock(&tree->lock)) {
803 node = rb_next(node);
804 goto process_node;
805 }
806 }
807out:
808 spin_unlock(&tree->lock);
809}
810
811static void set_state_bits(struct extent_io_tree *tree,
812 struct extent_state *state,
813 unsigned *bits, struct extent_changeset *changeset)
814{
815 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
816
817 set_state_cb(tree, state, bits);
818 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
819 u64 range = state->end - state->start + 1;
820 tree->dirty_bytes += range;
821 }
822 add_extent_changeset(state, bits_to_set, changeset, 1);
823 state->state |= bits_to_set;
824}
825
826static void cache_state_if_flags(struct extent_state *state,
827 struct extent_state **cached_ptr,
828 unsigned flags)
829{
830 if (cached_ptr && !(*cached_ptr)) {
831 if (!flags || (state->state & flags)) {
832 *cached_ptr = state;
833 atomic_inc(&state->refs);
834 }
835 }
836}
837
838static void cache_state(struct extent_state *state,
839 struct extent_state **cached_ptr)
840{
841 return cache_state_if_flags(state, cached_ptr,
842 EXTENT_IOBITS | EXTENT_BOUNDARY);
843}
844
845
846
847
848
849
850
851
852
853
854
855
856static int __must_check
857__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
858 unsigned bits, unsigned exclusive_bits,
859 u64 *failed_start, struct extent_state **cached_state,
860 gfp_t mask, struct extent_changeset *changeset)
861{
862 struct extent_state *state;
863 struct extent_state *prealloc = NULL;
864 struct rb_node *node;
865 struct rb_node **p;
866 struct rb_node *parent;
867 int err = 0;
868 u64 last_start;
869 u64 last_end;
870
871 btrfs_debug_check_extent_io_range(tree, start, end);
872
873 bits |= EXTENT_FIRST_DELALLOC;
874again:
875 if (!prealloc && gfpflags_allow_blocking(mask)) {
876
877
878
879
880
881
882
883 prealloc = alloc_extent_state(mask);
884 }
885
886 spin_lock(&tree->lock);
887 if (cached_state && *cached_state) {
888 state = *cached_state;
889 if (state->start <= start && state->end > start &&
890 extent_state_in_tree(state)) {
891 node = &state->rb_node;
892 goto hit_next;
893 }
894 }
895
896
897
898
899 node = tree_search_for_insert(tree, start, &p, &parent);
900 if (!node) {
901 prealloc = alloc_extent_state_atomic(prealloc);
902 BUG_ON(!prealloc);
903 err = insert_state(tree, prealloc, start, end,
904 &p, &parent, &bits, changeset);
905 if (err)
906 extent_io_tree_panic(tree, err);
907
908 cache_state(prealloc, cached_state);
909 prealloc = NULL;
910 goto out;
911 }
912 state = rb_entry(node, struct extent_state, rb_node);
913hit_next:
914 last_start = state->start;
915 last_end = state->end;
916
917
918
919
920
921
922
923 if (state->start == start && state->end <= end) {
924 if (state->state & exclusive_bits) {
925 *failed_start = state->start;
926 err = -EEXIST;
927 goto out;
928 }
929
930 set_state_bits(tree, state, &bits, changeset);
931 cache_state(state, cached_state);
932 merge_state(tree, state);
933 if (last_end == (u64)-1)
934 goto out;
935 start = last_end + 1;
936 state = next_state(state);
937 if (start < end && state && state->start == start &&
938 !need_resched())
939 goto hit_next;
940 goto search_again;
941 }
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959 if (state->start < start) {
960 if (state->state & exclusive_bits) {
961 *failed_start = start;
962 err = -EEXIST;
963 goto out;
964 }
965
966 prealloc = alloc_extent_state_atomic(prealloc);
967 BUG_ON(!prealloc);
968 err = split_state(tree, state, prealloc, start);
969 if (err)
970 extent_io_tree_panic(tree, err);
971
972 prealloc = NULL;
973 if (err)
974 goto out;
975 if (state->end <= end) {
976 set_state_bits(tree, state, &bits, changeset);
977 cache_state(state, cached_state);
978 merge_state(tree, state);
979 if (last_end == (u64)-1)
980 goto out;
981 start = last_end + 1;
982 state = next_state(state);
983 if (start < end && state && state->start == start &&
984 !need_resched())
985 goto hit_next;
986 }
987 goto search_again;
988 }
989
990
991
992
993
994
995
996 if (state->start > start) {
997 u64 this_end;
998 if (end < last_start)
999 this_end = end;
1000 else
1001 this_end = last_start - 1;
1002
1003 prealloc = alloc_extent_state_atomic(prealloc);
1004 BUG_ON(!prealloc);
1005
1006
1007
1008
1009
1010 err = insert_state(tree, prealloc, start, this_end,
1011 NULL, NULL, &bits, changeset);
1012 if (err)
1013 extent_io_tree_panic(tree, err);
1014
1015 cache_state(prealloc, cached_state);
1016 prealloc = NULL;
1017 start = this_end + 1;
1018 goto search_again;
1019 }
1020
1021
1022
1023
1024
1025
1026 if (state->start <= end && state->end > end) {
1027 if (state->state & exclusive_bits) {
1028 *failed_start = start;
1029 err = -EEXIST;
1030 goto out;
1031 }
1032
1033 prealloc = alloc_extent_state_atomic(prealloc);
1034 BUG_ON(!prealloc);
1035 err = split_state(tree, state, prealloc, end + 1);
1036 if (err)
1037 extent_io_tree_panic(tree, err);
1038
1039 set_state_bits(tree, prealloc, &bits, changeset);
1040 cache_state(prealloc, cached_state);
1041 merge_state(tree, prealloc);
1042 prealloc = NULL;
1043 goto out;
1044 }
1045
1046search_again:
1047 if (start > end)
1048 goto out;
1049 spin_unlock(&tree->lock);
1050 if (gfpflags_allow_blocking(mask))
1051 cond_resched();
1052 goto again;
1053
1054out:
1055 spin_unlock(&tree->lock);
1056 if (prealloc)
1057 free_extent_state(prealloc);
1058
1059 return err;
1060
1061}
1062
1063int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1064 unsigned bits, u64 * failed_start,
1065 struct extent_state **cached_state, gfp_t mask)
1066{
1067 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1068 cached_state, mask, NULL);
1069}
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1091 unsigned bits, unsigned clear_bits,
1092 struct extent_state **cached_state)
1093{
1094 struct extent_state *state;
1095 struct extent_state *prealloc = NULL;
1096 struct rb_node *node;
1097 struct rb_node **p;
1098 struct rb_node *parent;
1099 int err = 0;
1100 u64 last_start;
1101 u64 last_end;
1102 bool first_iteration = true;
1103
1104 btrfs_debug_check_extent_io_range(tree, start, end);
1105
1106again:
1107 if (!prealloc) {
1108
1109
1110
1111
1112
1113
1114
1115 prealloc = alloc_extent_state(GFP_NOFS);
1116 if (!prealloc && !first_iteration)
1117 return -ENOMEM;
1118 }
1119
1120 spin_lock(&tree->lock);
1121 if (cached_state && *cached_state) {
1122 state = *cached_state;
1123 if (state->start <= start && state->end > start &&
1124 extent_state_in_tree(state)) {
1125 node = &state->rb_node;
1126 goto hit_next;
1127 }
1128 }
1129
1130
1131
1132
1133
1134 node = tree_search_for_insert(tree, start, &p, &parent);
1135 if (!node) {
1136 prealloc = alloc_extent_state_atomic(prealloc);
1137 if (!prealloc) {
1138 err = -ENOMEM;
1139 goto out;
1140 }
1141 err = insert_state(tree, prealloc, start, end,
1142 &p, &parent, &bits, NULL);
1143 if (err)
1144 extent_io_tree_panic(tree, err);
1145 cache_state(prealloc, cached_state);
1146 prealloc = NULL;
1147 goto out;
1148 }
1149 state = rb_entry(node, struct extent_state, rb_node);
1150hit_next:
1151 last_start = state->start;
1152 last_end = state->end;
1153
1154
1155
1156
1157
1158
1159
1160 if (state->start == start && state->end <= end) {
1161 set_state_bits(tree, state, &bits, NULL);
1162 cache_state(state, cached_state);
1163 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1164 if (last_end == (u64)-1)
1165 goto out;
1166 start = last_end + 1;
1167 if (start < end && state && state->start == start &&
1168 !need_resched())
1169 goto hit_next;
1170 goto search_again;
1171 }
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189 if (state->start < start) {
1190 prealloc = alloc_extent_state_atomic(prealloc);
1191 if (!prealloc) {
1192 err = -ENOMEM;
1193 goto out;
1194 }
1195 err = split_state(tree, state, prealloc, start);
1196 if (err)
1197 extent_io_tree_panic(tree, err);
1198 prealloc = NULL;
1199 if (err)
1200 goto out;
1201 if (state->end <= end) {
1202 set_state_bits(tree, state, &bits, NULL);
1203 cache_state(state, cached_state);
1204 state = clear_state_bit(tree, state, &clear_bits, 0,
1205 NULL);
1206 if (last_end == (u64)-1)
1207 goto out;
1208 start = last_end + 1;
1209 if (start < end && state && state->start == start &&
1210 !need_resched())
1211 goto hit_next;
1212 }
1213 goto search_again;
1214 }
1215
1216
1217
1218
1219
1220
1221
1222 if (state->start > start) {
1223 u64 this_end;
1224 if (end < last_start)
1225 this_end = end;
1226 else
1227 this_end = last_start - 1;
1228
1229 prealloc = alloc_extent_state_atomic(prealloc);
1230 if (!prealloc) {
1231 err = -ENOMEM;
1232 goto out;
1233 }
1234
1235
1236
1237
1238
1239 err = insert_state(tree, prealloc, start, this_end,
1240 NULL, NULL, &bits, NULL);
1241 if (err)
1242 extent_io_tree_panic(tree, err);
1243 cache_state(prealloc, cached_state);
1244 prealloc = NULL;
1245 start = this_end + 1;
1246 goto search_again;
1247 }
1248
1249
1250
1251
1252
1253
1254 if (state->start <= end && state->end > end) {
1255 prealloc = alloc_extent_state_atomic(prealloc);
1256 if (!prealloc) {
1257 err = -ENOMEM;
1258 goto out;
1259 }
1260
1261 err = split_state(tree, state, prealloc, end + 1);
1262 if (err)
1263 extent_io_tree_panic(tree, err);
1264
1265 set_state_bits(tree, prealloc, &bits, NULL);
1266 cache_state(prealloc, cached_state);
1267 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1268 prealloc = NULL;
1269 goto out;
1270 }
1271
1272search_again:
1273 if (start > end)
1274 goto out;
1275 spin_unlock(&tree->lock);
1276 cond_resched();
1277 first_iteration = false;
1278 goto again;
1279
1280out:
1281 spin_unlock(&tree->lock);
1282 if (prealloc)
1283 free_extent_state(prealloc);
1284
1285 return err;
1286}
1287
1288
1289int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1290 unsigned bits, struct extent_changeset *changeset)
1291{
1292
1293
1294
1295
1296
1297
1298 BUG_ON(bits & EXTENT_LOCKED);
1299
1300 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1301 changeset);
1302}
1303
1304int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1305 unsigned bits, int wake, int delete,
1306 struct extent_state **cached, gfp_t mask)
1307{
1308 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1309 cached, mask, NULL);
1310}
1311
1312int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1313 unsigned bits, struct extent_changeset *changeset)
1314{
1315
1316
1317
1318
1319 BUG_ON(bits & EXTENT_LOCKED);
1320
1321 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1322 changeset);
1323}
1324
1325
1326
1327
1328
1329int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1330 struct extent_state **cached_state)
1331{
1332 int err;
1333 u64 failed_start;
1334
1335 while (1) {
1336 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1337 EXTENT_LOCKED, &failed_start,
1338 cached_state, GFP_NOFS, NULL);
1339 if (err == -EEXIST) {
1340 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1341 start = failed_start;
1342 } else
1343 break;
1344 WARN_ON(start > end);
1345 }
1346 return err;
1347}
1348
1349int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1350{
1351 int err;
1352 u64 failed_start;
1353
1354 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1355 &failed_start, NULL, GFP_NOFS, NULL);
1356 if (err == -EEXIST) {
1357 if (failed_start > start)
1358 clear_extent_bit(tree, start, failed_start - 1,
1359 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
1360 return 0;
1361 }
1362 return 1;
1363}
1364
1365void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1366{
1367 unsigned long index = start >> PAGE_SHIFT;
1368 unsigned long end_index = end >> PAGE_SHIFT;
1369 struct page *page;
1370
1371 while (index <= end_index) {
1372 page = find_get_page(inode->i_mapping, index);
1373 BUG_ON(!page);
1374 clear_page_dirty_for_io(page);
1375 put_page(page);
1376 index++;
1377 }
1378}
1379
1380void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1381{
1382 unsigned long index = start >> PAGE_SHIFT;
1383 unsigned long end_index = end >> PAGE_SHIFT;
1384 struct page *page;
1385
1386 while (index <= end_index) {
1387 page = find_get_page(inode->i_mapping, index);
1388 BUG_ON(!page);
1389 __set_page_dirty_nobuffers(page);
1390 account_page_redirty(page);
1391 put_page(page);
1392 index++;
1393 }
1394}
1395
1396
1397
1398
1399static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1400{
1401 unsigned long index = start >> PAGE_SHIFT;
1402 unsigned long end_index = end >> PAGE_SHIFT;
1403 struct page *page;
1404
1405 while (index <= end_index) {
1406 page = find_get_page(tree->mapping, index);
1407 BUG_ON(!page);
1408 set_page_writeback(page);
1409 put_page(page);
1410 index++;
1411 }
1412}
1413
1414
1415
1416
1417
1418static struct extent_state *
1419find_first_extent_bit_state(struct extent_io_tree *tree,
1420 u64 start, unsigned bits)
1421{
1422 struct rb_node *node;
1423 struct extent_state *state;
1424
1425
1426
1427
1428
1429 node = tree_search(tree, start);
1430 if (!node)
1431 goto out;
1432
1433 while (1) {
1434 state = rb_entry(node, struct extent_state, rb_node);
1435 if (state->end >= start && (state->state & bits))
1436 return state;
1437
1438 node = rb_next(node);
1439 if (!node)
1440 break;
1441 }
1442out:
1443 return NULL;
1444}
1445
1446
1447
1448
1449
1450
1451
1452
1453int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1454 u64 *start_ret, u64 *end_ret, unsigned bits,
1455 struct extent_state **cached_state)
1456{
1457 struct extent_state *state;
1458 struct rb_node *n;
1459 int ret = 1;
1460
1461 spin_lock(&tree->lock);
1462 if (cached_state && *cached_state) {
1463 state = *cached_state;
1464 if (state->end == start - 1 && extent_state_in_tree(state)) {
1465 n = rb_next(&state->rb_node);
1466 while (n) {
1467 state = rb_entry(n, struct extent_state,
1468 rb_node);
1469 if (state->state & bits)
1470 goto got_it;
1471 n = rb_next(n);
1472 }
1473 free_extent_state(*cached_state);
1474 *cached_state = NULL;
1475 goto out;
1476 }
1477 free_extent_state(*cached_state);
1478 *cached_state = NULL;
1479 }
1480
1481 state = find_first_extent_bit_state(tree, start, bits);
1482got_it:
1483 if (state) {
1484 cache_state_if_flags(state, cached_state, 0);
1485 *start_ret = state->start;
1486 *end_ret = state->end;
1487 ret = 0;
1488 }
1489out:
1490 spin_unlock(&tree->lock);
1491 return ret;
1492}
1493
1494
1495
1496
1497
1498
1499
1500static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1501 u64 *start, u64 *end, u64 max_bytes,
1502 struct extent_state **cached_state)
1503{
1504 struct rb_node *node;
1505 struct extent_state *state;
1506 u64 cur_start = *start;
1507 u64 found = 0;
1508 u64 total_bytes = 0;
1509
1510 spin_lock(&tree->lock);
1511
1512
1513
1514
1515
1516 node = tree_search(tree, cur_start);
1517 if (!node) {
1518 if (!found)
1519 *end = (u64)-1;
1520 goto out;
1521 }
1522
1523 while (1) {
1524 state = rb_entry(node, struct extent_state, rb_node);
1525 if (found && (state->start != cur_start ||
1526 (state->state & EXTENT_BOUNDARY))) {
1527 goto out;
1528 }
1529 if (!(state->state & EXTENT_DELALLOC)) {
1530 if (!found)
1531 *end = state->end;
1532 goto out;
1533 }
1534 if (!found) {
1535 *start = state->start;
1536 *cached_state = state;
1537 atomic_inc(&state->refs);
1538 }
1539 found++;
1540 *end = state->end;
1541 cur_start = state->end + 1;
1542 node = rb_next(node);
1543 total_bytes += state->end - state->start + 1;
1544 if (total_bytes >= max_bytes)
1545 break;
1546 if (!node)
1547 break;
1548 }
1549out:
1550 spin_unlock(&tree->lock);
1551 return found;
1552}
1553
1554static noinline void __unlock_for_delalloc(struct inode *inode,
1555 struct page *locked_page,
1556 u64 start, u64 end)
1557{
1558 int ret;
1559 struct page *pages[16];
1560 unsigned long index = start >> PAGE_SHIFT;
1561 unsigned long end_index = end >> PAGE_SHIFT;
1562 unsigned long nr_pages = end_index - index + 1;
1563 int i;
1564
1565 if (index == locked_page->index && end_index == index)
1566 return;
1567
1568 while (nr_pages > 0) {
1569 ret = find_get_pages_contig(inode->i_mapping, index,
1570 min_t(unsigned long, nr_pages,
1571 ARRAY_SIZE(pages)), pages);
1572 for (i = 0; i < ret; i++) {
1573 if (pages[i] != locked_page)
1574 unlock_page(pages[i]);
1575 put_page(pages[i]);
1576 }
1577 nr_pages -= ret;
1578 index += ret;
1579 cond_resched();
1580 }
1581}
1582
1583static noinline int lock_delalloc_pages(struct inode *inode,
1584 struct page *locked_page,
1585 u64 delalloc_start,
1586 u64 delalloc_end)
1587{
1588 unsigned long index = delalloc_start >> PAGE_SHIFT;
1589 unsigned long start_index = index;
1590 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1591 unsigned long pages_locked = 0;
1592 struct page *pages[16];
1593 unsigned long nrpages;
1594 int ret;
1595 int i;
1596
1597
1598 if (index == locked_page->index && index == end_index)
1599 return 0;
1600
1601
1602 nrpages = end_index - index + 1;
1603 while (nrpages > 0) {
1604 ret = find_get_pages_contig(inode->i_mapping, index,
1605 min_t(unsigned long,
1606 nrpages, ARRAY_SIZE(pages)), pages);
1607 if (ret == 0) {
1608 ret = -EAGAIN;
1609 goto done;
1610 }
1611
1612 for (i = 0; i < ret; i++) {
1613
1614
1615
1616
1617 if (pages[i] != locked_page) {
1618 lock_page(pages[i]);
1619 if (!PageDirty(pages[i]) ||
1620 pages[i]->mapping != inode->i_mapping) {
1621 ret = -EAGAIN;
1622 unlock_page(pages[i]);
1623 put_page(pages[i]);
1624 goto done;
1625 }
1626 }
1627 put_page(pages[i]);
1628 pages_locked++;
1629 }
1630 nrpages -= ret;
1631 index += ret;
1632 cond_resched();
1633 }
1634 ret = 0;
1635done:
1636 if (ret && pages_locked) {
1637 __unlock_for_delalloc(inode, locked_page,
1638 delalloc_start,
1639 ((u64)(start_index + pages_locked - 1)) <<
1640 PAGE_SHIFT);
1641 }
1642 return ret;
1643}
1644
1645
1646
1647
1648
1649
1650
1651STATIC u64 find_lock_delalloc_range(struct inode *inode,
1652 struct extent_io_tree *tree,
1653 struct page *locked_page, u64 *start,
1654 u64 *end, u64 max_bytes)
1655{
1656 u64 delalloc_start;
1657 u64 delalloc_end;
1658 u64 found;
1659 struct extent_state *cached_state = NULL;
1660 int ret;
1661 int loops = 0;
1662
1663again:
1664
1665 delalloc_start = *start;
1666 delalloc_end = 0;
1667 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1668 max_bytes, &cached_state);
1669 if (!found || delalloc_end <= *start) {
1670 *start = delalloc_start;
1671 *end = delalloc_end;
1672 free_extent_state(cached_state);
1673 return 0;
1674 }
1675
1676
1677
1678
1679
1680
1681 if (delalloc_start < *start)
1682 delalloc_start = *start;
1683
1684
1685
1686
1687 if (delalloc_end + 1 - delalloc_start > max_bytes)
1688 delalloc_end = delalloc_start + max_bytes - 1;
1689
1690
1691 ret = lock_delalloc_pages(inode, locked_page,
1692 delalloc_start, delalloc_end);
1693 if (ret == -EAGAIN) {
1694
1695
1696
1697 free_extent_state(cached_state);
1698 cached_state = NULL;
1699 if (!loops) {
1700 max_bytes = PAGE_SIZE;
1701 loops = 1;
1702 goto again;
1703 } else {
1704 found = 0;
1705 goto out_failed;
1706 }
1707 }
1708 BUG_ON(ret);
1709
1710
1711 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1712
1713
1714 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1715 EXTENT_DELALLOC, 1, cached_state);
1716 if (!ret) {
1717 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1718 &cached_state, GFP_NOFS);
1719 __unlock_for_delalloc(inode, locked_page,
1720 delalloc_start, delalloc_end);
1721 cond_resched();
1722 goto again;
1723 }
1724 free_extent_state(cached_state);
1725 *start = delalloc_start;
1726 *end = delalloc_end;
1727out_failed:
1728 return found;
1729}
1730
1731void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1732 struct page *locked_page,
1733 unsigned clear_bits,
1734 unsigned long page_ops)
1735{
1736 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1737 int ret;
1738 struct page *pages[16];
1739 unsigned long index = start >> PAGE_SHIFT;
1740 unsigned long end_index = end >> PAGE_SHIFT;
1741 unsigned long nr_pages = end_index - index + 1;
1742 int i;
1743
1744 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1745 if (page_ops == 0)
1746 return;
1747
1748 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1749 mapping_set_error(inode->i_mapping, -EIO);
1750
1751 while (nr_pages > 0) {
1752 ret = find_get_pages_contig(inode->i_mapping, index,
1753 min_t(unsigned long,
1754 nr_pages, ARRAY_SIZE(pages)), pages);
1755 for (i = 0; i < ret; i++) {
1756
1757 if (page_ops & PAGE_SET_PRIVATE2)
1758 SetPagePrivate2(pages[i]);
1759
1760 if (pages[i] == locked_page) {
1761 put_page(pages[i]);
1762 continue;
1763 }
1764 if (page_ops & PAGE_CLEAR_DIRTY)
1765 clear_page_dirty_for_io(pages[i]);
1766 if (page_ops & PAGE_SET_WRITEBACK)
1767 set_page_writeback(pages[i]);
1768 if (page_ops & PAGE_SET_ERROR)
1769 SetPageError(pages[i]);
1770 if (page_ops & PAGE_END_WRITEBACK)
1771 end_page_writeback(pages[i]);
1772 if (page_ops & PAGE_UNLOCK)
1773 unlock_page(pages[i]);
1774 put_page(pages[i]);
1775 }
1776 nr_pages -= ret;
1777 index += ret;
1778 cond_resched();
1779 }
1780}
1781
1782
1783
1784
1785
1786
1787u64 count_range_bits(struct extent_io_tree *tree,
1788 u64 *start, u64 search_end, u64 max_bytes,
1789 unsigned bits, int contig)
1790{
1791 struct rb_node *node;
1792 struct extent_state *state;
1793 u64 cur_start = *start;
1794 u64 total_bytes = 0;
1795 u64 last = 0;
1796 int found = 0;
1797
1798 if (WARN_ON(search_end <= cur_start))
1799 return 0;
1800
1801 spin_lock(&tree->lock);
1802 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1803 total_bytes = tree->dirty_bytes;
1804 goto out;
1805 }
1806
1807
1808
1809
1810 node = tree_search(tree, cur_start);
1811 if (!node)
1812 goto out;
1813
1814 while (1) {
1815 state = rb_entry(node, struct extent_state, rb_node);
1816 if (state->start > search_end)
1817 break;
1818 if (contig && found && state->start > last + 1)
1819 break;
1820 if (state->end >= cur_start && (state->state & bits) == bits) {
1821 total_bytes += min(search_end, state->end) + 1 -
1822 max(cur_start, state->start);
1823 if (total_bytes >= max_bytes)
1824 break;
1825 if (!found) {
1826 *start = max(cur_start, state->start);
1827 found = 1;
1828 }
1829 last = state->end;
1830 } else if (contig && found) {
1831 break;
1832 }
1833 node = rb_next(node);
1834 if (!node)
1835 break;
1836 }
1837out:
1838 spin_unlock(&tree->lock);
1839 return total_bytes;
1840}
1841
1842
1843
1844
1845
1846static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
1847 struct io_failure_record *failrec)
1848{
1849 struct rb_node *node;
1850 struct extent_state *state;
1851 int ret = 0;
1852
1853 spin_lock(&tree->lock);
1854
1855
1856
1857
1858 node = tree_search(tree, start);
1859 if (!node) {
1860 ret = -ENOENT;
1861 goto out;
1862 }
1863 state = rb_entry(node, struct extent_state, rb_node);
1864 if (state->start != start) {
1865 ret = -ENOENT;
1866 goto out;
1867 }
1868 state->failrec = failrec;
1869out:
1870 spin_unlock(&tree->lock);
1871 return ret;
1872}
1873
1874static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
1875 struct io_failure_record **failrec)
1876{
1877 struct rb_node *node;
1878 struct extent_state *state;
1879 int ret = 0;
1880
1881 spin_lock(&tree->lock);
1882
1883
1884
1885
1886 node = tree_search(tree, start);
1887 if (!node) {
1888 ret = -ENOENT;
1889 goto out;
1890 }
1891 state = rb_entry(node, struct extent_state, rb_node);
1892 if (state->start != start) {
1893 ret = -ENOENT;
1894 goto out;
1895 }
1896 *failrec = state->failrec;
1897out:
1898 spin_unlock(&tree->lock);
1899 return ret;
1900}
1901
1902
1903
1904
1905
1906
1907
1908int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1909 unsigned bits, int filled, struct extent_state *cached)
1910{
1911 struct extent_state *state = NULL;
1912 struct rb_node *node;
1913 int bitset = 0;
1914
1915 spin_lock(&tree->lock);
1916 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1917 cached->end > start)
1918 node = &cached->rb_node;
1919 else
1920 node = tree_search(tree, start);
1921 while (node && start <= end) {
1922 state = rb_entry(node, struct extent_state, rb_node);
1923
1924 if (filled && state->start > start) {
1925 bitset = 0;
1926 break;
1927 }
1928
1929 if (state->start > end)
1930 break;
1931
1932 if (state->state & bits) {
1933 bitset = 1;
1934 if (!filled)
1935 break;
1936 } else if (filled) {
1937 bitset = 0;
1938 break;
1939 }
1940
1941 if (state->end == (u64)-1)
1942 break;
1943
1944 start = state->end + 1;
1945 if (start > end)
1946 break;
1947 node = rb_next(node);
1948 if (!node) {
1949 if (filled)
1950 bitset = 0;
1951 break;
1952 }
1953 }
1954 spin_unlock(&tree->lock);
1955 return bitset;
1956}
1957
1958
1959
1960
1961
1962static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1963{
1964 u64 start = page_offset(page);
1965 u64 end = start + PAGE_SIZE - 1;
1966 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1967 SetPageUptodate(page);
1968}
1969
1970int free_io_failure(struct inode *inode, struct io_failure_record *rec)
1971{
1972 int ret;
1973 int err = 0;
1974 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
1975
1976 set_state_failrec(failure_tree, rec->start, NULL);
1977 ret = clear_extent_bits(failure_tree, rec->start,
1978 rec->start + rec->len - 1,
1979 EXTENT_LOCKED | EXTENT_DIRTY);
1980 if (ret)
1981 err = ret;
1982
1983 ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
1984 rec->start + rec->len - 1,
1985 EXTENT_DAMAGED);
1986 if (ret && !err)
1987 err = ret;
1988
1989 kfree(rec);
1990 return err;
1991}
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
2004 struct page *page, unsigned int pg_offset, int mirror_num)
2005{
2006 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2007 struct bio *bio;
2008 struct btrfs_device *dev;
2009 u64 map_length = 0;
2010 u64 sector;
2011 struct btrfs_bio *bbio = NULL;
2012 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
2013 int ret;
2014
2015 ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
2016 BUG_ON(!mirror_num);
2017
2018
2019 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
2020 return 0;
2021
2022 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2023 if (!bio)
2024 return -EIO;
2025 bio->bi_iter.bi_size = 0;
2026 map_length = length;
2027
2028
2029
2030
2031
2032
2033 btrfs_bio_counter_inc_blocked(fs_info);
2034 ret = btrfs_map_block(fs_info, WRITE, logical,
2035 &map_length, &bbio, mirror_num);
2036 if (ret) {
2037 btrfs_bio_counter_dec(fs_info);
2038 bio_put(bio);
2039 return -EIO;
2040 }
2041 BUG_ON(mirror_num != bbio->mirror_num);
2042 sector = bbio->stripes[mirror_num-1].physical >> 9;
2043 bio->bi_iter.bi_sector = sector;
2044 dev = bbio->stripes[mirror_num-1].dev;
2045 btrfs_put_bbio(bbio);
2046 if (!dev || !dev->bdev || !dev->writeable) {
2047 btrfs_bio_counter_dec(fs_info);
2048 bio_put(bio);
2049 return -EIO;
2050 }
2051 bio->bi_bdev = dev->bdev;
2052 bio_add_page(bio, page, length, pg_offset);
2053
2054 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
2055
2056 btrfs_bio_counter_dec(fs_info);
2057 bio_put(bio);
2058 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2059 return -EIO;
2060 }
2061
2062 btrfs_info_rl_in_rcu(fs_info,
2063 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2064 btrfs_ino(inode), start,
2065 rcu_str_deref(dev->name), sector);
2066 btrfs_bio_counter_dec(fs_info);
2067 bio_put(bio);
2068 return 0;
2069}
2070
2071int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
2072 int mirror_num)
2073{
2074 u64 start = eb->start;
2075 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
2076 int ret = 0;
2077
2078 if (root->fs_info->sb->s_flags & MS_RDONLY)
2079 return -EROFS;
2080
2081 for (i = 0; i < num_pages; i++) {
2082 struct page *p = eb->pages[i];
2083
2084 ret = repair_io_failure(root->fs_info->btree_inode, start,
2085 PAGE_SIZE, start, p,
2086 start - page_offset(p), mirror_num);
2087 if (ret)
2088 break;
2089 start += PAGE_SIZE;
2090 }
2091
2092 return ret;
2093}
2094
2095
2096
2097
2098
2099int clean_io_failure(struct inode *inode, u64 start, struct page *page,
2100 unsigned int pg_offset)
2101{
2102 u64 private;
2103 struct io_failure_record *failrec;
2104 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2105 struct extent_state *state;
2106 int num_copies;
2107 int ret;
2108
2109 private = 0;
2110 ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
2111 (u64)-1, 1, EXTENT_DIRTY, 0);
2112 if (!ret)
2113 return 0;
2114
2115 ret = get_state_failrec(&BTRFS_I(inode)->io_failure_tree, start,
2116 &failrec);
2117 if (ret)
2118 return 0;
2119
2120 BUG_ON(!failrec->this_mirror);
2121
2122 if (failrec->in_validation) {
2123
2124 pr_debug("clean_io_failure: freeing dummy error at %llu\n",
2125 failrec->start);
2126 goto out;
2127 }
2128 if (fs_info->sb->s_flags & MS_RDONLY)
2129 goto out;
2130
2131 spin_lock(&BTRFS_I(inode)->io_tree.lock);
2132 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
2133 failrec->start,
2134 EXTENT_LOCKED);
2135 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
2136
2137 if (state && state->start <= failrec->start &&
2138 state->end >= failrec->start + failrec->len - 1) {
2139 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2140 failrec->len);
2141 if (num_copies > 1) {
2142 repair_io_failure(inode, start, failrec->len,
2143 failrec->logical, page,
2144 pg_offset, failrec->failed_mirror);
2145 }
2146 }
2147
2148out:
2149 free_io_failure(inode, failrec);
2150
2151 return 0;
2152}
2153
2154
2155
2156
2157
2158
2159
2160void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
2161{
2162 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2163 struct io_failure_record *failrec;
2164 struct extent_state *state, *next;
2165
2166 if (RB_EMPTY_ROOT(&failure_tree->state))
2167 return;
2168
2169 spin_lock(&failure_tree->lock);
2170 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2171 while (state) {
2172 if (state->start > end)
2173 break;
2174
2175 ASSERT(state->end <= end);
2176
2177 next = next_state(state);
2178
2179 failrec = state->failrec;
2180 free_extent_state(state);
2181 kfree(failrec);
2182
2183 state = next;
2184 }
2185 spin_unlock(&failure_tree->lock);
2186}
2187
2188int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2189 struct io_failure_record **failrec_ret)
2190{
2191 struct io_failure_record *failrec;
2192 struct extent_map *em;
2193 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2194 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2195 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2196 int ret;
2197 u64 logical;
2198
2199 ret = get_state_failrec(failure_tree, start, &failrec);
2200 if (ret) {
2201 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2202 if (!failrec)
2203 return -ENOMEM;
2204
2205 failrec->start = start;
2206 failrec->len = end - start + 1;
2207 failrec->this_mirror = 0;
2208 failrec->bio_flags = 0;
2209 failrec->in_validation = 0;
2210
2211 read_lock(&em_tree->lock);
2212 em = lookup_extent_mapping(em_tree, start, failrec->len);
2213 if (!em) {
2214 read_unlock(&em_tree->lock);
2215 kfree(failrec);
2216 return -EIO;
2217 }
2218
2219 if (em->start > start || em->start + em->len <= start) {
2220 free_extent_map(em);
2221 em = NULL;
2222 }
2223 read_unlock(&em_tree->lock);
2224 if (!em) {
2225 kfree(failrec);
2226 return -EIO;
2227 }
2228
2229 logical = start - em->start;
2230 logical = em->block_start + logical;
2231 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2232 logical = em->block_start;
2233 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2234 extent_set_compress_type(&failrec->bio_flags,
2235 em->compress_type);
2236 }
2237
2238 pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
2239 logical, start, failrec->len);
2240
2241 failrec->logical = logical;
2242 free_extent_map(em);
2243
2244
2245 ret = set_extent_bits(failure_tree, start, end,
2246 EXTENT_LOCKED | EXTENT_DIRTY);
2247 if (ret >= 0)
2248 ret = set_state_failrec(failure_tree, start, failrec);
2249
2250 if (ret >= 0)
2251 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2252 if (ret < 0) {
2253 kfree(failrec);
2254 return ret;
2255 }
2256 } else {
2257 pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
2258 failrec->logical, failrec->start, failrec->len,
2259 failrec->in_validation);
2260
2261
2262
2263
2264
2265 }
2266
2267 *failrec_ret = failrec;
2268
2269 return 0;
2270}
2271
2272int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
2273 struct io_failure_record *failrec, int failed_mirror)
2274{
2275 int num_copies;
2276
2277 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
2278 failrec->logical, failrec->len);
2279 if (num_copies == 1) {
2280
2281
2282
2283
2284
2285 pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
2286 num_copies, failrec->this_mirror, failed_mirror);
2287 return 0;
2288 }
2289
2290
2291
2292
2293
2294
2295 if (failed_bio->bi_vcnt > 1) {
2296
2297
2298
2299
2300
2301
2302
2303
2304 BUG_ON(failrec->in_validation);
2305 failrec->in_validation = 1;
2306 failrec->this_mirror = failed_mirror;
2307 } else {
2308
2309
2310
2311
2312
2313 if (failrec->in_validation) {
2314 BUG_ON(failrec->this_mirror != failed_mirror);
2315 failrec->in_validation = 0;
2316 failrec->this_mirror = 0;
2317 }
2318 failrec->failed_mirror = failed_mirror;
2319 failrec->this_mirror++;
2320 if (failrec->this_mirror == failed_mirror)
2321 failrec->this_mirror++;
2322 }
2323
2324 if (failrec->this_mirror > num_copies) {
2325 pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2326 num_copies, failrec->this_mirror, failed_mirror);
2327 return 0;
2328 }
2329
2330 return 1;
2331}
2332
2333
2334struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2335 struct io_failure_record *failrec,
2336 struct page *page, int pg_offset, int icsum,
2337 bio_end_io_t *endio_func, void *data)
2338{
2339 struct bio *bio;
2340 struct btrfs_io_bio *btrfs_failed_bio;
2341 struct btrfs_io_bio *btrfs_bio;
2342
2343 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2344 if (!bio)
2345 return NULL;
2346
2347 bio->bi_end_io = endio_func;
2348 bio->bi_iter.bi_sector = failrec->logical >> 9;
2349 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2350 bio->bi_iter.bi_size = 0;
2351 bio->bi_private = data;
2352
2353 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2354 if (btrfs_failed_bio->csum) {
2355 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2356 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2357
2358 btrfs_bio = btrfs_io_bio(bio);
2359 btrfs_bio->csum = btrfs_bio->csum_inline;
2360 icsum *= csum_size;
2361 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2362 csum_size);
2363 }
2364
2365 bio_add_page(bio, page, failrec->len, pg_offset);
2366
2367 return bio;
2368}
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2379 struct page *page, u64 start, u64 end,
2380 int failed_mirror)
2381{
2382 struct io_failure_record *failrec;
2383 struct inode *inode = page->mapping->host;
2384 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2385 struct bio *bio;
2386 int read_mode;
2387 int ret;
2388
2389 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
2390
2391 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2392 if (ret)
2393 return ret;
2394
2395 ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
2396 if (!ret) {
2397 free_io_failure(inode, failrec);
2398 return -EIO;
2399 }
2400
2401 if (failed_bio->bi_vcnt > 1)
2402 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
2403 else
2404 read_mode = READ_SYNC;
2405
2406 phy_offset >>= inode->i_sb->s_blocksize_bits;
2407 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2408 start - page_offset(page),
2409 (int)phy_offset, failed_bio->bi_end_io,
2410 NULL);
2411 if (!bio) {
2412 free_io_failure(inode, failrec);
2413 return -EIO;
2414 }
2415
2416 pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
2417 read_mode, failrec->this_mirror, failrec->in_validation);
2418
2419 ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
2420 failrec->this_mirror,
2421 failrec->bio_flags, 0);
2422 if (ret) {
2423 free_io_failure(inode, failrec);
2424 bio_put(bio);
2425 }
2426
2427 return ret;
2428}
2429
2430
2431
2432void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2433{
2434 int uptodate = (err == 0);
2435 struct extent_io_tree *tree;
2436 int ret = 0;
2437
2438 tree = &BTRFS_I(page->mapping->host)->io_tree;
2439
2440 if (tree->ops && tree->ops->writepage_end_io_hook) {
2441 ret = tree->ops->writepage_end_io_hook(page, start,
2442 end, NULL, uptodate);
2443 if (ret)
2444 uptodate = 0;
2445 }
2446
2447 if (!uptodate) {
2448 ClearPageUptodate(page);
2449 SetPageError(page);
2450 ret = ret < 0 ? ret : -EIO;
2451 mapping_set_error(page->mapping, ret);
2452 }
2453}
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464static void end_bio_extent_writepage(struct bio *bio)
2465{
2466 struct bio_vec *bvec;
2467 u64 start;
2468 u64 end;
2469 int i;
2470
2471 bio_for_each_segment_all(bvec, bio, i) {
2472 struct page *page = bvec->bv_page;
2473
2474
2475
2476
2477
2478
2479 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2480 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2481 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2482 "partial page write in btrfs with offset %u and length %u",
2483 bvec->bv_offset, bvec->bv_len);
2484 else
2485 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2486 "incomplete page write in btrfs with offset %u and "
2487 "length %u",
2488 bvec->bv_offset, bvec->bv_len);
2489 }
2490
2491 start = page_offset(page);
2492 end = start + bvec->bv_offset + bvec->bv_len - 1;
2493
2494 end_extent_writepage(page, bio->bi_error, start, end);
2495 end_page_writeback(page);
2496 }
2497
2498 bio_put(bio);
2499}
2500
2501static void
2502endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2503 int uptodate)
2504{
2505 struct extent_state *cached = NULL;
2506 u64 end = start + len - 1;
2507
2508 if (uptodate && tree->track_uptodate)
2509 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2510 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2511}
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524static void end_bio_extent_readpage(struct bio *bio)
2525{
2526 struct bio_vec *bvec;
2527 int uptodate = !bio->bi_error;
2528 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2529 struct extent_io_tree *tree;
2530 u64 offset = 0;
2531 u64 start;
2532 u64 end;
2533 u64 len;
2534 u64 extent_start = 0;
2535 u64 extent_len = 0;
2536 int mirror;
2537 int ret;
2538 int i;
2539
2540 bio_for_each_segment_all(bvec, bio, i) {
2541 struct page *page = bvec->bv_page;
2542 struct inode *inode = page->mapping->host;
2543
2544 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2545 "mirror=%u\n", (u64)bio->bi_iter.bi_sector,
2546 bio->bi_error, io_bio->mirror_num);
2547 tree = &BTRFS_I(inode)->io_tree;
2548
2549
2550
2551
2552
2553
2554 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2555 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2556 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2557 "partial page read in btrfs with offset %u and length %u",
2558 bvec->bv_offset, bvec->bv_len);
2559 else
2560 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2561 "incomplete page read in btrfs with offset %u and "
2562 "length %u",
2563 bvec->bv_offset, bvec->bv_len);
2564 }
2565
2566 start = page_offset(page);
2567 end = start + bvec->bv_offset + bvec->bv_len - 1;
2568 len = bvec->bv_len;
2569
2570 mirror = io_bio->mirror_num;
2571 if (likely(uptodate && tree->ops &&
2572 tree->ops->readpage_end_io_hook)) {
2573 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2574 page, start, end,
2575 mirror);
2576 if (ret)
2577 uptodate = 0;
2578 else
2579 clean_io_failure(inode, start, page, 0);
2580 }
2581
2582 if (likely(uptodate))
2583 goto readpage_ok;
2584
2585 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2586 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2587 if (!ret && !bio->bi_error)
2588 uptodate = 1;
2589 } else {
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600 ret = bio_readpage_error(bio, offset, page, start, end,
2601 mirror);
2602 if (ret == 0) {
2603 uptodate = !bio->bi_error;
2604 offset += len;
2605 continue;
2606 }
2607 }
2608readpage_ok:
2609 if (likely(uptodate)) {
2610 loff_t i_size = i_size_read(inode);
2611 pgoff_t end_index = i_size >> PAGE_SHIFT;
2612 unsigned off;
2613
2614
2615 off = i_size & (PAGE_SIZE-1);
2616 if (page->index == end_index && off)
2617 zero_user_segment(page, off, PAGE_SIZE);
2618 SetPageUptodate(page);
2619 } else {
2620 ClearPageUptodate(page);
2621 SetPageError(page);
2622 }
2623 unlock_page(page);
2624 offset += len;
2625
2626 if (unlikely(!uptodate)) {
2627 if (extent_len) {
2628 endio_readpage_release_extent(tree,
2629 extent_start,
2630 extent_len, 1);
2631 extent_start = 0;
2632 extent_len = 0;
2633 }
2634 endio_readpage_release_extent(tree, start,
2635 end - start + 1, 0);
2636 } else if (!extent_len) {
2637 extent_start = start;
2638 extent_len = end + 1 - start;
2639 } else if (extent_start + extent_len == start) {
2640 extent_len += end + 1 - start;
2641 } else {
2642 endio_readpage_release_extent(tree, extent_start,
2643 extent_len, uptodate);
2644 extent_start = start;
2645 extent_len = end + 1 - start;
2646 }
2647 }
2648
2649 if (extent_len)
2650 endio_readpage_release_extent(tree, extent_start, extent_len,
2651 uptodate);
2652 if (io_bio->end_io)
2653 io_bio->end_io(io_bio, bio->bi_error);
2654 bio_put(bio);
2655}
2656
2657
2658
2659
2660
2661struct bio *
2662btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2663 gfp_t gfp_flags)
2664{
2665 struct btrfs_io_bio *btrfs_bio;
2666 struct bio *bio;
2667
2668 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
2669
2670 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
2671 while (!bio && (nr_vecs /= 2)) {
2672 bio = bio_alloc_bioset(gfp_flags,
2673 nr_vecs, btrfs_bioset);
2674 }
2675 }
2676
2677 if (bio) {
2678 bio->bi_bdev = bdev;
2679 bio->bi_iter.bi_sector = first_sector;
2680 btrfs_bio = btrfs_io_bio(bio);
2681 btrfs_bio->csum = NULL;
2682 btrfs_bio->csum_allocated = NULL;
2683 btrfs_bio->end_io = NULL;
2684 }
2685 return bio;
2686}
2687
2688struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2689{
2690 struct btrfs_io_bio *btrfs_bio;
2691 struct bio *new;
2692
2693 new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
2694 if (new) {
2695 btrfs_bio = btrfs_io_bio(new);
2696 btrfs_bio->csum = NULL;
2697 btrfs_bio->csum_allocated = NULL;
2698 btrfs_bio->end_io = NULL;
2699
2700#ifdef CONFIG_BLK_CGROUP
2701
2702 if (bio->bi_css)
2703 bio_associate_blkcg(new, bio->bi_css);
2704#endif
2705 }
2706 return new;
2707}
2708
2709
2710struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2711{
2712 struct btrfs_io_bio *btrfs_bio;
2713 struct bio *bio;
2714
2715 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2716 if (bio) {
2717 btrfs_bio = btrfs_io_bio(bio);
2718 btrfs_bio->csum = NULL;
2719 btrfs_bio->csum_allocated = NULL;
2720 btrfs_bio->end_io = NULL;
2721 }
2722 return bio;
2723}
2724
2725
2726static int __must_check submit_one_bio(int rw, struct bio *bio,
2727 int mirror_num, unsigned long bio_flags)
2728{
2729 int ret = 0;
2730 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2731 struct page *page = bvec->bv_page;
2732 struct extent_io_tree *tree = bio->bi_private;
2733 u64 start;
2734
2735 start = page_offset(page) + bvec->bv_offset;
2736
2737 bio->bi_private = NULL;
2738
2739 bio_get(bio);
2740
2741 if (tree->ops && tree->ops->submit_bio_hook)
2742 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
2743 mirror_num, bio_flags, start);
2744 else
2745 btrfsic_submit_bio(rw, bio);
2746
2747 bio_put(bio);
2748 return ret;
2749}
2750
2751static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
2752 unsigned long offset, size_t size, struct bio *bio,
2753 unsigned long bio_flags)
2754{
2755 int ret = 0;
2756 if (tree->ops && tree->ops->merge_bio_hook)
2757 ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
2758 bio_flags);
2759 BUG_ON(ret < 0);
2760 return ret;
2761
2762}
2763
2764static int submit_extent_page(int rw, struct extent_io_tree *tree,
2765 struct writeback_control *wbc,
2766 struct page *page, sector_t sector,
2767 size_t size, unsigned long offset,
2768 struct block_device *bdev,
2769 struct bio **bio_ret,
2770 unsigned long max_pages,
2771 bio_end_io_t end_io_func,
2772 int mirror_num,
2773 unsigned long prev_bio_flags,
2774 unsigned long bio_flags,
2775 bool force_bio_submit)
2776{
2777 int ret = 0;
2778 struct bio *bio;
2779 int contig = 0;
2780 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
2781 size_t page_size = min_t(size_t, size, PAGE_SIZE);
2782
2783 if (bio_ret && *bio_ret) {
2784 bio = *bio_ret;
2785 if (old_compressed)
2786 contig = bio->bi_iter.bi_sector == sector;
2787 else
2788 contig = bio_end_sector(bio) == sector;
2789
2790 if (prev_bio_flags != bio_flags || !contig ||
2791 force_bio_submit ||
2792 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
2793 bio_add_page(bio, page, page_size, offset) < page_size) {
2794 ret = submit_one_bio(rw, bio, mirror_num,
2795 prev_bio_flags);
2796 if (ret < 0) {
2797 *bio_ret = NULL;
2798 return ret;
2799 }
2800 bio = NULL;
2801 } else {
2802 if (wbc)
2803 wbc_account_io(wbc, page, page_size);
2804 return 0;
2805 }
2806 }
2807
2808 bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
2809 GFP_NOFS | __GFP_HIGH);
2810 if (!bio)
2811 return -ENOMEM;
2812
2813 bio_add_page(bio, page, page_size, offset);
2814 bio->bi_end_io = end_io_func;
2815 bio->bi_private = tree;
2816 if (wbc) {
2817 wbc_init_bio(wbc, bio);
2818 wbc_account_io(wbc, page, page_size);
2819 }
2820
2821 if (bio_ret)
2822 *bio_ret = bio;
2823 else
2824 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
2825
2826 return ret;
2827}
2828
2829static void attach_extent_buffer_page(struct extent_buffer *eb,
2830 struct page *page)
2831{
2832 if (!PagePrivate(page)) {
2833 SetPagePrivate(page);
2834 get_page(page);
2835 set_page_private(page, (unsigned long)eb);
2836 } else {
2837 WARN_ON(page->private != (unsigned long)eb);
2838 }
2839}
2840
2841void set_page_extent_mapped(struct page *page)
2842{
2843 if (!PagePrivate(page)) {
2844 SetPagePrivate(page);
2845 get_page(page);
2846 set_page_private(page, EXTENT_PAGE_PRIVATE);
2847 }
2848}
2849
2850static struct extent_map *
2851__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2852 u64 start, u64 len, get_extent_t *get_extent,
2853 struct extent_map **em_cached)
2854{
2855 struct extent_map *em;
2856
2857 if (em_cached && *em_cached) {
2858 em = *em_cached;
2859 if (extent_map_in_tree(em) && start >= em->start &&
2860 start < extent_map_end(em)) {
2861 atomic_inc(&em->refs);
2862 return em;
2863 }
2864
2865 free_extent_map(em);
2866 *em_cached = NULL;
2867 }
2868
2869 em = get_extent(inode, page, pg_offset, start, len, 0);
2870 if (em_cached && !IS_ERR_OR_NULL(em)) {
2871 BUG_ON(*em_cached);
2872 atomic_inc(&em->refs);
2873 *em_cached = em;
2874 }
2875 return em;
2876}
2877
2878
2879
2880
2881
2882
2883static int __do_readpage(struct extent_io_tree *tree,
2884 struct page *page,
2885 get_extent_t *get_extent,
2886 struct extent_map **em_cached,
2887 struct bio **bio, int mirror_num,
2888 unsigned long *bio_flags, int rw,
2889 u64 *prev_em_start)
2890{
2891 struct inode *inode = page->mapping->host;
2892 u64 start = page_offset(page);
2893 u64 page_end = start + PAGE_SIZE - 1;
2894 u64 end;
2895 u64 cur = start;
2896 u64 extent_offset;
2897 u64 last_byte = i_size_read(inode);
2898 u64 block_start;
2899 u64 cur_end;
2900 sector_t sector;
2901 struct extent_map *em;
2902 struct block_device *bdev;
2903 int ret;
2904 int nr = 0;
2905 size_t pg_offset = 0;
2906 size_t iosize;
2907 size_t disk_io_size;
2908 size_t blocksize = inode->i_sb->s_blocksize;
2909 unsigned long this_bio_flag = 0;
2910
2911 set_page_extent_mapped(page);
2912
2913 end = page_end;
2914 if (!PageUptodate(page)) {
2915 if (cleancache_get_page(page) == 0) {
2916 BUG_ON(blocksize != PAGE_SIZE);
2917 unlock_extent(tree, start, end);
2918 goto out;
2919 }
2920 }
2921
2922 if (page->index == last_byte >> PAGE_SHIFT) {
2923 char *userpage;
2924 size_t zero_offset = last_byte & (PAGE_SIZE - 1);
2925
2926 if (zero_offset) {
2927 iosize = PAGE_SIZE - zero_offset;
2928 userpage = kmap_atomic(page);
2929 memset(userpage + zero_offset, 0, iosize);
2930 flush_dcache_page(page);
2931 kunmap_atomic(userpage);
2932 }
2933 }
2934 while (cur <= end) {
2935 unsigned long pnr = (last_byte >> PAGE_SHIFT) + 1;
2936 bool force_bio_submit = false;
2937
2938 if (cur >= last_byte) {
2939 char *userpage;
2940 struct extent_state *cached = NULL;
2941
2942 iosize = PAGE_SIZE - pg_offset;
2943 userpage = kmap_atomic(page);
2944 memset(userpage + pg_offset, 0, iosize);
2945 flush_dcache_page(page);
2946 kunmap_atomic(userpage);
2947 set_extent_uptodate(tree, cur, cur + iosize - 1,
2948 &cached, GFP_NOFS);
2949 unlock_extent_cached(tree, cur,
2950 cur + iosize - 1,
2951 &cached, GFP_NOFS);
2952 break;
2953 }
2954 em = __get_extent_map(inode, page, pg_offset, cur,
2955 end - cur + 1, get_extent, em_cached);
2956 if (IS_ERR_OR_NULL(em)) {
2957 SetPageError(page);
2958 unlock_extent(tree, cur, end);
2959 break;
2960 }
2961 extent_offset = cur - em->start;
2962 BUG_ON(extent_map_end(em) <= cur);
2963 BUG_ON(end < cur);
2964
2965 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2966 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2967 extent_set_compress_type(&this_bio_flag,
2968 em->compress_type);
2969 }
2970
2971 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2972 cur_end = min(extent_map_end(em) - 1, end);
2973 iosize = ALIGN(iosize, blocksize);
2974 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2975 disk_io_size = em->block_len;
2976 sector = em->block_start >> 9;
2977 } else {
2978 sector = (em->block_start + extent_offset) >> 9;
2979 disk_io_size = iosize;
2980 }
2981 bdev = em->bdev;
2982 block_start = em->block_start;
2983 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2984 block_start = EXTENT_MAP_HOLE;
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3021 prev_em_start && *prev_em_start != (u64)-1 &&
3022 *prev_em_start != em->orig_start)
3023 force_bio_submit = true;
3024
3025 if (prev_em_start)
3026 *prev_em_start = em->orig_start;
3027
3028 free_extent_map(em);
3029 em = NULL;
3030
3031
3032 if (block_start == EXTENT_MAP_HOLE) {
3033 char *userpage;
3034 struct extent_state *cached = NULL;
3035
3036 userpage = kmap_atomic(page);
3037 memset(userpage + pg_offset, 0, iosize);
3038 flush_dcache_page(page);
3039 kunmap_atomic(userpage);
3040
3041 set_extent_uptodate(tree, cur, cur + iosize - 1,
3042 &cached, GFP_NOFS);
3043 unlock_extent_cached(tree, cur,
3044 cur + iosize - 1,
3045 &cached, GFP_NOFS);
3046 cur = cur + iosize;
3047 pg_offset += iosize;
3048 continue;
3049 }
3050
3051 if (test_range_bit(tree, cur, cur_end,
3052 EXTENT_UPTODATE, 1, NULL)) {
3053 check_page_uptodate(tree, page);
3054 unlock_extent(tree, cur, cur + iosize - 1);
3055 cur = cur + iosize;
3056 pg_offset += iosize;
3057 continue;
3058 }
3059
3060
3061
3062 if (block_start == EXTENT_MAP_INLINE) {
3063 SetPageError(page);
3064 unlock_extent(tree, cur, cur + iosize - 1);
3065 cur = cur + iosize;
3066 pg_offset += iosize;
3067 continue;
3068 }
3069
3070 pnr -= page->index;
3071 ret = submit_extent_page(rw, tree, NULL, page,
3072 sector, disk_io_size, pg_offset,
3073 bdev, bio, pnr,
3074 end_bio_extent_readpage, mirror_num,
3075 *bio_flags,
3076 this_bio_flag,
3077 force_bio_submit);
3078 if (!ret) {
3079 nr++;
3080 *bio_flags = this_bio_flag;
3081 } else {
3082 SetPageError(page);
3083 unlock_extent(tree, cur, cur + iosize - 1);
3084 }
3085 cur = cur + iosize;
3086 pg_offset += iosize;
3087 }
3088out:
3089 if (!nr) {
3090 if (!PageError(page))
3091 SetPageUptodate(page);
3092 unlock_page(page);
3093 }
3094 return 0;
3095}
3096
3097static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3098 struct page *pages[], int nr_pages,
3099 u64 start, u64 end,
3100 get_extent_t *get_extent,
3101 struct extent_map **em_cached,
3102 struct bio **bio, int mirror_num,
3103 unsigned long *bio_flags, int rw,
3104 u64 *prev_em_start)
3105{
3106 struct inode *inode;
3107 struct btrfs_ordered_extent *ordered;
3108 int index;
3109
3110 inode = pages[0]->mapping->host;
3111 while (1) {
3112 lock_extent(tree, start, end);
3113 ordered = btrfs_lookup_ordered_range(inode, start,
3114 end - start + 1);
3115 if (!ordered)
3116 break;
3117 unlock_extent(tree, start, end);
3118 btrfs_start_ordered_extent(inode, ordered, 1);
3119 btrfs_put_ordered_extent(ordered);
3120 }
3121
3122 for (index = 0; index < nr_pages; index++) {
3123 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
3124 mirror_num, bio_flags, rw, prev_em_start);
3125 put_page(pages[index]);
3126 }
3127}
3128
3129static void __extent_readpages(struct extent_io_tree *tree,
3130 struct page *pages[],
3131 int nr_pages, get_extent_t *get_extent,
3132 struct extent_map **em_cached,
3133 struct bio **bio, int mirror_num,
3134 unsigned long *bio_flags, int rw,
3135 u64 *prev_em_start)
3136{
3137 u64 start = 0;
3138 u64 end = 0;
3139 u64 page_start;
3140 int index;
3141 int first_index = 0;
3142
3143 for (index = 0; index < nr_pages; index++) {
3144 page_start = page_offset(pages[index]);
3145 if (!end) {
3146 start = page_start;
3147 end = start + PAGE_SIZE - 1;
3148 first_index = index;
3149 } else if (end + 1 == page_start) {
3150 end += PAGE_SIZE;
3151 } else {
3152 __do_contiguous_readpages(tree, &pages[first_index],
3153 index - first_index, start,
3154 end, get_extent, em_cached,
3155 bio, mirror_num, bio_flags,
3156 rw, prev_em_start);
3157 start = page_start;
3158 end = start + PAGE_SIZE - 1;
3159 first_index = index;
3160 }
3161 }
3162
3163 if (end)
3164 __do_contiguous_readpages(tree, &pages[first_index],
3165 index - first_index, start,
3166 end, get_extent, em_cached, bio,
3167 mirror_num, bio_flags, rw,
3168 prev_em_start);
3169}
3170
3171static int __extent_read_full_page(struct extent_io_tree *tree,
3172 struct page *page,
3173 get_extent_t *get_extent,
3174 struct bio **bio, int mirror_num,
3175 unsigned long *bio_flags, int rw)
3176{
3177 struct inode *inode = page->mapping->host;
3178 struct btrfs_ordered_extent *ordered;
3179 u64 start = page_offset(page);
3180 u64 end = start + PAGE_SIZE - 1;
3181 int ret;
3182
3183 while (1) {
3184 lock_extent(tree, start, end);
3185 ordered = btrfs_lookup_ordered_range(inode, start,
3186 PAGE_SIZE);
3187 if (!ordered)
3188 break;
3189 unlock_extent(tree, start, end);
3190 btrfs_start_ordered_extent(inode, ordered, 1);
3191 btrfs_put_ordered_extent(ordered);
3192 }
3193
3194 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3195 bio_flags, rw, NULL);
3196 return ret;
3197}
3198
3199int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3200 get_extent_t *get_extent, int mirror_num)
3201{
3202 struct bio *bio = NULL;
3203 unsigned long bio_flags = 0;
3204 int ret;
3205
3206 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3207 &bio_flags, READ);
3208 if (bio)
3209 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3210 return ret;
3211}
3212
3213static void update_nr_written(struct page *page, struct writeback_control *wbc,
3214 unsigned long nr_written)
3215{
3216 wbc->nr_to_write -= nr_written;
3217}
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229static noinline_for_stack int writepage_delalloc(struct inode *inode,
3230 struct page *page, struct writeback_control *wbc,
3231 struct extent_page_data *epd,
3232 u64 delalloc_start,
3233 unsigned long *nr_written)
3234{
3235 struct extent_io_tree *tree = epd->tree;
3236 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3237 u64 nr_delalloc;
3238 u64 delalloc_to_write = 0;
3239 u64 delalloc_end = 0;
3240 int ret;
3241 int page_started = 0;
3242
3243 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3244 return 0;
3245
3246 while (delalloc_end < page_end) {
3247 nr_delalloc = find_lock_delalloc_range(inode, tree,
3248 page,
3249 &delalloc_start,
3250 &delalloc_end,
3251 BTRFS_MAX_EXTENT_SIZE);
3252 if (nr_delalloc == 0) {
3253 delalloc_start = delalloc_end + 1;
3254 continue;
3255 }
3256 ret = tree->ops->fill_delalloc(inode, page,
3257 delalloc_start,
3258 delalloc_end,
3259 &page_started,
3260 nr_written);
3261
3262 if (ret) {
3263 SetPageError(page);
3264
3265
3266
3267
3268
3269 ret = ret < 0 ? ret : -EIO;
3270 goto done;
3271 }
3272
3273
3274
3275
3276 delalloc_to_write += (delalloc_end - delalloc_start +
3277 PAGE_SIZE) >> PAGE_SHIFT;
3278 delalloc_start = delalloc_end + 1;
3279 }
3280 if (wbc->nr_to_write < delalloc_to_write) {
3281 int thresh = 8192;
3282
3283 if (delalloc_to_write < thresh * 2)
3284 thresh = delalloc_to_write;
3285 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3286 thresh);
3287 }
3288
3289
3290
3291
3292 if (page_started) {
3293
3294
3295
3296
3297
3298 wbc->nr_to_write -= *nr_written;
3299 return 1;
3300 }
3301
3302 ret = 0;
3303
3304done:
3305 return ret;
3306}
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3317 struct page *page,
3318 struct writeback_control *wbc,
3319 struct extent_page_data *epd,
3320 loff_t i_size,
3321 unsigned long nr_written,
3322 int write_flags, int *nr_ret)
3323{
3324 struct extent_io_tree *tree = epd->tree;
3325 u64 start = page_offset(page);
3326 u64 page_end = start + PAGE_SIZE - 1;
3327 u64 end;
3328 u64 cur = start;
3329 u64 extent_offset;
3330 u64 block_start;
3331 u64 iosize;
3332 sector_t sector;
3333 struct extent_state *cached_state = NULL;
3334 struct extent_map *em;
3335 struct block_device *bdev;
3336 size_t pg_offset = 0;
3337 size_t blocksize;
3338 int ret = 0;
3339 int nr = 0;
3340 bool compressed;
3341
3342 if (tree->ops && tree->ops->writepage_start_hook) {
3343 ret = tree->ops->writepage_start_hook(page, start,
3344 page_end);
3345 if (ret) {
3346
3347 if (ret == -EBUSY)
3348 wbc->pages_skipped++;
3349 else
3350 redirty_page_for_writepage(wbc, page);
3351
3352 update_nr_written(page, wbc, nr_written);
3353 unlock_page(page);
3354 ret = 1;
3355 goto done_unlocked;
3356 }
3357 }
3358
3359
3360
3361
3362
3363 update_nr_written(page, wbc, nr_written + 1);
3364
3365 end = page_end;
3366 if (i_size <= start) {
3367 if (tree->ops && tree->ops->writepage_end_io_hook)
3368 tree->ops->writepage_end_io_hook(page, start,
3369 page_end, NULL, 1);
3370 goto done;
3371 }
3372
3373 blocksize = inode->i_sb->s_blocksize;
3374
3375 while (cur <= end) {
3376 u64 em_end;
3377 unsigned long max_nr;
3378
3379 if (cur >= i_size) {
3380 if (tree->ops && tree->ops->writepage_end_io_hook)
3381 tree->ops->writepage_end_io_hook(page, cur,
3382 page_end, NULL, 1);
3383 break;
3384 }
3385 em = epd->get_extent(inode, page, pg_offset, cur,
3386 end - cur + 1, 1);
3387 if (IS_ERR_OR_NULL(em)) {
3388 SetPageError(page);
3389 ret = PTR_ERR_OR_ZERO(em);
3390 break;
3391 }
3392
3393 extent_offset = cur - em->start;
3394 em_end = extent_map_end(em);
3395 BUG_ON(em_end <= cur);
3396 BUG_ON(end < cur);
3397 iosize = min(em_end - cur, end - cur + 1);
3398 iosize = ALIGN(iosize, blocksize);
3399 sector = (em->block_start + extent_offset) >> 9;
3400 bdev = em->bdev;
3401 block_start = em->block_start;
3402 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3403 free_extent_map(em);
3404 em = NULL;
3405
3406
3407
3408
3409
3410 if (compressed || block_start == EXTENT_MAP_HOLE ||
3411 block_start == EXTENT_MAP_INLINE) {
3412
3413
3414
3415
3416 if (!compressed && tree->ops &&
3417 tree->ops->writepage_end_io_hook)
3418 tree->ops->writepage_end_io_hook(page, cur,
3419 cur + iosize - 1,
3420 NULL, 1);
3421 else if (compressed) {
3422
3423
3424
3425
3426 nr++;
3427 }
3428
3429 cur += iosize;
3430 pg_offset += iosize;
3431 continue;
3432 }
3433
3434 max_nr = (i_size >> PAGE_SHIFT) + 1;
3435
3436 set_range_writeback(tree, cur, cur + iosize - 1);
3437 if (!PageWriteback(page)) {
3438 btrfs_err(BTRFS_I(inode)->root->fs_info,
3439 "page %lu not writeback, cur %llu end %llu",
3440 page->index, cur, end);
3441 }
3442
3443 ret = submit_extent_page(write_flags, tree, wbc, page,
3444 sector, iosize, pg_offset,
3445 bdev, &epd->bio, max_nr,
3446 end_bio_extent_writepage,
3447 0, 0, 0, false);
3448 if (ret)
3449 SetPageError(page);
3450
3451 cur = cur + iosize;
3452 pg_offset += iosize;
3453 nr++;
3454 }
3455done:
3456 *nr_ret = nr;
3457
3458done_unlocked:
3459
3460
3461 free_extent_state(cached_state);
3462 return ret;
3463}
3464
3465
3466
3467
3468
3469
3470
3471static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3472 void *data)
3473{
3474 struct inode *inode = page->mapping->host;
3475 struct extent_page_data *epd = data;
3476 u64 start = page_offset(page);
3477 u64 page_end = start + PAGE_SIZE - 1;
3478 int ret;
3479 int nr = 0;
3480 size_t pg_offset = 0;
3481 loff_t i_size = i_size_read(inode);
3482 unsigned long end_index = i_size >> PAGE_SHIFT;
3483 int write_flags;
3484 unsigned long nr_written = 0;
3485
3486 if (wbc->sync_mode == WB_SYNC_ALL)
3487 write_flags = WRITE_SYNC;
3488 else
3489 write_flags = WRITE;
3490
3491 trace___extent_writepage(page, inode, wbc);
3492
3493 WARN_ON(!PageLocked(page));
3494
3495 ClearPageError(page);
3496
3497 pg_offset = i_size & (PAGE_SIZE - 1);
3498 if (page->index > end_index ||
3499 (page->index == end_index && !pg_offset)) {
3500 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
3501 unlock_page(page);
3502 return 0;
3503 }
3504
3505 if (page->index == end_index) {
3506 char *userpage;
3507
3508 userpage = kmap_atomic(page);
3509 memset(userpage + pg_offset, 0,
3510 PAGE_SIZE - pg_offset);
3511 kunmap_atomic(userpage);
3512 flush_dcache_page(page);
3513 }
3514
3515 pg_offset = 0;
3516
3517 set_page_extent_mapped(page);
3518
3519 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3520 if (ret == 1)
3521 goto done_unlocked;
3522 if (ret)
3523 goto done;
3524
3525 ret = __extent_writepage_io(inode, page, wbc, epd,
3526 i_size, nr_written, write_flags, &nr);
3527 if (ret == 1)
3528 goto done_unlocked;
3529
3530done:
3531 if (nr == 0) {
3532
3533 set_page_writeback(page);
3534 end_page_writeback(page);
3535 }
3536 if (PageError(page)) {
3537 ret = ret < 0 ? ret : -EIO;
3538 end_extent_writepage(page, ret, start, page_end);
3539 }
3540 unlock_page(page);
3541 return ret;
3542
3543done_unlocked:
3544 return 0;
3545}
3546
3547void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3548{
3549 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3550 TASK_UNINTERRUPTIBLE);
3551}
3552
3553static noinline_for_stack int
3554lock_extent_buffer_for_io(struct extent_buffer *eb,
3555 struct btrfs_fs_info *fs_info,
3556 struct extent_page_data *epd)
3557{
3558 unsigned long i, num_pages;
3559 int flush = 0;
3560 int ret = 0;
3561
3562 if (!btrfs_try_tree_write_lock(eb)) {
3563 flush = 1;
3564 flush_write_bio(epd);
3565 btrfs_tree_lock(eb);
3566 }
3567
3568 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3569 btrfs_tree_unlock(eb);
3570 if (!epd->sync_io)
3571 return 0;
3572 if (!flush) {
3573 flush_write_bio(epd);
3574 flush = 1;
3575 }
3576 while (1) {
3577 wait_on_extent_buffer_writeback(eb);
3578 btrfs_tree_lock(eb);
3579 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3580 break;
3581 btrfs_tree_unlock(eb);
3582 }
3583 }
3584
3585
3586
3587
3588
3589
3590 spin_lock(&eb->refs_lock);
3591 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3592 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3593 spin_unlock(&eb->refs_lock);
3594 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3595 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
3596 -eb->len,
3597 fs_info->dirty_metadata_batch);
3598 ret = 1;
3599 } else {
3600 spin_unlock(&eb->refs_lock);
3601 }
3602
3603 btrfs_tree_unlock(eb);
3604
3605 if (!ret)
3606 return ret;
3607
3608 num_pages = num_extent_pages(eb->start, eb->len);
3609 for (i = 0; i < num_pages; i++) {
3610 struct page *p = eb->pages[i];
3611
3612 if (!trylock_page(p)) {
3613 if (!flush) {
3614 flush_write_bio(epd);
3615 flush = 1;
3616 }
3617 lock_page(p);
3618 }
3619 }
3620
3621 return ret;
3622}
3623
3624static void end_extent_buffer_writeback(struct extent_buffer *eb)
3625{
3626 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3627 smp_mb__after_atomic();
3628 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3629}
3630
3631static void set_btree_ioerr(struct page *page)
3632{
3633 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3634 struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode);
3635
3636 SetPageError(page);
3637 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3638 return;
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678 switch (eb->log_index) {
3679 case -1:
3680 set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags);
3681 break;
3682 case 0:
3683 set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
3684 break;
3685 case 1:
3686 set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
3687 break;
3688 default:
3689 BUG();
3690 }
3691}
3692
3693static void end_bio_extent_buffer_writepage(struct bio *bio)
3694{
3695 struct bio_vec *bvec;
3696 struct extent_buffer *eb;
3697 int i, done;
3698
3699 bio_for_each_segment_all(bvec, bio, i) {
3700 struct page *page = bvec->bv_page;
3701
3702 eb = (struct extent_buffer *)page->private;
3703 BUG_ON(!eb);
3704 done = atomic_dec_and_test(&eb->io_pages);
3705
3706 if (bio->bi_error ||
3707 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3708 ClearPageUptodate(page);
3709 set_btree_ioerr(page);
3710 }
3711
3712 end_page_writeback(page);
3713
3714 if (!done)
3715 continue;
3716
3717 end_extent_buffer_writeback(eb);
3718 }
3719
3720 bio_put(bio);
3721}
3722
3723static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3724 struct btrfs_fs_info *fs_info,
3725 struct writeback_control *wbc,
3726 struct extent_page_data *epd)
3727{
3728 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3729 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3730 u64 offset = eb->start;
3731 unsigned long i, num_pages;
3732 unsigned long bio_flags = 0;
3733 int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
3734 int ret = 0;
3735
3736 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3737 num_pages = num_extent_pages(eb->start, eb->len);
3738 atomic_set(&eb->io_pages, num_pages);
3739 if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
3740 bio_flags = EXTENT_BIO_TREE_LOG;
3741
3742 for (i = 0; i < num_pages; i++) {
3743 struct page *p = eb->pages[i];
3744
3745 clear_page_dirty_for_io(p);
3746 set_page_writeback(p);
3747 ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
3748 PAGE_SIZE, 0, bdev, &epd->bio,
3749 -1, end_bio_extent_buffer_writepage,
3750 0, epd->bio_flags, bio_flags, false);
3751 epd->bio_flags = bio_flags;
3752 if (ret) {
3753 set_btree_ioerr(p);
3754 end_page_writeback(p);
3755 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3756 end_extent_buffer_writeback(eb);
3757 ret = -EIO;
3758 break;
3759 }
3760 offset += PAGE_SIZE;
3761 update_nr_written(p, wbc, 1);
3762 unlock_page(p);
3763 }
3764
3765 if (unlikely(ret)) {
3766 for (; i < num_pages; i++) {
3767 struct page *p = eb->pages[i];
3768 clear_page_dirty_for_io(p);
3769 unlock_page(p);
3770 }
3771 }
3772
3773 return ret;
3774}
3775
3776int btree_write_cache_pages(struct address_space *mapping,
3777 struct writeback_control *wbc)
3778{
3779 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3780 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3781 struct extent_buffer *eb, *prev_eb = NULL;
3782 struct extent_page_data epd = {
3783 .bio = NULL,
3784 .tree = tree,
3785 .extent_locked = 0,
3786 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3787 .bio_flags = 0,
3788 };
3789 int ret = 0;
3790 int done = 0;
3791 int nr_to_write_done = 0;
3792 struct pagevec pvec;
3793 int nr_pages;
3794 pgoff_t index;
3795 pgoff_t end;
3796 int scanned = 0;
3797 int tag;
3798
3799 pagevec_init(&pvec, 0);
3800 if (wbc->range_cyclic) {
3801 index = mapping->writeback_index;
3802 end = -1;
3803 } else {
3804 index = wbc->range_start >> PAGE_SHIFT;
3805 end = wbc->range_end >> PAGE_SHIFT;
3806 scanned = 1;
3807 }
3808 if (wbc->sync_mode == WB_SYNC_ALL)
3809 tag = PAGECACHE_TAG_TOWRITE;
3810 else
3811 tag = PAGECACHE_TAG_DIRTY;
3812retry:
3813 if (wbc->sync_mode == WB_SYNC_ALL)
3814 tag_pages_for_writeback(mapping, index, end);
3815 while (!done && !nr_to_write_done && (index <= end) &&
3816 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3817 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3818 unsigned i;
3819
3820 scanned = 1;
3821 for (i = 0; i < nr_pages; i++) {
3822 struct page *page = pvec.pages[i];
3823
3824 if (!PagePrivate(page))
3825 continue;
3826
3827 if (!wbc->range_cyclic && page->index > end) {
3828 done = 1;
3829 break;
3830 }
3831
3832 spin_lock(&mapping->private_lock);
3833 if (!PagePrivate(page)) {
3834 spin_unlock(&mapping->private_lock);
3835 continue;
3836 }
3837
3838 eb = (struct extent_buffer *)page->private;
3839
3840
3841
3842
3843
3844
3845 if (WARN_ON(!eb)) {
3846 spin_unlock(&mapping->private_lock);
3847 continue;
3848 }
3849
3850 if (eb == prev_eb) {
3851 spin_unlock(&mapping->private_lock);
3852 continue;
3853 }
3854
3855 ret = atomic_inc_not_zero(&eb->refs);
3856 spin_unlock(&mapping->private_lock);
3857 if (!ret)
3858 continue;
3859
3860 prev_eb = eb;
3861 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3862 if (!ret) {
3863 free_extent_buffer(eb);
3864 continue;
3865 }
3866
3867 ret = write_one_eb(eb, fs_info, wbc, &epd);
3868 if (ret) {
3869 done = 1;
3870 free_extent_buffer(eb);
3871 break;
3872 }
3873 free_extent_buffer(eb);
3874
3875
3876
3877
3878
3879
3880 nr_to_write_done = wbc->nr_to_write <= 0;
3881 }
3882 pagevec_release(&pvec);
3883 cond_resched();
3884 }
3885 if (!scanned && !done) {
3886
3887
3888
3889
3890 scanned = 1;
3891 index = 0;
3892 goto retry;
3893 }
3894 flush_write_bio(&epd);
3895 return ret;
3896}
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913static int extent_write_cache_pages(struct extent_io_tree *tree,
3914 struct address_space *mapping,
3915 struct writeback_control *wbc,
3916 writepage_t writepage, void *data,
3917 void (*flush_fn)(void *))
3918{
3919 struct inode *inode = mapping->host;
3920 int ret = 0;
3921 int done = 0;
3922 int nr_to_write_done = 0;
3923 struct pagevec pvec;
3924 int nr_pages;
3925 pgoff_t index;
3926 pgoff_t end;
3927 pgoff_t done_index;
3928 int range_whole = 0;
3929 int scanned = 0;
3930 int tag;
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941 if (!igrab(inode))
3942 return 0;
3943
3944 pagevec_init(&pvec, 0);
3945 if (wbc->range_cyclic) {
3946 index = mapping->writeback_index;
3947 end = -1;
3948 } else {
3949 index = wbc->range_start >> PAGE_SHIFT;
3950 end = wbc->range_end >> PAGE_SHIFT;
3951 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3952 range_whole = 1;
3953 scanned = 1;
3954 }
3955 if (wbc->sync_mode == WB_SYNC_ALL)
3956 tag = PAGECACHE_TAG_TOWRITE;
3957 else
3958 tag = PAGECACHE_TAG_DIRTY;
3959retry:
3960 if (wbc->sync_mode == WB_SYNC_ALL)
3961 tag_pages_for_writeback(mapping, index, end);
3962 done_index = index;
3963 while (!done && !nr_to_write_done && (index <= end) &&
3964 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3965 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3966 unsigned i;
3967
3968 scanned = 1;
3969 for (i = 0; i < nr_pages; i++) {
3970 struct page *page = pvec.pages[i];
3971
3972 done_index = page->index;
3973
3974
3975
3976
3977
3978
3979
3980 if (!trylock_page(page)) {
3981 flush_fn(data);
3982 lock_page(page);
3983 }
3984
3985 if (unlikely(page->mapping != mapping)) {
3986 unlock_page(page);
3987 continue;
3988 }
3989
3990 if (!wbc->range_cyclic && page->index > end) {
3991 done = 1;
3992 unlock_page(page);
3993 continue;
3994 }
3995
3996 if (wbc->sync_mode != WB_SYNC_NONE) {
3997 if (PageWriteback(page))
3998 flush_fn(data);
3999 wait_on_page_writeback(page);
4000 }
4001
4002 if (PageWriteback(page) ||
4003 !clear_page_dirty_for_io(page)) {
4004 unlock_page(page);
4005 continue;
4006 }
4007
4008 ret = (*writepage)(page, wbc, data);
4009
4010 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
4011 unlock_page(page);
4012 ret = 0;
4013 }
4014 if (ret < 0) {
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024 done_index = page->index + 1;
4025 done = 1;
4026 break;
4027 }
4028
4029
4030
4031
4032
4033
4034 nr_to_write_done = wbc->nr_to_write <= 0;
4035 }
4036 pagevec_release(&pvec);
4037 cond_resched();
4038 }
4039 if (!scanned && !done) {
4040
4041
4042
4043
4044 scanned = 1;
4045 index = 0;
4046 goto retry;
4047 }
4048
4049 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4050 mapping->writeback_index = done_index;
4051
4052 btrfs_add_delayed_iput(inode);
4053 return ret;
4054}
4055
4056static void flush_epd_write_bio(struct extent_page_data *epd)
4057{
4058 if (epd->bio) {
4059 int rw = WRITE;
4060 int ret;
4061
4062 if (epd->sync_io)
4063 rw = WRITE_SYNC;
4064
4065 ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
4066 BUG_ON(ret < 0);
4067 epd->bio = NULL;
4068 }
4069}
4070
4071static noinline void flush_write_bio(void *data)
4072{
4073 struct extent_page_data *epd = data;
4074 flush_epd_write_bio(epd);
4075}
4076
4077int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
4078 get_extent_t *get_extent,
4079 struct writeback_control *wbc)
4080{
4081 int ret;
4082 struct extent_page_data epd = {
4083 .bio = NULL,
4084 .tree = tree,
4085 .get_extent = get_extent,
4086 .extent_locked = 0,
4087 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4088 .bio_flags = 0,
4089 };
4090
4091 ret = __extent_writepage(page, wbc, &epd);
4092
4093 flush_epd_write_bio(&epd);
4094 return ret;
4095}
4096
4097int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
4098 u64 start, u64 end, get_extent_t *get_extent,
4099 int mode)
4100{
4101 int ret = 0;
4102 struct address_space *mapping = inode->i_mapping;
4103 struct page *page;
4104 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4105 PAGE_SHIFT;
4106
4107 struct extent_page_data epd = {
4108 .bio = NULL,
4109 .tree = tree,
4110 .get_extent = get_extent,
4111 .extent_locked = 1,
4112 .sync_io = mode == WB_SYNC_ALL,
4113 .bio_flags = 0,
4114 };
4115 struct writeback_control wbc_writepages = {
4116 .sync_mode = mode,
4117 .nr_to_write = nr_pages * 2,
4118 .range_start = start,
4119 .range_end = end + 1,
4120 };
4121
4122 while (start <= end) {
4123 page = find_get_page(mapping, start >> PAGE_SHIFT);
4124 if (clear_page_dirty_for_io(page))
4125 ret = __extent_writepage(page, &wbc_writepages, &epd);
4126 else {
4127 if (tree->ops && tree->ops->writepage_end_io_hook)
4128 tree->ops->writepage_end_io_hook(page, start,
4129 start + PAGE_SIZE - 1,
4130 NULL, 1);
4131 unlock_page(page);
4132 }
4133 put_page(page);
4134 start += PAGE_SIZE;
4135 }
4136
4137 flush_epd_write_bio(&epd);
4138 return ret;
4139}
4140
4141int extent_writepages(struct extent_io_tree *tree,
4142 struct address_space *mapping,
4143 get_extent_t *get_extent,
4144 struct writeback_control *wbc)
4145{
4146 int ret = 0;
4147 struct extent_page_data epd = {
4148 .bio = NULL,
4149 .tree = tree,
4150 .get_extent = get_extent,
4151 .extent_locked = 0,
4152 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4153 .bio_flags = 0,
4154 };
4155
4156 ret = extent_write_cache_pages(tree, mapping, wbc,
4157 __extent_writepage, &epd,
4158 flush_write_bio);
4159 flush_epd_write_bio(&epd);
4160 return ret;
4161}
4162
4163int extent_readpages(struct extent_io_tree *tree,
4164 struct address_space *mapping,
4165 struct list_head *pages, unsigned nr_pages,
4166 get_extent_t get_extent)
4167{
4168 struct bio *bio = NULL;
4169 unsigned page_idx;
4170 unsigned long bio_flags = 0;
4171 struct page *pagepool[16];
4172 struct page *page;
4173 struct extent_map *em_cached = NULL;
4174 int nr = 0;
4175 u64 prev_em_start = (u64)-1;
4176
4177 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
4178 page = list_entry(pages->prev, struct page, lru);
4179
4180 prefetchw(&page->flags);
4181 list_del(&page->lru);
4182 if (add_to_page_cache_lru(page, mapping,
4183 page->index, GFP_NOFS)) {
4184 put_page(page);
4185 continue;
4186 }
4187
4188 pagepool[nr++] = page;
4189 if (nr < ARRAY_SIZE(pagepool))
4190 continue;
4191 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4192 &bio, 0, &bio_flags, READ, &prev_em_start);
4193 nr = 0;
4194 }
4195 if (nr)
4196 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4197 &bio, 0, &bio_flags, READ, &prev_em_start);
4198
4199 if (em_cached)
4200 free_extent_map(em_cached);
4201
4202 BUG_ON(!list_empty(pages));
4203 if (bio)
4204 return submit_one_bio(READ, bio, 0, bio_flags);
4205 return 0;
4206}
4207
4208
4209
4210
4211
4212
4213int extent_invalidatepage(struct extent_io_tree *tree,
4214 struct page *page, unsigned long offset)
4215{
4216 struct extent_state *cached_state = NULL;
4217 u64 start = page_offset(page);
4218 u64 end = start + PAGE_SIZE - 1;
4219 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4220
4221 start += ALIGN(offset, blocksize);
4222 if (start > end)
4223 return 0;
4224
4225 lock_extent_bits(tree, start, end, &cached_state);
4226 wait_on_page_writeback(page);
4227 clear_extent_bit(tree, start, end,
4228 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4229 EXTENT_DO_ACCOUNTING,
4230 1, 1, &cached_state, GFP_NOFS);
4231 return 0;
4232}
4233
4234
4235
4236
4237
4238
4239static int try_release_extent_state(struct extent_map_tree *map,
4240 struct extent_io_tree *tree,
4241 struct page *page, gfp_t mask)
4242{
4243 u64 start = page_offset(page);
4244 u64 end = start + PAGE_SIZE - 1;
4245 int ret = 1;
4246
4247 if (test_range_bit(tree, start, end,
4248 EXTENT_IOBITS, 0, NULL))
4249 ret = 0;
4250 else {
4251 if ((mask & GFP_NOFS) == GFP_NOFS)
4252 mask = GFP_NOFS;
4253
4254
4255
4256
4257 ret = clear_extent_bit(tree, start, end,
4258 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4259 0, 0, NULL, mask);
4260
4261
4262
4263
4264 if (ret < 0)
4265 ret = 0;
4266 else
4267 ret = 1;
4268 }
4269 return ret;
4270}
4271
4272
4273
4274
4275
4276
4277int try_release_extent_mapping(struct extent_map_tree *map,
4278 struct extent_io_tree *tree, struct page *page,
4279 gfp_t mask)
4280{
4281 struct extent_map *em;
4282 u64 start = page_offset(page);
4283 u64 end = start + PAGE_SIZE - 1;
4284
4285 if (gfpflags_allow_blocking(mask) &&
4286 page->mapping->host->i_size > SZ_16M) {
4287 u64 len;
4288 while (start <= end) {
4289 len = end - start + 1;
4290 write_lock(&map->lock);
4291 em = lookup_extent_mapping(map, start, len);
4292 if (!em) {
4293 write_unlock(&map->lock);
4294 break;
4295 }
4296 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4297 em->start != start) {
4298 write_unlock(&map->lock);
4299 free_extent_map(em);
4300 break;
4301 }
4302 if (!test_range_bit(tree, em->start,
4303 extent_map_end(em) - 1,
4304 EXTENT_LOCKED | EXTENT_WRITEBACK,
4305 0, NULL)) {
4306 remove_extent_mapping(map, em);
4307
4308 free_extent_map(em);
4309 }
4310 start = extent_map_end(em);
4311 write_unlock(&map->lock);
4312
4313
4314 free_extent_map(em);
4315 }
4316 }
4317 return try_release_extent_state(map, tree, page, mask);
4318}
4319
4320
4321
4322
4323
4324static struct extent_map *get_extent_skip_holes(struct inode *inode,
4325 u64 offset,
4326 u64 last,
4327 get_extent_t *get_extent)
4328{
4329 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
4330 struct extent_map *em;
4331 u64 len;
4332
4333 if (offset >= last)
4334 return NULL;
4335
4336 while (1) {
4337 len = last - offset;
4338 if (len == 0)
4339 break;
4340 len = ALIGN(len, sectorsize);
4341 em = get_extent(inode, NULL, 0, offset, len, 0);
4342 if (IS_ERR_OR_NULL(em))
4343 return em;
4344
4345
4346 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
4347 em->block_start != EXTENT_MAP_HOLE) {
4348 return em;
4349 }
4350
4351
4352 offset = extent_map_end(em);
4353 free_extent_map(em);
4354 if (offset >= last)
4355 break;
4356 }
4357 return NULL;
4358}
4359
4360int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4361 __u64 start, __u64 len, get_extent_t *get_extent)
4362{
4363 int ret = 0;
4364 u64 off = start;
4365 u64 max = start + len;
4366 u32 flags = 0;
4367 u32 found_type;
4368 u64 last;
4369 u64 last_for_get_extent = 0;
4370 u64 disko = 0;
4371 u64 isize = i_size_read(inode);
4372 struct btrfs_key found_key;
4373 struct extent_map *em = NULL;
4374 struct extent_state *cached_state = NULL;
4375 struct btrfs_path *path;
4376 struct btrfs_root *root = BTRFS_I(inode)->root;
4377 int end = 0;
4378 u64 em_start = 0;
4379 u64 em_len = 0;
4380 u64 em_end = 0;
4381
4382 if (len == 0)
4383 return -EINVAL;
4384
4385 path = btrfs_alloc_path();
4386 if (!path)
4387 return -ENOMEM;
4388 path->leave_spinning = 1;
4389
4390 start = round_down(start, BTRFS_I(inode)->root->sectorsize);
4391 len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start;
4392
4393
4394
4395
4396
4397 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
4398 0);
4399 if (ret < 0) {
4400 btrfs_free_path(path);
4401 return ret;
4402 } else {
4403 WARN_ON(!ret);
4404 if (ret == 1)
4405 ret = 0;
4406 }
4407
4408 path->slots[0]--;
4409 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4410 found_type = found_key.type;
4411
4412
4413 if (found_key.objectid != btrfs_ino(inode) ||
4414 found_type != BTRFS_EXTENT_DATA_KEY) {
4415
4416 last = (u64)-1;
4417 last_for_get_extent = isize;
4418 } else {
4419
4420
4421
4422
4423
4424 last = found_key.offset;
4425 last_for_get_extent = last + 1;
4426 }
4427 btrfs_release_path(path);
4428
4429
4430
4431
4432
4433
4434 if (last < isize) {
4435 last = (u64)-1;
4436 last_for_get_extent = isize;
4437 }
4438
4439 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4440 &cached_state);
4441
4442 em = get_extent_skip_holes(inode, start, last_for_get_extent,
4443 get_extent);
4444 if (!em)
4445 goto out;
4446 if (IS_ERR(em)) {
4447 ret = PTR_ERR(em);
4448 goto out;
4449 }
4450
4451 while (!end) {
4452 u64 offset_in_extent = 0;
4453
4454
4455 if (em->start >= max || extent_map_end(em) < off)
4456 break;
4457
4458
4459
4460
4461
4462
4463
4464 em_start = max(em->start, off);
4465
4466
4467
4468
4469
4470
4471
4472 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4473 offset_in_extent = em_start - em->start;
4474 em_end = extent_map_end(em);
4475 em_len = em_end - em_start;
4476 disko = 0;
4477 flags = 0;
4478
4479
4480
4481
4482 off = extent_map_end(em);
4483 if (off >= max)
4484 end = 1;
4485
4486 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4487 end = 1;
4488 flags |= FIEMAP_EXTENT_LAST;
4489 } else if (em->block_start == EXTENT_MAP_INLINE) {
4490 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4491 FIEMAP_EXTENT_NOT_ALIGNED);
4492 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4493 flags |= (FIEMAP_EXTENT_DELALLOC |
4494 FIEMAP_EXTENT_UNKNOWN);
4495 } else if (fieinfo->fi_extents_max) {
4496 u64 bytenr = em->block_start -
4497 (em->start - em->orig_start);
4498
4499 disko = em->block_start + offset_in_extent;
4500
4501
4502
4503
4504
4505
4506
4507
4508 ret = btrfs_check_shared(NULL, root->fs_info,
4509 root->objectid,
4510 btrfs_ino(inode), bytenr);
4511 if (ret < 0)
4512 goto out_free;
4513 if (ret)
4514 flags |= FIEMAP_EXTENT_SHARED;
4515 ret = 0;
4516 }
4517 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4518 flags |= FIEMAP_EXTENT_ENCODED;
4519 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4520 flags |= FIEMAP_EXTENT_UNWRITTEN;
4521
4522 free_extent_map(em);
4523 em = NULL;
4524 if ((em_start >= last) || em_len == (u64)-1 ||
4525 (last == (u64)-1 && isize <= em_end)) {
4526 flags |= FIEMAP_EXTENT_LAST;
4527 end = 1;
4528 }
4529
4530
4531 em = get_extent_skip_holes(inode, off, last_for_get_extent,
4532 get_extent);
4533 if (IS_ERR(em)) {
4534 ret = PTR_ERR(em);
4535 goto out;
4536 }
4537 if (!em) {
4538 flags |= FIEMAP_EXTENT_LAST;
4539 end = 1;
4540 }
4541 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
4542 em_len, flags);
4543 if (ret) {
4544 if (ret == 1)
4545 ret = 0;
4546 goto out_free;
4547 }
4548 }
4549out_free:
4550 free_extent_map(em);
4551out:
4552 btrfs_free_path(path);
4553 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4554 &cached_state, GFP_NOFS);
4555 return ret;
4556}
4557
4558static void __free_extent_buffer(struct extent_buffer *eb)
4559{
4560 btrfs_leak_debug_del(&eb->leak_list);
4561 kmem_cache_free(extent_buffer_cache, eb);
4562}
4563
4564int extent_buffer_under_io(struct extent_buffer *eb)
4565{
4566 return (atomic_read(&eb->io_pages) ||
4567 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4568 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4569}
4570
4571
4572
4573
4574static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4575{
4576 unsigned long index;
4577 struct page *page;
4578 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4579
4580 BUG_ON(extent_buffer_under_io(eb));
4581
4582 index = num_extent_pages(eb->start, eb->len);
4583 if (index == 0)
4584 return;
4585
4586 do {
4587 index--;
4588 page = eb->pages[index];
4589 if (!page)
4590 continue;
4591 if (mapped)
4592 spin_lock(&page->mapping->private_lock);
4593
4594
4595
4596
4597
4598
4599
4600 if (PagePrivate(page) &&
4601 page->private == (unsigned long)eb) {
4602 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4603 BUG_ON(PageDirty(page));
4604 BUG_ON(PageWriteback(page));
4605
4606
4607
4608
4609 ClearPagePrivate(page);
4610 set_page_private(page, 0);
4611
4612 put_page(page);
4613 }
4614
4615 if (mapped)
4616 spin_unlock(&page->mapping->private_lock);
4617
4618
4619 put_page(page);
4620 } while (index != 0);
4621}
4622
4623
4624
4625
4626static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4627{
4628 btrfs_release_extent_buffer_page(eb);
4629 __free_extent_buffer(eb);
4630}
4631
4632static struct extent_buffer *
4633__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4634 unsigned long len)
4635{
4636 struct extent_buffer *eb = NULL;
4637
4638 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4639 eb->start = start;
4640 eb->len = len;
4641 eb->fs_info = fs_info;
4642 eb->bflags = 0;
4643 rwlock_init(&eb->lock);
4644 atomic_set(&eb->write_locks, 0);
4645 atomic_set(&eb->read_locks, 0);
4646 atomic_set(&eb->blocking_readers, 0);
4647 atomic_set(&eb->blocking_writers, 0);
4648 atomic_set(&eb->spinning_readers, 0);
4649 atomic_set(&eb->spinning_writers, 0);
4650 eb->lock_nested = 0;
4651 init_waitqueue_head(&eb->write_lock_wq);
4652 init_waitqueue_head(&eb->read_lock_wq);
4653
4654 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4655
4656 spin_lock_init(&eb->refs_lock);
4657 atomic_set(&eb->refs, 1);
4658 atomic_set(&eb->io_pages, 0);
4659
4660
4661
4662
4663 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4664 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4665 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4666
4667 return eb;
4668}
4669
4670struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4671{
4672 unsigned long i;
4673 struct page *p;
4674 struct extent_buffer *new;
4675 unsigned long num_pages = num_extent_pages(src->start, src->len);
4676
4677 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4678 if (new == NULL)
4679 return NULL;
4680
4681 for (i = 0; i < num_pages; i++) {
4682 p = alloc_page(GFP_NOFS);
4683 if (!p) {
4684 btrfs_release_extent_buffer(new);
4685 return NULL;
4686 }
4687 attach_extent_buffer_page(new, p);
4688 WARN_ON(PageDirty(p));
4689 SetPageUptodate(p);
4690 new->pages[i] = p;
4691 }
4692
4693 copy_extent_buffer(new, src, 0, 0, src->len);
4694 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4695 set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
4696
4697 return new;
4698}
4699
4700struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4701 u64 start, unsigned long len)
4702{
4703 struct extent_buffer *eb;
4704 unsigned long num_pages;
4705 unsigned long i;
4706
4707 num_pages = num_extent_pages(start, len);
4708
4709 eb = __alloc_extent_buffer(fs_info, start, len);
4710 if (!eb)
4711 return NULL;
4712
4713 for (i = 0; i < num_pages; i++) {
4714 eb->pages[i] = alloc_page(GFP_NOFS);
4715 if (!eb->pages[i])
4716 goto err;
4717 }
4718 set_extent_buffer_uptodate(eb);
4719 btrfs_set_header_nritems(eb, 0);
4720 set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4721
4722 return eb;
4723err:
4724 for (; i > 0; i--)
4725 __free_page(eb->pages[i - 1]);
4726 __free_extent_buffer(eb);
4727 return NULL;
4728}
4729
4730struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4731 u64 start, u32 nodesize)
4732{
4733 unsigned long len;
4734
4735 if (!fs_info) {
4736
4737
4738
4739
4740 len = nodesize;
4741 } else {
4742 len = fs_info->tree_root->nodesize;
4743 }
4744
4745 return __alloc_dummy_extent_buffer(fs_info, start, len);
4746}
4747
4748static void check_buffer_tree_ref(struct extent_buffer *eb)
4749{
4750 int refs;
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771 refs = atomic_read(&eb->refs);
4772 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4773 return;
4774
4775 spin_lock(&eb->refs_lock);
4776 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4777 atomic_inc(&eb->refs);
4778 spin_unlock(&eb->refs_lock);
4779}
4780
4781static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4782 struct page *accessed)
4783{
4784 unsigned long num_pages, i;
4785
4786 check_buffer_tree_ref(eb);
4787
4788 num_pages = num_extent_pages(eb->start, eb->len);
4789 for (i = 0; i < num_pages; i++) {
4790 struct page *p = eb->pages[i];
4791
4792 if (p != accessed)
4793 mark_page_accessed(p);
4794 }
4795}
4796
4797struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4798 u64 start)
4799{
4800 struct extent_buffer *eb;
4801
4802 rcu_read_lock();
4803 eb = radix_tree_lookup(&fs_info->buffer_radix,
4804 start >> PAGE_SHIFT);
4805 if (eb && atomic_inc_not_zero(&eb->refs)) {
4806 rcu_read_unlock();
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4823 spin_lock(&eb->refs_lock);
4824 spin_unlock(&eb->refs_lock);
4825 }
4826 mark_extent_buffer_accessed(eb, NULL);
4827 return eb;
4828 }
4829 rcu_read_unlock();
4830
4831 return NULL;
4832}
4833
4834#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4835struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4836 u64 start, u32 nodesize)
4837{
4838 struct extent_buffer *eb, *exists = NULL;
4839 int ret;
4840
4841 eb = find_extent_buffer(fs_info, start);
4842 if (eb)
4843 return eb;
4844 eb = alloc_dummy_extent_buffer(fs_info, start, nodesize);
4845 if (!eb)
4846 return NULL;
4847 eb->fs_info = fs_info;
4848again:
4849 ret = radix_tree_preload(GFP_NOFS);
4850 if (ret)
4851 goto free_eb;
4852 spin_lock(&fs_info->buffer_lock);
4853 ret = radix_tree_insert(&fs_info->buffer_radix,
4854 start >> PAGE_SHIFT, eb);
4855 spin_unlock(&fs_info->buffer_lock);
4856 radix_tree_preload_end();
4857 if (ret == -EEXIST) {
4858 exists = find_extent_buffer(fs_info, start);
4859 if (exists)
4860 goto free_eb;
4861 else
4862 goto again;
4863 }
4864 check_buffer_tree_ref(eb);
4865 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4866
4867
4868
4869
4870
4871
4872
4873 atomic_inc(&eb->refs);
4874 return eb;
4875free_eb:
4876 btrfs_release_extent_buffer(eb);
4877 return exists;
4878}
4879#endif
4880
4881struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4882 u64 start)
4883{
4884 unsigned long len = fs_info->tree_root->nodesize;
4885 unsigned long num_pages = num_extent_pages(start, len);
4886 unsigned long i;
4887 unsigned long index = start >> PAGE_SHIFT;
4888 struct extent_buffer *eb;
4889 struct extent_buffer *exists = NULL;
4890 struct page *p;
4891 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4892 int uptodate = 1;
4893 int ret;
4894
4895 if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) {
4896 btrfs_err(fs_info, "bad tree block start %llu", start);
4897 return ERR_PTR(-EINVAL);
4898 }
4899
4900 eb = find_extent_buffer(fs_info, start);
4901 if (eb)
4902 return eb;
4903
4904 eb = __alloc_extent_buffer(fs_info, start, len);
4905 if (!eb)
4906 return ERR_PTR(-ENOMEM);
4907
4908 for (i = 0; i < num_pages; i++, index++) {
4909 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
4910 if (!p) {
4911 exists = ERR_PTR(-ENOMEM);
4912 goto free_eb;
4913 }
4914
4915 spin_lock(&mapping->private_lock);
4916 if (PagePrivate(p)) {
4917
4918
4919
4920
4921
4922
4923
4924 exists = (struct extent_buffer *)p->private;
4925 if (atomic_inc_not_zero(&exists->refs)) {
4926 spin_unlock(&mapping->private_lock);
4927 unlock_page(p);
4928 put_page(p);
4929 mark_extent_buffer_accessed(exists, p);
4930 goto free_eb;
4931 }
4932 exists = NULL;
4933
4934
4935
4936
4937
4938 ClearPagePrivate(p);
4939 WARN_ON(PageDirty(p));
4940 put_page(p);
4941 }
4942 attach_extent_buffer_page(eb, p);
4943 spin_unlock(&mapping->private_lock);
4944 WARN_ON(PageDirty(p));
4945 eb->pages[i] = p;
4946 if (!PageUptodate(p))
4947 uptodate = 0;
4948
4949
4950
4951
4952
4953 }
4954 if (uptodate)
4955 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
4956again:
4957 ret = radix_tree_preload(GFP_NOFS);
4958 if (ret) {
4959 exists = ERR_PTR(ret);
4960 goto free_eb;
4961 }
4962
4963 spin_lock(&fs_info->buffer_lock);
4964 ret = radix_tree_insert(&fs_info->buffer_radix,
4965 start >> PAGE_SHIFT, eb);
4966 spin_unlock(&fs_info->buffer_lock);
4967 radix_tree_preload_end();
4968 if (ret == -EEXIST) {
4969 exists = find_extent_buffer(fs_info, start);
4970 if (exists)
4971 goto free_eb;
4972 else
4973 goto again;
4974 }
4975
4976 check_buffer_tree_ref(eb);
4977 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988 SetPageChecked(eb->pages[0]);
4989 for (i = 1; i < num_pages; i++) {
4990 p = eb->pages[i];
4991 ClearPageChecked(p);
4992 unlock_page(p);
4993 }
4994 unlock_page(eb->pages[0]);
4995 return eb;
4996
4997free_eb:
4998 WARN_ON(!atomic_dec_and_test(&eb->refs));
4999 for (i = 0; i < num_pages; i++) {
5000 if (eb->pages[i])
5001 unlock_page(eb->pages[i]);
5002 }
5003
5004 btrfs_release_extent_buffer(eb);
5005 return exists;
5006}
5007
5008static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5009{
5010 struct extent_buffer *eb =
5011 container_of(head, struct extent_buffer, rcu_head);
5012
5013 __free_extent_buffer(eb);
5014}
5015
5016
5017static int release_extent_buffer(struct extent_buffer *eb)
5018{
5019 WARN_ON(atomic_read(&eb->refs) == 0);
5020 if (atomic_dec_and_test(&eb->refs)) {
5021 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5022 struct btrfs_fs_info *fs_info = eb->fs_info;
5023
5024 spin_unlock(&eb->refs_lock);
5025
5026 spin_lock(&fs_info->buffer_lock);
5027 radix_tree_delete(&fs_info->buffer_radix,
5028 eb->start >> PAGE_SHIFT);
5029 spin_unlock(&fs_info->buffer_lock);
5030 } else {
5031 spin_unlock(&eb->refs_lock);
5032 }
5033
5034
5035 btrfs_release_extent_buffer_page(eb);
5036#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5037 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
5038 __free_extent_buffer(eb);
5039 return 1;
5040 }
5041#endif
5042 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5043 return 1;
5044 }
5045 spin_unlock(&eb->refs_lock);
5046
5047 return 0;
5048}
5049
5050void free_extent_buffer(struct extent_buffer *eb)
5051{
5052 int refs;
5053 int old;
5054 if (!eb)
5055 return;
5056
5057 while (1) {
5058 refs = atomic_read(&eb->refs);
5059 if (refs <= 3)
5060 break;
5061 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5062 if (old == refs)
5063 return;
5064 }
5065
5066 spin_lock(&eb->refs_lock);
5067 if (atomic_read(&eb->refs) == 2 &&
5068 test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
5069 atomic_dec(&eb->refs);
5070
5071 if (atomic_read(&eb->refs) == 2 &&
5072 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5073 !extent_buffer_under_io(eb) &&
5074 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5075 atomic_dec(&eb->refs);
5076
5077
5078
5079
5080
5081 release_extent_buffer(eb);
5082}
5083
5084void free_extent_buffer_stale(struct extent_buffer *eb)
5085{
5086 if (!eb)
5087 return;
5088
5089 spin_lock(&eb->refs_lock);
5090 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5091
5092 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5093 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5094 atomic_dec(&eb->refs);
5095 release_extent_buffer(eb);
5096}
5097
5098void clear_extent_buffer_dirty(struct extent_buffer *eb)
5099{
5100 unsigned long i;
5101 unsigned long num_pages;
5102 struct page *page;
5103
5104 num_pages = num_extent_pages(eb->start, eb->len);
5105
5106 for (i = 0; i < num_pages; i++) {
5107 page = eb->pages[i];
5108 if (!PageDirty(page))
5109 continue;
5110
5111 lock_page(page);
5112 WARN_ON(!PagePrivate(page));
5113
5114 clear_page_dirty_for_io(page);
5115 spin_lock_irq(&page->mapping->tree_lock);
5116 if (!PageDirty(page)) {
5117 radix_tree_tag_clear(&page->mapping->page_tree,
5118 page_index(page),
5119 PAGECACHE_TAG_DIRTY);
5120 }
5121 spin_unlock_irq(&page->mapping->tree_lock);
5122 ClearPageError(page);
5123 unlock_page(page);
5124 }
5125 WARN_ON(atomic_read(&eb->refs) == 0);
5126}
5127
5128int set_extent_buffer_dirty(struct extent_buffer *eb)
5129{
5130 unsigned long i;
5131 unsigned long num_pages;
5132 int was_dirty = 0;
5133
5134 check_buffer_tree_ref(eb);
5135
5136 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5137
5138 num_pages = num_extent_pages(eb->start, eb->len);
5139 WARN_ON(atomic_read(&eb->refs) == 0);
5140 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5141
5142 for (i = 0; i < num_pages; i++)
5143 set_page_dirty(eb->pages[i]);
5144 return was_dirty;
5145}
5146
5147void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5148{
5149 unsigned long i;
5150 struct page *page;
5151 unsigned long num_pages;
5152
5153 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5154 num_pages = num_extent_pages(eb->start, eb->len);
5155 for (i = 0; i < num_pages; i++) {
5156 page = eb->pages[i];
5157 if (page)
5158 ClearPageUptodate(page);
5159 }
5160}
5161
5162void set_extent_buffer_uptodate(struct extent_buffer *eb)
5163{
5164 unsigned long i;
5165 struct page *page;
5166 unsigned long num_pages;
5167
5168 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5169 num_pages = num_extent_pages(eb->start, eb->len);
5170 for (i = 0; i < num_pages; i++) {
5171 page = eb->pages[i];
5172 SetPageUptodate(page);
5173 }
5174}
5175
5176int extent_buffer_uptodate(struct extent_buffer *eb)
5177{
5178 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5179}
5180
5181int read_extent_buffer_pages(struct extent_io_tree *tree,
5182 struct extent_buffer *eb, u64 start, int wait,
5183 get_extent_t *get_extent, int mirror_num)
5184{
5185 unsigned long i;
5186 unsigned long start_i;
5187 struct page *page;
5188 int err;
5189 int ret = 0;
5190 int locked_pages = 0;
5191 int all_uptodate = 1;
5192 unsigned long num_pages;
5193 unsigned long num_reads = 0;
5194 struct bio *bio = NULL;
5195 unsigned long bio_flags = 0;
5196
5197 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5198 return 0;
5199
5200 if (start) {
5201 WARN_ON(start < eb->start);
5202 start_i = (start >> PAGE_SHIFT) -
5203 (eb->start >> PAGE_SHIFT);
5204 } else {
5205 start_i = 0;
5206 }
5207
5208 num_pages = num_extent_pages(eb->start, eb->len);
5209 for (i = start_i; i < num_pages; i++) {
5210 page = eb->pages[i];
5211 if (wait == WAIT_NONE) {
5212 if (!trylock_page(page))
5213 goto unlock_exit;
5214 } else {
5215 lock_page(page);
5216 }
5217 locked_pages++;
5218 if (!PageUptodate(page)) {
5219 num_reads++;
5220 all_uptodate = 0;
5221 }
5222 }
5223 if (all_uptodate) {
5224 if (start_i == 0)
5225 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5226 goto unlock_exit;
5227 }
5228
5229 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5230 eb->read_mirror = 0;
5231 atomic_set(&eb->io_pages, num_reads);
5232 for (i = start_i; i < num_pages; i++) {
5233 page = eb->pages[i];
5234 if (!PageUptodate(page)) {
5235 ClearPageError(page);
5236 err = __extent_read_full_page(tree, page,
5237 get_extent, &bio,
5238 mirror_num, &bio_flags,
5239 READ | REQ_META);
5240 if (err)
5241 ret = err;
5242 } else {
5243 unlock_page(page);
5244 }
5245 }
5246
5247 if (bio) {
5248 err = submit_one_bio(READ | REQ_META, bio, mirror_num,
5249 bio_flags);
5250 if (err)
5251 return err;
5252 }
5253
5254 if (ret || wait != WAIT_COMPLETE)
5255 return ret;
5256
5257 for (i = start_i; i < num_pages; i++) {
5258 page = eb->pages[i];
5259 wait_on_page_locked(page);
5260 if (!PageUptodate(page))
5261 ret = -EIO;
5262 }
5263
5264 return ret;
5265
5266unlock_exit:
5267 i = start_i;
5268 while (locked_pages > 0) {
5269 page = eb->pages[i];
5270 i++;
5271 unlock_page(page);
5272 locked_pages--;
5273 }
5274 return ret;
5275}
5276
5277void read_extent_buffer(struct extent_buffer *eb, void *dstv,
5278 unsigned long start,
5279 unsigned long len)
5280{
5281 size_t cur;
5282 size_t offset;
5283 struct page *page;
5284 char *kaddr;
5285 char *dst = (char *)dstv;
5286 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5287 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5288
5289 WARN_ON(start > eb->len);
5290 WARN_ON(start + len > eb->start + eb->len);
5291
5292 offset = (start_offset + start) & (PAGE_SIZE - 1);
5293
5294 while (len > 0) {
5295 page = eb->pages[i];
5296
5297 cur = min(len, (PAGE_SIZE - offset));
5298 kaddr = page_address(page);
5299 memcpy(dst, kaddr + offset, cur);
5300
5301 dst += cur;
5302 len -= cur;
5303 offset = 0;
5304 i++;
5305 }
5306}
5307
5308int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
5309 unsigned long start,
5310 unsigned long len)
5311{
5312 size_t cur;
5313 size_t offset;
5314 struct page *page;
5315 char *kaddr;
5316 char __user *dst = (char __user *)dstv;
5317 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5318 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5319 int ret = 0;
5320
5321 WARN_ON(start > eb->len);
5322 WARN_ON(start + len > eb->start + eb->len);
5323
5324 offset = (start_offset + start) & (PAGE_SIZE - 1);
5325
5326 while (len > 0) {
5327 page = eb->pages[i];
5328
5329 cur = min(len, (PAGE_SIZE - offset));
5330 kaddr = page_address(page);
5331 if (copy_to_user(dst, kaddr + offset, cur)) {
5332 ret = -EFAULT;
5333 break;
5334 }
5335
5336 dst += cur;
5337 len -= cur;
5338 offset = 0;
5339 i++;
5340 }
5341
5342 return ret;
5343}
5344
5345
5346
5347
5348
5349
5350int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
5351 unsigned long min_len, char **map,
5352 unsigned long *map_start,
5353 unsigned long *map_len)
5354{
5355 size_t offset = start & (PAGE_SIZE - 1);
5356 char *kaddr;
5357 struct page *p;
5358 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5359 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5360 unsigned long end_i = (start_offset + start + min_len - 1) >>
5361 PAGE_SHIFT;
5362
5363 if (i != end_i)
5364 return 1;
5365
5366 if (i == 0) {
5367 offset = start_offset;
5368 *map_start = 0;
5369 } else {
5370 offset = 0;
5371 *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
5372 }
5373
5374 if (start + min_len > eb->len) {
5375 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
5376 "wanted %lu %lu\n",
5377 eb->start, eb->len, start, min_len);
5378 return -EINVAL;
5379 }
5380
5381 p = eb->pages[i];
5382 kaddr = page_address(p);
5383 *map = kaddr + offset;
5384 *map_len = PAGE_SIZE - offset;
5385 return 0;
5386}
5387
5388int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
5389 unsigned long start,
5390 unsigned long len)
5391{
5392 size_t cur;
5393 size_t offset;
5394 struct page *page;
5395 char *kaddr;
5396 char *ptr = (char *)ptrv;
5397 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5398 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5399 int ret = 0;
5400
5401 WARN_ON(start > eb->len);
5402 WARN_ON(start + len > eb->start + eb->len);
5403
5404 offset = (start_offset + start) & (PAGE_SIZE - 1);
5405
5406 while (len > 0) {
5407 page = eb->pages[i];
5408
5409 cur = min(len, (PAGE_SIZE - offset));
5410
5411 kaddr = page_address(page);
5412 ret = memcmp(ptr, kaddr + offset, cur);
5413 if (ret)
5414 break;
5415
5416 ptr += cur;
5417 len -= cur;
5418 offset = 0;
5419 i++;
5420 }
5421 return ret;
5422}
5423
5424void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5425 unsigned long start, unsigned long len)
5426{
5427 size_t cur;
5428 size_t offset;
5429 struct page *page;
5430 char *kaddr;
5431 char *src = (char *)srcv;
5432 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5433 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5434
5435 WARN_ON(start > eb->len);
5436 WARN_ON(start + len > eb->start + eb->len);
5437
5438 offset = (start_offset + start) & (PAGE_SIZE - 1);
5439
5440 while (len > 0) {
5441 page = eb->pages[i];
5442 WARN_ON(!PageUptodate(page));
5443
5444 cur = min(len, PAGE_SIZE - offset);
5445 kaddr = page_address(page);
5446 memcpy(kaddr + offset, src, cur);
5447
5448 src += cur;
5449 len -= cur;
5450 offset = 0;
5451 i++;
5452 }
5453}
5454
5455void memset_extent_buffer(struct extent_buffer *eb, char c,
5456 unsigned long start, unsigned long len)
5457{
5458 size_t cur;
5459 size_t offset;
5460 struct page *page;
5461 char *kaddr;
5462 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5463 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5464
5465 WARN_ON(start > eb->len);
5466 WARN_ON(start + len > eb->start + eb->len);
5467
5468 offset = (start_offset + start) & (PAGE_SIZE - 1);
5469
5470 while (len > 0) {
5471 page = eb->pages[i];
5472 WARN_ON(!PageUptodate(page));
5473
5474 cur = min(len, PAGE_SIZE - offset);
5475 kaddr = page_address(page);
5476 memset(kaddr + offset, c, cur);
5477
5478 len -= cur;
5479 offset = 0;
5480 i++;
5481 }
5482}
5483
5484void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5485 unsigned long dst_offset, unsigned long src_offset,
5486 unsigned long len)
5487{
5488 u64 dst_len = dst->len;
5489 size_t cur;
5490 size_t offset;
5491 struct page *page;
5492 char *kaddr;
5493 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5494 unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
5495
5496 WARN_ON(src->len != dst_len);
5497
5498 offset = (start_offset + dst_offset) &
5499 (PAGE_SIZE - 1);
5500
5501 while (len > 0) {
5502 page = dst->pages[i];
5503 WARN_ON(!PageUptodate(page));
5504
5505 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
5506
5507 kaddr = page_address(page);
5508 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5509
5510 src_offset += cur;
5511 len -= cur;
5512 offset = 0;
5513 i++;
5514 }
5515}
5516
5517
5518
5519
5520
5521
5522#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
5523#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
5524#define BITMAP_FIRST_BYTE_MASK(start) \
5525 ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
5526#define BITMAP_LAST_BYTE_MASK(nbits) \
5527 (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542static inline void eb_bitmap_offset(struct extent_buffer *eb,
5543 unsigned long start, unsigned long nr,
5544 unsigned long *page_index,
5545 size_t *page_offset)
5546{
5547 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5548 size_t byte_offset = BIT_BYTE(nr);
5549 size_t offset;
5550
5551
5552
5553
5554
5555
5556 offset = start_offset + start + byte_offset;
5557
5558 *page_index = offset >> PAGE_SHIFT;
5559 *page_offset = offset & (PAGE_SIZE - 1);
5560}
5561
5562
5563
5564
5565
5566
5567
5568int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5569 unsigned long nr)
5570{
5571 char *kaddr;
5572 struct page *page;
5573 unsigned long i;
5574 size_t offset;
5575
5576 eb_bitmap_offset(eb, start, nr, &i, &offset);
5577 page = eb->pages[i];
5578 WARN_ON(!PageUptodate(page));
5579 kaddr = page_address(page);
5580 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5581}
5582
5583
5584
5585
5586
5587
5588
5589
5590void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5591 unsigned long pos, unsigned long len)
5592{
5593 char *kaddr;
5594 struct page *page;
5595 unsigned long i;
5596 size_t offset;
5597 const unsigned int size = pos + len;
5598 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5599 unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
5600
5601 eb_bitmap_offset(eb, start, pos, &i, &offset);
5602 page = eb->pages[i];
5603 WARN_ON(!PageUptodate(page));
5604 kaddr = page_address(page);
5605
5606 while (len >= bits_to_set) {
5607 kaddr[offset] |= mask_to_set;
5608 len -= bits_to_set;
5609 bits_to_set = BITS_PER_BYTE;
5610 mask_to_set = ~0U;
5611 if (++offset >= PAGE_SIZE && len > 0) {
5612 offset = 0;
5613 page = eb->pages[++i];
5614 WARN_ON(!PageUptodate(page));
5615 kaddr = page_address(page);
5616 }
5617 }
5618 if (len) {
5619 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5620 kaddr[offset] |= mask_to_set;
5621 }
5622}
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5633 unsigned long pos, unsigned long len)
5634{
5635 char *kaddr;
5636 struct page *page;
5637 unsigned long i;
5638 size_t offset;
5639 const unsigned int size = pos + len;
5640 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5641 unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
5642
5643 eb_bitmap_offset(eb, start, pos, &i, &offset);
5644 page = eb->pages[i];
5645 WARN_ON(!PageUptodate(page));
5646 kaddr = page_address(page);
5647
5648 while (len >= bits_to_clear) {
5649 kaddr[offset] &= ~mask_to_clear;
5650 len -= bits_to_clear;
5651 bits_to_clear = BITS_PER_BYTE;
5652 mask_to_clear = ~0U;
5653 if (++offset >= PAGE_SIZE && len > 0) {
5654 offset = 0;
5655 page = eb->pages[++i];
5656 WARN_ON(!PageUptodate(page));
5657 kaddr = page_address(page);
5658 }
5659 }
5660 if (len) {
5661 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5662 kaddr[offset] &= ~mask_to_clear;
5663 }
5664}
5665
5666static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5667{
5668 unsigned long distance = (src > dst) ? src - dst : dst - src;
5669 return distance < len;
5670}
5671
5672static void copy_pages(struct page *dst_page, struct page *src_page,
5673 unsigned long dst_off, unsigned long src_off,
5674 unsigned long len)
5675{
5676 char *dst_kaddr = page_address(dst_page);
5677 char *src_kaddr;
5678 int must_memmove = 0;
5679
5680 if (dst_page != src_page) {
5681 src_kaddr = page_address(src_page);
5682 } else {
5683 src_kaddr = dst_kaddr;
5684 if (areas_overlap(src_off, dst_off, len))
5685 must_memmove = 1;
5686 }
5687
5688 if (must_memmove)
5689 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5690 else
5691 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5692}
5693
5694void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5695 unsigned long src_offset, unsigned long len)
5696{
5697 size_t cur;
5698 size_t dst_off_in_page;
5699 size_t src_off_in_page;
5700 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5701 unsigned long dst_i;
5702 unsigned long src_i;
5703
5704 if (src_offset + len > dst->len) {
5705 btrfs_err(dst->fs_info,
5706 "memmove bogus src_offset %lu move "
5707 "len %lu dst len %lu", src_offset, len, dst->len);
5708 BUG_ON(1);
5709 }
5710 if (dst_offset + len > dst->len) {
5711 btrfs_err(dst->fs_info,
5712 "memmove bogus dst_offset %lu move "
5713 "len %lu dst len %lu", dst_offset, len, dst->len);
5714 BUG_ON(1);
5715 }
5716
5717 while (len > 0) {
5718 dst_off_in_page = (start_offset + dst_offset) &
5719 (PAGE_SIZE - 1);
5720 src_off_in_page = (start_offset + src_offset) &
5721 (PAGE_SIZE - 1);
5722
5723 dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
5724 src_i = (start_offset + src_offset) >> PAGE_SHIFT;
5725
5726 cur = min(len, (unsigned long)(PAGE_SIZE -
5727 src_off_in_page));
5728 cur = min_t(unsigned long, cur,
5729 (unsigned long)(PAGE_SIZE - dst_off_in_page));
5730
5731 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5732 dst_off_in_page, src_off_in_page, cur);
5733
5734 src_offset += cur;
5735 dst_offset += cur;
5736 len -= cur;
5737 }
5738}
5739
5740void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5741 unsigned long src_offset, unsigned long len)
5742{
5743 size_t cur;
5744 size_t dst_off_in_page;
5745 size_t src_off_in_page;
5746 unsigned long dst_end = dst_offset + len - 1;
5747 unsigned long src_end = src_offset + len - 1;
5748 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5749 unsigned long dst_i;
5750 unsigned long src_i;
5751
5752 if (src_offset + len > dst->len) {
5753 btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move "
5754 "len %lu len %lu", src_offset, len, dst->len);
5755 BUG_ON(1);
5756 }
5757 if (dst_offset + len > dst->len) {
5758 btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move "
5759 "len %lu len %lu", dst_offset, len, dst->len);
5760 BUG_ON(1);
5761 }
5762 if (dst_offset < src_offset) {
5763 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5764 return;
5765 }
5766 while (len > 0) {
5767 dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
5768 src_i = (start_offset + src_end) >> PAGE_SHIFT;
5769
5770 dst_off_in_page = (start_offset + dst_end) &
5771 (PAGE_SIZE - 1);
5772 src_off_in_page = (start_offset + src_end) &
5773 (PAGE_SIZE - 1);
5774
5775 cur = min_t(unsigned long, len, src_off_in_page + 1);
5776 cur = min(cur, dst_off_in_page + 1);
5777 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5778 dst_off_in_page - cur + 1,
5779 src_off_in_page - cur + 1, cur);
5780
5781 dst_end -= cur;
5782 src_end -= cur;
5783 len -= cur;
5784 }
5785}
5786
5787int try_release_extent_buffer(struct page *page)
5788{
5789 struct extent_buffer *eb;
5790
5791
5792
5793
5794
5795 spin_lock(&page->mapping->private_lock);
5796 if (!PagePrivate(page)) {
5797 spin_unlock(&page->mapping->private_lock);
5798 return 1;
5799 }
5800
5801 eb = (struct extent_buffer *)page->private;
5802 BUG_ON(!eb);
5803
5804
5805
5806
5807
5808
5809 spin_lock(&eb->refs_lock);
5810 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
5811 spin_unlock(&eb->refs_lock);
5812 spin_unlock(&page->mapping->private_lock);
5813 return 0;
5814 }
5815 spin_unlock(&page->mapping->private_lock);
5816
5817
5818
5819
5820
5821 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
5822 spin_unlock(&eb->refs_lock);
5823 return 0;
5824 }
5825
5826 return release_extent_buffer(eb);
5827}
5828