1#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/pagemap.h>
6#include <linux/page-flags.h>
7#include <linux/spinlock.h>
8#include <linux/blkdev.h>
9#include <linux/swap.h>
10#include <linux/writeback.h>
11#include <linux/pagevec.h>
12#include <linux/prefetch.h>
13#include <linux/cleancache.h>
14#include "extent_io.h"
15#include "extent_map.h"
16#include "ctree.h"
17#include "btrfs_inode.h"
18#include "volumes.h"
19#include "check-integrity.h"
20#include "locking.h"
21#include "rcu-string.h"
22#include "backref.h"
23
24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache;
26static struct bio_set *btrfs_bioset;
27
28static inline bool extent_state_in_tree(const struct extent_state *state)
29{
30 return !RB_EMPTY_NODE(&state->rb_node);
31}
32
33#ifdef CONFIG_BTRFS_DEBUG
34static LIST_HEAD(buffers);
35static LIST_HEAD(states);
36
37static DEFINE_SPINLOCK(leak_lock);
38
39static inline
40void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
41{
42 unsigned long flags;
43
44 spin_lock_irqsave(&leak_lock, flags);
45 list_add(new, head);
46 spin_unlock_irqrestore(&leak_lock, flags);
47}
48
49static inline
50void btrfs_leak_debug_del(struct list_head *entry)
51{
52 unsigned long flags;
53
54 spin_lock_irqsave(&leak_lock, flags);
55 list_del(entry);
56 spin_unlock_irqrestore(&leak_lock, flags);
57}
58
59static inline
60void btrfs_leak_debug_check(void)
61{
62 struct extent_state *state;
63 struct extent_buffer *eb;
64
65 while (!list_empty(&states)) {
66 state = list_entry(states.next, struct extent_state, leak_list);
67 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
68 state->start, state->end, state->state,
69 extent_state_in_tree(state),
70 atomic_read(&state->refs));
71 list_del(&state->leak_list);
72 kmem_cache_free(extent_state_cache, state);
73 }
74
75 while (!list_empty(&buffers)) {
76 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
77 printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
78 "refs %d\n",
79 eb->start, eb->len, atomic_read(&eb->refs));
80 list_del(&eb->leak_list);
81 kmem_cache_free(extent_buffer_cache, eb);
82 }
83}
84
85#define btrfs_debug_check_extent_io_range(tree, start, end) \
86 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
87static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 struct extent_io_tree *tree, u64 start, u64 end)
89{
90 struct inode *inode;
91 u64 isize;
92
93 if (!tree->mapping)
94 return;
95
96 inode = tree->mapping->host;
97 isize = i_size_read(inode);
98 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
99 printk_ratelimited(KERN_DEBUG
100 "BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
101 caller, btrfs_ino(inode), isize, start, end);
102 }
103}
104#else
105#define btrfs_leak_debug_add(new, head) do {} while (0)
106#define btrfs_leak_debug_del(entry) do {} while (0)
107#define btrfs_leak_debug_check() do {} while (0)
108#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
109#endif
110
111#define BUFFER_LRU_MAX 64
112
113struct tree_entry {
114 u64 start;
115 u64 end;
116 struct rb_node rb_node;
117};
118
119struct extent_page_data {
120 struct bio *bio;
121 struct extent_io_tree *tree;
122 get_extent_t *get_extent;
123 unsigned long bio_flags;
124
125
126
127
128 unsigned int extent_locked:1;
129
130
131 unsigned int sync_io:1;
132};
133
134static noinline void flush_write_bio(void *data);
135static inline struct btrfs_fs_info *
136tree_fs_info(struct extent_io_tree *tree)
137{
138 if (!tree->mapping)
139 return NULL;
140 return btrfs_sb(tree->mapping->host->i_sb);
141}
142
143int __init extent_io_init(void)
144{
145 extent_state_cache = kmem_cache_create("btrfs_extent_state",
146 sizeof(struct extent_state), 0,
147 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
148 if (!extent_state_cache)
149 return -ENOMEM;
150
151 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
152 sizeof(struct extent_buffer), 0,
153 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
154 if (!extent_buffer_cache)
155 goto free_state_cache;
156
157 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
158 offsetof(struct btrfs_io_bio, bio));
159 if (!btrfs_bioset)
160 goto free_buffer_cache;
161
162 if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
163 goto free_bioset;
164
165 return 0;
166
167free_bioset:
168 bioset_free(btrfs_bioset);
169 btrfs_bioset = NULL;
170
171free_buffer_cache:
172 kmem_cache_destroy(extent_buffer_cache);
173 extent_buffer_cache = NULL;
174
175free_state_cache:
176 kmem_cache_destroy(extent_state_cache);
177 extent_state_cache = NULL;
178 return -ENOMEM;
179}
180
181void extent_io_exit(void)
182{
183 btrfs_leak_debug_check();
184
185
186
187
188
189 rcu_barrier();
190 if (extent_state_cache)
191 kmem_cache_destroy(extent_state_cache);
192 if (extent_buffer_cache)
193 kmem_cache_destroy(extent_buffer_cache);
194 if (btrfs_bioset)
195 bioset_free(btrfs_bioset);
196}
197
198void extent_io_tree_init(struct extent_io_tree *tree,
199 struct address_space *mapping)
200{
201 tree->state = RB_ROOT;
202 tree->ops = NULL;
203 tree->dirty_bytes = 0;
204 spin_lock_init(&tree->lock);
205 tree->mapping = mapping;
206}
207
208static struct extent_state *alloc_extent_state(gfp_t mask)
209{
210 struct extent_state *state;
211
212 state = kmem_cache_alloc(extent_state_cache, mask);
213 if (!state)
214 return state;
215 state->state = 0;
216 state->private = 0;
217 RB_CLEAR_NODE(&state->rb_node);
218 btrfs_leak_debug_add(&state->leak_list, &states);
219 atomic_set(&state->refs, 1);
220 init_waitqueue_head(&state->wq);
221 trace_alloc_extent_state(state, mask, _RET_IP_);
222 return state;
223}
224
225void free_extent_state(struct extent_state *state)
226{
227 if (!state)
228 return;
229 if (atomic_dec_and_test(&state->refs)) {
230 WARN_ON(extent_state_in_tree(state));
231 btrfs_leak_debug_del(&state->leak_list);
232 trace_free_extent_state(state, _RET_IP_);
233 kmem_cache_free(extent_state_cache, state);
234 }
235}
236
237static struct rb_node *tree_insert(struct rb_root *root,
238 struct rb_node *search_start,
239 u64 offset,
240 struct rb_node *node,
241 struct rb_node ***p_in,
242 struct rb_node **parent_in)
243{
244 struct rb_node **p;
245 struct rb_node *parent = NULL;
246 struct tree_entry *entry;
247
248 if (p_in && parent_in) {
249 p = *p_in;
250 parent = *parent_in;
251 goto do_insert;
252 }
253
254 p = search_start ? &search_start : &root->rb_node;
255 while (*p) {
256 parent = *p;
257 entry = rb_entry(parent, struct tree_entry, rb_node);
258
259 if (offset < entry->start)
260 p = &(*p)->rb_left;
261 else if (offset > entry->end)
262 p = &(*p)->rb_right;
263 else
264 return parent;
265 }
266
267do_insert:
268 rb_link_node(node, parent, p);
269 rb_insert_color(node, root);
270 return NULL;
271}
272
273static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
274 struct rb_node **prev_ret,
275 struct rb_node **next_ret,
276 struct rb_node ***p_ret,
277 struct rb_node **parent_ret)
278{
279 struct rb_root *root = &tree->state;
280 struct rb_node **n = &root->rb_node;
281 struct rb_node *prev = NULL;
282 struct rb_node *orig_prev = NULL;
283 struct tree_entry *entry;
284 struct tree_entry *prev_entry = NULL;
285
286 while (*n) {
287 prev = *n;
288 entry = rb_entry(prev, struct tree_entry, rb_node);
289 prev_entry = entry;
290
291 if (offset < entry->start)
292 n = &(*n)->rb_left;
293 else if (offset > entry->end)
294 n = &(*n)->rb_right;
295 else
296 return *n;
297 }
298
299 if (p_ret)
300 *p_ret = n;
301 if (parent_ret)
302 *parent_ret = prev;
303
304 if (prev_ret) {
305 orig_prev = prev;
306 while (prev && offset > prev_entry->end) {
307 prev = rb_next(prev);
308 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
309 }
310 *prev_ret = prev;
311 prev = orig_prev;
312 }
313
314 if (next_ret) {
315 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
316 while (prev && offset < prev_entry->start) {
317 prev = rb_prev(prev);
318 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
319 }
320 *next_ret = prev;
321 }
322 return NULL;
323}
324
325static inline struct rb_node *
326tree_search_for_insert(struct extent_io_tree *tree,
327 u64 offset,
328 struct rb_node ***p_ret,
329 struct rb_node **parent_ret)
330{
331 struct rb_node *prev = NULL;
332 struct rb_node *ret;
333
334 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
335 if (!ret)
336 return prev;
337 return ret;
338}
339
340static inline struct rb_node *tree_search(struct extent_io_tree *tree,
341 u64 offset)
342{
343 return tree_search_for_insert(tree, offset, NULL, NULL);
344}
345
346static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
347 struct extent_state *other)
348{
349 if (tree->ops && tree->ops->merge_extent_hook)
350 tree->ops->merge_extent_hook(tree->mapping->host, new,
351 other);
352}
353
354
355
356
357
358
359
360
361
362
363static void merge_state(struct extent_io_tree *tree,
364 struct extent_state *state)
365{
366 struct extent_state *other;
367 struct rb_node *other_node;
368
369 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
370 return;
371
372 other_node = rb_prev(&state->rb_node);
373 if (other_node) {
374 other = rb_entry(other_node, struct extent_state, rb_node);
375 if (other->end == state->start - 1 &&
376 other->state == state->state) {
377 merge_cb(tree, state, other);
378 state->start = other->start;
379 rb_erase(&other->rb_node, &tree->state);
380 RB_CLEAR_NODE(&other->rb_node);
381 free_extent_state(other);
382 }
383 }
384 other_node = rb_next(&state->rb_node);
385 if (other_node) {
386 other = rb_entry(other_node, struct extent_state, rb_node);
387 if (other->start == state->end + 1 &&
388 other->state == state->state) {
389 merge_cb(tree, state, other);
390 state->end = other->end;
391 rb_erase(&other->rb_node, &tree->state);
392 RB_CLEAR_NODE(&other->rb_node);
393 free_extent_state(other);
394 }
395 }
396}
397
398static void set_state_cb(struct extent_io_tree *tree,
399 struct extent_state *state, unsigned *bits)
400{
401 if (tree->ops && tree->ops->set_bit_hook)
402 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
403}
404
405static void clear_state_cb(struct extent_io_tree *tree,
406 struct extent_state *state, unsigned *bits)
407{
408 if (tree->ops && tree->ops->clear_bit_hook)
409 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
410}
411
412static void set_state_bits(struct extent_io_tree *tree,
413 struct extent_state *state, unsigned *bits);
414
415
416
417
418
419
420
421
422
423
424
425static int insert_state(struct extent_io_tree *tree,
426 struct extent_state *state, u64 start, u64 end,
427 struct rb_node ***p,
428 struct rb_node **parent,
429 unsigned *bits)
430{
431 struct rb_node *node;
432
433 if (end < start)
434 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
435 end, start);
436 state->start = start;
437 state->end = end;
438
439 set_state_bits(tree, state, bits);
440
441 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
442 if (node) {
443 struct extent_state *found;
444 found = rb_entry(node, struct extent_state, rb_node);
445 printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
446 "%llu %llu\n",
447 found->start, found->end, start, end);
448 return -EEXIST;
449 }
450 merge_state(tree, state);
451 return 0;
452}
453
454static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
455 u64 split)
456{
457 if (tree->ops && tree->ops->split_extent_hook)
458 tree->ops->split_extent_hook(tree->mapping->host, orig, split);
459}
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
476 struct extent_state *prealloc, u64 split)
477{
478 struct rb_node *node;
479
480 split_cb(tree, orig, split);
481
482 prealloc->start = orig->start;
483 prealloc->end = split - 1;
484 prealloc->state = orig->state;
485 orig->start = split;
486
487 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
488 &prealloc->rb_node, NULL, NULL);
489 if (node) {
490 free_extent_state(prealloc);
491 return -EEXIST;
492 }
493 return 0;
494}
495
496static struct extent_state *next_state(struct extent_state *state)
497{
498 struct rb_node *next = rb_next(&state->rb_node);
499 if (next)
500 return rb_entry(next, struct extent_state, rb_node);
501 else
502 return NULL;
503}
504
505
506
507
508
509
510
511
512static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
513 struct extent_state *state,
514 unsigned *bits, int wake)
515{
516 struct extent_state *next;
517 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
518
519 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
520 u64 range = state->end - state->start + 1;
521 WARN_ON(range > tree->dirty_bytes);
522 tree->dirty_bytes -= range;
523 }
524 clear_state_cb(tree, state, bits);
525 state->state &= ~bits_to_clear;
526 if (wake)
527 wake_up(&state->wq);
528 if (state->state == 0) {
529 next = next_state(state);
530 if (extent_state_in_tree(state)) {
531 rb_erase(&state->rb_node, &tree->state);
532 RB_CLEAR_NODE(&state->rb_node);
533 free_extent_state(state);
534 } else {
535 WARN_ON(1);
536 }
537 } else {
538 merge_state(tree, state);
539 next = next_state(state);
540 }
541 return next;
542}
543
544static struct extent_state *
545alloc_extent_state_atomic(struct extent_state *prealloc)
546{
547 if (!prealloc)
548 prealloc = alloc_extent_state(GFP_ATOMIC);
549
550 return prealloc;
551}
552
553static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
554{
555 btrfs_panic(tree_fs_info(tree), err, "Locking error: "
556 "Extent tree was modified by another "
557 "thread while locked.");
558}
559
560
561
562
563
564
565
566
567
568
569
570
571
572int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
573 unsigned bits, int wake, int delete,
574 struct extent_state **cached_state,
575 gfp_t mask)
576{
577 struct extent_state *state;
578 struct extent_state *cached;
579 struct extent_state *prealloc = NULL;
580 struct rb_node *node;
581 u64 last_end;
582 int err;
583 int clear = 0;
584
585 btrfs_debug_check_extent_io_range(tree, start, end);
586
587 if (bits & EXTENT_DELALLOC)
588 bits |= EXTENT_NORESERVE;
589
590 if (delete)
591 bits |= ~EXTENT_CTLBITS;
592 bits |= EXTENT_FIRST_DELALLOC;
593
594 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
595 clear = 1;
596again:
597 if (!prealloc && (mask & __GFP_WAIT)) {
598
599
600
601
602
603
604
605 prealloc = alloc_extent_state(mask);
606 }
607
608 spin_lock(&tree->lock);
609 if (cached_state) {
610 cached = *cached_state;
611
612 if (clear) {
613 *cached_state = NULL;
614 cached_state = NULL;
615 }
616
617 if (cached && extent_state_in_tree(cached) &&
618 cached->start <= start && cached->end > start) {
619 if (clear)
620 atomic_dec(&cached->refs);
621 state = cached;
622 goto hit_next;
623 }
624 if (clear)
625 free_extent_state(cached);
626 }
627
628
629
630
631 node = tree_search(tree, start);
632 if (!node)
633 goto out;
634 state = rb_entry(node, struct extent_state, rb_node);
635hit_next:
636 if (state->start > end)
637 goto out;
638 WARN_ON(state->end < start);
639 last_end = state->end;
640
641
642 if (!(state->state & bits)) {
643 state = next_state(state);
644 goto next;
645 }
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663 if (state->start < start) {
664 prealloc = alloc_extent_state_atomic(prealloc);
665 BUG_ON(!prealloc);
666 err = split_state(tree, state, prealloc, start);
667 if (err)
668 extent_io_tree_panic(tree, err);
669
670 prealloc = NULL;
671 if (err)
672 goto out;
673 if (state->end <= end) {
674 state = clear_state_bit(tree, state, &bits, wake);
675 goto next;
676 }
677 goto search_again;
678 }
679
680
681
682
683
684
685 if (state->start <= end && state->end > end) {
686 prealloc = alloc_extent_state_atomic(prealloc);
687 BUG_ON(!prealloc);
688 err = split_state(tree, state, prealloc, end + 1);
689 if (err)
690 extent_io_tree_panic(tree, err);
691
692 if (wake)
693 wake_up(&state->wq);
694
695 clear_state_bit(tree, prealloc, &bits, wake);
696
697 prealloc = NULL;
698 goto out;
699 }
700
701 state = clear_state_bit(tree, state, &bits, wake);
702next:
703 if (last_end == (u64)-1)
704 goto out;
705 start = last_end + 1;
706 if (start <= end && state && !need_resched())
707 goto hit_next;
708 goto search_again;
709
710out:
711 spin_unlock(&tree->lock);
712 if (prealloc)
713 free_extent_state(prealloc);
714
715 return 0;
716
717search_again:
718 if (start > end)
719 goto out;
720 spin_unlock(&tree->lock);
721 if (mask & __GFP_WAIT)
722 cond_resched();
723 goto again;
724}
725
726static void wait_on_state(struct extent_io_tree *tree,
727 struct extent_state *state)
728 __releases(tree->lock)
729 __acquires(tree->lock)
730{
731 DEFINE_WAIT(wait);
732 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
733 spin_unlock(&tree->lock);
734 schedule();
735 spin_lock(&tree->lock);
736 finish_wait(&state->wq, &wait);
737}
738
739
740
741
742
743
744static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
745 unsigned long bits)
746{
747 struct extent_state *state;
748 struct rb_node *node;
749
750 btrfs_debug_check_extent_io_range(tree, start, end);
751
752 spin_lock(&tree->lock);
753again:
754 while (1) {
755
756
757
758
759 node = tree_search(tree, start);
760process_node:
761 if (!node)
762 break;
763
764 state = rb_entry(node, struct extent_state, rb_node);
765
766 if (state->start > end)
767 goto out;
768
769 if (state->state & bits) {
770 start = state->start;
771 atomic_inc(&state->refs);
772 wait_on_state(tree, state);
773 free_extent_state(state);
774 goto again;
775 }
776 start = state->end + 1;
777
778 if (start > end)
779 break;
780
781 if (!cond_resched_lock(&tree->lock)) {
782 node = rb_next(node);
783 goto process_node;
784 }
785 }
786out:
787 spin_unlock(&tree->lock);
788}
789
790static void set_state_bits(struct extent_io_tree *tree,
791 struct extent_state *state,
792 unsigned *bits)
793{
794 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
795
796 set_state_cb(tree, state, bits);
797 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
798 u64 range = state->end - state->start + 1;
799 tree->dirty_bytes += range;
800 }
801 state->state |= bits_to_set;
802}
803
804static void cache_state_if_flags(struct extent_state *state,
805 struct extent_state **cached_ptr,
806 unsigned flags)
807{
808 if (cached_ptr && !(*cached_ptr)) {
809 if (!flags || (state->state & flags)) {
810 *cached_ptr = state;
811 atomic_inc(&state->refs);
812 }
813 }
814}
815
816static void cache_state(struct extent_state *state,
817 struct extent_state **cached_ptr)
818{
819 return cache_state_if_flags(state, cached_ptr,
820 EXTENT_IOBITS | EXTENT_BOUNDARY);
821}
822
823
824
825
826
827
828
829
830
831
832
833
834static int __must_check
835__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
836 unsigned bits, unsigned exclusive_bits,
837 u64 *failed_start, struct extent_state **cached_state,
838 gfp_t mask)
839{
840 struct extent_state *state;
841 struct extent_state *prealloc = NULL;
842 struct rb_node *node;
843 struct rb_node **p;
844 struct rb_node *parent;
845 int err = 0;
846 u64 last_start;
847 u64 last_end;
848
849 btrfs_debug_check_extent_io_range(tree, start, end);
850
851 bits |= EXTENT_FIRST_DELALLOC;
852again:
853 if (!prealloc && (mask & __GFP_WAIT)) {
854 prealloc = alloc_extent_state(mask);
855 BUG_ON(!prealloc);
856 }
857
858 spin_lock(&tree->lock);
859 if (cached_state && *cached_state) {
860 state = *cached_state;
861 if (state->start <= start && state->end > start &&
862 extent_state_in_tree(state)) {
863 node = &state->rb_node;
864 goto hit_next;
865 }
866 }
867
868
869
870
871 node = tree_search_for_insert(tree, start, &p, &parent);
872 if (!node) {
873 prealloc = alloc_extent_state_atomic(prealloc);
874 BUG_ON(!prealloc);
875 err = insert_state(tree, prealloc, start, end,
876 &p, &parent, &bits);
877 if (err)
878 extent_io_tree_panic(tree, err);
879
880 cache_state(prealloc, cached_state);
881 prealloc = NULL;
882 goto out;
883 }
884 state = rb_entry(node, struct extent_state, rb_node);
885hit_next:
886 last_start = state->start;
887 last_end = state->end;
888
889
890
891
892
893
894
895 if (state->start == start && state->end <= end) {
896 if (state->state & exclusive_bits) {
897 *failed_start = state->start;
898 err = -EEXIST;
899 goto out;
900 }
901
902 set_state_bits(tree, state, &bits);
903 cache_state(state, cached_state);
904 merge_state(tree, state);
905 if (last_end == (u64)-1)
906 goto out;
907 start = last_end + 1;
908 state = next_state(state);
909 if (start < end && state && state->start == start &&
910 !need_resched())
911 goto hit_next;
912 goto search_again;
913 }
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931 if (state->start < start) {
932 if (state->state & exclusive_bits) {
933 *failed_start = start;
934 err = -EEXIST;
935 goto out;
936 }
937
938 prealloc = alloc_extent_state_atomic(prealloc);
939 BUG_ON(!prealloc);
940 err = split_state(tree, state, prealloc, start);
941 if (err)
942 extent_io_tree_panic(tree, err);
943
944 prealloc = NULL;
945 if (err)
946 goto out;
947 if (state->end <= end) {
948 set_state_bits(tree, state, &bits);
949 cache_state(state, cached_state);
950 merge_state(tree, state);
951 if (last_end == (u64)-1)
952 goto out;
953 start = last_end + 1;
954 state = next_state(state);
955 if (start < end && state && state->start == start &&
956 !need_resched())
957 goto hit_next;
958 }
959 goto search_again;
960 }
961
962
963
964
965
966
967
968 if (state->start > start) {
969 u64 this_end;
970 if (end < last_start)
971 this_end = end;
972 else
973 this_end = last_start - 1;
974
975 prealloc = alloc_extent_state_atomic(prealloc);
976 BUG_ON(!prealloc);
977
978
979
980
981
982 err = insert_state(tree, prealloc, start, this_end,
983 NULL, NULL, &bits);
984 if (err)
985 extent_io_tree_panic(tree, err);
986
987 cache_state(prealloc, cached_state);
988 prealloc = NULL;
989 start = this_end + 1;
990 goto search_again;
991 }
992
993
994
995
996
997
998 if (state->start <= end && state->end > end) {
999 if (state->state & exclusive_bits) {
1000 *failed_start = start;
1001 err = -EEXIST;
1002 goto out;
1003 }
1004
1005 prealloc = alloc_extent_state_atomic(prealloc);
1006 BUG_ON(!prealloc);
1007 err = split_state(tree, state, prealloc, end + 1);
1008 if (err)
1009 extent_io_tree_panic(tree, err);
1010
1011 set_state_bits(tree, prealloc, &bits);
1012 cache_state(prealloc, cached_state);
1013 merge_state(tree, prealloc);
1014 prealloc = NULL;
1015 goto out;
1016 }
1017
1018 goto search_again;
1019
1020out:
1021 spin_unlock(&tree->lock);
1022 if (prealloc)
1023 free_extent_state(prealloc);
1024
1025 return err;
1026
1027search_again:
1028 if (start > end)
1029 goto out;
1030 spin_unlock(&tree->lock);
1031 if (mask & __GFP_WAIT)
1032 cond_resched();
1033 goto again;
1034}
1035
1036int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1037 unsigned bits, u64 * failed_start,
1038 struct extent_state **cached_state, gfp_t mask)
1039{
1040 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1041 cached_state, mask);
1042}
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1063 unsigned bits, unsigned clear_bits,
1064 struct extent_state **cached_state, gfp_t mask)
1065{
1066 struct extent_state *state;
1067 struct extent_state *prealloc = NULL;
1068 struct rb_node *node;
1069 struct rb_node **p;
1070 struct rb_node *parent;
1071 int err = 0;
1072 u64 last_start;
1073 u64 last_end;
1074 bool first_iteration = true;
1075
1076 btrfs_debug_check_extent_io_range(tree, start, end);
1077
1078again:
1079 if (!prealloc && (mask & __GFP_WAIT)) {
1080
1081
1082
1083
1084
1085
1086
1087 prealloc = alloc_extent_state(mask);
1088 if (!prealloc && !first_iteration)
1089 return -ENOMEM;
1090 }
1091
1092 spin_lock(&tree->lock);
1093 if (cached_state && *cached_state) {
1094 state = *cached_state;
1095 if (state->start <= start && state->end > start &&
1096 extent_state_in_tree(state)) {
1097 node = &state->rb_node;
1098 goto hit_next;
1099 }
1100 }
1101
1102
1103
1104
1105
1106 node = tree_search_for_insert(tree, start, &p, &parent);
1107 if (!node) {
1108 prealloc = alloc_extent_state_atomic(prealloc);
1109 if (!prealloc) {
1110 err = -ENOMEM;
1111 goto out;
1112 }
1113 err = insert_state(tree, prealloc, start, end,
1114 &p, &parent, &bits);
1115 if (err)
1116 extent_io_tree_panic(tree, err);
1117 cache_state(prealloc, cached_state);
1118 prealloc = NULL;
1119 goto out;
1120 }
1121 state = rb_entry(node, struct extent_state, rb_node);
1122hit_next:
1123 last_start = state->start;
1124 last_end = state->end;
1125
1126
1127
1128
1129
1130
1131
1132 if (state->start == start && state->end <= end) {
1133 set_state_bits(tree, state, &bits);
1134 cache_state(state, cached_state);
1135 state = clear_state_bit(tree, state, &clear_bits, 0);
1136 if (last_end == (u64)-1)
1137 goto out;
1138 start = last_end + 1;
1139 if (start < end && state && state->start == start &&
1140 !need_resched())
1141 goto hit_next;
1142 goto search_again;
1143 }
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161 if (state->start < start) {
1162 prealloc = alloc_extent_state_atomic(prealloc);
1163 if (!prealloc) {
1164 err = -ENOMEM;
1165 goto out;
1166 }
1167 err = split_state(tree, state, prealloc, start);
1168 if (err)
1169 extent_io_tree_panic(tree, err);
1170 prealloc = NULL;
1171 if (err)
1172 goto out;
1173 if (state->end <= end) {
1174 set_state_bits(tree, state, &bits);
1175 cache_state(state, cached_state);
1176 state = clear_state_bit(tree, state, &clear_bits, 0);
1177 if (last_end == (u64)-1)
1178 goto out;
1179 start = last_end + 1;
1180 if (start < end && state && state->start == start &&
1181 !need_resched())
1182 goto hit_next;
1183 }
1184 goto search_again;
1185 }
1186
1187
1188
1189
1190
1191
1192
1193 if (state->start > start) {
1194 u64 this_end;
1195 if (end < last_start)
1196 this_end = end;
1197 else
1198 this_end = last_start - 1;
1199
1200 prealloc = alloc_extent_state_atomic(prealloc);
1201 if (!prealloc) {
1202 err = -ENOMEM;
1203 goto out;
1204 }
1205
1206
1207
1208
1209
1210 err = insert_state(tree, prealloc, start, this_end,
1211 NULL, NULL, &bits);
1212 if (err)
1213 extent_io_tree_panic(tree, err);
1214 cache_state(prealloc, cached_state);
1215 prealloc = NULL;
1216 start = this_end + 1;
1217 goto search_again;
1218 }
1219
1220
1221
1222
1223
1224
1225 if (state->start <= end && state->end > end) {
1226 prealloc = alloc_extent_state_atomic(prealloc);
1227 if (!prealloc) {
1228 err = -ENOMEM;
1229 goto out;
1230 }
1231
1232 err = split_state(tree, state, prealloc, end + 1);
1233 if (err)
1234 extent_io_tree_panic(tree, err);
1235
1236 set_state_bits(tree, prealloc, &bits);
1237 cache_state(prealloc, cached_state);
1238 clear_state_bit(tree, prealloc, &clear_bits, 0);
1239 prealloc = NULL;
1240 goto out;
1241 }
1242
1243 goto search_again;
1244
1245out:
1246 spin_unlock(&tree->lock);
1247 if (prealloc)
1248 free_extent_state(prealloc);
1249
1250 return err;
1251
1252search_again:
1253 if (start > end)
1254 goto out;
1255 spin_unlock(&tree->lock);
1256 if (mask & __GFP_WAIT)
1257 cond_resched();
1258 first_iteration = false;
1259 goto again;
1260}
1261
1262
1263int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1264 gfp_t mask)
1265{
1266 return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
1267 NULL, mask);
1268}
1269
1270int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1271 unsigned bits, gfp_t mask)
1272{
1273 return set_extent_bit(tree, start, end, bits, NULL,
1274 NULL, mask);
1275}
1276
1277int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1278 unsigned bits, gfp_t mask)
1279{
1280 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
1281}
1282
1283int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
1284 struct extent_state **cached_state, gfp_t mask)
1285{
1286 return set_extent_bit(tree, start, end,
1287 EXTENT_DELALLOC | EXTENT_UPTODATE,
1288 NULL, cached_state, mask);
1289}
1290
1291int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
1292 struct extent_state **cached_state, gfp_t mask)
1293{
1294 return set_extent_bit(tree, start, end,
1295 EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
1296 NULL, cached_state, mask);
1297}
1298
1299int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1300 gfp_t mask)
1301{
1302 return clear_extent_bit(tree, start, end,
1303 EXTENT_DIRTY | EXTENT_DELALLOC |
1304 EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
1305}
1306
1307int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
1308 gfp_t mask)
1309{
1310 return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
1311 NULL, mask);
1312}
1313
1314int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1315 struct extent_state **cached_state, gfp_t mask)
1316{
1317 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
1318 cached_state, mask);
1319}
1320
1321int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1322 struct extent_state **cached_state, gfp_t mask)
1323{
1324 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
1325 cached_state, mask);
1326}
1327
1328
1329
1330
1331
1332int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1333 unsigned bits, struct extent_state **cached_state)
1334{
1335 int err;
1336 u64 failed_start;
1337
1338 while (1) {
1339 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
1340 EXTENT_LOCKED, &failed_start,
1341 cached_state, GFP_NOFS);
1342 if (err == -EEXIST) {
1343 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1344 start = failed_start;
1345 } else
1346 break;
1347 WARN_ON(start > end);
1348 }
1349 return err;
1350}
1351
1352int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1353{
1354 return lock_extent_bits(tree, start, end, 0, NULL);
1355}
1356
1357int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1358{
1359 int err;
1360 u64 failed_start;
1361
1362 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1363 &failed_start, NULL, GFP_NOFS);
1364 if (err == -EEXIST) {
1365 if (failed_start > start)
1366 clear_extent_bit(tree, start, failed_start - 1,
1367 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
1368 return 0;
1369 }
1370 return 1;
1371}
1372
1373int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1374 struct extent_state **cached, gfp_t mask)
1375{
1376 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
1377 mask);
1378}
1379
1380int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1381{
1382 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1383 GFP_NOFS);
1384}
1385
1386int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1387{
1388 unsigned long index = start >> PAGE_CACHE_SHIFT;
1389 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1390 struct page *page;
1391
1392 while (index <= end_index) {
1393 page = find_get_page(inode->i_mapping, index);
1394 BUG_ON(!page);
1395 clear_page_dirty_for_io(page);
1396 page_cache_release(page);
1397 index++;
1398 }
1399 return 0;
1400}
1401
1402int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1403{
1404 unsigned long index = start >> PAGE_CACHE_SHIFT;
1405 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1406 struct page *page;
1407
1408 while (index <= end_index) {
1409 page = find_get_page(inode->i_mapping, index);
1410 BUG_ON(!page);
1411 __set_page_dirty_nobuffers(page);
1412 account_page_redirty(page);
1413 page_cache_release(page);
1414 index++;
1415 }
1416 return 0;
1417}
1418
1419
1420
1421
1422static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1423{
1424 unsigned long index = start >> PAGE_CACHE_SHIFT;
1425 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1426 struct page *page;
1427
1428 while (index <= end_index) {
1429 page = find_get_page(tree->mapping, index);
1430 BUG_ON(!page);
1431 set_page_writeback(page);
1432 page_cache_release(page);
1433 index++;
1434 }
1435 return 0;
1436}
1437
1438
1439
1440
1441
1442static struct extent_state *
1443find_first_extent_bit_state(struct extent_io_tree *tree,
1444 u64 start, unsigned bits)
1445{
1446 struct rb_node *node;
1447 struct extent_state *state;
1448
1449
1450
1451
1452
1453 node = tree_search(tree, start);
1454 if (!node)
1455 goto out;
1456
1457 while (1) {
1458 state = rb_entry(node, struct extent_state, rb_node);
1459 if (state->end >= start && (state->state & bits))
1460 return state;
1461
1462 node = rb_next(node);
1463 if (!node)
1464 break;
1465 }
1466out:
1467 return NULL;
1468}
1469
1470
1471
1472
1473
1474
1475
1476
1477int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1478 u64 *start_ret, u64 *end_ret, unsigned bits,
1479 struct extent_state **cached_state)
1480{
1481 struct extent_state *state;
1482 struct rb_node *n;
1483 int ret = 1;
1484
1485 spin_lock(&tree->lock);
1486 if (cached_state && *cached_state) {
1487 state = *cached_state;
1488 if (state->end == start - 1 && extent_state_in_tree(state)) {
1489 n = rb_next(&state->rb_node);
1490 while (n) {
1491 state = rb_entry(n, struct extent_state,
1492 rb_node);
1493 if (state->state & bits)
1494 goto got_it;
1495 n = rb_next(n);
1496 }
1497 free_extent_state(*cached_state);
1498 *cached_state = NULL;
1499 goto out;
1500 }
1501 free_extent_state(*cached_state);
1502 *cached_state = NULL;
1503 }
1504
1505 state = find_first_extent_bit_state(tree, start, bits);
1506got_it:
1507 if (state) {
1508 cache_state_if_flags(state, cached_state, 0);
1509 *start_ret = state->start;
1510 *end_ret = state->end;
1511 ret = 0;
1512 }
1513out:
1514 spin_unlock(&tree->lock);
1515 return ret;
1516}
1517
1518
1519
1520
1521
1522
1523
1524static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1525 u64 *start, u64 *end, u64 max_bytes,
1526 struct extent_state **cached_state)
1527{
1528 struct rb_node *node;
1529 struct extent_state *state;
1530 u64 cur_start = *start;
1531 u64 found = 0;
1532 u64 total_bytes = 0;
1533
1534 spin_lock(&tree->lock);
1535
1536
1537
1538
1539
1540 node = tree_search(tree, cur_start);
1541 if (!node) {
1542 if (!found)
1543 *end = (u64)-1;
1544 goto out;
1545 }
1546
1547 while (1) {
1548 state = rb_entry(node, struct extent_state, rb_node);
1549 if (found && (state->start != cur_start ||
1550 (state->state & EXTENT_BOUNDARY))) {
1551 goto out;
1552 }
1553 if (!(state->state & EXTENT_DELALLOC)) {
1554 if (!found)
1555 *end = state->end;
1556 goto out;
1557 }
1558 if (!found) {
1559 *start = state->start;
1560 *cached_state = state;
1561 atomic_inc(&state->refs);
1562 }
1563 found++;
1564 *end = state->end;
1565 cur_start = state->end + 1;
1566 node = rb_next(node);
1567 total_bytes += state->end - state->start + 1;
1568 if (total_bytes >= max_bytes)
1569 break;
1570 if (!node)
1571 break;
1572 }
1573out:
1574 spin_unlock(&tree->lock);
1575 return found;
1576}
1577
1578static noinline void __unlock_for_delalloc(struct inode *inode,
1579 struct page *locked_page,
1580 u64 start, u64 end)
1581{
1582 int ret;
1583 struct page *pages[16];
1584 unsigned long index = start >> PAGE_CACHE_SHIFT;
1585 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1586 unsigned long nr_pages = end_index - index + 1;
1587 int i;
1588
1589 if (index == locked_page->index && end_index == index)
1590 return;
1591
1592 while (nr_pages > 0) {
1593 ret = find_get_pages_contig(inode->i_mapping, index,
1594 min_t(unsigned long, nr_pages,
1595 ARRAY_SIZE(pages)), pages);
1596 for (i = 0; i < ret; i++) {
1597 if (pages[i] != locked_page)
1598 unlock_page(pages[i]);
1599 page_cache_release(pages[i]);
1600 }
1601 nr_pages -= ret;
1602 index += ret;
1603 cond_resched();
1604 }
1605}
1606
1607static noinline int lock_delalloc_pages(struct inode *inode,
1608 struct page *locked_page,
1609 u64 delalloc_start,
1610 u64 delalloc_end)
1611{
1612 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
1613 unsigned long start_index = index;
1614 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
1615 unsigned long pages_locked = 0;
1616 struct page *pages[16];
1617 unsigned long nrpages;
1618 int ret;
1619 int i;
1620
1621
1622 if (index == locked_page->index && index == end_index)
1623 return 0;
1624
1625
1626 nrpages = end_index - index + 1;
1627 while (nrpages > 0) {
1628 ret = find_get_pages_contig(inode->i_mapping, index,
1629 min_t(unsigned long,
1630 nrpages, ARRAY_SIZE(pages)), pages);
1631 if (ret == 0) {
1632 ret = -EAGAIN;
1633 goto done;
1634 }
1635
1636 for (i = 0; i < ret; i++) {
1637
1638
1639
1640
1641 if (pages[i] != locked_page) {
1642 lock_page(pages[i]);
1643 if (!PageDirty(pages[i]) ||
1644 pages[i]->mapping != inode->i_mapping) {
1645 ret = -EAGAIN;
1646 unlock_page(pages[i]);
1647 page_cache_release(pages[i]);
1648 goto done;
1649 }
1650 }
1651 page_cache_release(pages[i]);
1652 pages_locked++;
1653 }
1654 nrpages -= ret;
1655 index += ret;
1656 cond_resched();
1657 }
1658 ret = 0;
1659done:
1660 if (ret && pages_locked) {
1661 __unlock_for_delalloc(inode, locked_page,
1662 delalloc_start,
1663 ((u64)(start_index + pages_locked - 1)) <<
1664 PAGE_CACHE_SHIFT);
1665 }
1666 return ret;
1667}
1668
1669
1670
1671
1672
1673
1674
1675STATIC u64 find_lock_delalloc_range(struct inode *inode,
1676 struct extent_io_tree *tree,
1677 struct page *locked_page, u64 *start,
1678 u64 *end, u64 max_bytes)
1679{
1680 u64 delalloc_start;
1681 u64 delalloc_end;
1682 u64 found;
1683 struct extent_state *cached_state = NULL;
1684 int ret;
1685 int loops = 0;
1686
1687again:
1688
1689 delalloc_start = *start;
1690 delalloc_end = 0;
1691 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1692 max_bytes, &cached_state);
1693 if (!found || delalloc_end <= *start) {
1694 *start = delalloc_start;
1695 *end = delalloc_end;
1696 free_extent_state(cached_state);
1697 return 0;
1698 }
1699
1700
1701
1702
1703
1704
1705 if (delalloc_start < *start)
1706 delalloc_start = *start;
1707
1708
1709
1710
1711 if (delalloc_end + 1 - delalloc_start > max_bytes)
1712 delalloc_end = delalloc_start + max_bytes - 1;
1713
1714
1715 ret = lock_delalloc_pages(inode, locked_page,
1716 delalloc_start, delalloc_end);
1717 if (ret == -EAGAIN) {
1718
1719
1720
1721 free_extent_state(cached_state);
1722 cached_state = NULL;
1723 if (!loops) {
1724 max_bytes = PAGE_CACHE_SIZE;
1725 loops = 1;
1726 goto again;
1727 } else {
1728 found = 0;
1729 goto out_failed;
1730 }
1731 }
1732 BUG_ON(ret);
1733
1734
1735 lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
1736
1737
1738 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1739 EXTENT_DELALLOC, 1, cached_state);
1740 if (!ret) {
1741 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1742 &cached_state, GFP_NOFS);
1743 __unlock_for_delalloc(inode, locked_page,
1744 delalloc_start, delalloc_end);
1745 cond_resched();
1746 goto again;
1747 }
1748 free_extent_state(cached_state);
1749 *start = delalloc_start;
1750 *end = delalloc_end;
1751out_failed:
1752 return found;
1753}
1754
1755int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1756 struct page *locked_page,
1757 unsigned clear_bits,
1758 unsigned long page_ops)
1759{
1760 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1761 int ret;
1762 struct page *pages[16];
1763 unsigned long index = start >> PAGE_CACHE_SHIFT;
1764 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1765 unsigned long nr_pages = end_index - index + 1;
1766 int i;
1767
1768 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1769 if (page_ops == 0)
1770 return 0;
1771
1772 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1773 mapping_set_error(inode->i_mapping, -EIO);
1774
1775 while (nr_pages > 0) {
1776 ret = find_get_pages_contig(inode->i_mapping, index,
1777 min_t(unsigned long,
1778 nr_pages, ARRAY_SIZE(pages)), pages);
1779 for (i = 0; i < ret; i++) {
1780
1781 if (page_ops & PAGE_SET_PRIVATE2)
1782 SetPagePrivate2(pages[i]);
1783
1784 if (pages[i] == locked_page) {
1785 page_cache_release(pages[i]);
1786 continue;
1787 }
1788 if (page_ops & PAGE_CLEAR_DIRTY)
1789 clear_page_dirty_for_io(pages[i]);
1790 if (page_ops & PAGE_SET_WRITEBACK)
1791 set_page_writeback(pages[i]);
1792 if (page_ops & PAGE_SET_ERROR)
1793 SetPageError(pages[i]);
1794 if (page_ops & PAGE_END_WRITEBACK)
1795 end_page_writeback(pages[i]);
1796 if (page_ops & PAGE_UNLOCK)
1797 unlock_page(pages[i]);
1798 page_cache_release(pages[i]);
1799 }
1800 nr_pages -= ret;
1801 index += ret;
1802 cond_resched();
1803 }
1804 return 0;
1805}
1806
1807
1808
1809
1810
1811
1812u64 count_range_bits(struct extent_io_tree *tree,
1813 u64 *start, u64 search_end, u64 max_bytes,
1814 unsigned bits, int contig)
1815{
1816 struct rb_node *node;
1817 struct extent_state *state;
1818 u64 cur_start = *start;
1819 u64 total_bytes = 0;
1820 u64 last = 0;
1821 int found = 0;
1822
1823 if (WARN_ON(search_end <= cur_start))
1824 return 0;
1825
1826 spin_lock(&tree->lock);
1827 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1828 total_bytes = tree->dirty_bytes;
1829 goto out;
1830 }
1831
1832
1833
1834
1835 node = tree_search(tree, cur_start);
1836 if (!node)
1837 goto out;
1838
1839 while (1) {
1840 state = rb_entry(node, struct extent_state, rb_node);
1841 if (state->start > search_end)
1842 break;
1843 if (contig && found && state->start > last + 1)
1844 break;
1845 if (state->end >= cur_start && (state->state & bits) == bits) {
1846 total_bytes += min(search_end, state->end) + 1 -
1847 max(cur_start, state->start);
1848 if (total_bytes >= max_bytes)
1849 break;
1850 if (!found) {
1851 *start = max(cur_start, state->start);
1852 found = 1;
1853 }
1854 last = state->end;
1855 } else if (contig && found) {
1856 break;
1857 }
1858 node = rb_next(node);
1859 if (!node)
1860 break;
1861 }
1862out:
1863 spin_unlock(&tree->lock);
1864 return total_bytes;
1865}
1866
1867
1868
1869
1870
1871static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1872{
1873 struct rb_node *node;
1874 struct extent_state *state;
1875 int ret = 0;
1876
1877 spin_lock(&tree->lock);
1878
1879
1880
1881
1882 node = tree_search(tree, start);
1883 if (!node) {
1884 ret = -ENOENT;
1885 goto out;
1886 }
1887 state = rb_entry(node, struct extent_state, rb_node);
1888 if (state->start != start) {
1889 ret = -ENOENT;
1890 goto out;
1891 }
1892 state->private = private;
1893out:
1894 spin_unlock(&tree->lock);
1895 return ret;
1896}
1897
1898int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1899{
1900 struct rb_node *node;
1901 struct extent_state *state;
1902 int ret = 0;
1903
1904 spin_lock(&tree->lock);
1905
1906
1907
1908
1909 node = tree_search(tree, start);
1910 if (!node) {
1911 ret = -ENOENT;
1912 goto out;
1913 }
1914 state = rb_entry(node, struct extent_state, rb_node);
1915 if (state->start != start) {
1916 ret = -ENOENT;
1917 goto out;
1918 }
1919 *private = state->private;
1920out:
1921 spin_unlock(&tree->lock);
1922 return ret;
1923}
1924
1925
1926
1927
1928
1929
1930
1931int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1932 unsigned bits, int filled, struct extent_state *cached)
1933{
1934 struct extent_state *state = NULL;
1935 struct rb_node *node;
1936 int bitset = 0;
1937
1938 spin_lock(&tree->lock);
1939 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1940 cached->end > start)
1941 node = &cached->rb_node;
1942 else
1943 node = tree_search(tree, start);
1944 while (node && start <= end) {
1945 state = rb_entry(node, struct extent_state, rb_node);
1946
1947 if (filled && state->start > start) {
1948 bitset = 0;
1949 break;
1950 }
1951
1952 if (state->start > end)
1953 break;
1954
1955 if (state->state & bits) {
1956 bitset = 1;
1957 if (!filled)
1958 break;
1959 } else if (filled) {
1960 bitset = 0;
1961 break;
1962 }
1963
1964 if (state->end == (u64)-1)
1965 break;
1966
1967 start = state->end + 1;
1968 if (start > end)
1969 break;
1970 node = rb_next(node);
1971 if (!node) {
1972 if (filled)
1973 bitset = 0;
1974 break;
1975 }
1976 }
1977 spin_unlock(&tree->lock);
1978 return bitset;
1979}
1980
1981
1982
1983
1984
1985static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1986{
1987 u64 start = page_offset(page);
1988 u64 end = start + PAGE_CACHE_SIZE - 1;
1989 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1990 SetPageUptodate(page);
1991}
1992
1993int free_io_failure(struct inode *inode, struct io_failure_record *rec)
1994{
1995 int ret;
1996 int err = 0;
1997 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
1998
1999 set_state_private(failure_tree, rec->start, 0);
2000 ret = clear_extent_bits(failure_tree, rec->start,
2001 rec->start + rec->len - 1,
2002 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
2003 if (ret)
2004 err = ret;
2005
2006 ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
2007 rec->start + rec->len - 1,
2008 EXTENT_DAMAGED, GFP_NOFS);
2009 if (ret && !err)
2010 err = ret;
2011
2012 kfree(rec);
2013 return err;
2014}
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
2027 struct page *page, unsigned int pg_offset, int mirror_num)
2028{
2029 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2030 struct bio *bio;
2031 struct btrfs_device *dev;
2032 u64 map_length = 0;
2033 u64 sector;
2034 struct btrfs_bio *bbio = NULL;
2035 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
2036 int ret;
2037
2038 ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
2039 BUG_ON(!mirror_num);
2040
2041
2042 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
2043 return 0;
2044
2045 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2046 if (!bio)
2047 return -EIO;
2048 bio->bi_iter.bi_size = 0;
2049 map_length = length;
2050
2051 ret = btrfs_map_block(fs_info, WRITE, logical,
2052 &map_length, &bbio, mirror_num);
2053 if (ret) {
2054 bio_put(bio);
2055 return -EIO;
2056 }
2057 BUG_ON(mirror_num != bbio->mirror_num);
2058 sector = bbio->stripes[mirror_num-1].physical >> 9;
2059 bio->bi_iter.bi_sector = sector;
2060 dev = bbio->stripes[mirror_num-1].dev;
2061 btrfs_put_bbio(bbio);
2062 if (!dev || !dev->bdev || !dev->writeable) {
2063 bio_put(bio);
2064 return -EIO;
2065 }
2066 bio->bi_bdev = dev->bdev;
2067 bio_add_page(bio, page, length, pg_offset);
2068
2069 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
2070
2071 bio_put(bio);
2072 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2073 return -EIO;
2074 }
2075
2076 printk_ratelimited_in_rcu(KERN_INFO
2077 "BTRFS: read error corrected: ino %llu off %llu (dev %s sector %llu)\n",
2078 btrfs_ino(inode), start,
2079 rcu_str_deref(dev->name), sector);
2080 bio_put(bio);
2081 return 0;
2082}
2083
2084int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
2085 int mirror_num)
2086{
2087 u64 start = eb->start;
2088 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
2089 int ret = 0;
2090
2091 if (root->fs_info->sb->s_flags & MS_RDONLY)
2092 return -EROFS;
2093
2094 for (i = 0; i < num_pages; i++) {
2095 struct page *p = eb->pages[i];
2096
2097 ret = repair_io_failure(root->fs_info->btree_inode, start,
2098 PAGE_CACHE_SIZE, start, p,
2099 start - page_offset(p), mirror_num);
2100 if (ret)
2101 break;
2102 start += PAGE_CACHE_SIZE;
2103 }
2104
2105 return ret;
2106}
2107
2108
2109
2110
2111
2112int clean_io_failure(struct inode *inode, u64 start, struct page *page,
2113 unsigned int pg_offset)
2114{
2115 u64 private;
2116 u64 private_failure;
2117 struct io_failure_record *failrec;
2118 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2119 struct extent_state *state;
2120 int num_copies;
2121 int ret;
2122
2123 private = 0;
2124 ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
2125 (u64)-1, 1, EXTENT_DIRTY, 0);
2126 if (!ret)
2127 return 0;
2128
2129 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start,
2130 &private_failure);
2131 if (ret)
2132 return 0;
2133
2134 failrec = (struct io_failure_record *)(unsigned long) private_failure;
2135 BUG_ON(!failrec->this_mirror);
2136
2137 if (failrec->in_validation) {
2138
2139 pr_debug("clean_io_failure: freeing dummy error at %llu\n",
2140 failrec->start);
2141 goto out;
2142 }
2143 if (fs_info->sb->s_flags & MS_RDONLY)
2144 goto out;
2145
2146 spin_lock(&BTRFS_I(inode)->io_tree.lock);
2147 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
2148 failrec->start,
2149 EXTENT_LOCKED);
2150 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
2151
2152 if (state && state->start <= failrec->start &&
2153 state->end >= failrec->start + failrec->len - 1) {
2154 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2155 failrec->len);
2156 if (num_copies > 1) {
2157 repair_io_failure(inode, start, failrec->len,
2158 failrec->logical, page,
2159 pg_offset, failrec->failed_mirror);
2160 }
2161 }
2162
2163out:
2164 free_io_failure(inode, failrec);
2165
2166 return 0;
2167}
2168
2169
2170
2171
2172
2173
2174
2175void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
2176{
2177 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2178 struct io_failure_record *failrec;
2179 struct extent_state *state, *next;
2180
2181 if (RB_EMPTY_ROOT(&failure_tree->state))
2182 return;
2183
2184 spin_lock(&failure_tree->lock);
2185 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2186 while (state) {
2187 if (state->start > end)
2188 break;
2189
2190 ASSERT(state->end <= end);
2191
2192 next = next_state(state);
2193
2194 failrec = (struct io_failure_record *)(unsigned long)state->private;
2195 free_extent_state(state);
2196 kfree(failrec);
2197
2198 state = next;
2199 }
2200 spin_unlock(&failure_tree->lock);
2201}
2202
2203int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2204 struct io_failure_record **failrec_ret)
2205{
2206 struct io_failure_record *failrec;
2207 u64 private;
2208 struct extent_map *em;
2209 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2210 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2211 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2212 int ret;
2213 u64 logical;
2214
2215 ret = get_state_private(failure_tree, start, &private);
2216 if (ret) {
2217 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2218 if (!failrec)
2219 return -ENOMEM;
2220
2221 failrec->start = start;
2222 failrec->len = end - start + 1;
2223 failrec->this_mirror = 0;
2224 failrec->bio_flags = 0;
2225 failrec->in_validation = 0;
2226
2227 read_lock(&em_tree->lock);
2228 em = lookup_extent_mapping(em_tree, start, failrec->len);
2229 if (!em) {
2230 read_unlock(&em_tree->lock);
2231 kfree(failrec);
2232 return -EIO;
2233 }
2234
2235 if (em->start > start || em->start + em->len <= start) {
2236 free_extent_map(em);
2237 em = NULL;
2238 }
2239 read_unlock(&em_tree->lock);
2240 if (!em) {
2241 kfree(failrec);
2242 return -EIO;
2243 }
2244
2245 logical = start - em->start;
2246 logical = em->block_start + logical;
2247 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2248 logical = em->block_start;
2249 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2250 extent_set_compress_type(&failrec->bio_flags,
2251 em->compress_type);
2252 }
2253
2254 pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
2255 logical, start, failrec->len);
2256
2257 failrec->logical = logical;
2258 free_extent_map(em);
2259
2260
2261 ret = set_extent_bits(failure_tree, start, end,
2262 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
2263 if (ret >= 0)
2264 ret = set_state_private(failure_tree, start,
2265 (u64)(unsigned long)failrec);
2266
2267 if (ret >= 0)
2268 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
2269 GFP_NOFS);
2270 if (ret < 0) {
2271 kfree(failrec);
2272 return ret;
2273 }
2274 } else {
2275 failrec = (struct io_failure_record *)(unsigned long)private;
2276 pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
2277 failrec->logical, failrec->start, failrec->len,
2278 failrec->in_validation);
2279
2280
2281
2282
2283
2284 }
2285
2286 *failrec_ret = failrec;
2287
2288 return 0;
2289}
2290
2291int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
2292 struct io_failure_record *failrec, int failed_mirror)
2293{
2294 int num_copies;
2295
2296 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
2297 failrec->logical, failrec->len);
2298 if (num_copies == 1) {
2299
2300
2301
2302
2303
2304 pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
2305 num_copies, failrec->this_mirror, failed_mirror);
2306 return 0;
2307 }
2308
2309
2310
2311
2312
2313
2314 if (failed_bio->bi_vcnt > 1) {
2315
2316
2317
2318
2319
2320
2321
2322
2323 BUG_ON(failrec->in_validation);
2324 failrec->in_validation = 1;
2325 failrec->this_mirror = failed_mirror;
2326 } else {
2327
2328
2329
2330
2331
2332 if (failrec->in_validation) {
2333 BUG_ON(failrec->this_mirror != failed_mirror);
2334 failrec->in_validation = 0;
2335 failrec->this_mirror = 0;
2336 }
2337 failrec->failed_mirror = failed_mirror;
2338 failrec->this_mirror++;
2339 if (failrec->this_mirror == failed_mirror)
2340 failrec->this_mirror++;
2341 }
2342
2343 if (failrec->this_mirror > num_copies) {
2344 pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2345 num_copies, failrec->this_mirror, failed_mirror);
2346 return 0;
2347 }
2348
2349 return 1;
2350}
2351
2352
2353struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2354 struct io_failure_record *failrec,
2355 struct page *page, int pg_offset, int icsum,
2356 bio_end_io_t *endio_func, void *data)
2357{
2358 struct bio *bio;
2359 struct btrfs_io_bio *btrfs_failed_bio;
2360 struct btrfs_io_bio *btrfs_bio;
2361
2362 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2363 if (!bio)
2364 return NULL;
2365
2366 bio->bi_end_io = endio_func;
2367 bio->bi_iter.bi_sector = failrec->logical >> 9;
2368 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2369 bio->bi_iter.bi_size = 0;
2370 bio->bi_private = data;
2371
2372 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2373 if (btrfs_failed_bio->csum) {
2374 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2375 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2376
2377 btrfs_bio = btrfs_io_bio(bio);
2378 btrfs_bio->csum = btrfs_bio->csum_inline;
2379 icsum *= csum_size;
2380 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2381 csum_size);
2382 }
2383
2384 bio_add_page(bio, page, failrec->len, pg_offset);
2385
2386 return bio;
2387}
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2398 struct page *page, u64 start, u64 end,
2399 int failed_mirror)
2400{
2401 struct io_failure_record *failrec;
2402 struct inode *inode = page->mapping->host;
2403 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2404 struct bio *bio;
2405 int read_mode;
2406 int ret;
2407
2408 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
2409
2410 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2411 if (ret)
2412 return ret;
2413
2414 ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
2415 if (!ret) {
2416 free_io_failure(inode, failrec);
2417 return -EIO;
2418 }
2419
2420 if (failed_bio->bi_vcnt > 1)
2421 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
2422 else
2423 read_mode = READ_SYNC;
2424
2425 phy_offset >>= inode->i_sb->s_blocksize_bits;
2426 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2427 start - page_offset(page),
2428 (int)phy_offset, failed_bio->bi_end_io,
2429 NULL);
2430 if (!bio) {
2431 free_io_failure(inode, failrec);
2432 return -EIO;
2433 }
2434
2435 pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
2436 read_mode, failrec->this_mirror, failrec->in_validation);
2437
2438 ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
2439 failrec->this_mirror,
2440 failrec->bio_flags, 0);
2441 if (ret) {
2442 free_io_failure(inode, failrec);
2443 bio_put(bio);
2444 }
2445
2446 return ret;
2447}
2448
2449
2450
2451int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2452{
2453 int uptodate = (err == 0);
2454 struct extent_io_tree *tree;
2455 int ret = 0;
2456
2457 tree = &BTRFS_I(page->mapping->host)->io_tree;
2458
2459 if (tree->ops && tree->ops->writepage_end_io_hook) {
2460 ret = tree->ops->writepage_end_io_hook(page, start,
2461 end, NULL, uptodate);
2462 if (ret)
2463 uptodate = 0;
2464 }
2465
2466 if (!uptodate) {
2467 ClearPageUptodate(page);
2468 SetPageError(page);
2469 ret = ret < 0 ? ret : -EIO;
2470 mapping_set_error(page->mapping, ret);
2471 }
2472 return 0;
2473}
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484static void end_bio_extent_writepage(struct bio *bio, int err)
2485{
2486 struct bio_vec *bvec;
2487 u64 start;
2488 u64 end;
2489 int i;
2490
2491 bio_for_each_segment_all(bvec, bio, i) {
2492 struct page *page = bvec->bv_page;
2493
2494
2495
2496
2497
2498
2499 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2500 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2501 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2502 "partial page write in btrfs with offset %u and length %u",
2503 bvec->bv_offset, bvec->bv_len);
2504 else
2505 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2506 "incomplete page write in btrfs with offset %u and "
2507 "length %u",
2508 bvec->bv_offset, bvec->bv_len);
2509 }
2510
2511 start = page_offset(page);
2512 end = start + bvec->bv_offset + bvec->bv_len - 1;
2513
2514 if (end_extent_writepage(page, err, start, end))
2515 continue;
2516
2517 end_page_writeback(page);
2518 }
2519
2520 bio_put(bio);
2521}
2522
2523static void
2524endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2525 int uptodate)
2526{
2527 struct extent_state *cached = NULL;
2528 u64 end = start + len - 1;
2529
2530 if (uptodate && tree->track_uptodate)
2531 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2532 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2533}
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546static void end_bio_extent_readpage(struct bio *bio, int err)
2547{
2548 struct bio_vec *bvec;
2549 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
2550 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2551 struct extent_io_tree *tree;
2552 u64 offset = 0;
2553 u64 start;
2554 u64 end;
2555 u64 len;
2556 u64 extent_start = 0;
2557 u64 extent_len = 0;
2558 int mirror;
2559 int ret;
2560 int i;
2561
2562 if (err)
2563 uptodate = 0;
2564
2565 bio_for_each_segment_all(bvec, bio, i) {
2566 struct page *page = bvec->bv_page;
2567 struct inode *inode = page->mapping->host;
2568
2569 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2570 "mirror=%u\n", (u64)bio->bi_iter.bi_sector, err,
2571 io_bio->mirror_num);
2572 tree = &BTRFS_I(inode)->io_tree;
2573
2574
2575
2576
2577
2578
2579 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2580 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2581 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2582 "partial page read in btrfs with offset %u and length %u",
2583 bvec->bv_offset, bvec->bv_len);
2584 else
2585 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2586 "incomplete page read in btrfs with offset %u and "
2587 "length %u",
2588 bvec->bv_offset, bvec->bv_len);
2589 }
2590
2591 start = page_offset(page);
2592 end = start + bvec->bv_offset + bvec->bv_len - 1;
2593 len = bvec->bv_len;
2594
2595 mirror = io_bio->mirror_num;
2596 if (likely(uptodate && tree->ops &&
2597 tree->ops->readpage_end_io_hook)) {
2598 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2599 page, start, end,
2600 mirror);
2601 if (ret)
2602 uptodate = 0;
2603 else
2604 clean_io_failure(inode, start, page, 0);
2605 }
2606
2607 if (likely(uptodate))
2608 goto readpage_ok;
2609
2610 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2611 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2612 if (!ret && !err &&
2613 test_bit(BIO_UPTODATE, &bio->bi_flags))
2614 uptodate = 1;
2615 } else {
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626 ret = bio_readpage_error(bio, offset, page, start, end,
2627 mirror);
2628 if (ret == 0) {
2629 uptodate =
2630 test_bit(BIO_UPTODATE, &bio->bi_flags);
2631 if (err)
2632 uptodate = 0;
2633 offset += len;
2634 continue;
2635 }
2636 }
2637readpage_ok:
2638 if (likely(uptodate)) {
2639 loff_t i_size = i_size_read(inode);
2640 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2641 unsigned off;
2642
2643
2644 off = i_size & (PAGE_CACHE_SIZE-1);
2645 if (page->index == end_index && off)
2646 zero_user_segment(page, off, PAGE_CACHE_SIZE);
2647 SetPageUptodate(page);
2648 } else {
2649 ClearPageUptodate(page);
2650 SetPageError(page);
2651 }
2652 unlock_page(page);
2653 offset += len;
2654
2655 if (unlikely(!uptodate)) {
2656 if (extent_len) {
2657 endio_readpage_release_extent(tree,
2658 extent_start,
2659 extent_len, 1);
2660 extent_start = 0;
2661 extent_len = 0;
2662 }
2663 endio_readpage_release_extent(tree, start,
2664 end - start + 1, 0);
2665 } else if (!extent_len) {
2666 extent_start = start;
2667 extent_len = end + 1 - start;
2668 } else if (extent_start + extent_len == start) {
2669 extent_len += end + 1 - start;
2670 } else {
2671 endio_readpage_release_extent(tree, extent_start,
2672 extent_len, uptodate);
2673 extent_start = start;
2674 extent_len = end + 1 - start;
2675 }
2676 }
2677
2678 if (extent_len)
2679 endio_readpage_release_extent(tree, extent_start, extent_len,
2680 uptodate);
2681 if (io_bio->end_io)
2682 io_bio->end_io(io_bio, err);
2683 bio_put(bio);
2684}
2685
2686
2687
2688
2689
2690struct bio *
2691btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2692 gfp_t gfp_flags)
2693{
2694 struct btrfs_io_bio *btrfs_bio;
2695 struct bio *bio;
2696
2697 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
2698
2699 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
2700 while (!bio && (nr_vecs /= 2)) {
2701 bio = bio_alloc_bioset(gfp_flags,
2702 nr_vecs, btrfs_bioset);
2703 }
2704 }
2705
2706 if (bio) {
2707 bio->bi_bdev = bdev;
2708 bio->bi_iter.bi_sector = first_sector;
2709 btrfs_bio = btrfs_io_bio(bio);
2710 btrfs_bio->csum = NULL;
2711 btrfs_bio->csum_allocated = NULL;
2712 btrfs_bio->end_io = NULL;
2713 }
2714 return bio;
2715}
2716
2717struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2718{
2719 struct btrfs_io_bio *btrfs_bio;
2720 struct bio *new;
2721
2722 new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
2723 if (new) {
2724 btrfs_bio = btrfs_io_bio(new);
2725 btrfs_bio->csum = NULL;
2726 btrfs_bio->csum_allocated = NULL;
2727 btrfs_bio->end_io = NULL;
2728 }
2729 return new;
2730}
2731
2732
2733struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2734{
2735 struct btrfs_io_bio *btrfs_bio;
2736 struct bio *bio;
2737
2738 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2739 if (bio) {
2740 btrfs_bio = btrfs_io_bio(bio);
2741 btrfs_bio->csum = NULL;
2742 btrfs_bio->csum_allocated = NULL;
2743 btrfs_bio->end_io = NULL;
2744 }
2745 return bio;
2746}
2747
2748
2749static int __must_check submit_one_bio(int rw, struct bio *bio,
2750 int mirror_num, unsigned long bio_flags)
2751{
2752 int ret = 0;
2753 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2754 struct page *page = bvec->bv_page;
2755 struct extent_io_tree *tree = bio->bi_private;
2756 u64 start;
2757
2758 start = page_offset(page) + bvec->bv_offset;
2759
2760 bio->bi_private = NULL;
2761
2762 bio_get(bio);
2763
2764 if (tree->ops && tree->ops->submit_bio_hook)
2765 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
2766 mirror_num, bio_flags, start);
2767 else
2768 btrfsic_submit_bio(rw, bio);
2769
2770 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2771 ret = -EOPNOTSUPP;
2772 bio_put(bio);
2773 return ret;
2774}
2775
2776static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
2777 unsigned long offset, size_t size, struct bio *bio,
2778 unsigned long bio_flags)
2779{
2780 int ret = 0;
2781 if (tree->ops && tree->ops->merge_bio_hook)
2782 ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
2783 bio_flags);
2784 BUG_ON(ret < 0);
2785 return ret;
2786
2787}
2788
2789static int submit_extent_page(int rw, struct extent_io_tree *tree,
2790 struct page *page, sector_t sector,
2791 size_t size, unsigned long offset,
2792 struct block_device *bdev,
2793 struct bio **bio_ret,
2794 unsigned long max_pages,
2795 bio_end_io_t end_io_func,
2796 int mirror_num,
2797 unsigned long prev_bio_flags,
2798 unsigned long bio_flags)
2799{
2800 int ret = 0;
2801 struct bio *bio;
2802 int nr;
2803 int contig = 0;
2804 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
2805 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
2806 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
2807
2808 if (bio_ret && *bio_ret) {
2809 bio = *bio_ret;
2810 if (old_compressed)
2811 contig = bio->bi_iter.bi_sector == sector;
2812 else
2813 contig = bio_end_sector(bio) == sector;
2814
2815 if (prev_bio_flags != bio_flags || !contig ||
2816 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
2817 bio_add_page(bio, page, page_size, offset) < page_size) {
2818 ret = submit_one_bio(rw, bio, mirror_num,
2819 prev_bio_flags);
2820 if (ret < 0) {
2821 *bio_ret = NULL;
2822 return ret;
2823 }
2824 bio = NULL;
2825 } else {
2826 return 0;
2827 }
2828 }
2829 if (this_compressed)
2830 nr = BIO_MAX_PAGES;
2831 else
2832 nr = bio_get_nr_vecs(bdev);
2833
2834 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
2835 if (!bio)
2836 return -ENOMEM;
2837
2838 bio_add_page(bio, page, page_size, offset);
2839 bio->bi_end_io = end_io_func;
2840 bio->bi_private = tree;
2841
2842 if (bio_ret)
2843 *bio_ret = bio;
2844 else
2845 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
2846
2847 return ret;
2848}
2849
2850static void attach_extent_buffer_page(struct extent_buffer *eb,
2851 struct page *page)
2852{
2853 if (!PagePrivate(page)) {
2854 SetPagePrivate(page);
2855 page_cache_get(page);
2856 set_page_private(page, (unsigned long)eb);
2857 } else {
2858 WARN_ON(page->private != (unsigned long)eb);
2859 }
2860}
2861
2862void set_page_extent_mapped(struct page *page)
2863{
2864 if (!PagePrivate(page)) {
2865 SetPagePrivate(page);
2866 page_cache_get(page);
2867 set_page_private(page, EXTENT_PAGE_PRIVATE);
2868 }
2869}
2870
2871static struct extent_map *
2872__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2873 u64 start, u64 len, get_extent_t *get_extent,
2874 struct extent_map **em_cached)
2875{
2876 struct extent_map *em;
2877
2878 if (em_cached && *em_cached) {
2879 em = *em_cached;
2880 if (extent_map_in_tree(em) && start >= em->start &&
2881 start < extent_map_end(em)) {
2882 atomic_inc(&em->refs);
2883 return em;
2884 }
2885
2886 free_extent_map(em);
2887 *em_cached = NULL;
2888 }
2889
2890 em = get_extent(inode, page, pg_offset, start, len, 0);
2891 if (em_cached && !IS_ERR_OR_NULL(em)) {
2892 BUG_ON(*em_cached);
2893 atomic_inc(&em->refs);
2894 *em_cached = em;
2895 }
2896 return em;
2897}
2898
2899
2900
2901
2902
2903
2904static int __do_readpage(struct extent_io_tree *tree,
2905 struct page *page,
2906 get_extent_t *get_extent,
2907 struct extent_map **em_cached,
2908 struct bio **bio, int mirror_num,
2909 unsigned long *bio_flags, int rw)
2910{
2911 struct inode *inode = page->mapping->host;
2912 u64 start = page_offset(page);
2913 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2914 u64 end;
2915 u64 cur = start;
2916 u64 extent_offset;
2917 u64 last_byte = i_size_read(inode);
2918 u64 block_start;
2919 u64 cur_end;
2920 sector_t sector;
2921 struct extent_map *em;
2922 struct block_device *bdev;
2923 int ret;
2924 int nr = 0;
2925 int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2926 size_t pg_offset = 0;
2927 size_t iosize;
2928 size_t disk_io_size;
2929 size_t blocksize = inode->i_sb->s_blocksize;
2930 unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2931
2932 set_page_extent_mapped(page);
2933
2934 end = page_end;
2935 if (!PageUptodate(page)) {
2936 if (cleancache_get_page(page) == 0) {
2937 BUG_ON(blocksize != PAGE_SIZE);
2938 unlock_extent(tree, start, end);
2939 goto out;
2940 }
2941 }
2942
2943 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2944 char *userpage;
2945 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
2946
2947 if (zero_offset) {
2948 iosize = PAGE_CACHE_SIZE - zero_offset;
2949 userpage = kmap_atomic(page);
2950 memset(userpage + zero_offset, 0, iosize);
2951 flush_dcache_page(page);
2952 kunmap_atomic(userpage);
2953 }
2954 }
2955 while (cur <= end) {
2956 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2957
2958 if (cur >= last_byte) {
2959 char *userpage;
2960 struct extent_state *cached = NULL;
2961
2962 iosize = PAGE_CACHE_SIZE - pg_offset;
2963 userpage = kmap_atomic(page);
2964 memset(userpage + pg_offset, 0, iosize);
2965 flush_dcache_page(page);
2966 kunmap_atomic(userpage);
2967 set_extent_uptodate(tree, cur, cur + iosize - 1,
2968 &cached, GFP_NOFS);
2969 if (!parent_locked)
2970 unlock_extent_cached(tree, cur,
2971 cur + iosize - 1,
2972 &cached, GFP_NOFS);
2973 break;
2974 }
2975 em = __get_extent_map(inode, page, pg_offset, cur,
2976 end - cur + 1, get_extent, em_cached);
2977 if (IS_ERR_OR_NULL(em)) {
2978 SetPageError(page);
2979 if (!parent_locked)
2980 unlock_extent(tree, cur, end);
2981 break;
2982 }
2983 extent_offset = cur - em->start;
2984 BUG_ON(extent_map_end(em) <= cur);
2985 BUG_ON(end < cur);
2986
2987 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2988 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2989 extent_set_compress_type(&this_bio_flag,
2990 em->compress_type);
2991 }
2992
2993 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2994 cur_end = min(extent_map_end(em) - 1, end);
2995 iosize = ALIGN(iosize, blocksize);
2996 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2997 disk_io_size = em->block_len;
2998 sector = em->block_start >> 9;
2999 } else {
3000 sector = (em->block_start + extent_offset) >> 9;
3001 disk_io_size = iosize;
3002 }
3003 bdev = em->bdev;
3004 block_start = em->block_start;
3005 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3006 block_start = EXTENT_MAP_HOLE;
3007 free_extent_map(em);
3008 em = NULL;
3009
3010
3011 if (block_start == EXTENT_MAP_HOLE) {
3012 char *userpage;
3013 struct extent_state *cached = NULL;
3014
3015 userpage = kmap_atomic(page);
3016 memset(userpage + pg_offset, 0, iosize);
3017 flush_dcache_page(page);
3018 kunmap_atomic(userpage);
3019
3020 set_extent_uptodate(tree, cur, cur + iosize - 1,
3021 &cached, GFP_NOFS);
3022 unlock_extent_cached(tree, cur, cur + iosize - 1,
3023 &cached, GFP_NOFS);
3024 cur = cur + iosize;
3025 pg_offset += iosize;
3026 continue;
3027 }
3028
3029 if (test_range_bit(tree, cur, cur_end,
3030 EXTENT_UPTODATE, 1, NULL)) {
3031 check_page_uptodate(tree, page);
3032 if (!parent_locked)
3033 unlock_extent(tree, cur, cur + iosize - 1);
3034 cur = cur + iosize;
3035 pg_offset += iosize;
3036 continue;
3037 }
3038
3039
3040
3041 if (block_start == EXTENT_MAP_INLINE) {
3042 SetPageError(page);
3043 if (!parent_locked)
3044 unlock_extent(tree, cur, cur + iosize - 1);
3045 cur = cur + iosize;
3046 pg_offset += iosize;
3047 continue;
3048 }
3049
3050 pnr -= page->index;
3051 ret = submit_extent_page(rw, tree, page,
3052 sector, disk_io_size, pg_offset,
3053 bdev, bio, pnr,
3054 end_bio_extent_readpage, mirror_num,
3055 *bio_flags,
3056 this_bio_flag);
3057 if (!ret) {
3058 nr++;
3059 *bio_flags = this_bio_flag;
3060 } else {
3061 SetPageError(page);
3062 if (!parent_locked)
3063 unlock_extent(tree, cur, cur + iosize - 1);
3064 }
3065 cur = cur + iosize;
3066 pg_offset += iosize;
3067 }
3068out:
3069 if (!nr) {
3070 if (!PageError(page))
3071 SetPageUptodate(page);
3072 unlock_page(page);
3073 }
3074 return 0;
3075}
3076
3077static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3078 struct page *pages[], int nr_pages,
3079 u64 start, u64 end,
3080 get_extent_t *get_extent,
3081 struct extent_map **em_cached,
3082 struct bio **bio, int mirror_num,
3083 unsigned long *bio_flags, int rw)
3084{
3085 struct inode *inode;
3086 struct btrfs_ordered_extent *ordered;
3087 int index;
3088
3089 inode = pages[0]->mapping->host;
3090 while (1) {
3091 lock_extent(tree, start, end);
3092 ordered = btrfs_lookup_ordered_range(inode, start,
3093 end - start + 1);
3094 if (!ordered)
3095 break;
3096 unlock_extent(tree, start, end);
3097 btrfs_start_ordered_extent(inode, ordered, 1);
3098 btrfs_put_ordered_extent(ordered);
3099 }
3100
3101 for (index = 0; index < nr_pages; index++) {
3102 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
3103 mirror_num, bio_flags, rw);
3104 page_cache_release(pages[index]);
3105 }
3106}
3107
3108static void __extent_readpages(struct extent_io_tree *tree,
3109 struct page *pages[],
3110 int nr_pages, get_extent_t *get_extent,
3111 struct extent_map **em_cached,
3112 struct bio **bio, int mirror_num,
3113 unsigned long *bio_flags, int rw)
3114{
3115 u64 start = 0;
3116 u64 end = 0;
3117 u64 page_start;
3118 int index;
3119 int first_index = 0;
3120
3121 for (index = 0; index < nr_pages; index++) {
3122 page_start = page_offset(pages[index]);
3123 if (!end) {
3124 start = page_start;
3125 end = start + PAGE_CACHE_SIZE - 1;
3126 first_index = index;
3127 } else if (end + 1 == page_start) {
3128 end += PAGE_CACHE_SIZE;
3129 } else {
3130 __do_contiguous_readpages(tree, &pages[first_index],
3131 index - first_index, start,
3132 end, get_extent, em_cached,
3133 bio, mirror_num, bio_flags,
3134 rw);
3135 start = page_start;
3136 end = start + PAGE_CACHE_SIZE - 1;
3137 first_index = index;
3138 }
3139 }
3140
3141 if (end)
3142 __do_contiguous_readpages(tree, &pages[first_index],
3143 index - first_index, start,
3144 end, get_extent, em_cached, bio,
3145 mirror_num, bio_flags, rw);
3146}
3147
3148static int __extent_read_full_page(struct extent_io_tree *tree,
3149 struct page *page,
3150 get_extent_t *get_extent,
3151 struct bio **bio, int mirror_num,
3152 unsigned long *bio_flags, int rw)
3153{
3154 struct inode *inode = page->mapping->host;
3155 struct btrfs_ordered_extent *ordered;
3156 u64 start = page_offset(page);
3157 u64 end = start + PAGE_CACHE_SIZE - 1;
3158 int ret;
3159
3160 while (1) {
3161 lock_extent(tree, start, end);
3162 ordered = btrfs_lookup_ordered_extent(inode, start);
3163 if (!ordered)
3164 break;
3165 unlock_extent(tree, start, end);
3166 btrfs_start_ordered_extent(inode, ordered, 1);
3167 btrfs_put_ordered_extent(ordered);
3168 }
3169
3170 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3171 bio_flags, rw);
3172 return ret;
3173}
3174
3175int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3176 get_extent_t *get_extent, int mirror_num)
3177{
3178 struct bio *bio = NULL;
3179 unsigned long bio_flags = 0;
3180 int ret;
3181
3182 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3183 &bio_flags, READ);
3184 if (bio)
3185 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3186 return ret;
3187}
3188
3189int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3190 get_extent_t *get_extent, int mirror_num)
3191{
3192 struct bio *bio = NULL;
3193 unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
3194 int ret;
3195
3196 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3197 &bio_flags, READ);
3198 if (bio)
3199 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3200 return ret;
3201}
3202
3203static noinline void update_nr_written(struct page *page,
3204 struct writeback_control *wbc,
3205 unsigned long nr_written)
3206{
3207 wbc->nr_to_write -= nr_written;
3208 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
3209 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
3210 page->mapping->writeback_index = page->index + nr_written;
3211}
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223static noinline_for_stack int writepage_delalloc(struct inode *inode,
3224 struct page *page, struct writeback_control *wbc,
3225 struct extent_page_data *epd,
3226 u64 delalloc_start,
3227 unsigned long *nr_written)
3228{
3229 struct extent_io_tree *tree = epd->tree;
3230 u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
3231 u64 nr_delalloc;
3232 u64 delalloc_to_write = 0;
3233 u64 delalloc_end = 0;
3234 int ret;
3235 int page_started = 0;
3236
3237 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3238 return 0;
3239
3240 while (delalloc_end < page_end) {
3241 nr_delalloc = find_lock_delalloc_range(inode, tree,
3242 page,
3243 &delalloc_start,
3244 &delalloc_end,
3245 BTRFS_MAX_EXTENT_SIZE);
3246 if (nr_delalloc == 0) {
3247 delalloc_start = delalloc_end + 1;
3248 continue;
3249 }
3250 ret = tree->ops->fill_delalloc(inode, page,
3251 delalloc_start,
3252 delalloc_end,
3253 &page_started,
3254 nr_written);
3255
3256 if (ret) {
3257 SetPageError(page);
3258
3259
3260
3261
3262
3263 ret = ret < 0 ? ret : -EIO;
3264 goto done;
3265 }
3266
3267
3268
3269
3270
3271 delalloc_to_write += (delalloc_end - delalloc_start +
3272 PAGE_CACHE_SIZE) >>
3273 PAGE_CACHE_SHIFT;
3274 delalloc_start = delalloc_end + 1;
3275 }
3276 if (wbc->nr_to_write < delalloc_to_write) {
3277 int thresh = 8192;
3278
3279 if (delalloc_to_write < thresh * 2)
3280 thresh = delalloc_to_write;
3281 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3282 thresh);
3283 }
3284
3285
3286
3287
3288 if (page_started) {
3289
3290
3291
3292
3293
3294 wbc->nr_to_write -= *nr_written;
3295 return 1;
3296 }
3297
3298 ret = 0;
3299
3300done:
3301 return ret;
3302}
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3313 struct page *page,
3314 struct writeback_control *wbc,
3315 struct extent_page_data *epd,
3316 loff_t i_size,
3317 unsigned long nr_written,
3318 int write_flags, int *nr_ret)
3319{
3320 struct extent_io_tree *tree = epd->tree;
3321 u64 start = page_offset(page);
3322 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3323 u64 end;
3324 u64 cur = start;
3325 u64 extent_offset;
3326 u64 block_start;
3327 u64 iosize;
3328 sector_t sector;
3329 struct extent_state *cached_state = NULL;
3330 struct extent_map *em;
3331 struct block_device *bdev;
3332 size_t pg_offset = 0;
3333 size_t blocksize;
3334 int ret = 0;
3335 int nr = 0;
3336 bool compressed;
3337
3338 if (tree->ops && tree->ops->writepage_start_hook) {
3339 ret = tree->ops->writepage_start_hook(page, start,
3340 page_end);
3341 if (ret) {
3342
3343 if (ret == -EBUSY)
3344 wbc->pages_skipped++;
3345 else
3346 redirty_page_for_writepage(wbc, page);
3347
3348 update_nr_written(page, wbc, nr_written);
3349 unlock_page(page);
3350 ret = 1;
3351 goto done_unlocked;
3352 }
3353 }
3354
3355
3356
3357
3358
3359 update_nr_written(page, wbc, nr_written + 1);
3360
3361 end = page_end;
3362 if (i_size <= start) {
3363 if (tree->ops && tree->ops->writepage_end_io_hook)
3364 tree->ops->writepage_end_io_hook(page, start,
3365 page_end, NULL, 1);
3366 goto done;
3367 }
3368
3369 blocksize = inode->i_sb->s_blocksize;
3370
3371 while (cur <= end) {
3372 u64 em_end;
3373 if (cur >= i_size) {
3374 if (tree->ops && tree->ops->writepage_end_io_hook)
3375 tree->ops->writepage_end_io_hook(page, cur,
3376 page_end, NULL, 1);
3377 break;
3378 }
3379 em = epd->get_extent(inode, page, pg_offset, cur,
3380 end - cur + 1, 1);
3381 if (IS_ERR_OR_NULL(em)) {
3382 SetPageError(page);
3383 ret = PTR_ERR_OR_ZERO(em);
3384 break;
3385 }
3386
3387 extent_offset = cur - em->start;
3388 em_end = extent_map_end(em);
3389 BUG_ON(em_end <= cur);
3390 BUG_ON(end < cur);
3391 iosize = min(em_end - cur, end - cur + 1);
3392 iosize = ALIGN(iosize, blocksize);
3393 sector = (em->block_start + extent_offset) >> 9;
3394 bdev = em->bdev;
3395 block_start = em->block_start;
3396 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3397 free_extent_map(em);
3398 em = NULL;
3399
3400
3401
3402
3403
3404 if (compressed || block_start == EXTENT_MAP_HOLE ||
3405 block_start == EXTENT_MAP_INLINE) {
3406
3407
3408
3409
3410 if (!compressed && tree->ops &&
3411 tree->ops->writepage_end_io_hook)
3412 tree->ops->writepage_end_io_hook(page, cur,
3413 cur + iosize - 1,
3414 NULL, 1);
3415 else if (compressed) {
3416
3417
3418
3419
3420 nr++;
3421 }
3422
3423 cur += iosize;
3424 pg_offset += iosize;
3425 continue;
3426 }
3427
3428 if (tree->ops && tree->ops->writepage_io_hook) {
3429 ret = tree->ops->writepage_io_hook(page, cur,
3430 cur + iosize - 1);
3431 } else {
3432 ret = 0;
3433 }
3434 if (ret) {
3435 SetPageError(page);
3436 } else {
3437 unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
3438
3439 set_range_writeback(tree, cur, cur + iosize - 1);
3440 if (!PageWriteback(page)) {
3441 btrfs_err(BTRFS_I(inode)->root->fs_info,
3442 "page %lu not writeback, cur %llu end %llu",
3443 page->index, cur, end);
3444 }
3445
3446 ret = submit_extent_page(write_flags, tree, page,
3447 sector, iosize, pg_offset,
3448 bdev, &epd->bio, max_nr,
3449 end_bio_extent_writepage,
3450 0, 0, 0);
3451 if (ret)
3452 SetPageError(page);
3453 }
3454 cur = cur + iosize;
3455 pg_offset += iosize;
3456 nr++;
3457 }
3458done:
3459 *nr_ret = nr;
3460
3461done_unlocked:
3462
3463
3464 free_extent_state(cached_state);
3465 return ret;
3466}
3467
3468
3469
3470
3471
3472
3473
3474static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3475 void *data)
3476{
3477 struct inode *inode = page->mapping->host;
3478 struct extent_page_data *epd = data;
3479 u64 start = page_offset(page);
3480 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3481 int ret;
3482 int nr = 0;
3483 size_t pg_offset = 0;
3484 loff_t i_size = i_size_read(inode);
3485 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
3486 int write_flags;
3487 unsigned long nr_written = 0;
3488
3489 if (wbc->sync_mode == WB_SYNC_ALL)
3490 write_flags = WRITE_SYNC;
3491 else
3492 write_flags = WRITE;
3493
3494 trace___extent_writepage(page, inode, wbc);
3495
3496 WARN_ON(!PageLocked(page));
3497
3498 ClearPageError(page);
3499
3500 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
3501 if (page->index > end_index ||
3502 (page->index == end_index && !pg_offset)) {
3503 page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
3504 unlock_page(page);
3505 return 0;
3506 }
3507
3508 if (page->index == end_index) {
3509 char *userpage;
3510
3511 userpage = kmap_atomic(page);
3512 memset(userpage + pg_offset, 0,
3513 PAGE_CACHE_SIZE - pg_offset);
3514 kunmap_atomic(userpage);
3515 flush_dcache_page(page);
3516 }
3517
3518 pg_offset = 0;
3519
3520 set_page_extent_mapped(page);
3521
3522 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3523 if (ret == 1)
3524 goto done_unlocked;
3525 if (ret)
3526 goto done;
3527
3528 ret = __extent_writepage_io(inode, page, wbc, epd,
3529 i_size, nr_written, write_flags, &nr);
3530 if (ret == 1)
3531 goto done_unlocked;
3532
3533done:
3534 if (nr == 0) {
3535
3536 set_page_writeback(page);
3537 end_page_writeback(page);
3538 }
3539 if (PageError(page)) {
3540 ret = ret < 0 ? ret : -EIO;
3541 end_extent_writepage(page, ret, start, page_end);
3542 }
3543 unlock_page(page);
3544 return ret;
3545
3546done_unlocked:
3547 return 0;
3548}
3549
3550void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3551{
3552 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3553 TASK_UNINTERRUPTIBLE);
3554}
3555
3556static noinline_for_stack int
3557lock_extent_buffer_for_io(struct extent_buffer *eb,
3558 struct btrfs_fs_info *fs_info,
3559 struct extent_page_data *epd)
3560{
3561 unsigned long i, num_pages;
3562 int flush = 0;
3563 int ret = 0;
3564
3565 if (!btrfs_try_tree_write_lock(eb)) {
3566 flush = 1;
3567 flush_write_bio(epd);
3568 btrfs_tree_lock(eb);
3569 }
3570
3571 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3572 btrfs_tree_unlock(eb);
3573 if (!epd->sync_io)
3574 return 0;
3575 if (!flush) {
3576 flush_write_bio(epd);
3577 flush = 1;
3578 }
3579 while (1) {
3580 wait_on_extent_buffer_writeback(eb);
3581 btrfs_tree_lock(eb);
3582 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3583 break;
3584 btrfs_tree_unlock(eb);
3585 }
3586 }
3587
3588
3589
3590
3591
3592
3593 spin_lock(&eb->refs_lock);
3594 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3595 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3596 spin_unlock(&eb->refs_lock);
3597 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3598 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
3599 -eb->len,
3600 fs_info->dirty_metadata_batch);
3601 ret = 1;
3602 } else {
3603 spin_unlock(&eb->refs_lock);
3604 }
3605
3606 btrfs_tree_unlock(eb);
3607
3608 if (!ret)
3609 return ret;
3610
3611 num_pages = num_extent_pages(eb->start, eb->len);
3612 for (i = 0; i < num_pages; i++) {
3613 struct page *p = eb->pages[i];
3614
3615 if (!trylock_page(p)) {
3616 if (!flush) {
3617 flush_write_bio(epd);
3618 flush = 1;
3619 }
3620 lock_page(p);
3621 }
3622 }
3623
3624 return ret;
3625}
3626
3627static void end_extent_buffer_writeback(struct extent_buffer *eb)
3628{
3629 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3630 smp_mb__after_atomic();
3631 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3632}
3633
3634static void set_btree_ioerr(struct page *page)
3635{
3636 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3637 struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode);
3638
3639 SetPageError(page);
3640 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3641 return;
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681 switch (eb->log_index) {
3682 case -1:
3683 set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags);
3684 break;
3685 case 0:
3686 set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
3687 break;
3688 case 1:
3689 set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
3690 break;
3691 default:
3692 BUG();
3693 }
3694}
3695
3696static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
3697{
3698 struct bio_vec *bvec;
3699 struct extent_buffer *eb;
3700 int i, done;
3701
3702 bio_for_each_segment_all(bvec, bio, i) {
3703 struct page *page = bvec->bv_page;
3704
3705 eb = (struct extent_buffer *)page->private;
3706 BUG_ON(!eb);
3707 done = atomic_dec_and_test(&eb->io_pages);
3708
3709 if (err || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3710 ClearPageUptodate(page);
3711 set_btree_ioerr(page);
3712 }
3713
3714 end_page_writeback(page);
3715
3716 if (!done)
3717 continue;
3718
3719 end_extent_buffer_writeback(eb);
3720 }
3721
3722 bio_put(bio);
3723}
3724
3725static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3726 struct btrfs_fs_info *fs_info,
3727 struct writeback_control *wbc,
3728 struct extent_page_data *epd)
3729{
3730 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3731 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3732 u64 offset = eb->start;
3733 unsigned long i, num_pages;
3734 unsigned long bio_flags = 0;
3735 int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
3736 int ret = 0;
3737
3738 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3739 num_pages = num_extent_pages(eb->start, eb->len);
3740 atomic_set(&eb->io_pages, num_pages);
3741 if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
3742 bio_flags = EXTENT_BIO_TREE_LOG;
3743
3744 for (i = 0; i < num_pages; i++) {
3745 struct page *p = eb->pages[i];
3746
3747 clear_page_dirty_for_io(p);
3748 set_page_writeback(p);
3749 ret = submit_extent_page(rw, tree, p, offset >> 9,
3750 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3751 -1, end_bio_extent_buffer_writepage,
3752 0, epd->bio_flags, bio_flags);
3753 epd->bio_flags = bio_flags;
3754 if (ret) {
3755 set_btree_ioerr(p);
3756 end_page_writeback(p);
3757 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3758 end_extent_buffer_writeback(eb);
3759 ret = -EIO;
3760 break;
3761 }
3762 offset += PAGE_CACHE_SIZE;
3763 update_nr_written(p, wbc, 1);
3764 unlock_page(p);
3765 }
3766
3767 if (unlikely(ret)) {
3768 for (; i < num_pages; i++) {
3769 struct page *p = eb->pages[i];
3770 clear_page_dirty_for_io(p);
3771 unlock_page(p);
3772 }
3773 }
3774
3775 return ret;
3776}
3777
3778int btree_write_cache_pages(struct address_space *mapping,
3779 struct writeback_control *wbc)
3780{
3781 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3782 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3783 struct extent_buffer *eb, *prev_eb = NULL;
3784 struct extent_page_data epd = {
3785 .bio = NULL,
3786 .tree = tree,
3787 .extent_locked = 0,
3788 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3789 .bio_flags = 0,
3790 };
3791 int ret = 0;
3792 int done = 0;
3793 int nr_to_write_done = 0;
3794 struct pagevec pvec;
3795 int nr_pages;
3796 pgoff_t index;
3797 pgoff_t end;
3798 int scanned = 0;
3799 int tag;
3800
3801 pagevec_init(&pvec, 0);
3802 if (wbc->range_cyclic) {
3803 index = mapping->writeback_index;
3804 end = -1;
3805 } else {
3806 index = wbc->range_start >> PAGE_CACHE_SHIFT;
3807 end = wbc->range_end >> PAGE_CACHE_SHIFT;
3808 scanned = 1;
3809 }
3810 if (wbc->sync_mode == WB_SYNC_ALL)
3811 tag = PAGECACHE_TAG_TOWRITE;
3812 else
3813 tag = PAGECACHE_TAG_DIRTY;
3814retry:
3815 if (wbc->sync_mode == WB_SYNC_ALL)
3816 tag_pages_for_writeback(mapping, index, end);
3817 while (!done && !nr_to_write_done && (index <= end) &&
3818 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3819 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3820 unsigned i;
3821
3822 scanned = 1;
3823 for (i = 0; i < nr_pages; i++) {
3824 struct page *page = pvec.pages[i];
3825
3826 if (!PagePrivate(page))
3827 continue;
3828
3829 if (!wbc->range_cyclic && page->index > end) {
3830 done = 1;
3831 break;
3832 }
3833
3834 spin_lock(&mapping->private_lock);
3835 if (!PagePrivate(page)) {
3836 spin_unlock(&mapping->private_lock);
3837 continue;
3838 }
3839
3840 eb = (struct extent_buffer *)page->private;
3841
3842
3843
3844
3845
3846
3847 if (WARN_ON(!eb)) {
3848 spin_unlock(&mapping->private_lock);
3849 continue;
3850 }
3851
3852 if (eb == prev_eb) {
3853 spin_unlock(&mapping->private_lock);
3854 continue;
3855 }
3856
3857 ret = atomic_inc_not_zero(&eb->refs);
3858 spin_unlock(&mapping->private_lock);
3859 if (!ret)
3860 continue;
3861
3862 prev_eb = eb;
3863 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3864 if (!ret) {
3865 free_extent_buffer(eb);
3866 continue;
3867 }
3868
3869 ret = write_one_eb(eb, fs_info, wbc, &epd);
3870 if (ret) {
3871 done = 1;
3872 free_extent_buffer(eb);
3873 break;
3874 }
3875 free_extent_buffer(eb);
3876
3877
3878
3879
3880
3881
3882 nr_to_write_done = wbc->nr_to_write <= 0;
3883 }
3884 pagevec_release(&pvec);
3885 cond_resched();
3886 }
3887 if (!scanned && !done) {
3888
3889
3890
3891
3892 scanned = 1;
3893 index = 0;
3894 goto retry;
3895 }
3896 flush_write_bio(&epd);
3897 return ret;
3898}
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915static int extent_write_cache_pages(struct extent_io_tree *tree,
3916 struct address_space *mapping,
3917 struct writeback_control *wbc,
3918 writepage_t writepage, void *data,
3919 void (*flush_fn)(void *))
3920{
3921 struct inode *inode = mapping->host;
3922 int ret = 0;
3923 int done = 0;
3924 int err = 0;
3925 int nr_to_write_done = 0;
3926 struct pagevec pvec;
3927 int nr_pages;
3928 pgoff_t index;
3929 pgoff_t end;
3930 int scanned = 0;
3931 int tag;
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942 if (!igrab(inode))
3943 return 0;
3944
3945 pagevec_init(&pvec, 0);
3946 if (wbc->range_cyclic) {
3947 index = mapping->writeback_index;
3948 end = -1;
3949 } else {
3950 index = wbc->range_start >> PAGE_CACHE_SHIFT;
3951 end = wbc->range_end >> PAGE_CACHE_SHIFT;
3952 scanned = 1;
3953 }
3954 if (wbc->sync_mode == WB_SYNC_ALL)
3955 tag = PAGECACHE_TAG_TOWRITE;
3956 else
3957 tag = PAGECACHE_TAG_DIRTY;
3958retry:
3959 if (wbc->sync_mode == WB_SYNC_ALL)
3960 tag_pages_for_writeback(mapping, index, end);
3961 while (!done && !nr_to_write_done && (index <= end) &&
3962 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3963 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3964 unsigned i;
3965
3966 scanned = 1;
3967 for (i = 0; i < nr_pages; i++) {
3968 struct page *page = pvec.pages[i];
3969
3970
3971
3972
3973
3974
3975
3976
3977 if (!trylock_page(page)) {
3978 flush_fn(data);
3979 lock_page(page);
3980 }
3981
3982 if (unlikely(page->mapping != mapping)) {
3983 unlock_page(page);
3984 continue;
3985 }
3986
3987 if (!wbc->range_cyclic && page->index > end) {
3988 done = 1;
3989 unlock_page(page);
3990 continue;
3991 }
3992
3993 if (wbc->sync_mode != WB_SYNC_NONE) {
3994 if (PageWriteback(page))
3995 flush_fn(data);
3996 wait_on_page_writeback(page);
3997 }
3998
3999 if (PageWriteback(page) ||
4000 !clear_page_dirty_for_io(page)) {
4001 unlock_page(page);
4002 continue;
4003 }
4004
4005 ret = (*writepage)(page, wbc, data);
4006
4007 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
4008 unlock_page(page);
4009 ret = 0;
4010 }
4011 if (!err && ret < 0)
4012 err = ret;
4013
4014
4015
4016
4017
4018
4019 nr_to_write_done = wbc->nr_to_write <= 0;
4020 }
4021 pagevec_release(&pvec);
4022 cond_resched();
4023 }
4024 if (!scanned && !done && !err) {
4025
4026
4027
4028
4029 scanned = 1;
4030 index = 0;
4031 goto retry;
4032 }
4033 btrfs_add_delayed_iput(inode);
4034 return err;
4035}
4036
4037static void flush_epd_write_bio(struct extent_page_data *epd)
4038{
4039 if (epd->bio) {
4040 int rw = WRITE;
4041 int ret;
4042
4043 if (epd->sync_io)
4044 rw = WRITE_SYNC;
4045
4046 ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
4047 BUG_ON(ret < 0);
4048 epd->bio = NULL;
4049 }
4050}
4051
4052static noinline void flush_write_bio(void *data)
4053{
4054 struct extent_page_data *epd = data;
4055 flush_epd_write_bio(epd);
4056}
4057
4058int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
4059 get_extent_t *get_extent,
4060 struct writeback_control *wbc)
4061{
4062 int ret;
4063 struct extent_page_data epd = {
4064 .bio = NULL,
4065 .tree = tree,
4066 .get_extent = get_extent,
4067 .extent_locked = 0,
4068 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4069 .bio_flags = 0,
4070 };
4071
4072 ret = __extent_writepage(page, wbc, &epd);
4073
4074 flush_epd_write_bio(&epd);
4075 return ret;
4076}
4077
4078int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
4079 u64 start, u64 end, get_extent_t *get_extent,
4080 int mode)
4081{
4082 int ret = 0;
4083 struct address_space *mapping = inode->i_mapping;
4084 struct page *page;
4085 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
4086 PAGE_CACHE_SHIFT;
4087
4088 struct extent_page_data epd = {
4089 .bio = NULL,
4090 .tree = tree,
4091 .get_extent = get_extent,
4092 .extent_locked = 1,
4093 .sync_io = mode == WB_SYNC_ALL,
4094 .bio_flags = 0,
4095 };
4096 struct writeback_control wbc_writepages = {
4097 .sync_mode = mode,
4098 .nr_to_write = nr_pages * 2,
4099 .range_start = start,
4100 .range_end = end + 1,
4101 };
4102
4103 while (start <= end) {
4104 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
4105 if (clear_page_dirty_for_io(page))
4106 ret = __extent_writepage(page, &wbc_writepages, &epd);
4107 else {
4108 if (tree->ops && tree->ops->writepage_end_io_hook)
4109 tree->ops->writepage_end_io_hook(page, start,
4110 start + PAGE_CACHE_SIZE - 1,
4111 NULL, 1);
4112 unlock_page(page);
4113 }
4114 page_cache_release(page);
4115 start += PAGE_CACHE_SIZE;
4116 }
4117
4118 flush_epd_write_bio(&epd);
4119 return ret;
4120}
4121
4122int extent_writepages(struct extent_io_tree *tree,
4123 struct address_space *mapping,
4124 get_extent_t *get_extent,
4125 struct writeback_control *wbc)
4126{
4127 int ret = 0;
4128 struct extent_page_data epd = {
4129 .bio = NULL,
4130 .tree = tree,
4131 .get_extent = get_extent,
4132 .extent_locked = 0,
4133 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4134 .bio_flags = 0,
4135 };
4136
4137 ret = extent_write_cache_pages(tree, mapping, wbc,
4138 __extent_writepage, &epd,
4139 flush_write_bio);
4140 flush_epd_write_bio(&epd);
4141 return ret;
4142}
4143
4144int extent_readpages(struct extent_io_tree *tree,
4145 struct address_space *mapping,
4146 struct list_head *pages, unsigned nr_pages,
4147 get_extent_t get_extent)
4148{
4149 struct bio *bio = NULL;
4150 unsigned page_idx;
4151 unsigned long bio_flags = 0;
4152 struct page *pagepool[16];
4153 struct page *page;
4154 struct extent_map *em_cached = NULL;
4155 int nr = 0;
4156
4157 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
4158 page = list_entry(pages->prev, struct page, lru);
4159
4160 prefetchw(&page->flags);
4161 list_del(&page->lru);
4162 if (add_to_page_cache_lru(page, mapping,
4163 page->index, GFP_NOFS)) {
4164 page_cache_release(page);
4165 continue;
4166 }
4167
4168 pagepool[nr++] = page;
4169 if (nr < ARRAY_SIZE(pagepool))
4170 continue;
4171 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4172 &bio, 0, &bio_flags, READ);
4173 nr = 0;
4174 }
4175 if (nr)
4176 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4177 &bio, 0, &bio_flags, READ);
4178
4179 if (em_cached)
4180 free_extent_map(em_cached);
4181
4182 BUG_ON(!list_empty(pages));
4183 if (bio)
4184 return submit_one_bio(READ, bio, 0, bio_flags);
4185 return 0;
4186}
4187
4188
4189
4190
4191
4192
4193int extent_invalidatepage(struct extent_io_tree *tree,
4194 struct page *page, unsigned long offset)
4195{
4196 struct extent_state *cached_state = NULL;
4197 u64 start = page_offset(page);
4198 u64 end = start + PAGE_CACHE_SIZE - 1;
4199 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4200
4201 start += ALIGN(offset, blocksize);
4202 if (start > end)
4203 return 0;
4204
4205 lock_extent_bits(tree, start, end, 0, &cached_state);
4206 wait_on_page_writeback(page);
4207 clear_extent_bit(tree, start, end,
4208 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4209 EXTENT_DO_ACCOUNTING,
4210 1, 1, &cached_state, GFP_NOFS);
4211 return 0;
4212}
4213
4214
4215
4216
4217
4218
4219static int try_release_extent_state(struct extent_map_tree *map,
4220 struct extent_io_tree *tree,
4221 struct page *page, gfp_t mask)
4222{
4223 u64 start = page_offset(page);
4224 u64 end = start + PAGE_CACHE_SIZE - 1;
4225 int ret = 1;
4226
4227 if (test_range_bit(tree, start, end,
4228 EXTENT_IOBITS, 0, NULL))
4229 ret = 0;
4230 else {
4231 if ((mask & GFP_NOFS) == GFP_NOFS)
4232 mask = GFP_NOFS;
4233
4234
4235
4236
4237 ret = clear_extent_bit(tree, start, end,
4238 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4239 0, 0, NULL, mask);
4240
4241
4242
4243
4244 if (ret < 0)
4245 ret = 0;
4246 else
4247 ret = 1;
4248 }
4249 return ret;
4250}
4251
4252
4253
4254
4255
4256
4257int try_release_extent_mapping(struct extent_map_tree *map,
4258 struct extent_io_tree *tree, struct page *page,
4259 gfp_t mask)
4260{
4261 struct extent_map *em;
4262 u64 start = page_offset(page);
4263 u64 end = start + PAGE_CACHE_SIZE - 1;
4264
4265 if ((mask & __GFP_WAIT) &&
4266 page->mapping->host->i_size > 16 * 1024 * 1024) {
4267 u64 len;
4268 while (start <= end) {
4269 len = end - start + 1;
4270 write_lock(&map->lock);
4271 em = lookup_extent_mapping(map, start, len);
4272 if (!em) {
4273 write_unlock(&map->lock);
4274 break;
4275 }
4276 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4277 em->start != start) {
4278 write_unlock(&map->lock);
4279 free_extent_map(em);
4280 break;
4281 }
4282 if (!test_range_bit(tree, em->start,
4283 extent_map_end(em) - 1,
4284 EXTENT_LOCKED | EXTENT_WRITEBACK,
4285 0, NULL)) {
4286 remove_extent_mapping(map, em);
4287
4288 free_extent_map(em);
4289 }
4290 start = extent_map_end(em);
4291 write_unlock(&map->lock);
4292
4293
4294 free_extent_map(em);
4295 }
4296 }
4297 return try_release_extent_state(map, tree, page, mask);
4298}
4299
4300
4301
4302
4303
4304static struct extent_map *get_extent_skip_holes(struct inode *inode,
4305 u64 offset,
4306 u64 last,
4307 get_extent_t *get_extent)
4308{
4309 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
4310 struct extent_map *em;
4311 u64 len;
4312
4313 if (offset >= last)
4314 return NULL;
4315
4316 while (1) {
4317 len = last - offset;
4318 if (len == 0)
4319 break;
4320 len = ALIGN(len, sectorsize);
4321 em = get_extent(inode, NULL, 0, offset, len, 0);
4322 if (IS_ERR_OR_NULL(em))
4323 return em;
4324
4325
4326 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
4327 em->block_start != EXTENT_MAP_HOLE) {
4328 return em;
4329 }
4330
4331
4332 offset = extent_map_end(em);
4333 free_extent_map(em);
4334 if (offset >= last)
4335 break;
4336 }
4337 return NULL;
4338}
4339
4340int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4341 __u64 start, __u64 len, get_extent_t *get_extent)
4342{
4343 int ret = 0;
4344 u64 off = start;
4345 u64 max = start + len;
4346 u32 flags = 0;
4347 u32 found_type;
4348 u64 last;
4349 u64 last_for_get_extent = 0;
4350 u64 disko = 0;
4351 u64 isize = i_size_read(inode);
4352 struct btrfs_key found_key;
4353 struct extent_map *em = NULL;
4354 struct extent_state *cached_state = NULL;
4355 struct btrfs_path *path;
4356 struct btrfs_root *root = BTRFS_I(inode)->root;
4357 int end = 0;
4358 u64 em_start = 0;
4359 u64 em_len = 0;
4360 u64 em_end = 0;
4361
4362 if (len == 0)
4363 return -EINVAL;
4364
4365 path = btrfs_alloc_path();
4366 if (!path)
4367 return -ENOMEM;
4368 path->leave_spinning = 1;
4369
4370 start = round_down(start, BTRFS_I(inode)->root->sectorsize);
4371 len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start;
4372
4373
4374
4375
4376
4377 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
4378 0);
4379 if (ret < 0) {
4380 btrfs_free_path(path);
4381 return ret;
4382 }
4383 WARN_ON(!ret);
4384 path->slots[0]--;
4385 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4386 found_type = found_key.type;
4387
4388
4389 if (found_key.objectid != btrfs_ino(inode) ||
4390 found_type != BTRFS_EXTENT_DATA_KEY) {
4391
4392 last = (u64)-1;
4393 last_for_get_extent = isize;
4394 } else {
4395
4396
4397
4398
4399
4400 last = found_key.offset;
4401 last_for_get_extent = last + 1;
4402 }
4403 btrfs_release_path(path);
4404
4405
4406
4407
4408
4409
4410 if (last < isize) {
4411 last = (u64)-1;
4412 last_for_get_extent = isize;
4413 }
4414
4415 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
4416 &cached_state);
4417
4418 em = get_extent_skip_holes(inode, start, last_for_get_extent,
4419 get_extent);
4420 if (!em)
4421 goto out;
4422 if (IS_ERR(em)) {
4423 ret = PTR_ERR(em);
4424 goto out;
4425 }
4426
4427 while (!end) {
4428 u64 offset_in_extent = 0;
4429
4430
4431 if (em->start >= max || extent_map_end(em) < off)
4432 break;
4433
4434
4435
4436
4437
4438
4439
4440 em_start = max(em->start, off);
4441
4442
4443
4444
4445
4446
4447
4448 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4449 offset_in_extent = em_start - em->start;
4450 em_end = extent_map_end(em);
4451 em_len = em_end - em_start;
4452 disko = 0;
4453 flags = 0;
4454
4455
4456
4457
4458 off = extent_map_end(em);
4459 if (off >= max)
4460 end = 1;
4461
4462 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4463 end = 1;
4464 flags |= FIEMAP_EXTENT_LAST;
4465 } else if (em->block_start == EXTENT_MAP_INLINE) {
4466 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4467 FIEMAP_EXTENT_NOT_ALIGNED);
4468 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4469 flags |= (FIEMAP_EXTENT_DELALLOC |
4470 FIEMAP_EXTENT_UNKNOWN);
4471 } else if (fieinfo->fi_extents_max) {
4472 u64 bytenr = em->block_start -
4473 (em->start - em->orig_start);
4474
4475 disko = em->block_start + offset_in_extent;
4476
4477
4478
4479
4480
4481
4482
4483
4484 ret = btrfs_check_shared(NULL, root->fs_info,
4485 root->objectid,
4486 btrfs_ino(inode), bytenr);
4487 if (ret < 0)
4488 goto out_free;
4489 if (ret)
4490 flags |= FIEMAP_EXTENT_SHARED;
4491 ret = 0;
4492 }
4493 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4494 flags |= FIEMAP_EXTENT_ENCODED;
4495
4496 free_extent_map(em);
4497 em = NULL;
4498 if ((em_start >= last) || em_len == (u64)-1 ||
4499 (last == (u64)-1 && isize <= em_end)) {
4500 flags |= FIEMAP_EXTENT_LAST;
4501 end = 1;
4502 }
4503
4504
4505 em = get_extent_skip_holes(inode, off, last_for_get_extent,
4506 get_extent);
4507 if (IS_ERR(em)) {
4508 ret = PTR_ERR(em);
4509 goto out;
4510 }
4511 if (!em) {
4512 flags |= FIEMAP_EXTENT_LAST;
4513 end = 1;
4514 }
4515 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
4516 em_len, flags);
4517 if (ret)
4518 goto out_free;
4519 }
4520out_free:
4521 free_extent_map(em);
4522out:
4523 btrfs_free_path(path);
4524 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4525 &cached_state, GFP_NOFS);
4526 return ret;
4527}
4528
4529static void __free_extent_buffer(struct extent_buffer *eb)
4530{
4531 btrfs_leak_debug_del(&eb->leak_list);
4532 kmem_cache_free(extent_buffer_cache, eb);
4533}
4534
4535int extent_buffer_under_io(struct extent_buffer *eb)
4536{
4537 return (atomic_read(&eb->io_pages) ||
4538 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4539 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4540}
4541
4542
4543
4544
4545static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4546{
4547 unsigned long index;
4548 struct page *page;
4549 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4550
4551 BUG_ON(extent_buffer_under_io(eb));
4552
4553 index = num_extent_pages(eb->start, eb->len);
4554 if (index == 0)
4555 return;
4556
4557 do {
4558 index--;
4559 page = eb->pages[index];
4560 if (page && mapped) {
4561 spin_lock(&page->mapping->private_lock);
4562
4563
4564
4565
4566
4567
4568
4569 if (PagePrivate(page) &&
4570 page->private == (unsigned long)eb) {
4571 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4572 BUG_ON(PageDirty(page));
4573 BUG_ON(PageWriteback(page));
4574
4575
4576
4577
4578 ClearPagePrivate(page);
4579 set_page_private(page, 0);
4580
4581 page_cache_release(page);
4582 }
4583 spin_unlock(&page->mapping->private_lock);
4584
4585 }
4586 if (page) {
4587
4588 page_cache_release(page);
4589 }
4590 } while (index != 0);
4591}
4592
4593
4594
4595
4596static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4597{
4598 btrfs_release_extent_buffer_page(eb);
4599 __free_extent_buffer(eb);
4600}
4601
4602static struct extent_buffer *
4603__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4604 unsigned long len)
4605{
4606 struct extent_buffer *eb = NULL;
4607
4608 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS);
4609 if (eb == NULL)
4610 return NULL;
4611 eb->start = start;
4612 eb->len = len;
4613 eb->fs_info = fs_info;
4614 eb->bflags = 0;
4615 rwlock_init(&eb->lock);
4616 atomic_set(&eb->write_locks, 0);
4617 atomic_set(&eb->read_locks, 0);
4618 atomic_set(&eb->blocking_readers, 0);
4619 atomic_set(&eb->blocking_writers, 0);
4620 atomic_set(&eb->spinning_readers, 0);
4621 atomic_set(&eb->spinning_writers, 0);
4622 eb->lock_nested = 0;
4623 init_waitqueue_head(&eb->write_lock_wq);
4624 init_waitqueue_head(&eb->read_lock_wq);
4625
4626 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4627
4628 spin_lock_init(&eb->refs_lock);
4629 atomic_set(&eb->refs, 1);
4630 atomic_set(&eb->io_pages, 0);
4631
4632
4633
4634
4635 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4636 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4637 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4638
4639 return eb;
4640}
4641
4642struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4643{
4644 unsigned long i;
4645 struct page *p;
4646 struct extent_buffer *new;
4647 unsigned long num_pages = num_extent_pages(src->start, src->len);
4648
4649 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4650 if (new == NULL)
4651 return NULL;
4652
4653 for (i = 0; i < num_pages; i++) {
4654 p = alloc_page(GFP_NOFS);
4655 if (!p) {
4656 btrfs_release_extent_buffer(new);
4657 return NULL;
4658 }
4659 attach_extent_buffer_page(new, p);
4660 WARN_ON(PageDirty(p));
4661 SetPageUptodate(p);
4662 new->pages[i] = p;
4663 }
4664
4665 copy_extent_buffer(new, src, 0, 0, src->len);
4666 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4667 set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
4668
4669 return new;
4670}
4671
4672struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4673 u64 start)
4674{
4675 struct extent_buffer *eb;
4676 unsigned long len;
4677 unsigned long num_pages;
4678 unsigned long i;
4679
4680 if (!fs_info) {
4681
4682
4683
4684
4685 len = 4096;
4686 } else {
4687 len = fs_info->tree_root->nodesize;
4688 }
4689 num_pages = num_extent_pages(0, len);
4690
4691 eb = __alloc_extent_buffer(fs_info, start, len);
4692 if (!eb)
4693 return NULL;
4694
4695 for (i = 0; i < num_pages; i++) {
4696 eb->pages[i] = alloc_page(GFP_NOFS);
4697 if (!eb->pages[i])
4698 goto err;
4699 }
4700 set_extent_buffer_uptodate(eb);
4701 btrfs_set_header_nritems(eb, 0);
4702 set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4703
4704 return eb;
4705err:
4706 for (; i > 0; i--)
4707 __free_page(eb->pages[i - 1]);
4708 __free_extent_buffer(eb);
4709 return NULL;
4710}
4711
4712static void check_buffer_tree_ref(struct extent_buffer *eb)
4713{
4714 int refs;
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735 refs = atomic_read(&eb->refs);
4736 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4737 return;
4738
4739 spin_lock(&eb->refs_lock);
4740 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4741 atomic_inc(&eb->refs);
4742 spin_unlock(&eb->refs_lock);
4743}
4744
4745static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4746 struct page *accessed)
4747{
4748 unsigned long num_pages, i;
4749
4750 check_buffer_tree_ref(eb);
4751
4752 num_pages = num_extent_pages(eb->start, eb->len);
4753 for (i = 0; i < num_pages; i++) {
4754 struct page *p = eb->pages[i];
4755
4756 if (p != accessed)
4757 mark_page_accessed(p);
4758 }
4759}
4760
4761struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4762 u64 start)
4763{
4764 struct extent_buffer *eb;
4765
4766 rcu_read_lock();
4767 eb = radix_tree_lookup(&fs_info->buffer_radix,
4768 start >> PAGE_CACHE_SHIFT);
4769 if (eb && atomic_inc_not_zero(&eb->refs)) {
4770 rcu_read_unlock();
4771 mark_extent_buffer_accessed(eb, NULL);
4772 return eb;
4773 }
4774 rcu_read_unlock();
4775
4776 return NULL;
4777}
4778
4779#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4780struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4781 u64 start)
4782{
4783 struct extent_buffer *eb, *exists = NULL;
4784 int ret;
4785
4786 eb = find_extent_buffer(fs_info, start);
4787 if (eb)
4788 return eb;
4789 eb = alloc_dummy_extent_buffer(fs_info, start);
4790 if (!eb)
4791 return NULL;
4792 eb->fs_info = fs_info;
4793again:
4794 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
4795 if (ret)
4796 goto free_eb;
4797 spin_lock(&fs_info->buffer_lock);
4798 ret = radix_tree_insert(&fs_info->buffer_radix,
4799 start >> PAGE_CACHE_SHIFT, eb);
4800 spin_unlock(&fs_info->buffer_lock);
4801 radix_tree_preload_end();
4802 if (ret == -EEXIST) {
4803 exists = find_extent_buffer(fs_info, start);
4804 if (exists)
4805 goto free_eb;
4806 else
4807 goto again;
4808 }
4809 check_buffer_tree_ref(eb);
4810 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4811
4812
4813
4814
4815
4816
4817
4818 atomic_inc(&eb->refs);
4819 return eb;
4820free_eb:
4821 btrfs_release_extent_buffer(eb);
4822 return exists;
4823}
4824#endif
4825
4826struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4827 u64 start)
4828{
4829 unsigned long len = fs_info->tree_root->nodesize;
4830 unsigned long num_pages = num_extent_pages(start, len);
4831 unsigned long i;
4832 unsigned long index = start >> PAGE_CACHE_SHIFT;
4833 struct extent_buffer *eb;
4834 struct extent_buffer *exists = NULL;
4835 struct page *p;
4836 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4837 int uptodate = 1;
4838 int ret;
4839
4840 eb = find_extent_buffer(fs_info, start);
4841 if (eb)
4842 return eb;
4843
4844 eb = __alloc_extent_buffer(fs_info, start, len);
4845 if (!eb)
4846 return NULL;
4847
4848 for (i = 0; i < num_pages; i++, index++) {
4849 p = find_or_create_page(mapping, index, GFP_NOFS);
4850 if (!p)
4851 goto free_eb;
4852
4853 spin_lock(&mapping->private_lock);
4854 if (PagePrivate(p)) {
4855
4856
4857
4858
4859
4860
4861
4862 exists = (struct extent_buffer *)p->private;
4863 if (atomic_inc_not_zero(&exists->refs)) {
4864 spin_unlock(&mapping->private_lock);
4865 unlock_page(p);
4866 page_cache_release(p);
4867 mark_extent_buffer_accessed(exists, p);
4868 goto free_eb;
4869 }
4870
4871
4872
4873
4874
4875 ClearPagePrivate(p);
4876 WARN_ON(PageDirty(p));
4877 page_cache_release(p);
4878 }
4879 attach_extent_buffer_page(eb, p);
4880 spin_unlock(&mapping->private_lock);
4881 WARN_ON(PageDirty(p));
4882 eb->pages[i] = p;
4883 if (!PageUptodate(p))
4884 uptodate = 0;
4885
4886
4887
4888
4889
4890 }
4891 if (uptodate)
4892 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
4893again:
4894 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
4895 if (ret)
4896 goto free_eb;
4897
4898 spin_lock(&fs_info->buffer_lock);
4899 ret = radix_tree_insert(&fs_info->buffer_radix,
4900 start >> PAGE_CACHE_SHIFT, eb);
4901 spin_unlock(&fs_info->buffer_lock);
4902 radix_tree_preload_end();
4903 if (ret == -EEXIST) {
4904 exists = find_extent_buffer(fs_info, start);
4905 if (exists)
4906 goto free_eb;
4907 else
4908 goto again;
4909 }
4910
4911 check_buffer_tree_ref(eb);
4912 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923 SetPageChecked(eb->pages[0]);
4924 for (i = 1; i < num_pages; i++) {
4925 p = eb->pages[i];
4926 ClearPageChecked(p);
4927 unlock_page(p);
4928 }
4929 unlock_page(eb->pages[0]);
4930 return eb;
4931
4932free_eb:
4933 for (i = 0; i < num_pages; i++) {
4934 if (eb->pages[i])
4935 unlock_page(eb->pages[i]);
4936 }
4937
4938 WARN_ON(!atomic_dec_and_test(&eb->refs));
4939 btrfs_release_extent_buffer(eb);
4940 return exists;
4941}
4942
4943static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4944{
4945 struct extent_buffer *eb =
4946 container_of(head, struct extent_buffer, rcu_head);
4947
4948 __free_extent_buffer(eb);
4949}
4950
4951
4952static int release_extent_buffer(struct extent_buffer *eb)
4953{
4954 WARN_ON(atomic_read(&eb->refs) == 0);
4955 if (atomic_dec_and_test(&eb->refs)) {
4956 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
4957 struct btrfs_fs_info *fs_info = eb->fs_info;
4958
4959 spin_unlock(&eb->refs_lock);
4960
4961 spin_lock(&fs_info->buffer_lock);
4962 radix_tree_delete(&fs_info->buffer_radix,
4963 eb->start >> PAGE_CACHE_SHIFT);
4964 spin_unlock(&fs_info->buffer_lock);
4965 } else {
4966 spin_unlock(&eb->refs_lock);
4967 }
4968
4969
4970 btrfs_release_extent_buffer_page(eb);
4971#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4972 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
4973 __free_extent_buffer(eb);
4974 return 1;
4975 }
4976#endif
4977 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4978 return 1;
4979 }
4980 spin_unlock(&eb->refs_lock);
4981
4982 return 0;
4983}
4984
4985void free_extent_buffer(struct extent_buffer *eb)
4986{
4987 int refs;
4988 int old;
4989 if (!eb)
4990 return;
4991
4992 while (1) {
4993 refs = atomic_read(&eb->refs);
4994 if (refs <= 3)
4995 break;
4996 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
4997 if (old == refs)
4998 return;
4999 }
5000
5001 spin_lock(&eb->refs_lock);
5002 if (atomic_read(&eb->refs) == 2 &&
5003 test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
5004 atomic_dec(&eb->refs);
5005
5006 if (atomic_read(&eb->refs) == 2 &&
5007 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5008 !extent_buffer_under_io(eb) &&
5009 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5010 atomic_dec(&eb->refs);
5011
5012
5013
5014
5015
5016 release_extent_buffer(eb);
5017}
5018
5019void free_extent_buffer_stale(struct extent_buffer *eb)
5020{
5021 if (!eb)
5022 return;
5023
5024 spin_lock(&eb->refs_lock);
5025 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5026
5027 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5028 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5029 atomic_dec(&eb->refs);
5030 release_extent_buffer(eb);
5031}
5032
5033void clear_extent_buffer_dirty(struct extent_buffer *eb)
5034{
5035 unsigned long i;
5036 unsigned long num_pages;
5037 struct page *page;
5038
5039 num_pages = num_extent_pages(eb->start, eb->len);
5040
5041 for (i = 0; i < num_pages; i++) {
5042 page = eb->pages[i];
5043 if (!PageDirty(page))
5044 continue;
5045
5046 lock_page(page);
5047 WARN_ON(!PagePrivate(page));
5048
5049 clear_page_dirty_for_io(page);
5050 spin_lock_irq(&page->mapping->tree_lock);
5051 if (!PageDirty(page)) {
5052 radix_tree_tag_clear(&page->mapping->page_tree,
5053 page_index(page),
5054 PAGECACHE_TAG_DIRTY);
5055 }
5056 spin_unlock_irq(&page->mapping->tree_lock);
5057 ClearPageError(page);
5058 unlock_page(page);
5059 }
5060 WARN_ON(atomic_read(&eb->refs) == 0);
5061}
5062
5063int set_extent_buffer_dirty(struct extent_buffer *eb)
5064{
5065 unsigned long i;
5066 unsigned long num_pages;
5067 int was_dirty = 0;
5068
5069 check_buffer_tree_ref(eb);
5070
5071 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5072
5073 num_pages = num_extent_pages(eb->start, eb->len);
5074 WARN_ON(atomic_read(&eb->refs) == 0);
5075 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5076
5077 for (i = 0; i < num_pages; i++)
5078 set_page_dirty(eb->pages[i]);
5079 return was_dirty;
5080}
5081
5082int clear_extent_buffer_uptodate(struct extent_buffer *eb)
5083{
5084 unsigned long i;
5085 struct page *page;
5086 unsigned long num_pages;
5087
5088 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5089 num_pages = num_extent_pages(eb->start, eb->len);
5090 for (i = 0; i < num_pages; i++) {
5091 page = eb->pages[i];
5092 if (page)
5093 ClearPageUptodate(page);
5094 }
5095 return 0;
5096}
5097
5098int set_extent_buffer_uptodate(struct extent_buffer *eb)
5099{
5100 unsigned long i;
5101 struct page *page;
5102 unsigned long num_pages;
5103
5104 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5105 num_pages = num_extent_pages(eb->start, eb->len);
5106 for (i = 0; i < num_pages; i++) {
5107 page = eb->pages[i];
5108 SetPageUptodate(page);
5109 }
5110 return 0;
5111}
5112
5113int extent_buffer_uptodate(struct extent_buffer *eb)
5114{
5115 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5116}
5117
5118int read_extent_buffer_pages(struct extent_io_tree *tree,
5119 struct extent_buffer *eb, u64 start, int wait,
5120 get_extent_t *get_extent, int mirror_num)
5121{
5122 unsigned long i;
5123 unsigned long start_i;
5124 struct page *page;
5125 int err;
5126 int ret = 0;
5127 int locked_pages = 0;
5128 int all_uptodate = 1;
5129 unsigned long num_pages;
5130 unsigned long num_reads = 0;
5131 struct bio *bio = NULL;
5132 unsigned long bio_flags = 0;
5133
5134 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5135 return 0;
5136
5137 if (start) {
5138 WARN_ON(start < eb->start);
5139 start_i = (start >> PAGE_CACHE_SHIFT) -
5140 (eb->start >> PAGE_CACHE_SHIFT);
5141 } else {
5142 start_i = 0;
5143 }
5144
5145 num_pages = num_extent_pages(eb->start, eb->len);
5146 for (i = start_i; i < num_pages; i++) {
5147 page = eb->pages[i];
5148 if (wait == WAIT_NONE) {
5149 if (!trylock_page(page))
5150 goto unlock_exit;
5151 } else {
5152 lock_page(page);
5153 }
5154 locked_pages++;
5155 if (!PageUptodate(page)) {
5156 num_reads++;
5157 all_uptodate = 0;
5158 }
5159 }
5160 if (all_uptodate) {
5161 if (start_i == 0)
5162 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5163 goto unlock_exit;
5164 }
5165
5166 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5167 eb->read_mirror = 0;
5168 atomic_set(&eb->io_pages, num_reads);
5169 for (i = start_i; i < num_pages; i++) {
5170 page = eb->pages[i];
5171 if (!PageUptodate(page)) {
5172 ClearPageError(page);
5173 err = __extent_read_full_page(tree, page,
5174 get_extent, &bio,
5175 mirror_num, &bio_flags,
5176 READ | REQ_META);
5177 if (err)
5178 ret = err;
5179 } else {
5180 unlock_page(page);
5181 }
5182 }
5183
5184 if (bio) {
5185 err = submit_one_bio(READ | REQ_META, bio, mirror_num,
5186 bio_flags);
5187 if (err)
5188 return err;
5189 }
5190
5191 if (ret || wait != WAIT_COMPLETE)
5192 return ret;
5193
5194 for (i = start_i; i < num_pages; i++) {
5195 page = eb->pages[i];
5196 wait_on_page_locked(page);
5197 if (!PageUptodate(page))
5198 ret = -EIO;
5199 }
5200
5201 return ret;
5202
5203unlock_exit:
5204 i = start_i;
5205 while (locked_pages > 0) {
5206 page = eb->pages[i];
5207 i++;
5208 unlock_page(page);
5209 locked_pages--;
5210 }
5211 return ret;
5212}
5213
5214void read_extent_buffer(struct extent_buffer *eb, void *dstv,
5215 unsigned long start,
5216 unsigned long len)
5217{
5218 size_t cur;
5219 size_t offset;
5220 struct page *page;
5221 char *kaddr;
5222 char *dst = (char *)dstv;
5223 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5224 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5225
5226 WARN_ON(start > eb->len);
5227 WARN_ON(start + len > eb->start + eb->len);
5228
5229 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5230
5231 while (len > 0) {
5232 page = eb->pages[i];
5233
5234 cur = min(len, (PAGE_CACHE_SIZE - offset));
5235 kaddr = page_address(page);
5236 memcpy(dst, kaddr + offset, cur);
5237
5238 dst += cur;
5239 len -= cur;
5240 offset = 0;
5241 i++;
5242 }
5243}
5244
5245int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
5246 unsigned long start,
5247 unsigned long len)
5248{
5249 size_t cur;
5250 size_t offset;
5251 struct page *page;
5252 char *kaddr;
5253 char __user *dst = (char __user *)dstv;
5254 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5255 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5256 int ret = 0;
5257
5258 WARN_ON(start > eb->len);
5259 WARN_ON(start + len > eb->start + eb->len);
5260
5261 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5262
5263 while (len > 0) {
5264 page = eb->pages[i];
5265
5266 cur = min(len, (PAGE_CACHE_SIZE - offset));
5267 kaddr = page_address(page);
5268 if (copy_to_user(dst, kaddr + offset, cur)) {
5269 ret = -EFAULT;
5270 break;
5271 }
5272
5273 dst += cur;
5274 len -= cur;
5275 offset = 0;
5276 i++;
5277 }
5278
5279 return ret;
5280}
5281
5282int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
5283 unsigned long min_len, char **map,
5284 unsigned long *map_start,
5285 unsigned long *map_len)
5286{
5287 size_t offset = start & (PAGE_CACHE_SIZE - 1);
5288 char *kaddr;
5289 struct page *p;
5290 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5291 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5292 unsigned long end_i = (start_offset + start + min_len - 1) >>
5293 PAGE_CACHE_SHIFT;
5294
5295 if (i != end_i)
5296 return -EINVAL;
5297
5298 if (i == 0) {
5299 offset = start_offset;
5300 *map_start = 0;
5301 } else {
5302 offset = 0;
5303 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
5304 }
5305
5306 if (start + min_len > eb->len) {
5307 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
5308 "wanted %lu %lu\n",
5309 eb->start, eb->len, start, min_len);
5310 return -EINVAL;
5311 }
5312
5313 p = eb->pages[i];
5314 kaddr = page_address(p);
5315 *map = kaddr + offset;
5316 *map_len = PAGE_CACHE_SIZE - offset;
5317 return 0;
5318}
5319
5320int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
5321 unsigned long start,
5322 unsigned long len)
5323{
5324 size_t cur;
5325 size_t offset;
5326 struct page *page;
5327 char *kaddr;
5328 char *ptr = (char *)ptrv;
5329 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5330 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5331 int ret = 0;
5332
5333 WARN_ON(start > eb->len);
5334 WARN_ON(start + len > eb->start + eb->len);
5335
5336 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5337
5338 while (len > 0) {
5339 page = eb->pages[i];
5340
5341 cur = min(len, (PAGE_CACHE_SIZE - offset));
5342
5343 kaddr = page_address(page);
5344 ret = memcmp(ptr, kaddr + offset, cur);
5345 if (ret)
5346 break;
5347
5348 ptr += cur;
5349 len -= cur;
5350 offset = 0;
5351 i++;
5352 }
5353 return ret;
5354}
5355
5356void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5357 unsigned long start, unsigned long len)
5358{
5359 size_t cur;
5360 size_t offset;
5361 struct page *page;
5362 char *kaddr;
5363 char *src = (char *)srcv;
5364 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5365 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5366
5367 WARN_ON(start > eb->len);
5368 WARN_ON(start + len > eb->start + eb->len);
5369
5370 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5371
5372 while (len > 0) {
5373 page = eb->pages[i];
5374 WARN_ON(!PageUptodate(page));
5375
5376 cur = min(len, PAGE_CACHE_SIZE - offset);
5377 kaddr = page_address(page);
5378 memcpy(kaddr + offset, src, cur);
5379
5380 src += cur;
5381 len -= cur;
5382 offset = 0;
5383 i++;
5384 }
5385}
5386
5387void memset_extent_buffer(struct extent_buffer *eb, char c,
5388 unsigned long start, unsigned long len)
5389{
5390 size_t cur;
5391 size_t offset;
5392 struct page *page;
5393 char *kaddr;
5394 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5395 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5396
5397 WARN_ON(start > eb->len);
5398 WARN_ON(start + len > eb->start + eb->len);
5399
5400 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5401
5402 while (len > 0) {
5403 page = eb->pages[i];
5404 WARN_ON(!PageUptodate(page));
5405
5406 cur = min(len, PAGE_CACHE_SIZE - offset);
5407 kaddr = page_address(page);
5408 memset(kaddr + offset, c, cur);
5409
5410 len -= cur;
5411 offset = 0;
5412 i++;
5413 }
5414}
5415
5416void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5417 unsigned long dst_offset, unsigned long src_offset,
5418 unsigned long len)
5419{
5420 u64 dst_len = dst->len;
5421 size_t cur;
5422 size_t offset;
5423 struct page *page;
5424 char *kaddr;
5425 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
5426 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5427
5428 WARN_ON(src->len != dst_len);
5429
5430 offset = (start_offset + dst_offset) &
5431 (PAGE_CACHE_SIZE - 1);
5432
5433 while (len > 0) {
5434 page = dst->pages[i];
5435 WARN_ON(!PageUptodate(page));
5436
5437 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
5438
5439 kaddr = page_address(page);
5440 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5441
5442 src_offset += cur;
5443 len -= cur;
5444 offset = 0;
5445 i++;
5446 }
5447}
5448
5449static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5450{
5451 unsigned long distance = (src > dst) ? src - dst : dst - src;
5452 return distance < len;
5453}
5454
5455static void copy_pages(struct page *dst_page, struct page *src_page,
5456 unsigned long dst_off, unsigned long src_off,
5457 unsigned long len)
5458{
5459 char *dst_kaddr = page_address(dst_page);
5460 char *src_kaddr;
5461 int must_memmove = 0;
5462
5463 if (dst_page != src_page) {
5464 src_kaddr = page_address(src_page);
5465 } else {
5466 src_kaddr = dst_kaddr;
5467 if (areas_overlap(src_off, dst_off, len))
5468 must_memmove = 1;
5469 }
5470
5471 if (must_memmove)
5472 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5473 else
5474 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5475}
5476
5477void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5478 unsigned long src_offset, unsigned long len)
5479{
5480 size_t cur;
5481 size_t dst_off_in_page;
5482 size_t src_off_in_page;
5483 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
5484 unsigned long dst_i;
5485 unsigned long src_i;
5486
5487 if (src_offset + len > dst->len) {
5488 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
5489 "len %lu dst len %lu\n", src_offset, len, dst->len);
5490 BUG_ON(1);
5491 }
5492 if (dst_offset + len > dst->len) {
5493 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
5494 "len %lu dst len %lu\n", dst_offset, len, dst->len);
5495 BUG_ON(1);
5496 }
5497
5498 while (len > 0) {
5499 dst_off_in_page = (start_offset + dst_offset) &
5500 (PAGE_CACHE_SIZE - 1);
5501 src_off_in_page = (start_offset + src_offset) &
5502 (PAGE_CACHE_SIZE - 1);
5503
5504 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5505 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
5506
5507 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
5508 src_off_in_page));
5509 cur = min_t(unsigned long, cur,
5510 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
5511
5512 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5513 dst_off_in_page, src_off_in_page, cur);
5514
5515 src_offset += cur;
5516 dst_offset += cur;
5517 len -= cur;
5518 }
5519}
5520
5521void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5522 unsigned long src_offset, unsigned long len)
5523{
5524 size_t cur;
5525 size_t dst_off_in_page;
5526 size_t src_off_in_page;
5527 unsigned long dst_end = dst_offset + len - 1;
5528 unsigned long src_end = src_offset + len - 1;
5529 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
5530 unsigned long dst_i;
5531 unsigned long src_i;
5532
5533 if (src_offset + len > dst->len) {
5534 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
5535 "len %lu len %lu\n", src_offset, len, dst->len);
5536 BUG_ON(1);
5537 }
5538 if (dst_offset + len > dst->len) {
5539 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
5540 "len %lu len %lu\n", dst_offset, len, dst->len);
5541 BUG_ON(1);
5542 }
5543 if (dst_offset < src_offset) {
5544 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5545 return;
5546 }
5547 while (len > 0) {
5548 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
5549 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
5550
5551 dst_off_in_page = (start_offset + dst_end) &
5552 (PAGE_CACHE_SIZE - 1);
5553 src_off_in_page = (start_offset + src_end) &
5554 (PAGE_CACHE_SIZE - 1);
5555
5556 cur = min_t(unsigned long, len, src_off_in_page + 1);
5557 cur = min(cur, dst_off_in_page + 1);
5558 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5559 dst_off_in_page - cur + 1,
5560 src_off_in_page - cur + 1, cur);
5561
5562 dst_end -= cur;
5563 src_end -= cur;
5564 len -= cur;
5565 }
5566}
5567
5568int try_release_extent_buffer(struct page *page)
5569{
5570 struct extent_buffer *eb;
5571
5572
5573
5574
5575
5576 spin_lock(&page->mapping->private_lock);
5577 if (!PagePrivate(page)) {
5578 spin_unlock(&page->mapping->private_lock);
5579 return 1;
5580 }
5581
5582 eb = (struct extent_buffer *)page->private;
5583 BUG_ON(!eb);
5584
5585
5586
5587
5588
5589
5590 spin_lock(&eb->refs_lock);
5591 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
5592 spin_unlock(&eb->refs_lock);
5593 spin_unlock(&page->mapping->private_lock);
5594 return 0;
5595 }
5596 spin_unlock(&page->mapping->private_lock);
5597
5598
5599
5600
5601
5602 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
5603 spin_unlock(&eb->refs_lock);
5604 return 0;
5605 }
5606
5607 return release_extent_buffer(eb);
5608}
5609