1#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/pagemap.h>
6#include <linux/page-flags.h>
7#include <linux/spinlock.h>
8#include <linux/blkdev.h>
9#include <linux/swap.h>
10#include <linux/writeback.h>
11#include <linux/pagevec.h>
12#include <linux/prefetch.h>
13#include <linux/cleancache.h>
14#include "extent_io.h"
15#include "extent_map.h"
16#include "ctree.h"
17#include "btrfs_inode.h"
18#include "volumes.h"
19#include "check-integrity.h"
20#include "locking.h"
21#include "rcu-string.h"
22#include "backref.h"
23
24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache;
26static struct bio_set *btrfs_bioset;
27
28static inline bool extent_state_in_tree(const struct extent_state *state)
29{
30 return !RB_EMPTY_NODE(&state->rb_node);
31}
32
33#ifdef CONFIG_BTRFS_DEBUG
34static LIST_HEAD(buffers);
35static LIST_HEAD(states);
36
37static DEFINE_SPINLOCK(leak_lock);
38
39static inline
40void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
41{
42 unsigned long flags;
43
44 spin_lock_irqsave(&leak_lock, flags);
45 list_add(new, head);
46 spin_unlock_irqrestore(&leak_lock, flags);
47}
48
49static inline
50void btrfs_leak_debug_del(struct list_head *entry)
51{
52 unsigned long flags;
53
54 spin_lock_irqsave(&leak_lock, flags);
55 list_del(entry);
56 spin_unlock_irqrestore(&leak_lock, flags);
57}
58
59static inline
60void btrfs_leak_debug_check(void)
61{
62 struct extent_state *state;
63 struct extent_buffer *eb;
64
65 while (!list_empty(&states)) {
66 state = list_entry(states.next, struct extent_state, leak_list);
67 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
68 state->start, state->end, state->state,
69 extent_state_in_tree(state),
70 atomic_read(&state->refs));
71 list_del(&state->leak_list);
72 kmem_cache_free(extent_state_cache, state);
73 }
74
75 while (!list_empty(&buffers)) {
76 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
77 printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
78 "refs %d\n",
79 eb->start, eb->len, atomic_read(&eb->refs));
80 list_del(&eb->leak_list);
81 kmem_cache_free(extent_buffer_cache, eb);
82 }
83}
84
85#define btrfs_debug_check_extent_io_range(tree, start, end) \
86 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
87static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 struct extent_io_tree *tree, u64 start, u64 end)
89{
90 struct inode *inode;
91 u64 isize;
92
93 if (!tree->mapping)
94 return;
95
96 inode = tree->mapping->host;
97 isize = i_size_read(inode);
98 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
99 printk_ratelimited(KERN_DEBUG
100 "BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
101 caller, btrfs_ino(inode), isize, start, end);
102 }
103}
104#else
105#define btrfs_leak_debug_add(new, head) do {} while (0)
106#define btrfs_leak_debug_del(entry) do {} while (0)
107#define btrfs_leak_debug_check() do {} while (0)
108#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
109#endif
110
111#define BUFFER_LRU_MAX 64
112
113struct tree_entry {
114 u64 start;
115 u64 end;
116 struct rb_node rb_node;
117};
118
119struct extent_page_data {
120 struct bio *bio;
121 struct extent_io_tree *tree;
122 get_extent_t *get_extent;
123 unsigned long bio_flags;
124
125
126
127
128 unsigned int extent_locked:1;
129
130
131 unsigned int sync_io:1;
132};
133
134static noinline void flush_write_bio(void *data);
135static inline struct btrfs_fs_info *
136tree_fs_info(struct extent_io_tree *tree)
137{
138 if (!tree->mapping)
139 return NULL;
140 return btrfs_sb(tree->mapping->host->i_sb);
141}
142
143int __init extent_io_init(void)
144{
145 extent_state_cache = kmem_cache_create("btrfs_extent_state",
146 sizeof(struct extent_state), 0,
147 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
148 if (!extent_state_cache)
149 return -ENOMEM;
150
151 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
152 sizeof(struct extent_buffer), 0,
153 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
154 if (!extent_buffer_cache)
155 goto free_state_cache;
156
157 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
158 offsetof(struct btrfs_io_bio, bio));
159 if (!btrfs_bioset)
160 goto free_buffer_cache;
161
162 if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
163 goto free_bioset;
164
165 return 0;
166
167free_bioset:
168 bioset_free(btrfs_bioset);
169 btrfs_bioset = NULL;
170
171free_buffer_cache:
172 kmem_cache_destroy(extent_buffer_cache);
173 extent_buffer_cache = NULL;
174
175free_state_cache:
176 kmem_cache_destroy(extent_state_cache);
177 extent_state_cache = NULL;
178 return -ENOMEM;
179}
180
181void extent_io_exit(void)
182{
183 btrfs_leak_debug_check();
184
185
186
187
188
189 rcu_barrier();
190 if (extent_state_cache)
191 kmem_cache_destroy(extent_state_cache);
192 if (extent_buffer_cache)
193 kmem_cache_destroy(extent_buffer_cache);
194 if (btrfs_bioset)
195 bioset_free(btrfs_bioset);
196}
197
198void extent_io_tree_init(struct extent_io_tree *tree,
199 struct address_space *mapping)
200{
201 tree->state = RB_ROOT;
202 tree->ops = NULL;
203 tree->dirty_bytes = 0;
204 spin_lock_init(&tree->lock);
205 tree->mapping = mapping;
206}
207
208static struct extent_state *alloc_extent_state(gfp_t mask)
209{
210 struct extent_state *state;
211
212 state = kmem_cache_alloc(extent_state_cache, mask);
213 if (!state)
214 return state;
215 state->state = 0;
216 state->private = 0;
217 RB_CLEAR_NODE(&state->rb_node);
218 btrfs_leak_debug_add(&state->leak_list, &states);
219 atomic_set(&state->refs, 1);
220 init_waitqueue_head(&state->wq);
221 trace_alloc_extent_state(state, mask, _RET_IP_);
222 return state;
223}
224
225void free_extent_state(struct extent_state *state)
226{
227 if (!state)
228 return;
229 if (atomic_dec_and_test(&state->refs)) {
230 WARN_ON(extent_state_in_tree(state));
231 btrfs_leak_debug_del(&state->leak_list);
232 trace_free_extent_state(state, _RET_IP_);
233 kmem_cache_free(extent_state_cache, state);
234 }
235}
236
237static struct rb_node *tree_insert(struct rb_root *root,
238 struct rb_node *search_start,
239 u64 offset,
240 struct rb_node *node,
241 struct rb_node ***p_in,
242 struct rb_node **parent_in)
243{
244 struct rb_node **p;
245 struct rb_node *parent = NULL;
246 struct tree_entry *entry;
247
248 if (p_in && parent_in) {
249 p = *p_in;
250 parent = *parent_in;
251 goto do_insert;
252 }
253
254 p = search_start ? &search_start : &root->rb_node;
255 while (*p) {
256 parent = *p;
257 entry = rb_entry(parent, struct tree_entry, rb_node);
258
259 if (offset < entry->start)
260 p = &(*p)->rb_left;
261 else if (offset > entry->end)
262 p = &(*p)->rb_right;
263 else
264 return parent;
265 }
266
267do_insert:
268 rb_link_node(node, parent, p);
269 rb_insert_color(node, root);
270 return NULL;
271}
272
273static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
274 struct rb_node **prev_ret,
275 struct rb_node **next_ret,
276 struct rb_node ***p_ret,
277 struct rb_node **parent_ret)
278{
279 struct rb_root *root = &tree->state;
280 struct rb_node **n = &root->rb_node;
281 struct rb_node *prev = NULL;
282 struct rb_node *orig_prev = NULL;
283 struct tree_entry *entry;
284 struct tree_entry *prev_entry = NULL;
285
286 while (*n) {
287 prev = *n;
288 entry = rb_entry(prev, struct tree_entry, rb_node);
289 prev_entry = entry;
290
291 if (offset < entry->start)
292 n = &(*n)->rb_left;
293 else if (offset > entry->end)
294 n = &(*n)->rb_right;
295 else
296 return *n;
297 }
298
299 if (p_ret)
300 *p_ret = n;
301 if (parent_ret)
302 *parent_ret = prev;
303
304 if (prev_ret) {
305 orig_prev = prev;
306 while (prev && offset > prev_entry->end) {
307 prev = rb_next(prev);
308 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
309 }
310 *prev_ret = prev;
311 prev = orig_prev;
312 }
313
314 if (next_ret) {
315 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
316 while (prev && offset < prev_entry->start) {
317 prev = rb_prev(prev);
318 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
319 }
320 *next_ret = prev;
321 }
322 return NULL;
323}
324
325static inline struct rb_node *
326tree_search_for_insert(struct extent_io_tree *tree,
327 u64 offset,
328 struct rb_node ***p_ret,
329 struct rb_node **parent_ret)
330{
331 struct rb_node *prev = NULL;
332 struct rb_node *ret;
333
334 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
335 if (!ret)
336 return prev;
337 return ret;
338}
339
340static inline struct rb_node *tree_search(struct extent_io_tree *tree,
341 u64 offset)
342{
343 return tree_search_for_insert(tree, offset, NULL, NULL);
344}
345
346static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
347 struct extent_state *other)
348{
349 if (tree->ops && tree->ops->merge_extent_hook)
350 tree->ops->merge_extent_hook(tree->mapping->host, new,
351 other);
352}
353
354
355
356
357
358
359
360
361
362
363static void merge_state(struct extent_io_tree *tree,
364 struct extent_state *state)
365{
366 struct extent_state *other;
367 struct rb_node *other_node;
368
369 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
370 return;
371
372 other_node = rb_prev(&state->rb_node);
373 if (other_node) {
374 other = rb_entry(other_node, struct extent_state, rb_node);
375 if (other->end == state->start - 1 &&
376 other->state == state->state) {
377 merge_cb(tree, state, other);
378 state->start = other->start;
379 rb_erase(&other->rb_node, &tree->state);
380 RB_CLEAR_NODE(&other->rb_node);
381 free_extent_state(other);
382 }
383 }
384 other_node = rb_next(&state->rb_node);
385 if (other_node) {
386 other = rb_entry(other_node, struct extent_state, rb_node);
387 if (other->start == state->end + 1 &&
388 other->state == state->state) {
389 merge_cb(tree, state, other);
390 state->end = other->end;
391 rb_erase(&other->rb_node, &tree->state);
392 RB_CLEAR_NODE(&other->rb_node);
393 free_extent_state(other);
394 }
395 }
396}
397
398static void set_state_cb(struct extent_io_tree *tree,
399 struct extent_state *state, unsigned *bits)
400{
401 if (tree->ops && tree->ops->set_bit_hook)
402 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
403}
404
405static void clear_state_cb(struct extent_io_tree *tree,
406 struct extent_state *state, unsigned *bits)
407{
408 if (tree->ops && tree->ops->clear_bit_hook)
409 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
410}
411
412static void set_state_bits(struct extent_io_tree *tree,
413 struct extent_state *state, unsigned *bits);
414
415
416
417
418
419
420
421
422
423
424
425static int insert_state(struct extent_io_tree *tree,
426 struct extent_state *state, u64 start, u64 end,
427 struct rb_node ***p,
428 struct rb_node **parent,
429 unsigned *bits)
430{
431 struct rb_node *node;
432
433 if (end < start)
434 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
435 end, start);
436 state->start = start;
437 state->end = end;
438
439 set_state_bits(tree, state, bits);
440
441 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
442 if (node) {
443 struct extent_state *found;
444 found = rb_entry(node, struct extent_state, rb_node);
445 printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
446 "%llu %llu\n",
447 found->start, found->end, start, end);
448 return -EEXIST;
449 }
450 merge_state(tree, state);
451 return 0;
452}
453
454static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
455 u64 split)
456{
457 if (tree->ops && tree->ops->split_extent_hook)
458 tree->ops->split_extent_hook(tree->mapping->host, orig, split);
459}
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
476 struct extent_state *prealloc, u64 split)
477{
478 struct rb_node *node;
479
480 split_cb(tree, orig, split);
481
482 prealloc->start = orig->start;
483 prealloc->end = split - 1;
484 prealloc->state = orig->state;
485 orig->start = split;
486
487 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
488 &prealloc->rb_node, NULL, NULL);
489 if (node) {
490 free_extent_state(prealloc);
491 return -EEXIST;
492 }
493 return 0;
494}
495
496static struct extent_state *next_state(struct extent_state *state)
497{
498 struct rb_node *next = rb_next(&state->rb_node);
499 if (next)
500 return rb_entry(next, struct extent_state, rb_node);
501 else
502 return NULL;
503}
504
505
506
507
508
509
510
511
512static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
513 struct extent_state *state,
514 unsigned *bits, int wake)
515{
516 struct extent_state *next;
517 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
518
519 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
520 u64 range = state->end - state->start + 1;
521 WARN_ON(range > tree->dirty_bytes);
522 tree->dirty_bytes -= range;
523 }
524 clear_state_cb(tree, state, bits);
525 state->state &= ~bits_to_clear;
526 if (wake)
527 wake_up(&state->wq);
528 if (state->state == 0) {
529 next = next_state(state);
530 if (extent_state_in_tree(state)) {
531 rb_erase(&state->rb_node, &tree->state);
532 RB_CLEAR_NODE(&state->rb_node);
533 free_extent_state(state);
534 } else {
535 WARN_ON(1);
536 }
537 } else {
538 merge_state(tree, state);
539 next = next_state(state);
540 }
541 return next;
542}
543
544static struct extent_state *
545alloc_extent_state_atomic(struct extent_state *prealloc)
546{
547 if (!prealloc)
548 prealloc = alloc_extent_state(GFP_ATOMIC);
549
550 return prealloc;
551}
552
553static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
554{
555 btrfs_panic(tree_fs_info(tree), err, "Locking error: "
556 "Extent tree was modified by another "
557 "thread while locked.");
558}
559
560
561
562
563
564
565
566
567
568
569
570
571
572int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
573 unsigned bits, int wake, int delete,
574 struct extent_state **cached_state,
575 gfp_t mask)
576{
577 struct extent_state *state;
578 struct extent_state *cached;
579 struct extent_state *prealloc = NULL;
580 struct rb_node *node;
581 u64 last_end;
582 int err;
583 int clear = 0;
584
585 btrfs_debug_check_extent_io_range(tree, start, end);
586
587 if (bits & EXTENT_DELALLOC)
588 bits |= EXTENT_NORESERVE;
589
590 if (delete)
591 bits |= ~EXTENT_CTLBITS;
592 bits |= EXTENT_FIRST_DELALLOC;
593
594 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
595 clear = 1;
596again:
597 if (!prealloc && (mask & __GFP_WAIT)) {
598
599
600
601
602
603
604
605 prealloc = alloc_extent_state(mask);
606 }
607
608 spin_lock(&tree->lock);
609 if (cached_state) {
610 cached = *cached_state;
611
612 if (clear) {
613 *cached_state = NULL;
614 cached_state = NULL;
615 }
616
617 if (cached && extent_state_in_tree(cached) &&
618 cached->start <= start && cached->end > start) {
619 if (clear)
620 atomic_dec(&cached->refs);
621 state = cached;
622 goto hit_next;
623 }
624 if (clear)
625 free_extent_state(cached);
626 }
627
628
629
630
631 node = tree_search(tree, start);
632 if (!node)
633 goto out;
634 state = rb_entry(node, struct extent_state, rb_node);
635hit_next:
636 if (state->start > end)
637 goto out;
638 WARN_ON(state->end < start);
639 last_end = state->end;
640
641
642 if (!(state->state & bits)) {
643 state = next_state(state);
644 goto next;
645 }
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663 if (state->start < start) {
664 prealloc = alloc_extent_state_atomic(prealloc);
665 BUG_ON(!prealloc);
666 err = split_state(tree, state, prealloc, start);
667 if (err)
668 extent_io_tree_panic(tree, err);
669
670 prealloc = NULL;
671 if (err)
672 goto out;
673 if (state->end <= end) {
674 state = clear_state_bit(tree, state, &bits, wake);
675 goto next;
676 }
677 goto search_again;
678 }
679
680
681
682
683
684
685 if (state->start <= end && state->end > end) {
686 prealloc = alloc_extent_state_atomic(prealloc);
687 BUG_ON(!prealloc);
688 err = split_state(tree, state, prealloc, end + 1);
689 if (err)
690 extent_io_tree_panic(tree, err);
691
692 if (wake)
693 wake_up(&state->wq);
694
695 clear_state_bit(tree, prealloc, &bits, wake);
696
697 prealloc = NULL;
698 goto out;
699 }
700
701 state = clear_state_bit(tree, state, &bits, wake);
702next:
703 if (last_end == (u64)-1)
704 goto out;
705 start = last_end + 1;
706 if (start <= end && state && !need_resched())
707 goto hit_next;
708 goto search_again;
709
710out:
711 spin_unlock(&tree->lock);
712 if (prealloc)
713 free_extent_state(prealloc);
714
715 return 0;
716
717search_again:
718 if (start > end)
719 goto out;
720 spin_unlock(&tree->lock);
721 if (mask & __GFP_WAIT)
722 cond_resched();
723 goto again;
724}
725
726static void wait_on_state(struct extent_io_tree *tree,
727 struct extent_state *state)
728 __releases(tree->lock)
729 __acquires(tree->lock)
730{
731 DEFINE_WAIT(wait);
732 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
733 spin_unlock(&tree->lock);
734 schedule();
735 spin_lock(&tree->lock);
736 finish_wait(&state->wq, &wait);
737}
738
739
740
741
742
743
744static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
745 unsigned long bits)
746{
747 struct extent_state *state;
748 struct rb_node *node;
749
750 btrfs_debug_check_extent_io_range(tree, start, end);
751
752 spin_lock(&tree->lock);
753again:
754 while (1) {
755
756
757
758
759 node = tree_search(tree, start);
760process_node:
761 if (!node)
762 break;
763
764 state = rb_entry(node, struct extent_state, rb_node);
765
766 if (state->start > end)
767 goto out;
768
769 if (state->state & bits) {
770 start = state->start;
771 atomic_inc(&state->refs);
772 wait_on_state(tree, state);
773 free_extent_state(state);
774 goto again;
775 }
776 start = state->end + 1;
777
778 if (start > end)
779 break;
780
781 if (!cond_resched_lock(&tree->lock)) {
782 node = rb_next(node);
783 goto process_node;
784 }
785 }
786out:
787 spin_unlock(&tree->lock);
788}
789
790static void set_state_bits(struct extent_io_tree *tree,
791 struct extent_state *state,
792 unsigned *bits)
793{
794 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
795
796 set_state_cb(tree, state, bits);
797 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
798 u64 range = state->end - state->start + 1;
799 tree->dirty_bytes += range;
800 }
801 state->state |= bits_to_set;
802}
803
804static void cache_state_if_flags(struct extent_state *state,
805 struct extent_state **cached_ptr,
806 unsigned flags)
807{
808 if (cached_ptr && !(*cached_ptr)) {
809 if (!flags || (state->state & flags)) {
810 *cached_ptr = state;
811 atomic_inc(&state->refs);
812 }
813 }
814}
815
816static void cache_state(struct extent_state *state,
817 struct extent_state **cached_ptr)
818{
819 return cache_state_if_flags(state, cached_ptr,
820 EXTENT_IOBITS | EXTENT_BOUNDARY);
821}
822
823
824
825
826
827
828
829
830
831
832
833
834static int __must_check
835__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
836 unsigned bits, unsigned exclusive_bits,
837 u64 *failed_start, struct extent_state **cached_state,
838 gfp_t mask)
839{
840 struct extent_state *state;
841 struct extent_state *prealloc = NULL;
842 struct rb_node *node;
843 struct rb_node **p;
844 struct rb_node *parent;
845 int err = 0;
846 u64 last_start;
847 u64 last_end;
848
849 btrfs_debug_check_extent_io_range(tree, start, end);
850
851 bits |= EXTENT_FIRST_DELALLOC;
852again:
853 if (!prealloc && (mask & __GFP_WAIT)) {
854 prealloc = alloc_extent_state(mask);
855 BUG_ON(!prealloc);
856 }
857
858 spin_lock(&tree->lock);
859 if (cached_state && *cached_state) {
860 state = *cached_state;
861 if (state->start <= start && state->end > start &&
862 extent_state_in_tree(state)) {
863 node = &state->rb_node;
864 goto hit_next;
865 }
866 }
867
868
869
870
871 node = tree_search_for_insert(tree, start, &p, &parent);
872 if (!node) {
873 prealloc = alloc_extent_state_atomic(prealloc);
874 BUG_ON(!prealloc);
875 err = insert_state(tree, prealloc, start, end,
876 &p, &parent, &bits);
877 if (err)
878 extent_io_tree_panic(tree, err);
879
880 cache_state(prealloc, cached_state);
881 prealloc = NULL;
882 goto out;
883 }
884 state = rb_entry(node, struct extent_state, rb_node);
885hit_next:
886 last_start = state->start;
887 last_end = state->end;
888
889
890
891
892
893
894
895 if (state->start == start && state->end <= end) {
896 if (state->state & exclusive_bits) {
897 *failed_start = state->start;
898 err = -EEXIST;
899 goto out;
900 }
901
902 set_state_bits(tree, state, &bits);
903 cache_state(state, cached_state);
904 merge_state(tree, state);
905 if (last_end == (u64)-1)
906 goto out;
907 start = last_end + 1;
908 state = next_state(state);
909 if (start < end && state && state->start == start &&
910 !need_resched())
911 goto hit_next;
912 goto search_again;
913 }
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931 if (state->start < start) {
932 if (state->state & exclusive_bits) {
933 *failed_start = start;
934 err = -EEXIST;
935 goto out;
936 }
937
938 prealloc = alloc_extent_state_atomic(prealloc);
939 BUG_ON(!prealloc);
940 err = split_state(tree, state, prealloc, start);
941 if (err)
942 extent_io_tree_panic(tree, err);
943
944 prealloc = NULL;
945 if (err)
946 goto out;
947 if (state->end <= end) {
948 set_state_bits(tree, state, &bits);
949 cache_state(state, cached_state);
950 merge_state(tree, state);
951 if (last_end == (u64)-1)
952 goto out;
953 start = last_end + 1;
954 state = next_state(state);
955 if (start < end && state && state->start == start &&
956 !need_resched())
957 goto hit_next;
958 }
959 goto search_again;
960 }
961
962
963
964
965
966
967
968 if (state->start > start) {
969 u64 this_end;
970 if (end < last_start)
971 this_end = end;
972 else
973 this_end = last_start - 1;
974
975 prealloc = alloc_extent_state_atomic(prealloc);
976 BUG_ON(!prealloc);
977
978
979
980
981
982 err = insert_state(tree, prealloc, start, this_end,
983 NULL, NULL, &bits);
984 if (err)
985 extent_io_tree_panic(tree, err);
986
987 cache_state(prealloc, cached_state);
988 prealloc = NULL;
989 start = this_end + 1;
990 goto search_again;
991 }
992
993
994
995
996
997
998 if (state->start <= end && state->end > end) {
999 if (state->state & exclusive_bits) {
1000 *failed_start = start;
1001 err = -EEXIST;
1002 goto out;
1003 }
1004
1005 prealloc = alloc_extent_state_atomic(prealloc);
1006 BUG_ON(!prealloc);
1007 err = split_state(tree, state, prealloc, end + 1);
1008 if (err)
1009 extent_io_tree_panic(tree, err);
1010
1011 set_state_bits(tree, prealloc, &bits);
1012 cache_state(prealloc, cached_state);
1013 merge_state(tree, prealloc);
1014 prealloc = NULL;
1015 goto out;
1016 }
1017
1018 goto search_again;
1019
1020out:
1021 spin_unlock(&tree->lock);
1022 if (prealloc)
1023 free_extent_state(prealloc);
1024
1025 return err;
1026
1027search_again:
1028 if (start > end)
1029 goto out;
1030 spin_unlock(&tree->lock);
1031 if (mask & __GFP_WAIT)
1032 cond_resched();
1033 goto again;
1034}
1035
1036int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1037 unsigned bits, u64 * failed_start,
1038 struct extent_state **cached_state, gfp_t mask)
1039{
1040 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1041 cached_state, mask);
1042}
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1063 unsigned bits, unsigned clear_bits,
1064 struct extent_state **cached_state, gfp_t mask)
1065{
1066 struct extent_state *state;
1067 struct extent_state *prealloc = NULL;
1068 struct rb_node *node;
1069 struct rb_node **p;
1070 struct rb_node *parent;
1071 int err = 0;
1072 u64 last_start;
1073 u64 last_end;
1074 bool first_iteration = true;
1075
1076 btrfs_debug_check_extent_io_range(tree, start, end);
1077
1078again:
1079 if (!prealloc && (mask & __GFP_WAIT)) {
1080
1081
1082
1083
1084
1085
1086
1087 prealloc = alloc_extent_state(mask);
1088 if (!prealloc && !first_iteration)
1089 return -ENOMEM;
1090 }
1091
1092 spin_lock(&tree->lock);
1093 if (cached_state && *cached_state) {
1094 state = *cached_state;
1095 if (state->start <= start && state->end > start &&
1096 extent_state_in_tree(state)) {
1097 node = &state->rb_node;
1098 goto hit_next;
1099 }
1100 }
1101
1102
1103
1104
1105
1106 node = tree_search_for_insert(tree, start, &p, &parent);
1107 if (!node) {
1108 prealloc = alloc_extent_state_atomic(prealloc);
1109 if (!prealloc) {
1110 err = -ENOMEM;
1111 goto out;
1112 }
1113 err = insert_state(tree, prealloc, start, end,
1114 &p, &parent, &bits);
1115 if (err)
1116 extent_io_tree_panic(tree, err);
1117 cache_state(prealloc, cached_state);
1118 prealloc = NULL;
1119 goto out;
1120 }
1121 state = rb_entry(node, struct extent_state, rb_node);
1122hit_next:
1123 last_start = state->start;
1124 last_end = state->end;
1125
1126
1127
1128
1129
1130
1131
1132 if (state->start == start && state->end <= end) {
1133 set_state_bits(tree, state, &bits);
1134 cache_state(state, cached_state);
1135 state = clear_state_bit(tree, state, &clear_bits, 0);
1136 if (last_end == (u64)-1)
1137 goto out;
1138 start = last_end + 1;
1139 if (start < end && state && state->start == start &&
1140 !need_resched())
1141 goto hit_next;
1142 goto search_again;
1143 }
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161 if (state->start < start) {
1162 prealloc = alloc_extent_state_atomic(prealloc);
1163 if (!prealloc) {
1164 err = -ENOMEM;
1165 goto out;
1166 }
1167 err = split_state(tree, state, prealloc, start);
1168 if (err)
1169 extent_io_tree_panic(tree, err);
1170 prealloc = NULL;
1171 if (err)
1172 goto out;
1173 if (state->end <= end) {
1174 set_state_bits(tree, state, &bits);
1175 cache_state(state, cached_state);
1176 state = clear_state_bit(tree, state, &clear_bits, 0);
1177 if (last_end == (u64)-1)
1178 goto out;
1179 start = last_end + 1;
1180 if (start < end && state && state->start == start &&
1181 !need_resched())
1182 goto hit_next;
1183 }
1184 goto search_again;
1185 }
1186
1187
1188
1189
1190
1191
1192
1193 if (state->start > start) {
1194 u64 this_end;
1195 if (end < last_start)
1196 this_end = end;
1197 else
1198 this_end = last_start - 1;
1199
1200 prealloc = alloc_extent_state_atomic(prealloc);
1201 if (!prealloc) {
1202 err = -ENOMEM;
1203 goto out;
1204 }
1205
1206
1207
1208
1209
1210 err = insert_state(tree, prealloc, start, this_end,
1211 NULL, NULL, &bits);
1212 if (err)
1213 extent_io_tree_panic(tree, err);
1214 cache_state(prealloc, cached_state);
1215 prealloc = NULL;
1216 start = this_end + 1;
1217 goto search_again;
1218 }
1219
1220
1221
1222
1223
1224
1225 if (state->start <= end && state->end > end) {
1226 prealloc = alloc_extent_state_atomic(prealloc);
1227 if (!prealloc) {
1228 err = -ENOMEM;
1229 goto out;
1230 }
1231
1232 err = split_state(tree, state, prealloc, end + 1);
1233 if (err)
1234 extent_io_tree_panic(tree, err);
1235
1236 set_state_bits(tree, prealloc, &bits);
1237 cache_state(prealloc, cached_state);
1238 clear_state_bit(tree, prealloc, &clear_bits, 0);
1239 prealloc = NULL;
1240 goto out;
1241 }
1242
1243 goto search_again;
1244
1245out:
1246 spin_unlock(&tree->lock);
1247 if (prealloc)
1248 free_extent_state(prealloc);
1249
1250 return err;
1251
1252search_again:
1253 if (start > end)
1254 goto out;
1255 spin_unlock(&tree->lock);
1256 if (mask & __GFP_WAIT)
1257 cond_resched();
1258 first_iteration = false;
1259 goto again;
1260}
1261
1262
1263int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1264 gfp_t mask)
1265{
1266 return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
1267 NULL, mask);
1268}
1269
1270int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1271 unsigned bits, gfp_t mask)
1272{
1273 return set_extent_bit(tree, start, end, bits, NULL,
1274 NULL, mask);
1275}
1276
1277int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1278 unsigned bits, gfp_t mask)
1279{
1280 int wake = 0;
1281
1282 if (bits & EXTENT_LOCKED)
1283 wake = 1;
1284
1285 return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
1286}
1287
1288int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
1289 struct extent_state **cached_state, gfp_t mask)
1290{
1291 return set_extent_bit(tree, start, end,
1292 EXTENT_DELALLOC | EXTENT_UPTODATE,
1293 NULL, cached_state, mask);
1294}
1295
1296int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
1297 struct extent_state **cached_state, gfp_t mask)
1298{
1299 return set_extent_bit(tree, start, end,
1300 EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
1301 NULL, cached_state, mask);
1302}
1303
1304int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1305 gfp_t mask)
1306{
1307 return clear_extent_bit(tree, start, end,
1308 EXTENT_DIRTY | EXTENT_DELALLOC |
1309 EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
1310}
1311
1312int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
1313 gfp_t mask)
1314{
1315 return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
1316 NULL, mask);
1317}
1318
1319int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1320 struct extent_state **cached_state, gfp_t mask)
1321{
1322 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
1323 cached_state, mask);
1324}
1325
1326int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1327 struct extent_state **cached_state, gfp_t mask)
1328{
1329 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
1330 cached_state, mask);
1331}
1332
1333
1334
1335
1336
1337int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1338 unsigned bits, struct extent_state **cached_state)
1339{
1340 int err;
1341 u64 failed_start;
1342
1343 while (1) {
1344 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
1345 EXTENT_LOCKED, &failed_start,
1346 cached_state, GFP_NOFS);
1347 if (err == -EEXIST) {
1348 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1349 start = failed_start;
1350 } else
1351 break;
1352 WARN_ON(start > end);
1353 }
1354 return err;
1355}
1356
1357int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1358{
1359 return lock_extent_bits(tree, start, end, 0, NULL);
1360}
1361
1362int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1363{
1364 int err;
1365 u64 failed_start;
1366
1367 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1368 &failed_start, NULL, GFP_NOFS);
1369 if (err == -EEXIST) {
1370 if (failed_start > start)
1371 clear_extent_bit(tree, start, failed_start - 1,
1372 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
1373 return 0;
1374 }
1375 return 1;
1376}
1377
1378int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1379 struct extent_state **cached, gfp_t mask)
1380{
1381 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
1382 mask);
1383}
1384
1385int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1386{
1387 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1388 GFP_NOFS);
1389}
1390
1391int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1392{
1393 unsigned long index = start >> PAGE_CACHE_SHIFT;
1394 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1395 struct page *page;
1396
1397 while (index <= end_index) {
1398 page = find_get_page(inode->i_mapping, index);
1399 BUG_ON(!page);
1400 clear_page_dirty_for_io(page);
1401 page_cache_release(page);
1402 index++;
1403 }
1404 return 0;
1405}
1406
1407int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1408{
1409 unsigned long index = start >> PAGE_CACHE_SHIFT;
1410 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1411 struct page *page;
1412
1413 while (index <= end_index) {
1414 page = find_get_page(inode->i_mapping, index);
1415 BUG_ON(!page);
1416 __set_page_dirty_nobuffers(page);
1417 account_page_redirty(page);
1418 page_cache_release(page);
1419 index++;
1420 }
1421 return 0;
1422}
1423
1424
1425
1426
1427static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1428{
1429 unsigned long index = start >> PAGE_CACHE_SHIFT;
1430 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1431 struct page *page;
1432
1433 while (index <= end_index) {
1434 page = find_get_page(tree->mapping, index);
1435 BUG_ON(!page);
1436 set_page_writeback(page);
1437 page_cache_release(page);
1438 index++;
1439 }
1440 return 0;
1441}
1442
1443
1444
1445
1446
1447static struct extent_state *
1448find_first_extent_bit_state(struct extent_io_tree *tree,
1449 u64 start, unsigned bits)
1450{
1451 struct rb_node *node;
1452 struct extent_state *state;
1453
1454
1455
1456
1457
1458 node = tree_search(tree, start);
1459 if (!node)
1460 goto out;
1461
1462 while (1) {
1463 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->end >= start && (state->state & bits))
1465 return state;
1466
1467 node = rb_next(node);
1468 if (!node)
1469 break;
1470 }
1471out:
1472 return NULL;
1473}
1474
1475
1476
1477
1478
1479
1480
1481
1482int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1483 u64 *start_ret, u64 *end_ret, unsigned bits,
1484 struct extent_state **cached_state)
1485{
1486 struct extent_state *state;
1487 struct rb_node *n;
1488 int ret = 1;
1489
1490 spin_lock(&tree->lock);
1491 if (cached_state && *cached_state) {
1492 state = *cached_state;
1493 if (state->end == start - 1 && extent_state_in_tree(state)) {
1494 n = rb_next(&state->rb_node);
1495 while (n) {
1496 state = rb_entry(n, struct extent_state,
1497 rb_node);
1498 if (state->state & bits)
1499 goto got_it;
1500 n = rb_next(n);
1501 }
1502 free_extent_state(*cached_state);
1503 *cached_state = NULL;
1504 goto out;
1505 }
1506 free_extent_state(*cached_state);
1507 *cached_state = NULL;
1508 }
1509
1510 state = find_first_extent_bit_state(tree, start, bits);
1511got_it:
1512 if (state) {
1513 cache_state_if_flags(state, cached_state, 0);
1514 *start_ret = state->start;
1515 *end_ret = state->end;
1516 ret = 0;
1517 }
1518out:
1519 spin_unlock(&tree->lock);
1520 return ret;
1521}
1522
1523
1524
1525
1526
1527
1528
1529static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1530 u64 *start, u64 *end, u64 max_bytes,
1531 struct extent_state **cached_state)
1532{
1533 struct rb_node *node;
1534 struct extent_state *state;
1535 u64 cur_start = *start;
1536 u64 found = 0;
1537 u64 total_bytes = 0;
1538
1539 spin_lock(&tree->lock);
1540
1541
1542
1543
1544
1545 node = tree_search(tree, cur_start);
1546 if (!node) {
1547 if (!found)
1548 *end = (u64)-1;
1549 goto out;
1550 }
1551
1552 while (1) {
1553 state = rb_entry(node, struct extent_state, rb_node);
1554 if (found && (state->start != cur_start ||
1555 (state->state & EXTENT_BOUNDARY))) {
1556 goto out;
1557 }
1558 if (!(state->state & EXTENT_DELALLOC)) {
1559 if (!found)
1560 *end = state->end;
1561 goto out;
1562 }
1563 if (!found) {
1564 *start = state->start;
1565 *cached_state = state;
1566 atomic_inc(&state->refs);
1567 }
1568 found++;
1569 *end = state->end;
1570 cur_start = state->end + 1;
1571 node = rb_next(node);
1572 total_bytes += state->end - state->start + 1;
1573 if (total_bytes >= max_bytes)
1574 break;
1575 if (!node)
1576 break;
1577 }
1578out:
1579 spin_unlock(&tree->lock);
1580 return found;
1581}
1582
1583static noinline void __unlock_for_delalloc(struct inode *inode,
1584 struct page *locked_page,
1585 u64 start, u64 end)
1586{
1587 int ret;
1588 struct page *pages[16];
1589 unsigned long index = start >> PAGE_CACHE_SHIFT;
1590 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1591 unsigned long nr_pages = end_index - index + 1;
1592 int i;
1593
1594 if (index == locked_page->index && end_index == index)
1595 return;
1596
1597 while (nr_pages > 0) {
1598 ret = find_get_pages_contig(inode->i_mapping, index,
1599 min_t(unsigned long, nr_pages,
1600 ARRAY_SIZE(pages)), pages);
1601 for (i = 0; i < ret; i++) {
1602 if (pages[i] != locked_page)
1603 unlock_page(pages[i]);
1604 page_cache_release(pages[i]);
1605 }
1606 nr_pages -= ret;
1607 index += ret;
1608 cond_resched();
1609 }
1610}
1611
1612static noinline int lock_delalloc_pages(struct inode *inode,
1613 struct page *locked_page,
1614 u64 delalloc_start,
1615 u64 delalloc_end)
1616{
1617 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
1618 unsigned long start_index = index;
1619 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
1620 unsigned long pages_locked = 0;
1621 struct page *pages[16];
1622 unsigned long nrpages;
1623 int ret;
1624 int i;
1625
1626
1627 if (index == locked_page->index && index == end_index)
1628 return 0;
1629
1630
1631 nrpages = end_index - index + 1;
1632 while (nrpages > 0) {
1633 ret = find_get_pages_contig(inode->i_mapping, index,
1634 min_t(unsigned long,
1635 nrpages, ARRAY_SIZE(pages)), pages);
1636 if (ret == 0) {
1637 ret = -EAGAIN;
1638 goto done;
1639 }
1640
1641 for (i = 0; i < ret; i++) {
1642
1643
1644
1645
1646 if (pages[i] != locked_page) {
1647 lock_page(pages[i]);
1648 if (!PageDirty(pages[i]) ||
1649 pages[i]->mapping != inode->i_mapping) {
1650 ret = -EAGAIN;
1651 unlock_page(pages[i]);
1652 page_cache_release(pages[i]);
1653 goto done;
1654 }
1655 }
1656 page_cache_release(pages[i]);
1657 pages_locked++;
1658 }
1659 nrpages -= ret;
1660 index += ret;
1661 cond_resched();
1662 }
1663 ret = 0;
1664done:
1665 if (ret && pages_locked) {
1666 __unlock_for_delalloc(inode, locked_page,
1667 delalloc_start,
1668 ((u64)(start_index + pages_locked - 1)) <<
1669 PAGE_CACHE_SHIFT);
1670 }
1671 return ret;
1672}
1673
1674
1675
1676
1677
1678
1679
1680STATIC u64 find_lock_delalloc_range(struct inode *inode,
1681 struct extent_io_tree *tree,
1682 struct page *locked_page, u64 *start,
1683 u64 *end, u64 max_bytes)
1684{
1685 u64 delalloc_start;
1686 u64 delalloc_end;
1687 u64 found;
1688 struct extent_state *cached_state = NULL;
1689 int ret;
1690 int loops = 0;
1691
1692again:
1693
1694 delalloc_start = *start;
1695 delalloc_end = 0;
1696 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1697 max_bytes, &cached_state);
1698 if (!found || delalloc_end <= *start) {
1699 *start = delalloc_start;
1700 *end = delalloc_end;
1701 free_extent_state(cached_state);
1702 return 0;
1703 }
1704
1705
1706
1707
1708
1709
1710 if (delalloc_start < *start)
1711 delalloc_start = *start;
1712
1713
1714
1715
1716 if (delalloc_end + 1 - delalloc_start > max_bytes)
1717 delalloc_end = delalloc_start + max_bytes - 1;
1718
1719
1720 ret = lock_delalloc_pages(inode, locked_page,
1721 delalloc_start, delalloc_end);
1722 if (ret == -EAGAIN) {
1723
1724
1725
1726 free_extent_state(cached_state);
1727 cached_state = NULL;
1728 if (!loops) {
1729 max_bytes = PAGE_CACHE_SIZE;
1730 loops = 1;
1731 goto again;
1732 } else {
1733 found = 0;
1734 goto out_failed;
1735 }
1736 }
1737 BUG_ON(ret);
1738
1739
1740 lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
1741
1742
1743 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1744 EXTENT_DELALLOC, 1, cached_state);
1745 if (!ret) {
1746 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1747 &cached_state, GFP_NOFS);
1748 __unlock_for_delalloc(inode, locked_page,
1749 delalloc_start, delalloc_end);
1750 cond_resched();
1751 goto again;
1752 }
1753 free_extent_state(cached_state);
1754 *start = delalloc_start;
1755 *end = delalloc_end;
1756out_failed:
1757 return found;
1758}
1759
1760int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1761 struct page *locked_page,
1762 unsigned clear_bits,
1763 unsigned long page_ops)
1764{
1765 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1766 int ret;
1767 struct page *pages[16];
1768 unsigned long index = start >> PAGE_CACHE_SHIFT;
1769 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1770 unsigned long nr_pages = end_index - index + 1;
1771 int i;
1772
1773 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1774 if (page_ops == 0)
1775 return 0;
1776
1777 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1778 mapping_set_error(inode->i_mapping, -EIO);
1779
1780 while (nr_pages > 0) {
1781 ret = find_get_pages_contig(inode->i_mapping, index,
1782 min_t(unsigned long,
1783 nr_pages, ARRAY_SIZE(pages)), pages);
1784 for (i = 0; i < ret; i++) {
1785
1786 if (page_ops & PAGE_SET_PRIVATE2)
1787 SetPagePrivate2(pages[i]);
1788
1789 if (pages[i] == locked_page) {
1790 page_cache_release(pages[i]);
1791 continue;
1792 }
1793 if (page_ops & PAGE_CLEAR_DIRTY)
1794 clear_page_dirty_for_io(pages[i]);
1795 if (page_ops & PAGE_SET_WRITEBACK)
1796 set_page_writeback(pages[i]);
1797 if (page_ops & PAGE_SET_ERROR)
1798 SetPageError(pages[i]);
1799 if (page_ops & PAGE_END_WRITEBACK)
1800 end_page_writeback(pages[i]);
1801 if (page_ops & PAGE_UNLOCK)
1802 unlock_page(pages[i]);
1803 page_cache_release(pages[i]);
1804 }
1805 nr_pages -= ret;
1806 index += ret;
1807 cond_resched();
1808 }
1809 return 0;
1810}
1811
1812
1813
1814
1815
1816
1817u64 count_range_bits(struct extent_io_tree *tree,
1818 u64 *start, u64 search_end, u64 max_bytes,
1819 unsigned bits, int contig)
1820{
1821 struct rb_node *node;
1822 struct extent_state *state;
1823 u64 cur_start = *start;
1824 u64 total_bytes = 0;
1825 u64 last = 0;
1826 int found = 0;
1827
1828 if (WARN_ON(search_end <= cur_start))
1829 return 0;
1830
1831 spin_lock(&tree->lock);
1832 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1833 total_bytes = tree->dirty_bytes;
1834 goto out;
1835 }
1836
1837
1838
1839
1840 node = tree_search(tree, cur_start);
1841 if (!node)
1842 goto out;
1843
1844 while (1) {
1845 state = rb_entry(node, struct extent_state, rb_node);
1846 if (state->start > search_end)
1847 break;
1848 if (contig && found && state->start > last + 1)
1849 break;
1850 if (state->end >= cur_start && (state->state & bits) == bits) {
1851 total_bytes += min(search_end, state->end) + 1 -
1852 max(cur_start, state->start);
1853 if (total_bytes >= max_bytes)
1854 break;
1855 if (!found) {
1856 *start = max(cur_start, state->start);
1857 found = 1;
1858 }
1859 last = state->end;
1860 } else if (contig && found) {
1861 break;
1862 }
1863 node = rb_next(node);
1864 if (!node)
1865 break;
1866 }
1867out:
1868 spin_unlock(&tree->lock);
1869 return total_bytes;
1870}
1871
1872
1873
1874
1875
1876static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1877{
1878 struct rb_node *node;
1879 struct extent_state *state;
1880 int ret = 0;
1881
1882 spin_lock(&tree->lock);
1883
1884
1885
1886
1887 node = tree_search(tree, start);
1888 if (!node) {
1889 ret = -ENOENT;
1890 goto out;
1891 }
1892 state = rb_entry(node, struct extent_state, rb_node);
1893 if (state->start != start) {
1894 ret = -ENOENT;
1895 goto out;
1896 }
1897 state->private = private;
1898out:
1899 spin_unlock(&tree->lock);
1900 return ret;
1901}
1902
1903int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1904{
1905 struct rb_node *node;
1906 struct extent_state *state;
1907 int ret = 0;
1908
1909 spin_lock(&tree->lock);
1910
1911
1912
1913
1914 node = tree_search(tree, start);
1915 if (!node) {
1916 ret = -ENOENT;
1917 goto out;
1918 }
1919 state = rb_entry(node, struct extent_state, rb_node);
1920 if (state->start != start) {
1921 ret = -ENOENT;
1922 goto out;
1923 }
1924 *private = state->private;
1925out:
1926 spin_unlock(&tree->lock);
1927 return ret;
1928}
1929
1930
1931
1932
1933
1934
1935
1936int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1937 unsigned bits, int filled, struct extent_state *cached)
1938{
1939 struct extent_state *state = NULL;
1940 struct rb_node *node;
1941 int bitset = 0;
1942
1943 spin_lock(&tree->lock);
1944 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1945 cached->end > start)
1946 node = &cached->rb_node;
1947 else
1948 node = tree_search(tree, start);
1949 while (node && start <= end) {
1950 state = rb_entry(node, struct extent_state, rb_node);
1951
1952 if (filled && state->start > start) {
1953 bitset = 0;
1954 break;
1955 }
1956
1957 if (state->start > end)
1958 break;
1959
1960 if (state->state & bits) {
1961 bitset = 1;
1962 if (!filled)
1963 break;
1964 } else if (filled) {
1965 bitset = 0;
1966 break;
1967 }
1968
1969 if (state->end == (u64)-1)
1970 break;
1971
1972 start = state->end + 1;
1973 if (start > end)
1974 break;
1975 node = rb_next(node);
1976 if (!node) {
1977 if (filled)
1978 bitset = 0;
1979 break;
1980 }
1981 }
1982 spin_unlock(&tree->lock);
1983 return bitset;
1984}
1985
1986
1987
1988
1989
1990static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1991{
1992 u64 start = page_offset(page);
1993 u64 end = start + PAGE_CACHE_SIZE - 1;
1994 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1995 SetPageUptodate(page);
1996}
1997
1998int free_io_failure(struct inode *inode, struct io_failure_record *rec)
1999{
2000 int ret;
2001 int err = 0;
2002 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2003
2004 set_state_private(failure_tree, rec->start, 0);
2005 ret = clear_extent_bits(failure_tree, rec->start,
2006 rec->start + rec->len - 1,
2007 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
2008 if (ret)
2009 err = ret;
2010
2011 ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
2012 rec->start + rec->len - 1,
2013 EXTENT_DAMAGED, GFP_NOFS);
2014 if (ret && !err)
2015 err = ret;
2016
2017 kfree(rec);
2018 return err;
2019}
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
2032 struct page *page, unsigned int pg_offset, int mirror_num)
2033{
2034 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2035 struct bio *bio;
2036 struct btrfs_device *dev;
2037 u64 map_length = 0;
2038 u64 sector;
2039 struct btrfs_bio *bbio = NULL;
2040 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
2041 int ret;
2042
2043 ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
2044 BUG_ON(!mirror_num);
2045
2046
2047 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
2048 return 0;
2049
2050 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2051 if (!bio)
2052 return -EIO;
2053 bio->bi_iter.bi_size = 0;
2054 map_length = length;
2055
2056 ret = btrfs_map_block(fs_info, WRITE, logical,
2057 &map_length, &bbio, mirror_num);
2058 if (ret) {
2059 bio_put(bio);
2060 return -EIO;
2061 }
2062 BUG_ON(mirror_num != bbio->mirror_num);
2063 sector = bbio->stripes[mirror_num-1].physical >> 9;
2064 bio->bi_iter.bi_sector = sector;
2065 dev = bbio->stripes[mirror_num-1].dev;
2066 btrfs_put_bbio(bbio);
2067 if (!dev || !dev->bdev || !dev->writeable) {
2068 bio_put(bio);
2069 return -EIO;
2070 }
2071 bio->bi_bdev = dev->bdev;
2072 bio_add_page(bio, page, length, pg_offset);
2073
2074 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
2075
2076 bio_put(bio);
2077 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2078 return -EIO;
2079 }
2080
2081 printk_ratelimited_in_rcu(KERN_INFO
2082 "BTRFS: read error corrected: ino %llu off %llu (dev %s sector %llu)\n",
2083 btrfs_ino(inode), start,
2084 rcu_str_deref(dev->name), sector);
2085 bio_put(bio);
2086 return 0;
2087}
2088
2089int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
2090 int mirror_num)
2091{
2092 u64 start = eb->start;
2093 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
2094 int ret = 0;
2095
2096 if (root->fs_info->sb->s_flags & MS_RDONLY)
2097 return -EROFS;
2098
2099 for (i = 0; i < num_pages; i++) {
2100 struct page *p = eb->pages[i];
2101
2102 ret = repair_io_failure(root->fs_info->btree_inode, start,
2103 PAGE_CACHE_SIZE, start, p,
2104 start - page_offset(p), mirror_num);
2105 if (ret)
2106 break;
2107 start += PAGE_CACHE_SIZE;
2108 }
2109
2110 return ret;
2111}
2112
2113
2114
2115
2116
2117int clean_io_failure(struct inode *inode, u64 start, struct page *page,
2118 unsigned int pg_offset)
2119{
2120 u64 private;
2121 u64 private_failure;
2122 struct io_failure_record *failrec;
2123 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2124 struct extent_state *state;
2125 int num_copies;
2126 int ret;
2127
2128 private = 0;
2129 ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
2130 (u64)-1, 1, EXTENT_DIRTY, 0);
2131 if (!ret)
2132 return 0;
2133
2134 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start,
2135 &private_failure);
2136 if (ret)
2137 return 0;
2138
2139 failrec = (struct io_failure_record *)(unsigned long) private_failure;
2140 BUG_ON(!failrec->this_mirror);
2141
2142 if (failrec->in_validation) {
2143
2144 pr_debug("clean_io_failure: freeing dummy error at %llu\n",
2145 failrec->start);
2146 goto out;
2147 }
2148 if (fs_info->sb->s_flags & MS_RDONLY)
2149 goto out;
2150
2151 spin_lock(&BTRFS_I(inode)->io_tree.lock);
2152 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
2153 failrec->start,
2154 EXTENT_LOCKED);
2155 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
2156
2157 if (state && state->start <= failrec->start &&
2158 state->end >= failrec->start + failrec->len - 1) {
2159 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2160 failrec->len);
2161 if (num_copies > 1) {
2162 repair_io_failure(inode, start, failrec->len,
2163 failrec->logical, page,
2164 pg_offset, failrec->failed_mirror);
2165 }
2166 }
2167
2168out:
2169 free_io_failure(inode, failrec);
2170
2171 return 0;
2172}
2173
2174
2175
2176
2177
2178
2179
2180void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
2181{
2182 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2183 struct io_failure_record *failrec;
2184 struct extent_state *state, *next;
2185
2186 if (RB_EMPTY_ROOT(&failure_tree->state))
2187 return;
2188
2189 spin_lock(&failure_tree->lock);
2190 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2191 while (state) {
2192 if (state->start > end)
2193 break;
2194
2195 ASSERT(state->end <= end);
2196
2197 next = next_state(state);
2198
2199 failrec = (struct io_failure_record *)(unsigned long)state->private;
2200 free_extent_state(state);
2201 kfree(failrec);
2202
2203 state = next;
2204 }
2205 spin_unlock(&failure_tree->lock);
2206}
2207
2208int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2209 struct io_failure_record **failrec_ret)
2210{
2211 struct io_failure_record *failrec;
2212 u64 private;
2213 struct extent_map *em;
2214 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2215 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2216 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2217 int ret;
2218 u64 logical;
2219
2220 ret = get_state_private(failure_tree, start, &private);
2221 if (ret) {
2222 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2223 if (!failrec)
2224 return -ENOMEM;
2225
2226 failrec->start = start;
2227 failrec->len = end - start + 1;
2228 failrec->this_mirror = 0;
2229 failrec->bio_flags = 0;
2230 failrec->in_validation = 0;
2231
2232 read_lock(&em_tree->lock);
2233 em = lookup_extent_mapping(em_tree, start, failrec->len);
2234 if (!em) {
2235 read_unlock(&em_tree->lock);
2236 kfree(failrec);
2237 return -EIO;
2238 }
2239
2240 if (em->start > start || em->start + em->len <= start) {
2241 free_extent_map(em);
2242 em = NULL;
2243 }
2244 read_unlock(&em_tree->lock);
2245 if (!em) {
2246 kfree(failrec);
2247 return -EIO;
2248 }
2249
2250 logical = start - em->start;
2251 logical = em->block_start + logical;
2252 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2253 logical = em->block_start;
2254 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2255 extent_set_compress_type(&failrec->bio_flags,
2256 em->compress_type);
2257 }
2258
2259 pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
2260 logical, start, failrec->len);
2261
2262 failrec->logical = logical;
2263 free_extent_map(em);
2264
2265
2266 ret = set_extent_bits(failure_tree, start, end,
2267 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
2268 if (ret >= 0)
2269 ret = set_state_private(failure_tree, start,
2270 (u64)(unsigned long)failrec);
2271
2272 if (ret >= 0)
2273 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
2274 GFP_NOFS);
2275 if (ret < 0) {
2276 kfree(failrec);
2277 return ret;
2278 }
2279 } else {
2280 failrec = (struct io_failure_record *)(unsigned long)private;
2281 pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
2282 failrec->logical, failrec->start, failrec->len,
2283 failrec->in_validation);
2284
2285
2286
2287
2288
2289 }
2290
2291 *failrec_ret = failrec;
2292
2293 return 0;
2294}
2295
2296int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
2297 struct io_failure_record *failrec, int failed_mirror)
2298{
2299 int num_copies;
2300
2301 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
2302 failrec->logical, failrec->len);
2303 if (num_copies == 1) {
2304
2305
2306
2307
2308
2309 pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
2310 num_copies, failrec->this_mirror, failed_mirror);
2311 return 0;
2312 }
2313
2314
2315
2316
2317
2318
2319 if (failed_bio->bi_vcnt > 1) {
2320
2321
2322
2323
2324
2325
2326
2327
2328 BUG_ON(failrec->in_validation);
2329 failrec->in_validation = 1;
2330 failrec->this_mirror = failed_mirror;
2331 } else {
2332
2333
2334
2335
2336
2337 if (failrec->in_validation) {
2338 BUG_ON(failrec->this_mirror != failed_mirror);
2339 failrec->in_validation = 0;
2340 failrec->this_mirror = 0;
2341 }
2342 failrec->failed_mirror = failed_mirror;
2343 failrec->this_mirror++;
2344 if (failrec->this_mirror == failed_mirror)
2345 failrec->this_mirror++;
2346 }
2347
2348 if (failrec->this_mirror > num_copies) {
2349 pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2350 num_copies, failrec->this_mirror, failed_mirror);
2351 return 0;
2352 }
2353
2354 return 1;
2355}
2356
2357
2358struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2359 struct io_failure_record *failrec,
2360 struct page *page, int pg_offset, int icsum,
2361 bio_end_io_t *endio_func, void *data)
2362{
2363 struct bio *bio;
2364 struct btrfs_io_bio *btrfs_failed_bio;
2365 struct btrfs_io_bio *btrfs_bio;
2366
2367 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2368 if (!bio)
2369 return NULL;
2370
2371 bio->bi_end_io = endio_func;
2372 bio->bi_iter.bi_sector = failrec->logical >> 9;
2373 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2374 bio->bi_iter.bi_size = 0;
2375 bio->bi_private = data;
2376
2377 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2378 if (btrfs_failed_bio->csum) {
2379 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2380 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2381
2382 btrfs_bio = btrfs_io_bio(bio);
2383 btrfs_bio->csum = btrfs_bio->csum_inline;
2384 icsum *= csum_size;
2385 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2386 csum_size);
2387 }
2388
2389 bio_add_page(bio, page, failrec->len, pg_offset);
2390
2391 return bio;
2392}
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2403 struct page *page, u64 start, u64 end,
2404 int failed_mirror)
2405{
2406 struct io_failure_record *failrec;
2407 struct inode *inode = page->mapping->host;
2408 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2409 struct bio *bio;
2410 int read_mode;
2411 int ret;
2412
2413 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
2414
2415 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2416 if (ret)
2417 return ret;
2418
2419 ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
2420 if (!ret) {
2421 free_io_failure(inode, failrec);
2422 return -EIO;
2423 }
2424
2425 if (failed_bio->bi_vcnt > 1)
2426 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
2427 else
2428 read_mode = READ_SYNC;
2429
2430 phy_offset >>= inode->i_sb->s_blocksize_bits;
2431 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2432 start - page_offset(page),
2433 (int)phy_offset, failed_bio->bi_end_io,
2434 NULL);
2435 if (!bio) {
2436 free_io_failure(inode, failrec);
2437 return -EIO;
2438 }
2439
2440 pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
2441 read_mode, failrec->this_mirror, failrec->in_validation);
2442
2443 ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
2444 failrec->this_mirror,
2445 failrec->bio_flags, 0);
2446 if (ret) {
2447 free_io_failure(inode, failrec);
2448 bio_put(bio);
2449 }
2450
2451 return ret;
2452}
2453
2454
2455
2456int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2457{
2458 int uptodate = (err == 0);
2459 struct extent_io_tree *tree;
2460 int ret = 0;
2461
2462 tree = &BTRFS_I(page->mapping->host)->io_tree;
2463
2464 if (tree->ops && tree->ops->writepage_end_io_hook) {
2465 ret = tree->ops->writepage_end_io_hook(page, start,
2466 end, NULL, uptodate);
2467 if (ret)
2468 uptodate = 0;
2469 }
2470
2471 if (!uptodate) {
2472 ClearPageUptodate(page);
2473 SetPageError(page);
2474 ret = ret < 0 ? ret : -EIO;
2475 mapping_set_error(page->mapping, ret);
2476 }
2477 return 0;
2478}
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489static void end_bio_extent_writepage(struct bio *bio)
2490{
2491 struct bio_vec *bvec;
2492 u64 start;
2493 u64 end;
2494 int i;
2495
2496 bio_for_each_segment_all(bvec, bio, i) {
2497 struct page *page = bvec->bv_page;
2498
2499
2500
2501
2502
2503
2504 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2505 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2506 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2507 "partial page write in btrfs with offset %u and length %u",
2508 bvec->bv_offset, bvec->bv_len);
2509 else
2510 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2511 "incomplete page write in btrfs with offset %u and "
2512 "length %u",
2513 bvec->bv_offset, bvec->bv_len);
2514 }
2515
2516 start = page_offset(page);
2517 end = start + bvec->bv_offset + bvec->bv_len - 1;
2518
2519 if (end_extent_writepage(page, bio->bi_error, start, end))
2520 continue;
2521
2522 end_page_writeback(page);
2523 }
2524
2525 bio_put(bio);
2526}
2527
2528static void
2529endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2530 int uptodate)
2531{
2532 struct extent_state *cached = NULL;
2533 u64 end = start + len - 1;
2534
2535 if (uptodate && tree->track_uptodate)
2536 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2537 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2538}
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551static void end_bio_extent_readpage(struct bio *bio)
2552{
2553 struct bio_vec *bvec;
2554 int uptodate = !bio->bi_error;
2555 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2556 struct extent_io_tree *tree;
2557 u64 offset = 0;
2558 u64 start;
2559 u64 end;
2560 u64 len;
2561 u64 extent_start = 0;
2562 u64 extent_len = 0;
2563 int mirror;
2564 int ret;
2565 int i;
2566
2567 bio_for_each_segment_all(bvec, bio, i) {
2568 struct page *page = bvec->bv_page;
2569 struct inode *inode = page->mapping->host;
2570
2571 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2572 "mirror=%u\n", (u64)bio->bi_iter.bi_sector,
2573 bio->bi_error, io_bio->mirror_num);
2574 tree = &BTRFS_I(inode)->io_tree;
2575
2576
2577
2578
2579
2580
2581 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2582 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2583 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2584 "partial page read in btrfs with offset %u and length %u",
2585 bvec->bv_offset, bvec->bv_len);
2586 else
2587 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2588 "incomplete page read in btrfs with offset %u and "
2589 "length %u",
2590 bvec->bv_offset, bvec->bv_len);
2591 }
2592
2593 start = page_offset(page);
2594 end = start + bvec->bv_offset + bvec->bv_len - 1;
2595 len = bvec->bv_len;
2596
2597 mirror = io_bio->mirror_num;
2598 if (likely(uptodate && tree->ops &&
2599 tree->ops->readpage_end_io_hook)) {
2600 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2601 page, start, end,
2602 mirror);
2603 if (ret)
2604 uptodate = 0;
2605 else
2606 clean_io_failure(inode, start, page, 0);
2607 }
2608
2609 if (likely(uptodate))
2610 goto readpage_ok;
2611
2612 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2613 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2614 if (!ret && !bio->bi_error)
2615 uptodate = 1;
2616 } else {
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627 ret = bio_readpage_error(bio, offset, page, start, end,
2628 mirror);
2629 if (ret == 0) {
2630 uptodate = !bio->bi_error;
2631 offset += len;
2632 continue;
2633 }
2634 }
2635readpage_ok:
2636 if (likely(uptodate)) {
2637 loff_t i_size = i_size_read(inode);
2638 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2639 unsigned off;
2640
2641
2642 off = i_size & (PAGE_CACHE_SIZE-1);
2643 if (page->index == end_index && off)
2644 zero_user_segment(page, off, PAGE_CACHE_SIZE);
2645 SetPageUptodate(page);
2646 } else {
2647 ClearPageUptodate(page);
2648 SetPageError(page);
2649 }
2650 unlock_page(page);
2651 offset += len;
2652
2653 if (unlikely(!uptodate)) {
2654 if (extent_len) {
2655 endio_readpage_release_extent(tree,
2656 extent_start,
2657 extent_len, 1);
2658 extent_start = 0;
2659 extent_len = 0;
2660 }
2661 endio_readpage_release_extent(tree, start,
2662 end - start + 1, 0);
2663 } else if (!extent_len) {
2664 extent_start = start;
2665 extent_len = end + 1 - start;
2666 } else if (extent_start + extent_len == start) {
2667 extent_len += end + 1 - start;
2668 } else {
2669 endio_readpage_release_extent(tree, extent_start,
2670 extent_len, uptodate);
2671 extent_start = start;
2672 extent_len = end + 1 - start;
2673 }
2674 }
2675
2676 if (extent_len)
2677 endio_readpage_release_extent(tree, extent_start, extent_len,
2678 uptodate);
2679 if (io_bio->end_io)
2680 io_bio->end_io(io_bio, bio->bi_error);
2681 bio_put(bio);
2682}
2683
2684
2685
2686
2687
2688struct bio *
2689btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2690 gfp_t gfp_flags)
2691{
2692 struct btrfs_io_bio *btrfs_bio;
2693 struct bio *bio;
2694
2695 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
2696
2697 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
2698 while (!bio && (nr_vecs /= 2)) {
2699 bio = bio_alloc_bioset(gfp_flags,
2700 nr_vecs, btrfs_bioset);
2701 }
2702 }
2703
2704 if (bio) {
2705 bio->bi_bdev = bdev;
2706 bio->bi_iter.bi_sector = first_sector;
2707 btrfs_bio = btrfs_io_bio(bio);
2708 btrfs_bio->csum = NULL;
2709 btrfs_bio->csum_allocated = NULL;
2710 btrfs_bio->end_io = NULL;
2711 }
2712 return bio;
2713}
2714
2715struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2716{
2717 struct btrfs_io_bio *btrfs_bio;
2718 struct bio *new;
2719
2720 new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
2721 if (new) {
2722 btrfs_bio = btrfs_io_bio(new);
2723 btrfs_bio->csum = NULL;
2724 btrfs_bio->csum_allocated = NULL;
2725 btrfs_bio->end_io = NULL;
2726
2727#ifdef CONFIG_BLK_CGROUP
2728
2729 if (bio->bi_css)
2730 bio_associate_blkcg(new, bio->bi_css);
2731#endif
2732 }
2733 return new;
2734}
2735
2736
2737struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2738{
2739 struct btrfs_io_bio *btrfs_bio;
2740 struct bio *bio;
2741
2742 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2743 if (bio) {
2744 btrfs_bio = btrfs_io_bio(bio);
2745 btrfs_bio->csum = NULL;
2746 btrfs_bio->csum_allocated = NULL;
2747 btrfs_bio->end_io = NULL;
2748 }
2749 return bio;
2750}
2751
2752
2753static int __must_check submit_one_bio(int rw, struct bio *bio,
2754 int mirror_num, unsigned long bio_flags)
2755{
2756 int ret = 0;
2757 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2758 struct page *page = bvec->bv_page;
2759 struct extent_io_tree *tree = bio->bi_private;
2760 u64 start;
2761
2762 start = page_offset(page) + bvec->bv_offset;
2763
2764 bio->bi_private = NULL;
2765
2766 bio_get(bio);
2767
2768 if (tree->ops && tree->ops->submit_bio_hook)
2769 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
2770 mirror_num, bio_flags, start);
2771 else
2772 btrfsic_submit_bio(rw, bio);
2773
2774 bio_put(bio);
2775 return ret;
2776}
2777
2778static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
2779 unsigned long offset, size_t size, struct bio *bio,
2780 unsigned long bio_flags)
2781{
2782 int ret = 0;
2783 if (tree->ops && tree->ops->merge_bio_hook)
2784 ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
2785 bio_flags);
2786 BUG_ON(ret < 0);
2787 return ret;
2788
2789}
2790
2791static int submit_extent_page(int rw, struct extent_io_tree *tree,
2792 struct writeback_control *wbc,
2793 struct page *page, sector_t sector,
2794 size_t size, unsigned long offset,
2795 struct block_device *bdev,
2796 struct bio **bio_ret,
2797 unsigned long max_pages,
2798 bio_end_io_t end_io_func,
2799 int mirror_num,
2800 unsigned long prev_bio_flags,
2801 unsigned long bio_flags,
2802 bool force_bio_submit)
2803{
2804 int ret = 0;
2805 struct bio *bio;
2806 int contig = 0;
2807 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
2808 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
2809
2810 if (bio_ret && *bio_ret) {
2811 bio = *bio_ret;
2812 if (old_compressed)
2813 contig = bio->bi_iter.bi_sector == sector;
2814 else
2815 contig = bio_end_sector(bio) == sector;
2816
2817 if (prev_bio_flags != bio_flags || !contig ||
2818 force_bio_submit ||
2819 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
2820 bio_add_page(bio, page, page_size, offset) < page_size) {
2821 ret = submit_one_bio(rw, bio, mirror_num,
2822 prev_bio_flags);
2823 if (ret < 0) {
2824 *bio_ret = NULL;
2825 return ret;
2826 }
2827 bio = NULL;
2828 } else {
2829 if (wbc)
2830 wbc_account_io(wbc, page, page_size);
2831 return 0;
2832 }
2833 }
2834
2835 bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
2836 GFP_NOFS | __GFP_HIGH);
2837 if (!bio)
2838 return -ENOMEM;
2839
2840 bio_add_page(bio, page, page_size, offset);
2841 bio->bi_end_io = end_io_func;
2842 bio->bi_private = tree;
2843 if (wbc) {
2844 wbc_init_bio(wbc, bio);
2845 wbc_account_io(wbc, page, page_size);
2846 }
2847
2848 if (bio_ret)
2849 *bio_ret = bio;
2850 else
2851 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
2852
2853 return ret;
2854}
2855
2856static void attach_extent_buffer_page(struct extent_buffer *eb,
2857 struct page *page)
2858{
2859 if (!PagePrivate(page)) {
2860 SetPagePrivate(page);
2861 page_cache_get(page);
2862 set_page_private(page, (unsigned long)eb);
2863 } else {
2864 WARN_ON(page->private != (unsigned long)eb);
2865 }
2866}
2867
2868void set_page_extent_mapped(struct page *page)
2869{
2870 if (!PagePrivate(page)) {
2871 SetPagePrivate(page);
2872 page_cache_get(page);
2873 set_page_private(page, EXTENT_PAGE_PRIVATE);
2874 }
2875}
2876
2877static struct extent_map *
2878__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2879 u64 start, u64 len, get_extent_t *get_extent,
2880 struct extent_map **em_cached)
2881{
2882 struct extent_map *em;
2883
2884 if (em_cached && *em_cached) {
2885 em = *em_cached;
2886 if (extent_map_in_tree(em) && start >= em->start &&
2887 start < extent_map_end(em)) {
2888 atomic_inc(&em->refs);
2889 return em;
2890 }
2891
2892 free_extent_map(em);
2893 *em_cached = NULL;
2894 }
2895
2896 em = get_extent(inode, page, pg_offset, start, len, 0);
2897 if (em_cached && !IS_ERR_OR_NULL(em)) {
2898 BUG_ON(*em_cached);
2899 atomic_inc(&em->refs);
2900 *em_cached = em;
2901 }
2902 return em;
2903}
2904
2905
2906
2907
2908
2909
2910static int __do_readpage(struct extent_io_tree *tree,
2911 struct page *page,
2912 get_extent_t *get_extent,
2913 struct extent_map **em_cached,
2914 struct bio **bio, int mirror_num,
2915 unsigned long *bio_flags, int rw,
2916 u64 *prev_em_start)
2917{
2918 struct inode *inode = page->mapping->host;
2919 u64 start = page_offset(page);
2920 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2921 u64 end;
2922 u64 cur = start;
2923 u64 extent_offset;
2924 u64 last_byte = i_size_read(inode);
2925 u64 block_start;
2926 u64 cur_end;
2927 sector_t sector;
2928 struct extent_map *em;
2929 struct block_device *bdev;
2930 int ret;
2931 int nr = 0;
2932 int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2933 size_t pg_offset = 0;
2934 size_t iosize;
2935 size_t disk_io_size;
2936 size_t blocksize = inode->i_sb->s_blocksize;
2937 unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2938
2939 set_page_extent_mapped(page);
2940
2941 end = page_end;
2942 if (!PageUptodate(page)) {
2943 if (cleancache_get_page(page) == 0) {
2944 BUG_ON(blocksize != PAGE_SIZE);
2945 unlock_extent(tree, start, end);
2946 goto out;
2947 }
2948 }
2949
2950 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2951 char *userpage;
2952 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
2953
2954 if (zero_offset) {
2955 iosize = PAGE_CACHE_SIZE - zero_offset;
2956 userpage = kmap_atomic(page);
2957 memset(userpage + zero_offset, 0, iosize);
2958 flush_dcache_page(page);
2959 kunmap_atomic(userpage);
2960 }
2961 }
2962 while (cur <= end) {
2963 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2964 bool force_bio_submit = false;
2965
2966 if (cur >= last_byte) {
2967 char *userpage;
2968 struct extent_state *cached = NULL;
2969
2970 iosize = PAGE_CACHE_SIZE - pg_offset;
2971 userpage = kmap_atomic(page);
2972 memset(userpage + pg_offset, 0, iosize);
2973 flush_dcache_page(page);
2974 kunmap_atomic(userpage);
2975 set_extent_uptodate(tree, cur, cur + iosize - 1,
2976 &cached, GFP_NOFS);
2977 if (!parent_locked)
2978 unlock_extent_cached(tree, cur,
2979 cur + iosize - 1,
2980 &cached, GFP_NOFS);
2981 break;
2982 }
2983 em = __get_extent_map(inode, page, pg_offset, cur,
2984 end - cur + 1, get_extent, em_cached);
2985 if (IS_ERR_OR_NULL(em)) {
2986 SetPageError(page);
2987 if (!parent_locked)
2988 unlock_extent(tree, cur, end);
2989 break;
2990 }
2991 extent_offset = cur - em->start;
2992 BUG_ON(extent_map_end(em) <= cur);
2993 BUG_ON(end < cur);
2994
2995 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2996 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2997 extent_set_compress_type(&this_bio_flag,
2998 em->compress_type);
2999 }
3000
3001 iosize = min(extent_map_end(em) - cur, end - cur + 1);
3002 cur_end = min(extent_map_end(em) - 1, end);
3003 iosize = ALIGN(iosize, blocksize);
3004 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
3005 disk_io_size = em->block_len;
3006 sector = em->block_start >> 9;
3007 } else {
3008 sector = (em->block_start + extent_offset) >> 9;
3009 disk_io_size = iosize;
3010 }
3011 bdev = em->bdev;
3012 block_start = em->block_start;
3013 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3014 block_start = EXTENT_MAP_HOLE;
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3051 prev_em_start && *prev_em_start != (u64)-1 &&
3052 *prev_em_start != em->orig_start)
3053 force_bio_submit = true;
3054
3055 if (prev_em_start)
3056 *prev_em_start = em->orig_start;
3057
3058 free_extent_map(em);
3059 em = NULL;
3060
3061
3062 if (block_start == EXTENT_MAP_HOLE) {
3063 char *userpage;
3064 struct extent_state *cached = NULL;
3065
3066 userpage = kmap_atomic(page);
3067 memset(userpage + pg_offset, 0, iosize);
3068 flush_dcache_page(page);
3069 kunmap_atomic(userpage);
3070
3071 set_extent_uptodate(tree, cur, cur + iosize - 1,
3072 &cached, GFP_NOFS);
3073 unlock_extent_cached(tree, cur, cur + iosize - 1,
3074 &cached, GFP_NOFS);
3075 cur = cur + iosize;
3076 pg_offset += iosize;
3077 continue;
3078 }
3079
3080 if (test_range_bit(tree, cur, cur_end,
3081 EXTENT_UPTODATE, 1, NULL)) {
3082 check_page_uptodate(tree, page);
3083 if (!parent_locked)
3084 unlock_extent(tree, cur, cur + iosize - 1);
3085 cur = cur + iosize;
3086 pg_offset += iosize;
3087 continue;
3088 }
3089
3090
3091
3092 if (block_start == EXTENT_MAP_INLINE) {
3093 SetPageError(page);
3094 if (!parent_locked)
3095 unlock_extent(tree, cur, cur + iosize - 1);
3096 cur = cur + iosize;
3097 pg_offset += iosize;
3098 continue;
3099 }
3100
3101 pnr -= page->index;
3102 ret = submit_extent_page(rw, tree, NULL, page,
3103 sector, disk_io_size, pg_offset,
3104 bdev, bio, pnr,
3105 end_bio_extent_readpage, mirror_num,
3106 *bio_flags,
3107 this_bio_flag,
3108 force_bio_submit);
3109 if (!ret) {
3110 nr++;
3111 *bio_flags = this_bio_flag;
3112 } else {
3113 SetPageError(page);
3114 if (!parent_locked)
3115 unlock_extent(tree, cur, cur + iosize - 1);
3116 }
3117 cur = cur + iosize;
3118 pg_offset += iosize;
3119 }
3120out:
3121 if (!nr) {
3122 if (!PageError(page))
3123 SetPageUptodate(page);
3124 unlock_page(page);
3125 }
3126 return 0;
3127}
3128
3129static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3130 struct page *pages[], int nr_pages,
3131 u64 start, u64 end,
3132 get_extent_t *get_extent,
3133 struct extent_map **em_cached,
3134 struct bio **bio, int mirror_num,
3135 unsigned long *bio_flags, int rw,
3136 u64 *prev_em_start)
3137{
3138 struct inode *inode;
3139 struct btrfs_ordered_extent *ordered;
3140 int index;
3141
3142 inode = pages[0]->mapping->host;
3143 while (1) {
3144 lock_extent(tree, start, end);
3145 ordered = btrfs_lookup_ordered_range(inode, start,
3146 end - start + 1);
3147 if (!ordered)
3148 break;
3149 unlock_extent(tree, start, end);
3150 btrfs_start_ordered_extent(inode, ordered, 1);
3151 btrfs_put_ordered_extent(ordered);
3152 }
3153
3154 for (index = 0; index < nr_pages; index++) {
3155 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
3156 mirror_num, bio_flags, rw, prev_em_start);
3157 page_cache_release(pages[index]);
3158 }
3159}
3160
3161static void __extent_readpages(struct extent_io_tree *tree,
3162 struct page *pages[],
3163 int nr_pages, get_extent_t *get_extent,
3164 struct extent_map **em_cached,
3165 struct bio **bio, int mirror_num,
3166 unsigned long *bio_flags, int rw,
3167 u64 *prev_em_start)
3168{
3169 u64 start = 0;
3170 u64 end = 0;
3171 u64 page_start;
3172 int index;
3173 int first_index = 0;
3174
3175 for (index = 0; index < nr_pages; index++) {
3176 page_start = page_offset(pages[index]);
3177 if (!end) {
3178 start = page_start;
3179 end = start + PAGE_CACHE_SIZE - 1;
3180 first_index = index;
3181 } else if (end + 1 == page_start) {
3182 end += PAGE_CACHE_SIZE;
3183 } else {
3184 __do_contiguous_readpages(tree, &pages[first_index],
3185 index - first_index, start,
3186 end, get_extent, em_cached,
3187 bio, mirror_num, bio_flags,
3188 rw, prev_em_start);
3189 start = page_start;
3190 end = start + PAGE_CACHE_SIZE - 1;
3191 first_index = index;
3192 }
3193 }
3194
3195 if (end)
3196 __do_contiguous_readpages(tree, &pages[first_index],
3197 index - first_index, start,
3198 end, get_extent, em_cached, bio,
3199 mirror_num, bio_flags, rw,
3200 prev_em_start);
3201}
3202
3203static int __extent_read_full_page(struct extent_io_tree *tree,
3204 struct page *page,
3205 get_extent_t *get_extent,
3206 struct bio **bio, int mirror_num,
3207 unsigned long *bio_flags, int rw)
3208{
3209 struct inode *inode = page->mapping->host;
3210 struct btrfs_ordered_extent *ordered;
3211 u64 start = page_offset(page);
3212 u64 end = start + PAGE_CACHE_SIZE - 1;
3213 int ret;
3214
3215 while (1) {
3216 lock_extent(tree, start, end);
3217 ordered = btrfs_lookup_ordered_extent(inode, start);
3218 if (!ordered)
3219 break;
3220 unlock_extent(tree, start, end);
3221 btrfs_start_ordered_extent(inode, ordered, 1);
3222 btrfs_put_ordered_extent(ordered);
3223 }
3224
3225 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3226 bio_flags, rw, NULL);
3227 return ret;
3228}
3229
3230int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3231 get_extent_t *get_extent, int mirror_num)
3232{
3233 struct bio *bio = NULL;
3234 unsigned long bio_flags = 0;
3235 int ret;
3236
3237 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3238 &bio_flags, READ);
3239 if (bio)
3240 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3241 return ret;
3242}
3243
3244int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3245 get_extent_t *get_extent, int mirror_num)
3246{
3247 struct bio *bio = NULL;
3248 unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
3249 int ret;
3250
3251 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3252 &bio_flags, READ, NULL);
3253 if (bio)
3254 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3255 return ret;
3256}
3257
3258static noinline void update_nr_written(struct page *page,
3259 struct writeback_control *wbc,
3260 unsigned long nr_written)
3261{
3262 wbc->nr_to_write -= nr_written;
3263 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
3264 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
3265 page->mapping->writeback_index = page->index + nr_written;
3266}
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278static noinline_for_stack int writepage_delalloc(struct inode *inode,
3279 struct page *page, struct writeback_control *wbc,
3280 struct extent_page_data *epd,
3281 u64 delalloc_start,
3282 unsigned long *nr_written)
3283{
3284 struct extent_io_tree *tree = epd->tree;
3285 u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
3286 u64 nr_delalloc;
3287 u64 delalloc_to_write = 0;
3288 u64 delalloc_end = 0;
3289 int ret;
3290 int page_started = 0;
3291
3292 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3293 return 0;
3294
3295 while (delalloc_end < page_end) {
3296 nr_delalloc = find_lock_delalloc_range(inode, tree,
3297 page,
3298 &delalloc_start,
3299 &delalloc_end,
3300 BTRFS_MAX_EXTENT_SIZE);
3301 if (nr_delalloc == 0) {
3302 delalloc_start = delalloc_end + 1;
3303 continue;
3304 }
3305 ret = tree->ops->fill_delalloc(inode, page,
3306 delalloc_start,
3307 delalloc_end,
3308 &page_started,
3309 nr_written);
3310
3311 if (ret) {
3312 SetPageError(page);
3313
3314
3315
3316
3317
3318 ret = ret < 0 ? ret : -EIO;
3319 goto done;
3320 }
3321
3322
3323
3324
3325
3326 delalloc_to_write += (delalloc_end - delalloc_start +
3327 PAGE_CACHE_SIZE) >>
3328 PAGE_CACHE_SHIFT;
3329 delalloc_start = delalloc_end + 1;
3330 }
3331 if (wbc->nr_to_write < delalloc_to_write) {
3332 int thresh = 8192;
3333
3334 if (delalloc_to_write < thresh * 2)
3335 thresh = delalloc_to_write;
3336 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3337 thresh);
3338 }
3339
3340
3341
3342
3343 if (page_started) {
3344
3345
3346
3347
3348
3349 wbc->nr_to_write -= *nr_written;
3350 return 1;
3351 }
3352
3353 ret = 0;
3354
3355done:
3356 return ret;
3357}
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3368 struct page *page,
3369 struct writeback_control *wbc,
3370 struct extent_page_data *epd,
3371 loff_t i_size,
3372 unsigned long nr_written,
3373 int write_flags, int *nr_ret)
3374{
3375 struct extent_io_tree *tree = epd->tree;
3376 u64 start = page_offset(page);
3377 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3378 u64 end;
3379 u64 cur = start;
3380 u64 extent_offset;
3381 u64 block_start;
3382 u64 iosize;
3383 sector_t sector;
3384 struct extent_state *cached_state = NULL;
3385 struct extent_map *em;
3386 struct block_device *bdev;
3387 size_t pg_offset = 0;
3388 size_t blocksize;
3389 int ret = 0;
3390 int nr = 0;
3391 bool compressed;
3392
3393 if (tree->ops && tree->ops->writepage_start_hook) {
3394 ret = tree->ops->writepage_start_hook(page, start,
3395 page_end);
3396 if (ret) {
3397
3398 if (ret == -EBUSY)
3399 wbc->pages_skipped++;
3400 else
3401 redirty_page_for_writepage(wbc, page);
3402
3403 update_nr_written(page, wbc, nr_written);
3404 unlock_page(page);
3405 ret = 1;
3406 goto done_unlocked;
3407 }
3408 }
3409
3410
3411
3412
3413
3414 update_nr_written(page, wbc, nr_written + 1);
3415
3416 end = page_end;
3417 if (i_size <= start) {
3418 if (tree->ops && tree->ops->writepage_end_io_hook)
3419 tree->ops->writepage_end_io_hook(page, start,
3420 page_end, NULL, 1);
3421 goto done;
3422 }
3423
3424 blocksize = inode->i_sb->s_blocksize;
3425
3426 while (cur <= end) {
3427 u64 em_end;
3428 if (cur >= i_size) {
3429 if (tree->ops && tree->ops->writepage_end_io_hook)
3430 tree->ops->writepage_end_io_hook(page, cur,
3431 page_end, NULL, 1);
3432 break;
3433 }
3434 em = epd->get_extent(inode, page, pg_offset, cur,
3435 end - cur + 1, 1);
3436 if (IS_ERR_OR_NULL(em)) {
3437 SetPageError(page);
3438 ret = PTR_ERR_OR_ZERO(em);
3439 break;
3440 }
3441
3442 extent_offset = cur - em->start;
3443 em_end = extent_map_end(em);
3444 BUG_ON(em_end <= cur);
3445 BUG_ON(end < cur);
3446 iosize = min(em_end - cur, end - cur + 1);
3447 iosize = ALIGN(iosize, blocksize);
3448 sector = (em->block_start + extent_offset) >> 9;
3449 bdev = em->bdev;
3450 block_start = em->block_start;
3451 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3452 free_extent_map(em);
3453 em = NULL;
3454
3455
3456
3457
3458
3459 if (compressed || block_start == EXTENT_MAP_HOLE ||
3460 block_start == EXTENT_MAP_INLINE) {
3461
3462
3463
3464
3465 if (!compressed && tree->ops &&
3466 tree->ops->writepage_end_io_hook)
3467 tree->ops->writepage_end_io_hook(page, cur,
3468 cur + iosize - 1,
3469 NULL, 1);
3470 else if (compressed) {
3471
3472
3473
3474
3475 nr++;
3476 }
3477
3478 cur += iosize;
3479 pg_offset += iosize;
3480 continue;
3481 }
3482
3483 if (tree->ops && tree->ops->writepage_io_hook) {
3484 ret = tree->ops->writepage_io_hook(page, cur,
3485 cur + iosize - 1);
3486 } else {
3487 ret = 0;
3488 }
3489 if (ret) {
3490 SetPageError(page);
3491 } else {
3492 unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
3493
3494 set_range_writeback(tree, cur, cur + iosize - 1);
3495 if (!PageWriteback(page)) {
3496 btrfs_err(BTRFS_I(inode)->root->fs_info,
3497 "page %lu not writeback, cur %llu end %llu",
3498 page->index, cur, end);
3499 }
3500
3501 ret = submit_extent_page(write_flags, tree, wbc, page,
3502 sector, iosize, pg_offset,
3503 bdev, &epd->bio, max_nr,
3504 end_bio_extent_writepage,
3505 0, 0, 0, false);
3506 if (ret)
3507 SetPageError(page);
3508 }
3509 cur = cur + iosize;
3510 pg_offset += iosize;
3511 nr++;
3512 }
3513done:
3514 *nr_ret = nr;
3515
3516done_unlocked:
3517
3518
3519 free_extent_state(cached_state);
3520 return ret;
3521}
3522
3523
3524
3525
3526
3527
3528
3529static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3530 void *data)
3531{
3532 struct inode *inode = page->mapping->host;
3533 struct extent_page_data *epd = data;
3534 u64 start = page_offset(page);
3535 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3536 int ret;
3537 int nr = 0;
3538 size_t pg_offset = 0;
3539 loff_t i_size = i_size_read(inode);
3540 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
3541 int write_flags;
3542 unsigned long nr_written = 0;
3543
3544 if (wbc->sync_mode == WB_SYNC_ALL)
3545 write_flags = WRITE_SYNC;
3546 else
3547 write_flags = WRITE;
3548
3549 trace___extent_writepage(page, inode, wbc);
3550
3551 WARN_ON(!PageLocked(page));
3552
3553 ClearPageError(page);
3554
3555 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
3556 if (page->index > end_index ||
3557 (page->index == end_index && !pg_offset)) {
3558 page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
3559 unlock_page(page);
3560 return 0;
3561 }
3562
3563 if (page->index == end_index) {
3564 char *userpage;
3565
3566 userpage = kmap_atomic(page);
3567 memset(userpage + pg_offset, 0,
3568 PAGE_CACHE_SIZE - pg_offset);
3569 kunmap_atomic(userpage);
3570 flush_dcache_page(page);
3571 }
3572
3573 pg_offset = 0;
3574
3575 set_page_extent_mapped(page);
3576
3577 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3578 if (ret == 1)
3579 goto done_unlocked;
3580 if (ret)
3581 goto done;
3582
3583 ret = __extent_writepage_io(inode, page, wbc, epd,
3584 i_size, nr_written, write_flags, &nr);
3585 if (ret == 1)
3586 goto done_unlocked;
3587
3588done:
3589 if (nr == 0) {
3590
3591 set_page_writeback(page);
3592 end_page_writeback(page);
3593 }
3594 if (PageError(page)) {
3595 ret = ret < 0 ? ret : -EIO;
3596 end_extent_writepage(page, ret, start, page_end);
3597 }
3598 unlock_page(page);
3599 return ret;
3600
3601done_unlocked:
3602 return 0;
3603}
3604
3605void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3606{
3607 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3608 TASK_UNINTERRUPTIBLE);
3609}
3610
3611static noinline_for_stack int
3612lock_extent_buffer_for_io(struct extent_buffer *eb,
3613 struct btrfs_fs_info *fs_info,
3614 struct extent_page_data *epd)
3615{
3616 unsigned long i, num_pages;
3617 int flush = 0;
3618 int ret = 0;
3619
3620 if (!btrfs_try_tree_write_lock(eb)) {
3621 flush = 1;
3622 flush_write_bio(epd);
3623 btrfs_tree_lock(eb);
3624 }
3625
3626 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3627 btrfs_tree_unlock(eb);
3628 if (!epd->sync_io)
3629 return 0;
3630 if (!flush) {
3631 flush_write_bio(epd);
3632 flush = 1;
3633 }
3634 while (1) {
3635 wait_on_extent_buffer_writeback(eb);
3636 btrfs_tree_lock(eb);
3637 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3638 break;
3639 btrfs_tree_unlock(eb);
3640 }
3641 }
3642
3643
3644
3645
3646
3647
3648 spin_lock(&eb->refs_lock);
3649 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3650 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3651 spin_unlock(&eb->refs_lock);
3652 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3653 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
3654 -eb->len,
3655 fs_info->dirty_metadata_batch);
3656 ret = 1;
3657 } else {
3658 spin_unlock(&eb->refs_lock);
3659 }
3660
3661 btrfs_tree_unlock(eb);
3662
3663 if (!ret)
3664 return ret;
3665
3666 num_pages = num_extent_pages(eb->start, eb->len);
3667 for (i = 0; i < num_pages; i++) {
3668 struct page *p = eb->pages[i];
3669
3670 if (!trylock_page(p)) {
3671 if (!flush) {
3672 flush_write_bio(epd);
3673 flush = 1;
3674 }
3675 lock_page(p);
3676 }
3677 }
3678
3679 return ret;
3680}
3681
3682static void end_extent_buffer_writeback(struct extent_buffer *eb)
3683{
3684 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3685 smp_mb__after_atomic();
3686 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3687}
3688
3689static void set_btree_ioerr(struct page *page)
3690{
3691 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3692 struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode);
3693
3694 SetPageError(page);
3695 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3696 return;
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736 switch (eb->log_index) {
3737 case -1:
3738 set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags);
3739 break;
3740 case 0:
3741 set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
3742 break;
3743 case 1:
3744 set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
3745 break;
3746 default:
3747 BUG();
3748 }
3749}
3750
3751static void end_bio_extent_buffer_writepage(struct bio *bio)
3752{
3753 struct bio_vec *bvec;
3754 struct extent_buffer *eb;
3755 int i, done;
3756
3757 bio_for_each_segment_all(bvec, bio, i) {
3758 struct page *page = bvec->bv_page;
3759
3760 eb = (struct extent_buffer *)page->private;
3761 BUG_ON(!eb);
3762 done = atomic_dec_and_test(&eb->io_pages);
3763
3764 if (bio->bi_error ||
3765 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3766 ClearPageUptodate(page);
3767 set_btree_ioerr(page);
3768 }
3769
3770 end_page_writeback(page);
3771
3772 if (!done)
3773 continue;
3774
3775 end_extent_buffer_writeback(eb);
3776 }
3777
3778 bio_put(bio);
3779}
3780
3781static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3782 struct btrfs_fs_info *fs_info,
3783 struct writeback_control *wbc,
3784 struct extent_page_data *epd)
3785{
3786 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3787 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3788 u64 offset = eb->start;
3789 unsigned long i, num_pages;
3790 unsigned long bio_flags = 0;
3791 int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
3792 int ret = 0;
3793
3794 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3795 num_pages = num_extent_pages(eb->start, eb->len);
3796 atomic_set(&eb->io_pages, num_pages);
3797 if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
3798 bio_flags = EXTENT_BIO_TREE_LOG;
3799
3800 for (i = 0; i < num_pages; i++) {
3801 struct page *p = eb->pages[i];
3802
3803 clear_page_dirty_for_io(p);
3804 set_page_writeback(p);
3805 ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
3806 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3807 -1, end_bio_extent_buffer_writepage,
3808 0, epd->bio_flags, bio_flags, false);
3809 epd->bio_flags = bio_flags;
3810 if (ret) {
3811 set_btree_ioerr(p);
3812 end_page_writeback(p);
3813 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3814 end_extent_buffer_writeback(eb);
3815 ret = -EIO;
3816 break;
3817 }
3818 offset += PAGE_CACHE_SIZE;
3819 update_nr_written(p, wbc, 1);
3820 unlock_page(p);
3821 }
3822
3823 if (unlikely(ret)) {
3824 for (; i < num_pages; i++) {
3825 struct page *p = eb->pages[i];
3826 clear_page_dirty_for_io(p);
3827 unlock_page(p);
3828 }
3829 }
3830
3831 return ret;
3832}
3833
3834int btree_write_cache_pages(struct address_space *mapping,
3835 struct writeback_control *wbc)
3836{
3837 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3838 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3839 struct extent_buffer *eb, *prev_eb = NULL;
3840 struct extent_page_data epd = {
3841 .bio = NULL,
3842 .tree = tree,
3843 .extent_locked = 0,
3844 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3845 .bio_flags = 0,
3846 };
3847 int ret = 0;
3848 int done = 0;
3849 int nr_to_write_done = 0;
3850 struct pagevec pvec;
3851 int nr_pages;
3852 pgoff_t index;
3853 pgoff_t end;
3854 int scanned = 0;
3855 int tag;
3856
3857 pagevec_init(&pvec, 0);
3858 if (wbc->range_cyclic) {
3859 index = mapping->writeback_index;
3860 end = -1;
3861 } else {
3862 index = wbc->range_start >> PAGE_CACHE_SHIFT;
3863 end = wbc->range_end >> PAGE_CACHE_SHIFT;
3864 scanned = 1;
3865 }
3866 if (wbc->sync_mode == WB_SYNC_ALL)
3867 tag = PAGECACHE_TAG_TOWRITE;
3868 else
3869 tag = PAGECACHE_TAG_DIRTY;
3870retry:
3871 if (wbc->sync_mode == WB_SYNC_ALL)
3872 tag_pages_for_writeback(mapping, index, end);
3873 while (!done && !nr_to_write_done && (index <= end) &&
3874 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3875 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3876 unsigned i;
3877
3878 scanned = 1;
3879 for (i = 0; i < nr_pages; i++) {
3880 struct page *page = pvec.pages[i];
3881
3882 if (!PagePrivate(page))
3883 continue;
3884
3885 if (!wbc->range_cyclic && page->index > end) {
3886 done = 1;
3887 break;
3888 }
3889
3890 spin_lock(&mapping->private_lock);
3891 if (!PagePrivate(page)) {
3892 spin_unlock(&mapping->private_lock);
3893 continue;
3894 }
3895
3896 eb = (struct extent_buffer *)page->private;
3897
3898
3899
3900
3901
3902
3903 if (WARN_ON(!eb)) {
3904 spin_unlock(&mapping->private_lock);
3905 continue;
3906 }
3907
3908 if (eb == prev_eb) {
3909 spin_unlock(&mapping->private_lock);
3910 continue;
3911 }
3912
3913 ret = atomic_inc_not_zero(&eb->refs);
3914 spin_unlock(&mapping->private_lock);
3915 if (!ret)
3916 continue;
3917
3918 prev_eb = eb;
3919 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3920 if (!ret) {
3921 free_extent_buffer(eb);
3922 continue;
3923 }
3924
3925 ret = write_one_eb(eb, fs_info, wbc, &epd);
3926 if (ret) {
3927 done = 1;
3928 free_extent_buffer(eb);
3929 break;
3930 }
3931 free_extent_buffer(eb);
3932
3933
3934
3935
3936
3937
3938 nr_to_write_done = wbc->nr_to_write <= 0;
3939 }
3940 pagevec_release(&pvec);
3941 cond_resched();
3942 }
3943 if (!scanned && !done) {
3944
3945
3946
3947
3948 scanned = 1;
3949 index = 0;
3950 goto retry;
3951 }
3952 flush_write_bio(&epd);
3953 return ret;
3954}
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971static int extent_write_cache_pages(struct extent_io_tree *tree,
3972 struct address_space *mapping,
3973 struct writeback_control *wbc,
3974 writepage_t writepage, void *data,
3975 void (*flush_fn)(void *))
3976{
3977 struct inode *inode = mapping->host;
3978 int ret = 0;
3979 int done = 0;
3980 int err = 0;
3981 int nr_to_write_done = 0;
3982 struct pagevec pvec;
3983 int nr_pages;
3984 pgoff_t index;
3985 pgoff_t end;
3986 int scanned = 0;
3987 int tag;
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998 if (!igrab(inode))
3999 return 0;
4000
4001 pagevec_init(&pvec, 0);
4002 if (wbc->range_cyclic) {
4003 index = mapping->writeback_index;
4004 end = -1;
4005 } else {
4006 index = wbc->range_start >> PAGE_CACHE_SHIFT;
4007 end = wbc->range_end >> PAGE_CACHE_SHIFT;
4008 scanned = 1;
4009 }
4010 if (wbc->sync_mode == WB_SYNC_ALL)
4011 tag = PAGECACHE_TAG_TOWRITE;
4012 else
4013 tag = PAGECACHE_TAG_DIRTY;
4014retry:
4015 if (wbc->sync_mode == WB_SYNC_ALL)
4016 tag_pages_for_writeback(mapping, index, end);
4017 while (!done && !nr_to_write_done && (index <= end) &&
4018 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
4019 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
4020 unsigned i;
4021
4022 scanned = 1;
4023 for (i = 0; i < nr_pages; i++) {
4024 struct page *page = pvec.pages[i];
4025
4026
4027
4028
4029
4030
4031
4032
4033 if (!trylock_page(page)) {
4034 flush_fn(data);
4035 lock_page(page);
4036 }
4037
4038 if (unlikely(page->mapping != mapping)) {
4039 unlock_page(page);
4040 continue;
4041 }
4042
4043 if (!wbc->range_cyclic && page->index > end) {
4044 done = 1;
4045 unlock_page(page);
4046 continue;
4047 }
4048
4049 if (wbc->sync_mode != WB_SYNC_NONE) {
4050 if (PageWriteback(page))
4051 flush_fn(data);
4052 wait_on_page_writeback(page);
4053 }
4054
4055 if (PageWriteback(page) ||
4056 !clear_page_dirty_for_io(page)) {
4057 unlock_page(page);
4058 continue;
4059 }
4060
4061 ret = (*writepage)(page, wbc, data);
4062
4063 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
4064 unlock_page(page);
4065 ret = 0;
4066 }
4067 if (!err && ret < 0)
4068 err = ret;
4069
4070
4071
4072
4073
4074
4075 nr_to_write_done = wbc->nr_to_write <= 0;
4076 }
4077 pagevec_release(&pvec);
4078 cond_resched();
4079 }
4080 if (!scanned && !done && !err) {
4081
4082
4083
4084
4085 scanned = 1;
4086 index = 0;
4087 goto retry;
4088 }
4089 btrfs_add_delayed_iput(inode);
4090 return err;
4091}
4092
4093static void flush_epd_write_bio(struct extent_page_data *epd)
4094{
4095 if (epd->bio) {
4096 int rw = WRITE;
4097 int ret;
4098
4099 if (epd->sync_io)
4100 rw = WRITE_SYNC;
4101
4102 ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
4103 BUG_ON(ret < 0);
4104 epd->bio = NULL;
4105 }
4106}
4107
4108static noinline void flush_write_bio(void *data)
4109{
4110 struct extent_page_data *epd = data;
4111 flush_epd_write_bio(epd);
4112}
4113
4114int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
4115 get_extent_t *get_extent,
4116 struct writeback_control *wbc)
4117{
4118 int ret;
4119 struct extent_page_data epd = {
4120 .bio = NULL,
4121 .tree = tree,
4122 .get_extent = get_extent,
4123 .extent_locked = 0,
4124 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4125 .bio_flags = 0,
4126 };
4127
4128 ret = __extent_writepage(page, wbc, &epd);
4129
4130 flush_epd_write_bio(&epd);
4131 return ret;
4132}
4133
4134int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
4135 u64 start, u64 end, get_extent_t *get_extent,
4136 int mode)
4137{
4138 int ret = 0;
4139 struct address_space *mapping = inode->i_mapping;
4140 struct page *page;
4141 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
4142 PAGE_CACHE_SHIFT;
4143
4144 struct extent_page_data epd = {
4145 .bio = NULL,
4146 .tree = tree,
4147 .get_extent = get_extent,
4148 .extent_locked = 1,
4149 .sync_io = mode == WB_SYNC_ALL,
4150 .bio_flags = 0,
4151 };
4152 struct writeback_control wbc_writepages = {
4153 .sync_mode = mode,
4154 .nr_to_write = nr_pages * 2,
4155 .range_start = start,
4156 .range_end = end + 1,
4157 };
4158
4159 while (start <= end) {
4160 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
4161 if (clear_page_dirty_for_io(page))
4162 ret = __extent_writepage(page, &wbc_writepages, &epd);
4163 else {
4164 if (tree->ops && tree->ops->writepage_end_io_hook)
4165 tree->ops->writepage_end_io_hook(page, start,
4166 start + PAGE_CACHE_SIZE - 1,
4167 NULL, 1);
4168 unlock_page(page);
4169 }
4170 page_cache_release(page);
4171 start += PAGE_CACHE_SIZE;
4172 }
4173
4174 flush_epd_write_bio(&epd);
4175 return ret;
4176}
4177
4178int extent_writepages(struct extent_io_tree *tree,
4179 struct address_space *mapping,
4180 get_extent_t *get_extent,
4181 struct writeback_control *wbc)
4182{
4183 int ret = 0;
4184 struct extent_page_data epd = {
4185 .bio = NULL,
4186 .tree = tree,
4187 .get_extent = get_extent,
4188 .extent_locked = 0,
4189 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4190 .bio_flags = 0,
4191 };
4192
4193 ret = extent_write_cache_pages(tree, mapping, wbc,
4194 __extent_writepage, &epd,
4195 flush_write_bio);
4196 flush_epd_write_bio(&epd);
4197 return ret;
4198}
4199
4200int extent_readpages(struct extent_io_tree *tree,
4201 struct address_space *mapping,
4202 struct list_head *pages, unsigned nr_pages,
4203 get_extent_t get_extent)
4204{
4205 struct bio *bio = NULL;
4206 unsigned page_idx;
4207 unsigned long bio_flags = 0;
4208 struct page *pagepool[16];
4209 struct page *page;
4210 struct extent_map *em_cached = NULL;
4211 int nr = 0;
4212 u64 prev_em_start = (u64)-1;
4213
4214 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
4215 page = list_entry(pages->prev, struct page, lru);
4216
4217 prefetchw(&page->flags);
4218 list_del(&page->lru);
4219 if (add_to_page_cache_lru(page, mapping,
4220 page->index, GFP_NOFS)) {
4221 page_cache_release(page);
4222 continue;
4223 }
4224
4225 pagepool[nr++] = page;
4226 if (nr < ARRAY_SIZE(pagepool))
4227 continue;
4228 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4229 &bio, 0, &bio_flags, READ, &prev_em_start);
4230 nr = 0;
4231 }
4232 if (nr)
4233 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4234 &bio, 0, &bio_flags, READ, &prev_em_start);
4235
4236 if (em_cached)
4237 free_extent_map(em_cached);
4238
4239 BUG_ON(!list_empty(pages));
4240 if (bio)
4241 return submit_one_bio(READ, bio, 0, bio_flags);
4242 return 0;
4243}
4244
4245
4246
4247
4248
4249
4250int extent_invalidatepage(struct extent_io_tree *tree,
4251 struct page *page, unsigned long offset)
4252{
4253 struct extent_state *cached_state = NULL;
4254 u64 start = page_offset(page);
4255 u64 end = start + PAGE_CACHE_SIZE - 1;
4256 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4257
4258 start += ALIGN(offset, blocksize);
4259 if (start > end)
4260 return 0;
4261
4262 lock_extent_bits(tree, start, end, 0, &cached_state);
4263 wait_on_page_writeback(page);
4264 clear_extent_bit(tree, start, end,
4265 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4266 EXTENT_DO_ACCOUNTING,
4267 1, 1, &cached_state, GFP_NOFS);
4268 return 0;
4269}
4270
4271
4272
4273
4274
4275
4276static int try_release_extent_state(struct extent_map_tree *map,
4277 struct extent_io_tree *tree,
4278 struct page *page, gfp_t mask)
4279{
4280 u64 start = page_offset(page);
4281 u64 end = start + PAGE_CACHE_SIZE - 1;
4282 int ret = 1;
4283
4284 if (test_range_bit(tree, start, end,
4285 EXTENT_IOBITS, 0, NULL))
4286 ret = 0;
4287 else {
4288 if ((mask & GFP_NOFS) == GFP_NOFS)
4289 mask = GFP_NOFS;
4290
4291
4292
4293
4294 ret = clear_extent_bit(tree, start, end,
4295 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4296 0, 0, NULL, mask);
4297
4298
4299
4300
4301 if (ret < 0)
4302 ret = 0;
4303 else
4304 ret = 1;
4305 }
4306 return ret;
4307}
4308
4309
4310
4311
4312
4313
4314int try_release_extent_mapping(struct extent_map_tree *map,
4315 struct extent_io_tree *tree, struct page *page,
4316 gfp_t mask)
4317{
4318 struct extent_map *em;
4319 u64 start = page_offset(page);
4320 u64 end = start + PAGE_CACHE_SIZE - 1;
4321
4322 if ((mask & __GFP_WAIT) &&
4323 page->mapping->host->i_size > 16 * 1024 * 1024) {
4324 u64 len;
4325 while (start <= end) {
4326 len = end - start + 1;
4327 write_lock(&map->lock);
4328 em = lookup_extent_mapping(map, start, len);
4329 if (!em) {
4330 write_unlock(&map->lock);
4331 break;
4332 }
4333 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4334 em->start != start) {
4335 write_unlock(&map->lock);
4336 free_extent_map(em);
4337 break;
4338 }
4339 if (!test_range_bit(tree, em->start,
4340 extent_map_end(em) - 1,
4341 EXTENT_LOCKED | EXTENT_WRITEBACK,
4342 0, NULL)) {
4343 remove_extent_mapping(map, em);
4344
4345 free_extent_map(em);
4346 }
4347 start = extent_map_end(em);
4348 write_unlock(&map->lock);
4349
4350
4351 free_extent_map(em);
4352 }
4353 }
4354 return try_release_extent_state(map, tree, page, mask);
4355}
4356
4357
4358
4359
4360
4361static struct extent_map *get_extent_skip_holes(struct inode *inode,
4362 u64 offset,
4363 u64 last,
4364 get_extent_t *get_extent)
4365{
4366 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
4367 struct extent_map *em;
4368 u64 len;
4369
4370 if (offset >= last)
4371 return NULL;
4372
4373 while (1) {
4374 len = last - offset;
4375 if (len == 0)
4376 break;
4377 len = ALIGN(len, sectorsize);
4378 em = get_extent(inode, NULL, 0, offset, len, 0);
4379 if (IS_ERR_OR_NULL(em))
4380 return em;
4381
4382
4383 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
4384 em->block_start != EXTENT_MAP_HOLE) {
4385 return em;
4386 }
4387
4388
4389 offset = extent_map_end(em);
4390 free_extent_map(em);
4391 if (offset >= last)
4392 break;
4393 }
4394 return NULL;
4395}
4396
4397int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4398 __u64 start, __u64 len, get_extent_t *get_extent)
4399{
4400 int ret = 0;
4401 u64 off = start;
4402 u64 max = start + len;
4403 u32 flags = 0;
4404 u32 found_type;
4405 u64 last;
4406 u64 last_for_get_extent = 0;
4407 u64 disko = 0;
4408 u64 isize = i_size_read(inode);
4409 struct btrfs_key found_key;
4410 struct extent_map *em = NULL;
4411 struct extent_state *cached_state = NULL;
4412 struct btrfs_path *path;
4413 struct btrfs_root *root = BTRFS_I(inode)->root;
4414 int end = 0;
4415 u64 em_start = 0;
4416 u64 em_len = 0;
4417 u64 em_end = 0;
4418
4419 if (len == 0)
4420 return -EINVAL;
4421
4422 path = btrfs_alloc_path();
4423 if (!path)
4424 return -ENOMEM;
4425 path->leave_spinning = 1;
4426
4427 start = round_down(start, BTRFS_I(inode)->root->sectorsize);
4428 len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start;
4429
4430
4431
4432
4433
4434 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
4435 0);
4436 if (ret < 0) {
4437 btrfs_free_path(path);
4438 return ret;
4439 }
4440 WARN_ON(!ret);
4441 path->slots[0]--;
4442 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4443 found_type = found_key.type;
4444
4445
4446 if (found_key.objectid != btrfs_ino(inode) ||
4447 found_type != BTRFS_EXTENT_DATA_KEY) {
4448
4449 last = (u64)-1;
4450 last_for_get_extent = isize;
4451 } else {
4452
4453
4454
4455
4456
4457 last = found_key.offset;
4458 last_for_get_extent = last + 1;
4459 }
4460 btrfs_release_path(path);
4461
4462
4463
4464
4465
4466
4467 if (last < isize) {
4468 last = (u64)-1;
4469 last_for_get_extent = isize;
4470 }
4471
4472 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
4473 &cached_state);
4474
4475 em = get_extent_skip_holes(inode, start, last_for_get_extent,
4476 get_extent);
4477 if (!em)
4478 goto out;
4479 if (IS_ERR(em)) {
4480 ret = PTR_ERR(em);
4481 goto out;
4482 }
4483
4484 while (!end) {
4485 u64 offset_in_extent = 0;
4486
4487
4488 if (em->start >= max || extent_map_end(em) < off)
4489 break;
4490
4491
4492
4493
4494
4495
4496
4497 em_start = max(em->start, off);
4498
4499
4500
4501
4502
4503
4504
4505 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4506 offset_in_extent = em_start - em->start;
4507 em_end = extent_map_end(em);
4508 em_len = em_end - em_start;
4509 disko = 0;
4510 flags = 0;
4511
4512
4513
4514
4515 off = extent_map_end(em);
4516 if (off >= max)
4517 end = 1;
4518
4519 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4520 end = 1;
4521 flags |= FIEMAP_EXTENT_LAST;
4522 } else if (em->block_start == EXTENT_MAP_INLINE) {
4523 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4524 FIEMAP_EXTENT_NOT_ALIGNED);
4525 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4526 flags |= (FIEMAP_EXTENT_DELALLOC |
4527 FIEMAP_EXTENT_UNKNOWN);
4528 } else if (fieinfo->fi_extents_max) {
4529 u64 bytenr = em->block_start -
4530 (em->start - em->orig_start);
4531
4532 disko = em->block_start + offset_in_extent;
4533
4534
4535
4536
4537
4538
4539
4540
4541 ret = btrfs_check_shared(NULL, root->fs_info,
4542 root->objectid,
4543 btrfs_ino(inode), bytenr);
4544 if (ret < 0)
4545 goto out_free;
4546 if (ret)
4547 flags |= FIEMAP_EXTENT_SHARED;
4548 ret = 0;
4549 }
4550 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4551 flags |= FIEMAP_EXTENT_ENCODED;
4552 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4553 flags |= FIEMAP_EXTENT_UNWRITTEN;
4554
4555 free_extent_map(em);
4556 em = NULL;
4557 if ((em_start >= last) || em_len == (u64)-1 ||
4558 (last == (u64)-1 && isize <= em_end)) {
4559 flags |= FIEMAP_EXTENT_LAST;
4560 end = 1;
4561 }
4562
4563
4564 em = get_extent_skip_holes(inode, off, last_for_get_extent,
4565 get_extent);
4566 if (IS_ERR(em)) {
4567 ret = PTR_ERR(em);
4568 goto out;
4569 }
4570 if (!em) {
4571 flags |= FIEMAP_EXTENT_LAST;
4572 end = 1;
4573 }
4574 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
4575 em_len, flags);
4576 if (ret) {
4577 if (ret == 1)
4578 ret = 0;
4579 goto out_free;
4580 }
4581 }
4582out_free:
4583 free_extent_map(em);
4584out:
4585 btrfs_free_path(path);
4586 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4587 &cached_state, GFP_NOFS);
4588 return ret;
4589}
4590
4591static void __free_extent_buffer(struct extent_buffer *eb)
4592{
4593 btrfs_leak_debug_del(&eb->leak_list);
4594 kmem_cache_free(extent_buffer_cache, eb);
4595}
4596
4597int extent_buffer_under_io(struct extent_buffer *eb)
4598{
4599 return (atomic_read(&eb->io_pages) ||
4600 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4601 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4602}
4603
4604
4605
4606
4607static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4608{
4609 unsigned long index;
4610 struct page *page;
4611 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4612
4613 BUG_ON(extent_buffer_under_io(eb));
4614
4615 index = num_extent_pages(eb->start, eb->len);
4616 if (index == 0)
4617 return;
4618
4619 do {
4620 index--;
4621 page = eb->pages[index];
4622 if (!page)
4623 continue;
4624 if (mapped)
4625 spin_lock(&page->mapping->private_lock);
4626
4627
4628
4629
4630
4631
4632
4633 if (PagePrivate(page) &&
4634 page->private == (unsigned long)eb) {
4635 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4636 BUG_ON(PageDirty(page));
4637 BUG_ON(PageWriteback(page));
4638
4639
4640
4641
4642 ClearPagePrivate(page);
4643 set_page_private(page, 0);
4644
4645 page_cache_release(page);
4646 }
4647
4648 if (mapped)
4649 spin_unlock(&page->mapping->private_lock);
4650
4651
4652 page_cache_release(page);
4653 } while (index != 0);
4654}
4655
4656
4657
4658
4659static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4660{
4661 btrfs_release_extent_buffer_page(eb);
4662 __free_extent_buffer(eb);
4663}
4664
4665static struct extent_buffer *
4666__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4667 unsigned long len)
4668{
4669 struct extent_buffer *eb = NULL;
4670
4671 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4672 eb->start = start;
4673 eb->len = len;
4674 eb->fs_info = fs_info;
4675 eb->bflags = 0;
4676 rwlock_init(&eb->lock);
4677 atomic_set(&eb->write_locks, 0);
4678 atomic_set(&eb->read_locks, 0);
4679 atomic_set(&eb->blocking_readers, 0);
4680 atomic_set(&eb->blocking_writers, 0);
4681 atomic_set(&eb->spinning_readers, 0);
4682 atomic_set(&eb->spinning_writers, 0);
4683 eb->lock_nested = 0;
4684 init_waitqueue_head(&eb->write_lock_wq);
4685 init_waitqueue_head(&eb->read_lock_wq);
4686
4687 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4688
4689 spin_lock_init(&eb->refs_lock);
4690 atomic_set(&eb->refs, 1);
4691 atomic_set(&eb->io_pages, 0);
4692
4693
4694
4695
4696 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4697 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4698 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4699
4700 return eb;
4701}
4702
4703struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4704{
4705 unsigned long i;
4706 struct page *p;
4707 struct extent_buffer *new;
4708 unsigned long num_pages = num_extent_pages(src->start, src->len);
4709
4710 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4711 if (new == NULL)
4712 return NULL;
4713
4714 for (i = 0; i < num_pages; i++) {
4715 p = alloc_page(GFP_NOFS);
4716 if (!p) {
4717 btrfs_release_extent_buffer(new);
4718 return NULL;
4719 }
4720 attach_extent_buffer_page(new, p);
4721 WARN_ON(PageDirty(p));
4722 SetPageUptodate(p);
4723 new->pages[i] = p;
4724 }
4725
4726 copy_extent_buffer(new, src, 0, 0, src->len);
4727 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4728 set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
4729
4730 return new;
4731}
4732
4733struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4734 u64 start)
4735{
4736 struct extent_buffer *eb;
4737 unsigned long len;
4738 unsigned long num_pages;
4739 unsigned long i;
4740
4741 if (!fs_info) {
4742
4743
4744
4745
4746 len = 4096;
4747 } else {
4748 len = fs_info->tree_root->nodesize;
4749 }
4750 num_pages = num_extent_pages(0, len);
4751
4752 eb = __alloc_extent_buffer(fs_info, start, len);
4753 if (!eb)
4754 return NULL;
4755
4756 for (i = 0; i < num_pages; i++) {
4757 eb->pages[i] = alloc_page(GFP_NOFS);
4758 if (!eb->pages[i])
4759 goto err;
4760 }
4761 set_extent_buffer_uptodate(eb);
4762 btrfs_set_header_nritems(eb, 0);
4763 set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4764
4765 return eb;
4766err:
4767 for (; i > 0; i--)
4768 __free_page(eb->pages[i - 1]);
4769 __free_extent_buffer(eb);
4770 return NULL;
4771}
4772
4773static void check_buffer_tree_ref(struct extent_buffer *eb)
4774{
4775 int refs;
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796 refs = atomic_read(&eb->refs);
4797 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4798 return;
4799
4800 spin_lock(&eb->refs_lock);
4801 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4802 atomic_inc(&eb->refs);
4803 spin_unlock(&eb->refs_lock);
4804}
4805
4806static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4807 struct page *accessed)
4808{
4809 unsigned long num_pages, i;
4810
4811 check_buffer_tree_ref(eb);
4812
4813 num_pages = num_extent_pages(eb->start, eb->len);
4814 for (i = 0; i < num_pages; i++) {
4815 struct page *p = eb->pages[i];
4816
4817 if (p != accessed)
4818 mark_page_accessed(p);
4819 }
4820}
4821
4822struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4823 u64 start)
4824{
4825 struct extent_buffer *eb;
4826
4827 rcu_read_lock();
4828 eb = radix_tree_lookup(&fs_info->buffer_radix,
4829 start >> PAGE_CACHE_SHIFT);
4830 if (eb && atomic_inc_not_zero(&eb->refs)) {
4831 rcu_read_unlock();
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4848 spin_lock(&eb->refs_lock);
4849 spin_unlock(&eb->refs_lock);
4850 }
4851 mark_extent_buffer_accessed(eb, NULL);
4852 return eb;
4853 }
4854 rcu_read_unlock();
4855
4856 return NULL;
4857}
4858
4859#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4860struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4861 u64 start)
4862{
4863 struct extent_buffer *eb, *exists = NULL;
4864 int ret;
4865
4866 eb = find_extent_buffer(fs_info, start);
4867 if (eb)
4868 return eb;
4869 eb = alloc_dummy_extent_buffer(fs_info, start);
4870 if (!eb)
4871 return NULL;
4872 eb->fs_info = fs_info;
4873again:
4874 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
4875 if (ret)
4876 goto free_eb;
4877 spin_lock(&fs_info->buffer_lock);
4878 ret = radix_tree_insert(&fs_info->buffer_radix,
4879 start >> PAGE_CACHE_SHIFT, eb);
4880 spin_unlock(&fs_info->buffer_lock);
4881 radix_tree_preload_end();
4882 if (ret == -EEXIST) {
4883 exists = find_extent_buffer(fs_info, start);
4884 if (exists)
4885 goto free_eb;
4886 else
4887 goto again;
4888 }
4889 check_buffer_tree_ref(eb);
4890 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4891
4892
4893
4894
4895
4896
4897
4898 atomic_inc(&eb->refs);
4899 return eb;
4900free_eb:
4901 btrfs_release_extent_buffer(eb);
4902 return exists;
4903}
4904#endif
4905
4906struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4907 u64 start)
4908{
4909 unsigned long len = fs_info->tree_root->nodesize;
4910 unsigned long num_pages = num_extent_pages(start, len);
4911 unsigned long i;
4912 unsigned long index = start >> PAGE_CACHE_SHIFT;
4913 struct extent_buffer *eb;
4914 struct extent_buffer *exists = NULL;
4915 struct page *p;
4916 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4917 int uptodate = 1;
4918 int ret;
4919
4920 eb = find_extent_buffer(fs_info, start);
4921 if (eb)
4922 return eb;
4923
4924 eb = __alloc_extent_buffer(fs_info, start, len);
4925 if (!eb)
4926 return NULL;
4927
4928 for (i = 0; i < num_pages; i++, index++) {
4929 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
4930 if (!p)
4931 goto free_eb;
4932
4933 spin_lock(&mapping->private_lock);
4934 if (PagePrivate(p)) {
4935
4936
4937
4938
4939
4940
4941
4942 exists = (struct extent_buffer *)p->private;
4943 if (atomic_inc_not_zero(&exists->refs)) {
4944 spin_unlock(&mapping->private_lock);
4945 unlock_page(p);
4946 page_cache_release(p);
4947 mark_extent_buffer_accessed(exists, p);
4948 goto free_eb;
4949 }
4950 exists = NULL;
4951
4952
4953
4954
4955
4956 ClearPagePrivate(p);
4957 WARN_ON(PageDirty(p));
4958 page_cache_release(p);
4959 }
4960 attach_extent_buffer_page(eb, p);
4961 spin_unlock(&mapping->private_lock);
4962 WARN_ON(PageDirty(p));
4963 eb->pages[i] = p;
4964 if (!PageUptodate(p))
4965 uptodate = 0;
4966
4967
4968
4969
4970
4971 }
4972 if (uptodate)
4973 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
4974again:
4975 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
4976 if (ret)
4977 goto free_eb;
4978
4979 spin_lock(&fs_info->buffer_lock);
4980 ret = radix_tree_insert(&fs_info->buffer_radix,
4981 start >> PAGE_CACHE_SHIFT, eb);
4982 spin_unlock(&fs_info->buffer_lock);
4983 radix_tree_preload_end();
4984 if (ret == -EEXIST) {
4985 exists = find_extent_buffer(fs_info, start);
4986 if (exists)
4987 goto free_eb;
4988 else
4989 goto again;
4990 }
4991
4992 check_buffer_tree_ref(eb);
4993 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004 SetPageChecked(eb->pages[0]);
5005 for (i = 1; i < num_pages; i++) {
5006 p = eb->pages[i];
5007 ClearPageChecked(p);
5008 unlock_page(p);
5009 }
5010 unlock_page(eb->pages[0]);
5011 return eb;
5012
5013free_eb:
5014 WARN_ON(!atomic_dec_and_test(&eb->refs));
5015 for (i = 0; i < num_pages; i++) {
5016 if (eb->pages[i])
5017 unlock_page(eb->pages[i]);
5018 }
5019
5020 btrfs_release_extent_buffer(eb);
5021 return exists;
5022}
5023
5024static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5025{
5026 struct extent_buffer *eb =
5027 container_of(head, struct extent_buffer, rcu_head);
5028
5029 __free_extent_buffer(eb);
5030}
5031
5032
5033static int release_extent_buffer(struct extent_buffer *eb)
5034{
5035 WARN_ON(atomic_read(&eb->refs) == 0);
5036 if (atomic_dec_and_test(&eb->refs)) {
5037 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5038 struct btrfs_fs_info *fs_info = eb->fs_info;
5039
5040 spin_unlock(&eb->refs_lock);
5041
5042 spin_lock(&fs_info->buffer_lock);
5043 radix_tree_delete(&fs_info->buffer_radix,
5044 eb->start >> PAGE_CACHE_SHIFT);
5045 spin_unlock(&fs_info->buffer_lock);
5046 } else {
5047 spin_unlock(&eb->refs_lock);
5048 }
5049
5050
5051 btrfs_release_extent_buffer_page(eb);
5052#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5053 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
5054 __free_extent_buffer(eb);
5055 return 1;
5056 }
5057#endif
5058 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5059 return 1;
5060 }
5061 spin_unlock(&eb->refs_lock);
5062
5063 return 0;
5064}
5065
5066void free_extent_buffer(struct extent_buffer *eb)
5067{
5068 int refs;
5069 int old;
5070 if (!eb)
5071 return;
5072
5073 while (1) {
5074 refs = atomic_read(&eb->refs);
5075 if (refs <= 3)
5076 break;
5077 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5078 if (old == refs)
5079 return;
5080 }
5081
5082 spin_lock(&eb->refs_lock);
5083 if (atomic_read(&eb->refs) == 2 &&
5084 test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
5085 atomic_dec(&eb->refs);
5086
5087 if (atomic_read(&eb->refs) == 2 &&
5088 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5089 !extent_buffer_under_io(eb) &&
5090 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5091 atomic_dec(&eb->refs);
5092
5093
5094
5095
5096
5097 release_extent_buffer(eb);
5098}
5099
5100void free_extent_buffer_stale(struct extent_buffer *eb)
5101{
5102 if (!eb)
5103 return;
5104
5105 spin_lock(&eb->refs_lock);
5106 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5107
5108 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5109 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5110 atomic_dec(&eb->refs);
5111 release_extent_buffer(eb);
5112}
5113
5114void clear_extent_buffer_dirty(struct extent_buffer *eb)
5115{
5116 unsigned long i;
5117 unsigned long num_pages;
5118 struct page *page;
5119
5120 num_pages = num_extent_pages(eb->start, eb->len);
5121
5122 for (i = 0; i < num_pages; i++) {
5123 page = eb->pages[i];
5124 if (!PageDirty(page))
5125 continue;
5126
5127 lock_page(page);
5128 WARN_ON(!PagePrivate(page));
5129
5130 clear_page_dirty_for_io(page);
5131 spin_lock_irq(&page->mapping->tree_lock);
5132 if (!PageDirty(page)) {
5133 radix_tree_tag_clear(&page->mapping->page_tree,
5134 page_index(page),
5135 PAGECACHE_TAG_DIRTY);
5136 }
5137 spin_unlock_irq(&page->mapping->tree_lock);
5138 ClearPageError(page);
5139 unlock_page(page);
5140 }
5141 WARN_ON(atomic_read(&eb->refs) == 0);
5142}
5143
5144int set_extent_buffer_dirty(struct extent_buffer *eb)
5145{
5146 unsigned long i;
5147 unsigned long num_pages;
5148 int was_dirty = 0;
5149
5150 check_buffer_tree_ref(eb);
5151
5152 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5153
5154 num_pages = num_extent_pages(eb->start, eb->len);
5155 WARN_ON(atomic_read(&eb->refs) == 0);
5156 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5157
5158 for (i = 0; i < num_pages; i++)
5159 set_page_dirty(eb->pages[i]);
5160 return was_dirty;
5161}
5162
5163int clear_extent_buffer_uptodate(struct extent_buffer *eb)
5164{
5165 unsigned long i;
5166 struct page *page;
5167 unsigned long num_pages;
5168
5169 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5170 num_pages = num_extent_pages(eb->start, eb->len);
5171 for (i = 0; i < num_pages; i++) {
5172 page = eb->pages[i];
5173 if (page)
5174 ClearPageUptodate(page);
5175 }
5176 return 0;
5177}
5178
5179int set_extent_buffer_uptodate(struct extent_buffer *eb)
5180{
5181 unsigned long i;
5182 struct page *page;
5183 unsigned long num_pages;
5184
5185 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5186 num_pages = num_extent_pages(eb->start, eb->len);
5187 for (i = 0; i < num_pages; i++) {
5188 page = eb->pages[i];
5189 SetPageUptodate(page);
5190 }
5191 return 0;
5192}
5193
5194int extent_buffer_uptodate(struct extent_buffer *eb)
5195{
5196 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5197}
5198
5199int read_extent_buffer_pages(struct extent_io_tree *tree,
5200 struct extent_buffer *eb, u64 start, int wait,
5201 get_extent_t *get_extent, int mirror_num)
5202{
5203 unsigned long i;
5204 unsigned long start_i;
5205 struct page *page;
5206 int err;
5207 int ret = 0;
5208 int locked_pages = 0;
5209 int all_uptodate = 1;
5210 unsigned long num_pages;
5211 unsigned long num_reads = 0;
5212 struct bio *bio = NULL;
5213 unsigned long bio_flags = 0;
5214
5215 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5216 return 0;
5217
5218 if (start) {
5219 WARN_ON(start < eb->start);
5220 start_i = (start >> PAGE_CACHE_SHIFT) -
5221 (eb->start >> PAGE_CACHE_SHIFT);
5222 } else {
5223 start_i = 0;
5224 }
5225
5226 num_pages = num_extent_pages(eb->start, eb->len);
5227 for (i = start_i; i < num_pages; i++) {
5228 page = eb->pages[i];
5229 if (wait == WAIT_NONE) {
5230 if (!trylock_page(page))
5231 goto unlock_exit;
5232 } else {
5233 lock_page(page);
5234 }
5235 locked_pages++;
5236 if (!PageUptodate(page)) {
5237 num_reads++;
5238 all_uptodate = 0;
5239 }
5240 }
5241 if (all_uptodate) {
5242 if (start_i == 0)
5243 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5244 goto unlock_exit;
5245 }
5246
5247 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5248 eb->read_mirror = 0;
5249 atomic_set(&eb->io_pages, num_reads);
5250 for (i = start_i; i < num_pages; i++) {
5251 page = eb->pages[i];
5252 if (!PageUptodate(page)) {
5253 ClearPageError(page);
5254 err = __extent_read_full_page(tree, page,
5255 get_extent, &bio,
5256 mirror_num, &bio_flags,
5257 READ | REQ_META);
5258 if (err)
5259 ret = err;
5260 } else {
5261 unlock_page(page);
5262 }
5263 }
5264
5265 if (bio) {
5266 err = submit_one_bio(READ | REQ_META, bio, mirror_num,
5267 bio_flags);
5268 if (err)
5269 return err;
5270 }
5271
5272 if (ret || wait != WAIT_COMPLETE)
5273 return ret;
5274
5275 for (i = start_i; i < num_pages; i++) {
5276 page = eb->pages[i];
5277 wait_on_page_locked(page);
5278 if (!PageUptodate(page))
5279 ret = -EIO;
5280 }
5281
5282 return ret;
5283
5284unlock_exit:
5285 i = start_i;
5286 while (locked_pages > 0) {
5287 page = eb->pages[i];
5288 i++;
5289 unlock_page(page);
5290 locked_pages--;
5291 }
5292 return ret;
5293}
5294
5295void read_extent_buffer(struct extent_buffer *eb, void *dstv,
5296 unsigned long start,
5297 unsigned long len)
5298{
5299 size_t cur;
5300 size_t offset;
5301 struct page *page;
5302 char *kaddr;
5303 char *dst = (char *)dstv;
5304 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5305 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5306
5307 WARN_ON(start > eb->len);
5308 WARN_ON(start + len > eb->start + eb->len);
5309
5310 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5311
5312 while (len > 0) {
5313 page = eb->pages[i];
5314
5315 cur = min(len, (PAGE_CACHE_SIZE - offset));
5316 kaddr = page_address(page);
5317 memcpy(dst, kaddr + offset, cur);
5318
5319 dst += cur;
5320 len -= cur;
5321 offset = 0;
5322 i++;
5323 }
5324}
5325
5326int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
5327 unsigned long start,
5328 unsigned long len)
5329{
5330 size_t cur;
5331 size_t offset;
5332 struct page *page;
5333 char *kaddr;
5334 char __user *dst = (char __user *)dstv;
5335 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5336 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5337 int ret = 0;
5338
5339 WARN_ON(start > eb->len);
5340 WARN_ON(start + len > eb->start + eb->len);
5341
5342 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5343
5344 while (len > 0) {
5345 page = eb->pages[i];
5346
5347 cur = min(len, (PAGE_CACHE_SIZE - offset));
5348 kaddr = page_address(page);
5349 if (copy_to_user(dst, kaddr + offset, cur)) {
5350 ret = -EFAULT;
5351 break;
5352 }
5353
5354 dst += cur;
5355 len -= cur;
5356 offset = 0;
5357 i++;
5358 }
5359
5360 return ret;
5361}
5362
5363int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
5364 unsigned long min_len, char **map,
5365 unsigned long *map_start,
5366 unsigned long *map_len)
5367{
5368 size_t offset = start & (PAGE_CACHE_SIZE - 1);
5369 char *kaddr;
5370 struct page *p;
5371 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5372 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5373 unsigned long end_i = (start_offset + start + min_len - 1) >>
5374 PAGE_CACHE_SHIFT;
5375
5376 if (i != end_i)
5377 return -EINVAL;
5378
5379 if (i == 0) {
5380 offset = start_offset;
5381 *map_start = 0;
5382 } else {
5383 offset = 0;
5384 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
5385 }
5386
5387 if (start + min_len > eb->len) {
5388 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
5389 "wanted %lu %lu\n",
5390 eb->start, eb->len, start, min_len);
5391 return -EINVAL;
5392 }
5393
5394 p = eb->pages[i];
5395 kaddr = page_address(p);
5396 *map = kaddr + offset;
5397 *map_len = PAGE_CACHE_SIZE - offset;
5398 return 0;
5399}
5400
5401int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
5402 unsigned long start,
5403 unsigned long len)
5404{
5405 size_t cur;
5406 size_t offset;
5407 struct page *page;
5408 char *kaddr;
5409 char *ptr = (char *)ptrv;
5410 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5411 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5412 int ret = 0;
5413
5414 WARN_ON(start > eb->len);
5415 WARN_ON(start + len > eb->start + eb->len);
5416
5417 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5418
5419 while (len > 0) {
5420 page = eb->pages[i];
5421
5422 cur = min(len, (PAGE_CACHE_SIZE - offset));
5423
5424 kaddr = page_address(page);
5425 ret = memcmp(ptr, kaddr + offset, cur);
5426 if (ret)
5427 break;
5428
5429 ptr += cur;
5430 len -= cur;
5431 offset = 0;
5432 i++;
5433 }
5434 return ret;
5435}
5436
5437void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5438 unsigned long start, unsigned long len)
5439{
5440 size_t cur;
5441 size_t offset;
5442 struct page *page;
5443 char *kaddr;
5444 char *src = (char *)srcv;
5445 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5446 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5447
5448 WARN_ON(start > eb->len);
5449 WARN_ON(start + len > eb->start + eb->len);
5450
5451 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5452
5453 while (len > 0) {
5454 page = eb->pages[i];
5455 WARN_ON(!PageUptodate(page));
5456
5457 cur = min(len, PAGE_CACHE_SIZE - offset);
5458 kaddr = page_address(page);
5459 memcpy(kaddr + offset, src, cur);
5460
5461 src += cur;
5462 len -= cur;
5463 offset = 0;
5464 i++;
5465 }
5466}
5467
5468void memset_extent_buffer(struct extent_buffer *eb, char c,
5469 unsigned long start, unsigned long len)
5470{
5471 size_t cur;
5472 size_t offset;
5473 struct page *page;
5474 char *kaddr;
5475 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5476 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5477
5478 WARN_ON(start > eb->len);
5479 WARN_ON(start + len > eb->start + eb->len);
5480
5481 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5482
5483 while (len > 0) {
5484 page = eb->pages[i];
5485 WARN_ON(!PageUptodate(page));
5486
5487 cur = min(len, PAGE_CACHE_SIZE - offset);
5488 kaddr = page_address(page);
5489 memset(kaddr + offset, c, cur);
5490
5491 len -= cur;
5492 offset = 0;
5493 i++;
5494 }
5495}
5496
5497void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5498 unsigned long dst_offset, unsigned long src_offset,
5499 unsigned long len)
5500{
5501 u64 dst_len = dst->len;
5502 size_t cur;
5503 size_t offset;
5504 struct page *page;
5505 char *kaddr;
5506 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
5507 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5508
5509 WARN_ON(src->len != dst_len);
5510
5511 offset = (start_offset + dst_offset) &
5512 (PAGE_CACHE_SIZE - 1);
5513
5514 while (len > 0) {
5515 page = dst->pages[i];
5516 WARN_ON(!PageUptodate(page));
5517
5518 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
5519
5520 kaddr = page_address(page);
5521 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5522
5523 src_offset += cur;
5524 len -= cur;
5525 offset = 0;
5526 i++;
5527 }
5528}
5529
5530static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5531{
5532 unsigned long distance = (src > dst) ? src - dst : dst - src;
5533 return distance < len;
5534}
5535
5536static void copy_pages(struct page *dst_page, struct page *src_page,
5537 unsigned long dst_off, unsigned long src_off,
5538 unsigned long len)
5539{
5540 char *dst_kaddr = page_address(dst_page);
5541 char *src_kaddr;
5542 int must_memmove = 0;
5543
5544 if (dst_page != src_page) {
5545 src_kaddr = page_address(src_page);
5546 } else {
5547 src_kaddr = dst_kaddr;
5548 if (areas_overlap(src_off, dst_off, len))
5549 must_memmove = 1;
5550 }
5551
5552 if (must_memmove)
5553 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5554 else
5555 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5556}
5557
5558void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5559 unsigned long src_offset, unsigned long len)
5560{
5561 size_t cur;
5562 size_t dst_off_in_page;
5563 size_t src_off_in_page;
5564 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
5565 unsigned long dst_i;
5566 unsigned long src_i;
5567
5568 if (src_offset + len > dst->len) {
5569 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
5570 "len %lu dst len %lu\n", src_offset, len, dst->len);
5571 BUG_ON(1);
5572 }
5573 if (dst_offset + len > dst->len) {
5574 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
5575 "len %lu dst len %lu\n", dst_offset, len, dst->len);
5576 BUG_ON(1);
5577 }
5578
5579 while (len > 0) {
5580 dst_off_in_page = (start_offset + dst_offset) &
5581 (PAGE_CACHE_SIZE - 1);
5582 src_off_in_page = (start_offset + src_offset) &
5583 (PAGE_CACHE_SIZE - 1);
5584
5585 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5586 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
5587
5588 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
5589 src_off_in_page));
5590 cur = min_t(unsigned long, cur,
5591 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
5592
5593 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5594 dst_off_in_page, src_off_in_page, cur);
5595
5596 src_offset += cur;
5597 dst_offset += cur;
5598 len -= cur;
5599 }
5600}
5601
5602void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5603 unsigned long src_offset, unsigned long len)
5604{
5605 size_t cur;
5606 size_t dst_off_in_page;
5607 size_t src_off_in_page;
5608 unsigned long dst_end = dst_offset + len - 1;
5609 unsigned long src_end = src_offset + len - 1;
5610 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
5611 unsigned long dst_i;
5612 unsigned long src_i;
5613
5614 if (src_offset + len > dst->len) {
5615 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
5616 "len %lu len %lu\n", src_offset, len, dst->len);
5617 BUG_ON(1);
5618 }
5619 if (dst_offset + len > dst->len) {
5620 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
5621 "len %lu len %lu\n", dst_offset, len, dst->len);
5622 BUG_ON(1);
5623 }
5624 if (dst_offset < src_offset) {
5625 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5626 return;
5627 }
5628 while (len > 0) {
5629 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
5630 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
5631
5632 dst_off_in_page = (start_offset + dst_end) &
5633 (PAGE_CACHE_SIZE - 1);
5634 src_off_in_page = (start_offset + src_end) &
5635 (PAGE_CACHE_SIZE - 1);
5636
5637 cur = min_t(unsigned long, len, src_off_in_page + 1);
5638 cur = min(cur, dst_off_in_page + 1);
5639 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5640 dst_off_in_page - cur + 1,
5641 src_off_in_page - cur + 1, cur);
5642
5643 dst_end -= cur;
5644 src_end -= cur;
5645 len -= cur;
5646 }
5647}
5648
5649int try_release_extent_buffer(struct page *page)
5650{
5651 struct extent_buffer *eb;
5652
5653
5654
5655
5656
5657 spin_lock(&page->mapping->private_lock);
5658 if (!PagePrivate(page)) {
5659 spin_unlock(&page->mapping->private_lock);
5660 return 1;
5661 }
5662
5663 eb = (struct extent_buffer *)page->private;
5664 BUG_ON(!eb);
5665
5666
5667
5668
5669
5670
5671 spin_lock(&eb->refs_lock);
5672 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
5673 spin_unlock(&eb->refs_lock);
5674 spin_unlock(&page->mapping->private_lock);
5675 return 0;
5676 }
5677 spin_unlock(&page->mapping->private_lock);
5678
5679
5680
5681
5682
5683 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
5684 spin_unlock(&eb->refs_lock);
5685 return 0;
5686 }
5687
5688 return release_extent_buffer(eb);
5689}
5690