1#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/pagemap.h>
6#include <linux/page-flags.h>
7#include <linux/spinlock.h>
8#include <linux/blkdev.h>
9#include <linux/swap.h>
10#include <linux/writeback.h>
11#include <linux/pagevec.h>
12#include <linux/prefetch.h>
13#include <linux/cleancache.h>
14#include "extent_io.h"
15#include "extent_map.h"
16#include "ctree.h"
17#include "btrfs_inode.h"
18#include "volumes.h"
19#include "check-integrity.h"
20#include "locking.h"
21#include "rcu-string.h"
22#include "backref.h"
23
24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache;
26static struct bio_set *btrfs_bioset;
27
28static inline bool extent_state_in_tree(const struct extent_state *state)
29{
30 return !RB_EMPTY_NODE(&state->rb_node);
31}
32
33#ifdef CONFIG_BTRFS_DEBUG
34static LIST_HEAD(buffers);
35static LIST_HEAD(states);
36
37static DEFINE_SPINLOCK(leak_lock);
38
39static inline
40void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
41{
42 unsigned long flags;
43
44 spin_lock_irqsave(&leak_lock, flags);
45 list_add(new, head);
46 spin_unlock_irqrestore(&leak_lock, flags);
47}
48
49static inline
50void btrfs_leak_debug_del(struct list_head *entry)
51{
52 unsigned long flags;
53
54 spin_lock_irqsave(&leak_lock, flags);
55 list_del(entry);
56 spin_unlock_irqrestore(&leak_lock, flags);
57}
58
59static inline
60void btrfs_leak_debug_check(void)
61{
62 struct extent_state *state;
63 struct extent_buffer *eb;
64
65 while (!list_empty(&states)) {
66 state = list_entry(states.next, struct extent_state, leak_list);
67 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
68 state->start, state->end, state->state,
69 extent_state_in_tree(state),
70 atomic_read(&state->refs));
71 list_del(&state->leak_list);
72 kmem_cache_free(extent_state_cache, state);
73 }
74
75 while (!list_empty(&buffers)) {
76 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
77 printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
78 "refs %d\n",
79 eb->start, eb->len, atomic_read(&eb->refs));
80 list_del(&eb->leak_list);
81 kmem_cache_free(extent_buffer_cache, eb);
82 }
83}
84
85#define btrfs_debug_check_extent_io_range(tree, start, end) \
86 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
87static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 struct extent_io_tree *tree, u64 start, u64 end)
89{
90 struct inode *inode;
91 u64 isize;
92
93 if (!tree->mapping)
94 return;
95
96 inode = tree->mapping->host;
97 isize = i_size_read(inode);
98 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
99 btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
100 "%s: ino %llu isize %llu odd range [%llu,%llu]",
101 caller, btrfs_ino(inode), isize, start, end);
102 }
103}
104#else
105#define btrfs_leak_debug_add(new, head) do {} while (0)
106#define btrfs_leak_debug_del(entry) do {} while (0)
107#define btrfs_leak_debug_check() do {} while (0)
108#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
109#endif
110
111#define BUFFER_LRU_MAX 64
112
113struct tree_entry {
114 u64 start;
115 u64 end;
116 struct rb_node rb_node;
117};
118
119struct extent_page_data {
120 struct bio *bio;
121 struct extent_io_tree *tree;
122 get_extent_t *get_extent;
123 unsigned long bio_flags;
124
125
126
127
128 unsigned int extent_locked:1;
129
130
131 unsigned int sync_io:1;
132};
133
134static void add_extent_changeset(struct extent_state *state, unsigned bits,
135 struct extent_changeset *changeset,
136 int set)
137{
138 int ret;
139
140 if (!changeset)
141 return;
142 if (set && (state->state & bits) == bits)
143 return;
144 if (!set && (state->state & bits) == 0)
145 return;
146 changeset->bytes_changed += state->end - state->start + 1;
147 ret = ulist_add(changeset->range_changed, state->start, state->end,
148 GFP_ATOMIC);
149
150 BUG_ON(ret < 0);
151}
152
153static noinline void flush_write_bio(void *data);
154static inline struct btrfs_fs_info *
155tree_fs_info(struct extent_io_tree *tree)
156{
157 if (!tree->mapping)
158 return NULL;
159 return btrfs_sb(tree->mapping->host->i_sb);
160}
161
162int __init extent_io_init(void)
163{
164 extent_state_cache = kmem_cache_create("btrfs_extent_state",
165 sizeof(struct extent_state), 0,
166 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
167 if (!extent_state_cache)
168 return -ENOMEM;
169
170 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
171 sizeof(struct extent_buffer), 0,
172 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
173 if (!extent_buffer_cache)
174 goto free_state_cache;
175
176 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
177 offsetof(struct btrfs_io_bio, bio));
178 if (!btrfs_bioset)
179 goto free_buffer_cache;
180
181 if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
182 goto free_bioset;
183
184 return 0;
185
186free_bioset:
187 bioset_free(btrfs_bioset);
188 btrfs_bioset = NULL;
189
190free_buffer_cache:
191 kmem_cache_destroy(extent_buffer_cache);
192 extent_buffer_cache = NULL;
193
194free_state_cache:
195 kmem_cache_destroy(extent_state_cache);
196 extent_state_cache = NULL;
197 return -ENOMEM;
198}
199
200void extent_io_exit(void)
201{
202 btrfs_leak_debug_check();
203
204
205
206
207
208 rcu_barrier();
209 if (extent_state_cache)
210 kmem_cache_destroy(extent_state_cache);
211 if (extent_buffer_cache)
212 kmem_cache_destroy(extent_buffer_cache);
213 if (btrfs_bioset)
214 bioset_free(btrfs_bioset);
215}
216
217void extent_io_tree_init(struct extent_io_tree *tree,
218 struct address_space *mapping)
219{
220 tree->state = RB_ROOT;
221 tree->ops = NULL;
222 tree->dirty_bytes = 0;
223 spin_lock_init(&tree->lock);
224 tree->mapping = mapping;
225}
226
227static struct extent_state *alloc_extent_state(gfp_t mask)
228{
229 struct extent_state *state;
230
231 state = kmem_cache_alloc(extent_state_cache, mask);
232 if (!state)
233 return state;
234 state->state = 0;
235 state->private = 0;
236 RB_CLEAR_NODE(&state->rb_node);
237 btrfs_leak_debug_add(&state->leak_list, &states);
238 atomic_set(&state->refs, 1);
239 init_waitqueue_head(&state->wq);
240 trace_alloc_extent_state(state, mask, _RET_IP_);
241 return state;
242}
243
244void free_extent_state(struct extent_state *state)
245{
246 if (!state)
247 return;
248 if (atomic_dec_and_test(&state->refs)) {
249 WARN_ON(extent_state_in_tree(state));
250 btrfs_leak_debug_del(&state->leak_list);
251 trace_free_extent_state(state, _RET_IP_);
252 kmem_cache_free(extent_state_cache, state);
253 }
254}
255
256static struct rb_node *tree_insert(struct rb_root *root,
257 struct rb_node *search_start,
258 u64 offset,
259 struct rb_node *node,
260 struct rb_node ***p_in,
261 struct rb_node **parent_in)
262{
263 struct rb_node **p;
264 struct rb_node *parent = NULL;
265 struct tree_entry *entry;
266
267 if (p_in && parent_in) {
268 p = *p_in;
269 parent = *parent_in;
270 goto do_insert;
271 }
272
273 p = search_start ? &search_start : &root->rb_node;
274 while (*p) {
275 parent = *p;
276 entry = rb_entry(parent, struct tree_entry, rb_node);
277
278 if (offset < entry->start)
279 p = &(*p)->rb_left;
280 else if (offset > entry->end)
281 p = &(*p)->rb_right;
282 else
283 return parent;
284 }
285
286do_insert:
287 rb_link_node(node, parent, p);
288 rb_insert_color(node, root);
289 return NULL;
290}
291
292static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
293 struct rb_node **prev_ret,
294 struct rb_node **next_ret,
295 struct rb_node ***p_ret,
296 struct rb_node **parent_ret)
297{
298 struct rb_root *root = &tree->state;
299 struct rb_node **n = &root->rb_node;
300 struct rb_node *prev = NULL;
301 struct rb_node *orig_prev = NULL;
302 struct tree_entry *entry;
303 struct tree_entry *prev_entry = NULL;
304
305 while (*n) {
306 prev = *n;
307 entry = rb_entry(prev, struct tree_entry, rb_node);
308 prev_entry = entry;
309
310 if (offset < entry->start)
311 n = &(*n)->rb_left;
312 else if (offset > entry->end)
313 n = &(*n)->rb_right;
314 else
315 return *n;
316 }
317
318 if (p_ret)
319 *p_ret = n;
320 if (parent_ret)
321 *parent_ret = prev;
322
323 if (prev_ret) {
324 orig_prev = prev;
325 while (prev && offset > prev_entry->end) {
326 prev = rb_next(prev);
327 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
328 }
329 *prev_ret = prev;
330 prev = orig_prev;
331 }
332
333 if (next_ret) {
334 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
335 while (prev && offset < prev_entry->start) {
336 prev = rb_prev(prev);
337 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
338 }
339 *next_ret = prev;
340 }
341 return NULL;
342}
343
344static inline struct rb_node *
345tree_search_for_insert(struct extent_io_tree *tree,
346 u64 offset,
347 struct rb_node ***p_ret,
348 struct rb_node **parent_ret)
349{
350 struct rb_node *prev = NULL;
351 struct rb_node *ret;
352
353 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
354 if (!ret)
355 return prev;
356 return ret;
357}
358
359static inline struct rb_node *tree_search(struct extent_io_tree *tree,
360 u64 offset)
361{
362 return tree_search_for_insert(tree, offset, NULL, NULL);
363}
364
365static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
366 struct extent_state *other)
367{
368 if (tree->ops && tree->ops->merge_extent_hook)
369 tree->ops->merge_extent_hook(tree->mapping->host, new,
370 other);
371}
372
373
374
375
376
377
378
379
380
381
382static void merge_state(struct extent_io_tree *tree,
383 struct extent_state *state)
384{
385 struct extent_state *other;
386 struct rb_node *other_node;
387
388 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
389 return;
390
391 other_node = rb_prev(&state->rb_node);
392 if (other_node) {
393 other = rb_entry(other_node, struct extent_state, rb_node);
394 if (other->end == state->start - 1 &&
395 other->state == state->state) {
396 merge_cb(tree, state, other);
397 state->start = other->start;
398 rb_erase(&other->rb_node, &tree->state);
399 RB_CLEAR_NODE(&other->rb_node);
400 free_extent_state(other);
401 }
402 }
403 other_node = rb_next(&state->rb_node);
404 if (other_node) {
405 other = rb_entry(other_node, struct extent_state, rb_node);
406 if (other->start == state->end + 1 &&
407 other->state == state->state) {
408 merge_cb(tree, state, other);
409 state->end = other->end;
410 rb_erase(&other->rb_node, &tree->state);
411 RB_CLEAR_NODE(&other->rb_node);
412 free_extent_state(other);
413 }
414 }
415}
416
417static void set_state_cb(struct extent_io_tree *tree,
418 struct extent_state *state, unsigned *bits)
419{
420 if (tree->ops && tree->ops->set_bit_hook)
421 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
422}
423
424static void clear_state_cb(struct extent_io_tree *tree,
425 struct extent_state *state, unsigned *bits)
426{
427 if (tree->ops && tree->ops->clear_bit_hook)
428 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
429}
430
431static void set_state_bits(struct extent_io_tree *tree,
432 struct extent_state *state, unsigned *bits,
433 struct extent_changeset *changeset);
434
435
436
437
438
439
440
441
442
443
444
445static int insert_state(struct extent_io_tree *tree,
446 struct extent_state *state, u64 start, u64 end,
447 struct rb_node ***p,
448 struct rb_node **parent,
449 unsigned *bits, struct extent_changeset *changeset)
450{
451 struct rb_node *node;
452
453 if (end < start)
454 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
455 end, start);
456 state->start = start;
457 state->end = end;
458
459 set_state_bits(tree, state, bits, changeset);
460
461 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
462 if (node) {
463 struct extent_state *found;
464 found = rb_entry(node, struct extent_state, rb_node);
465 printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
466 "%llu %llu\n",
467 found->start, found->end, start, end);
468 return -EEXIST;
469 }
470 merge_state(tree, state);
471 return 0;
472}
473
474static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
475 u64 split)
476{
477 if (tree->ops && tree->ops->split_extent_hook)
478 tree->ops->split_extent_hook(tree->mapping->host, orig, split);
479}
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
496 struct extent_state *prealloc, u64 split)
497{
498 struct rb_node *node;
499
500 split_cb(tree, orig, split);
501
502 prealloc->start = orig->start;
503 prealloc->end = split - 1;
504 prealloc->state = orig->state;
505 orig->start = split;
506
507 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
508 &prealloc->rb_node, NULL, NULL);
509 if (node) {
510 free_extent_state(prealloc);
511 return -EEXIST;
512 }
513 return 0;
514}
515
516static struct extent_state *next_state(struct extent_state *state)
517{
518 struct rb_node *next = rb_next(&state->rb_node);
519 if (next)
520 return rb_entry(next, struct extent_state, rb_node);
521 else
522 return NULL;
523}
524
525
526
527
528
529
530
531
532static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
533 struct extent_state *state,
534 unsigned *bits, int wake,
535 struct extent_changeset *changeset)
536{
537 struct extent_state *next;
538 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
539
540 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
541 u64 range = state->end - state->start + 1;
542 WARN_ON(range > tree->dirty_bytes);
543 tree->dirty_bytes -= range;
544 }
545 clear_state_cb(tree, state, bits);
546 add_extent_changeset(state, bits_to_clear, changeset, 0);
547 state->state &= ~bits_to_clear;
548 if (wake)
549 wake_up(&state->wq);
550 if (state->state == 0) {
551 next = next_state(state);
552 if (extent_state_in_tree(state)) {
553 rb_erase(&state->rb_node, &tree->state);
554 RB_CLEAR_NODE(&state->rb_node);
555 free_extent_state(state);
556 } else {
557 WARN_ON(1);
558 }
559 } else {
560 merge_state(tree, state);
561 next = next_state(state);
562 }
563 return next;
564}
565
566static struct extent_state *
567alloc_extent_state_atomic(struct extent_state *prealloc)
568{
569 if (!prealloc)
570 prealloc = alloc_extent_state(GFP_ATOMIC);
571
572 return prealloc;
573}
574
575static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
576{
577 btrfs_panic(tree_fs_info(tree), err, "Locking error: "
578 "Extent tree was modified by another "
579 "thread while locked.");
580}
581
582
583
584
585
586
587
588
589
590
591
592
593
594static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
595 unsigned bits, int wake, int delete,
596 struct extent_state **cached_state,
597 gfp_t mask, struct extent_changeset *changeset)
598{
599 struct extent_state *state;
600 struct extent_state *cached;
601 struct extent_state *prealloc = NULL;
602 struct rb_node *node;
603 u64 last_end;
604 int err;
605 int clear = 0;
606
607 btrfs_debug_check_extent_io_range(tree, start, end);
608
609 if (bits & EXTENT_DELALLOC)
610 bits |= EXTENT_NORESERVE;
611
612 if (delete)
613 bits |= ~EXTENT_CTLBITS;
614 bits |= EXTENT_FIRST_DELALLOC;
615
616 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
617 clear = 1;
618again:
619 if (!prealloc && gfpflags_allow_blocking(mask)) {
620
621
622
623
624
625
626
627 prealloc = alloc_extent_state(mask);
628 }
629
630 spin_lock(&tree->lock);
631 if (cached_state) {
632 cached = *cached_state;
633
634 if (clear) {
635 *cached_state = NULL;
636 cached_state = NULL;
637 }
638
639 if (cached && extent_state_in_tree(cached) &&
640 cached->start <= start && cached->end > start) {
641 if (clear)
642 atomic_dec(&cached->refs);
643 state = cached;
644 goto hit_next;
645 }
646 if (clear)
647 free_extent_state(cached);
648 }
649
650
651
652
653 node = tree_search(tree, start);
654 if (!node)
655 goto out;
656 state = rb_entry(node, struct extent_state, rb_node);
657hit_next:
658 if (state->start > end)
659 goto out;
660 WARN_ON(state->end < start);
661 last_end = state->end;
662
663
664 if (!(state->state & bits)) {
665 state = next_state(state);
666 goto next;
667 }
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685 if (state->start < start) {
686 prealloc = alloc_extent_state_atomic(prealloc);
687 BUG_ON(!prealloc);
688 err = split_state(tree, state, prealloc, start);
689 if (err)
690 extent_io_tree_panic(tree, err);
691
692 prealloc = NULL;
693 if (err)
694 goto out;
695 if (state->end <= end) {
696 state = clear_state_bit(tree, state, &bits, wake,
697 changeset);
698 goto next;
699 }
700 goto search_again;
701 }
702
703
704
705
706
707
708 if (state->start <= end && state->end > end) {
709 prealloc = alloc_extent_state_atomic(prealloc);
710 BUG_ON(!prealloc);
711 err = split_state(tree, state, prealloc, end + 1);
712 if (err)
713 extent_io_tree_panic(tree, err);
714
715 if (wake)
716 wake_up(&state->wq);
717
718 clear_state_bit(tree, prealloc, &bits, wake, changeset);
719
720 prealloc = NULL;
721 goto out;
722 }
723
724 state = clear_state_bit(tree, state, &bits, wake, changeset);
725next:
726 if (last_end == (u64)-1)
727 goto out;
728 start = last_end + 1;
729 if (start <= end && state && !need_resched())
730 goto hit_next;
731 goto search_again;
732
733out:
734 spin_unlock(&tree->lock);
735 if (prealloc)
736 free_extent_state(prealloc);
737
738 return 0;
739
740search_again:
741 if (start > end)
742 goto out;
743 spin_unlock(&tree->lock);
744 if (gfpflags_allow_blocking(mask))
745 cond_resched();
746 goto again;
747}
748
749static void wait_on_state(struct extent_io_tree *tree,
750 struct extent_state *state)
751 __releases(tree->lock)
752 __acquires(tree->lock)
753{
754 DEFINE_WAIT(wait);
755 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
756 spin_unlock(&tree->lock);
757 schedule();
758 spin_lock(&tree->lock);
759 finish_wait(&state->wq, &wait);
760}
761
762
763
764
765
766
767static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
768 unsigned long bits)
769{
770 struct extent_state *state;
771 struct rb_node *node;
772
773 btrfs_debug_check_extent_io_range(tree, start, end);
774
775 spin_lock(&tree->lock);
776again:
777 while (1) {
778
779
780
781
782 node = tree_search(tree, start);
783process_node:
784 if (!node)
785 break;
786
787 state = rb_entry(node, struct extent_state, rb_node);
788
789 if (state->start > end)
790 goto out;
791
792 if (state->state & bits) {
793 start = state->start;
794 atomic_inc(&state->refs);
795 wait_on_state(tree, state);
796 free_extent_state(state);
797 goto again;
798 }
799 start = state->end + 1;
800
801 if (start > end)
802 break;
803
804 if (!cond_resched_lock(&tree->lock)) {
805 node = rb_next(node);
806 goto process_node;
807 }
808 }
809out:
810 spin_unlock(&tree->lock);
811}
812
813static void set_state_bits(struct extent_io_tree *tree,
814 struct extent_state *state,
815 unsigned *bits, struct extent_changeset *changeset)
816{
817 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
818
819 set_state_cb(tree, state, bits);
820 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
821 u64 range = state->end - state->start + 1;
822 tree->dirty_bytes += range;
823 }
824 add_extent_changeset(state, bits_to_set, changeset, 1);
825 state->state |= bits_to_set;
826}
827
828static void cache_state_if_flags(struct extent_state *state,
829 struct extent_state **cached_ptr,
830 unsigned flags)
831{
832 if (cached_ptr && !(*cached_ptr)) {
833 if (!flags || (state->state & flags)) {
834 *cached_ptr = state;
835 atomic_inc(&state->refs);
836 }
837 }
838}
839
840static void cache_state(struct extent_state *state,
841 struct extent_state **cached_ptr)
842{
843 return cache_state_if_flags(state, cached_ptr,
844 EXTENT_IOBITS | EXTENT_BOUNDARY);
845}
846
847
848
849
850
851
852
853
854
855
856
857
858static int __must_check
859__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
860 unsigned bits, unsigned exclusive_bits,
861 u64 *failed_start, struct extent_state **cached_state,
862 gfp_t mask, struct extent_changeset *changeset)
863{
864 struct extent_state *state;
865 struct extent_state *prealloc = NULL;
866 struct rb_node *node;
867 struct rb_node **p;
868 struct rb_node *parent;
869 int err = 0;
870 u64 last_start;
871 u64 last_end;
872
873 btrfs_debug_check_extent_io_range(tree, start, end);
874
875 bits |= EXTENT_FIRST_DELALLOC;
876again:
877 if (!prealloc && gfpflags_allow_blocking(mask)) {
878 prealloc = alloc_extent_state(mask);
879 BUG_ON(!prealloc);
880 }
881
882 spin_lock(&tree->lock);
883 if (cached_state && *cached_state) {
884 state = *cached_state;
885 if (state->start <= start && state->end > start &&
886 extent_state_in_tree(state)) {
887 node = &state->rb_node;
888 goto hit_next;
889 }
890 }
891
892
893
894
895 node = tree_search_for_insert(tree, start, &p, &parent);
896 if (!node) {
897 prealloc = alloc_extent_state_atomic(prealloc);
898 BUG_ON(!prealloc);
899 err = insert_state(tree, prealloc, start, end,
900 &p, &parent, &bits, changeset);
901 if (err)
902 extent_io_tree_panic(tree, err);
903
904 cache_state(prealloc, cached_state);
905 prealloc = NULL;
906 goto out;
907 }
908 state = rb_entry(node, struct extent_state, rb_node);
909hit_next:
910 last_start = state->start;
911 last_end = state->end;
912
913
914
915
916
917
918
919 if (state->start == start && state->end <= end) {
920 if (state->state & exclusive_bits) {
921 *failed_start = state->start;
922 err = -EEXIST;
923 goto out;
924 }
925
926 set_state_bits(tree, state, &bits, changeset);
927 cache_state(state, cached_state);
928 merge_state(tree, state);
929 if (last_end == (u64)-1)
930 goto out;
931 start = last_end + 1;
932 state = next_state(state);
933 if (start < end && state && state->start == start &&
934 !need_resched())
935 goto hit_next;
936 goto search_again;
937 }
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955 if (state->start < start) {
956 if (state->state & exclusive_bits) {
957 *failed_start = start;
958 err = -EEXIST;
959 goto out;
960 }
961
962 prealloc = alloc_extent_state_atomic(prealloc);
963 BUG_ON(!prealloc);
964 err = split_state(tree, state, prealloc, start);
965 if (err)
966 extent_io_tree_panic(tree, err);
967
968 prealloc = NULL;
969 if (err)
970 goto out;
971 if (state->end <= end) {
972 set_state_bits(tree, state, &bits, changeset);
973 cache_state(state, cached_state);
974 merge_state(tree, state);
975 if (last_end == (u64)-1)
976 goto out;
977 start = last_end + 1;
978 state = next_state(state);
979 if (start < end && state && state->start == start &&
980 !need_resched())
981 goto hit_next;
982 }
983 goto search_again;
984 }
985
986
987
988
989
990
991
992 if (state->start > start) {
993 u64 this_end;
994 if (end < last_start)
995 this_end = end;
996 else
997 this_end = last_start - 1;
998
999 prealloc = alloc_extent_state_atomic(prealloc);
1000 BUG_ON(!prealloc);
1001
1002
1003
1004
1005
1006 err = insert_state(tree, prealloc, start, this_end,
1007 NULL, NULL, &bits, changeset);
1008 if (err)
1009 extent_io_tree_panic(tree, err);
1010
1011 cache_state(prealloc, cached_state);
1012 prealloc = NULL;
1013 start = this_end + 1;
1014 goto search_again;
1015 }
1016
1017
1018
1019
1020
1021
1022 if (state->start <= end && state->end > end) {
1023 if (state->state & exclusive_bits) {
1024 *failed_start = start;
1025 err = -EEXIST;
1026 goto out;
1027 }
1028
1029 prealloc = alloc_extent_state_atomic(prealloc);
1030 BUG_ON(!prealloc);
1031 err = split_state(tree, state, prealloc, end + 1);
1032 if (err)
1033 extent_io_tree_panic(tree, err);
1034
1035 set_state_bits(tree, prealloc, &bits, changeset);
1036 cache_state(prealloc, cached_state);
1037 merge_state(tree, prealloc);
1038 prealloc = NULL;
1039 goto out;
1040 }
1041
1042 goto search_again;
1043
1044out:
1045 spin_unlock(&tree->lock);
1046 if (prealloc)
1047 free_extent_state(prealloc);
1048
1049 return err;
1050
1051search_again:
1052 if (start > end)
1053 goto out;
1054 spin_unlock(&tree->lock);
1055 if (gfpflags_allow_blocking(mask))
1056 cond_resched();
1057 goto again;
1058}
1059
1060int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1061 unsigned bits, u64 * failed_start,
1062 struct extent_state **cached_state, gfp_t mask)
1063{
1064 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1065 cached_state, mask, NULL);
1066}
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1087 unsigned bits, unsigned clear_bits,
1088 struct extent_state **cached_state, gfp_t mask)
1089{
1090 struct extent_state *state;
1091 struct extent_state *prealloc = NULL;
1092 struct rb_node *node;
1093 struct rb_node **p;
1094 struct rb_node *parent;
1095 int err = 0;
1096 u64 last_start;
1097 u64 last_end;
1098 bool first_iteration = true;
1099
1100 btrfs_debug_check_extent_io_range(tree, start, end);
1101
1102again:
1103 if (!prealloc && gfpflags_allow_blocking(mask)) {
1104
1105
1106
1107
1108
1109
1110
1111 prealloc = alloc_extent_state(mask);
1112 if (!prealloc && !first_iteration)
1113 return -ENOMEM;
1114 }
1115
1116 spin_lock(&tree->lock);
1117 if (cached_state && *cached_state) {
1118 state = *cached_state;
1119 if (state->start <= start && state->end > start &&
1120 extent_state_in_tree(state)) {
1121 node = &state->rb_node;
1122 goto hit_next;
1123 }
1124 }
1125
1126
1127
1128
1129
1130 node = tree_search_for_insert(tree, start, &p, &parent);
1131 if (!node) {
1132 prealloc = alloc_extent_state_atomic(prealloc);
1133 if (!prealloc) {
1134 err = -ENOMEM;
1135 goto out;
1136 }
1137 err = insert_state(tree, prealloc, start, end,
1138 &p, &parent, &bits, NULL);
1139 if (err)
1140 extent_io_tree_panic(tree, err);
1141 cache_state(prealloc, cached_state);
1142 prealloc = NULL;
1143 goto out;
1144 }
1145 state = rb_entry(node, struct extent_state, rb_node);
1146hit_next:
1147 last_start = state->start;
1148 last_end = state->end;
1149
1150
1151
1152
1153
1154
1155
1156 if (state->start == start && state->end <= end) {
1157 set_state_bits(tree, state, &bits, NULL);
1158 cache_state(state, cached_state);
1159 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1160 if (last_end == (u64)-1)
1161 goto out;
1162 start = last_end + 1;
1163 if (start < end && state && state->start == start &&
1164 !need_resched())
1165 goto hit_next;
1166 goto search_again;
1167 }
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 if (state->start < start) {
1186 prealloc = alloc_extent_state_atomic(prealloc);
1187 if (!prealloc) {
1188 err = -ENOMEM;
1189 goto out;
1190 }
1191 err = split_state(tree, state, prealloc, start);
1192 if (err)
1193 extent_io_tree_panic(tree, err);
1194 prealloc = NULL;
1195 if (err)
1196 goto out;
1197 if (state->end <= end) {
1198 set_state_bits(tree, state, &bits, NULL);
1199 cache_state(state, cached_state);
1200 state = clear_state_bit(tree, state, &clear_bits, 0,
1201 NULL);
1202 if (last_end == (u64)-1)
1203 goto out;
1204 start = last_end + 1;
1205 if (start < end && state && state->start == start &&
1206 !need_resched())
1207 goto hit_next;
1208 }
1209 goto search_again;
1210 }
1211
1212
1213
1214
1215
1216
1217
1218 if (state->start > start) {
1219 u64 this_end;
1220 if (end < last_start)
1221 this_end = end;
1222 else
1223 this_end = last_start - 1;
1224
1225 prealloc = alloc_extent_state_atomic(prealloc);
1226 if (!prealloc) {
1227 err = -ENOMEM;
1228 goto out;
1229 }
1230
1231
1232
1233
1234
1235 err = insert_state(tree, prealloc, start, this_end,
1236 NULL, NULL, &bits, NULL);
1237 if (err)
1238 extent_io_tree_panic(tree, err);
1239 cache_state(prealloc, cached_state);
1240 prealloc = NULL;
1241 start = this_end + 1;
1242 goto search_again;
1243 }
1244
1245
1246
1247
1248
1249
1250 if (state->start <= end && state->end > end) {
1251 prealloc = alloc_extent_state_atomic(prealloc);
1252 if (!prealloc) {
1253 err = -ENOMEM;
1254 goto out;
1255 }
1256
1257 err = split_state(tree, state, prealloc, end + 1);
1258 if (err)
1259 extent_io_tree_panic(tree, err);
1260
1261 set_state_bits(tree, prealloc, &bits, NULL);
1262 cache_state(prealloc, cached_state);
1263 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1264 prealloc = NULL;
1265 goto out;
1266 }
1267
1268 goto search_again;
1269
1270out:
1271 spin_unlock(&tree->lock);
1272 if (prealloc)
1273 free_extent_state(prealloc);
1274
1275 return err;
1276
1277search_again:
1278 if (start > end)
1279 goto out;
1280 spin_unlock(&tree->lock);
1281 if (gfpflags_allow_blocking(mask))
1282 cond_resched();
1283 first_iteration = false;
1284 goto again;
1285}
1286
1287
1288int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1289 unsigned bits, gfp_t mask,
1290 struct extent_changeset *changeset)
1291{
1292
1293
1294
1295
1296
1297
1298 BUG_ON(bits & EXTENT_LOCKED);
1299
1300 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, mask,
1301 changeset);
1302}
1303
1304int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1305 unsigned bits, int wake, int delete,
1306 struct extent_state **cached, gfp_t mask)
1307{
1308 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1309 cached, mask, NULL);
1310}
1311
1312int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1313 unsigned bits, gfp_t mask,
1314 struct extent_changeset *changeset)
1315{
1316
1317
1318
1319
1320 BUG_ON(bits & EXTENT_LOCKED);
1321
1322 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask,
1323 changeset);
1324}
1325
1326
1327
1328
1329
1330int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1331 struct extent_state **cached_state)
1332{
1333 int err;
1334 u64 failed_start;
1335
1336 while (1) {
1337 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1338 EXTENT_LOCKED, &failed_start,
1339 cached_state, GFP_NOFS, NULL);
1340 if (err == -EEXIST) {
1341 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1342 start = failed_start;
1343 } else
1344 break;
1345 WARN_ON(start > end);
1346 }
1347 return err;
1348}
1349
1350int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1351{
1352 int err;
1353 u64 failed_start;
1354
1355 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1356 &failed_start, NULL, GFP_NOFS, NULL);
1357 if (err == -EEXIST) {
1358 if (failed_start > start)
1359 clear_extent_bit(tree, start, failed_start - 1,
1360 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
1361 return 0;
1362 }
1363 return 1;
1364}
1365
1366void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1367{
1368 unsigned long index = start >> PAGE_CACHE_SHIFT;
1369 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1370 struct page *page;
1371
1372 while (index <= end_index) {
1373 page = find_get_page(inode->i_mapping, index);
1374 BUG_ON(!page);
1375 clear_page_dirty_for_io(page);
1376 page_cache_release(page);
1377 index++;
1378 }
1379}
1380
1381void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1382{
1383 unsigned long index = start >> PAGE_CACHE_SHIFT;
1384 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1385 struct page *page;
1386
1387 while (index <= end_index) {
1388 page = find_get_page(inode->i_mapping, index);
1389 BUG_ON(!page);
1390 __set_page_dirty_nobuffers(page);
1391 account_page_redirty(page);
1392 page_cache_release(page);
1393 index++;
1394 }
1395}
1396
1397
1398
1399
1400static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1401{
1402 unsigned long index = start >> PAGE_CACHE_SHIFT;
1403 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1404 struct page *page;
1405
1406 while (index <= end_index) {
1407 page = find_get_page(tree->mapping, index);
1408 BUG_ON(!page);
1409 set_page_writeback(page);
1410 page_cache_release(page);
1411 index++;
1412 }
1413}
1414
1415
1416
1417
1418
1419static struct extent_state *
1420find_first_extent_bit_state(struct extent_io_tree *tree,
1421 u64 start, unsigned bits)
1422{
1423 struct rb_node *node;
1424 struct extent_state *state;
1425
1426
1427
1428
1429
1430 node = tree_search(tree, start);
1431 if (!node)
1432 goto out;
1433
1434 while (1) {
1435 state = rb_entry(node, struct extent_state, rb_node);
1436 if (state->end >= start && (state->state & bits))
1437 return state;
1438
1439 node = rb_next(node);
1440 if (!node)
1441 break;
1442 }
1443out:
1444 return NULL;
1445}
1446
1447
1448
1449
1450
1451
1452
1453
1454int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1455 u64 *start_ret, u64 *end_ret, unsigned bits,
1456 struct extent_state **cached_state)
1457{
1458 struct extent_state *state;
1459 struct rb_node *n;
1460 int ret = 1;
1461
1462 spin_lock(&tree->lock);
1463 if (cached_state && *cached_state) {
1464 state = *cached_state;
1465 if (state->end == start - 1 && extent_state_in_tree(state)) {
1466 n = rb_next(&state->rb_node);
1467 while (n) {
1468 state = rb_entry(n, struct extent_state,
1469 rb_node);
1470 if (state->state & bits)
1471 goto got_it;
1472 n = rb_next(n);
1473 }
1474 free_extent_state(*cached_state);
1475 *cached_state = NULL;
1476 goto out;
1477 }
1478 free_extent_state(*cached_state);
1479 *cached_state = NULL;
1480 }
1481
1482 state = find_first_extent_bit_state(tree, start, bits);
1483got_it:
1484 if (state) {
1485 cache_state_if_flags(state, cached_state, 0);
1486 *start_ret = state->start;
1487 *end_ret = state->end;
1488 ret = 0;
1489 }
1490out:
1491 spin_unlock(&tree->lock);
1492 return ret;
1493}
1494
1495
1496
1497
1498
1499
1500
1501static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1502 u64 *start, u64 *end, u64 max_bytes,
1503 struct extent_state **cached_state)
1504{
1505 struct rb_node *node;
1506 struct extent_state *state;
1507 u64 cur_start = *start;
1508 u64 found = 0;
1509 u64 total_bytes = 0;
1510
1511 spin_lock(&tree->lock);
1512
1513
1514
1515
1516
1517 node = tree_search(tree, cur_start);
1518 if (!node) {
1519 if (!found)
1520 *end = (u64)-1;
1521 goto out;
1522 }
1523
1524 while (1) {
1525 state = rb_entry(node, struct extent_state, rb_node);
1526 if (found && (state->start != cur_start ||
1527 (state->state & EXTENT_BOUNDARY))) {
1528 goto out;
1529 }
1530 if (!(state->state & EXTENT_DELALLOC)) {
1531 if (!found)
1532 *end = state->end;
1533 goto out;
1534 }
1535 if (!found) {
1536 *start = state->start;
1537 *cached_state = state;
1538 atomic_inc(&state->refs);
1539 }
1540 found++;
1541 *end = state->end;
1542 cur_start = state->end + 1;
1543 node = rb_next(node);
1544 total_bytes += state->end - state->start + 1;
1545 if (total_bytes >= max_bytes)
1546 break;
1547 if (!node)
1548 break;
1549 }
1550out:
1551 spin_unlock(&tree->lock);
1552 return found;
1553}
1554
1555static noinline void __unlock_for_delalloc(struct inode *inode,
1556 struct page *locked_page,
1557 u64 start, u64 end)
1558{
1559 int ret;
1560 struct page *pages[16];
1561 unsigned long index = start >> PAGE_CACHE_SHIFT;
1562 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1563 unsigned long nr_pages = end_index - index + 1;
1564 int i;
1565
1566 if (index == locked_page->index && end_index == index)
1567 return;
1568
1569 while (nr_pages > 0) {
1570 ret = find_get_pages_contig(inode->i_mapping, index,
1571 min_t(unsigned long, nr_pages,
1572 ARRAY_SIZE(pages)), pages);
1573 for (i = 0; i < ret; i++) {
1574 if (pages[i] != locked_page)
1575 unlock_page(pages[i]);
1576 page_cache_release(pages[i]);
1577 }
1578 nr_pages -= ret;
1579 index += ret;
1580 cond_resched();
1581 }
1582}
1583
1584static noinline int lock_delalloc_pages(struct inode *inode,
1585 struct page *locked_page,
1586 u64 delalloc_start,
1587 u64 delalloc_end)
1588{
1589 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
1590 unsigned long start_index = index;
1591 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
1592 unsigned long pages_locked = 0;
1593 struct page *pages[16];
1594 unsigned long nrpages;
1595 int ret;
1596 int i;
1597
1598
1599 if (index == locked_page->index && index == end_index)
1600 return 0;
1601
1602
1603 nrpages = end_index - index + 1;
1604 while (nrpages > 0) {
1605 ret = find_get_pages_contig(inode->i_mapping, index,
1606 min_t(unsigned long,
1607 nrpages, ARRAY_SIZE(pages)), pages);
1608 if (ret == 0) {
1609 ret = -EAGAIN;
1610 goto done;
1611 }
1612
1613 for (i = 0; i < ret; i++) {
1614
1615
1616
1617
1618 if (pages[i] != locked_page) {
1619 lock_page(pages[i]);
1620 if (!PageDirty(pages[i]) ||
1621 pages[i]->mapping != inode->i_mapping) {
1622 ret = -EAGAIN;
1623 unlock_page(pages[i]);
1624 page_cache_release(pages[i]);
1625 goto done;
1626 }
1627 }
1628 page_cache_release(pages[i]);
1629 pages_locked++;
1630 }
1631 nrpages -= ret;
1632 index += ret;
1633 cond_resched();
1634 }
1635 ret = 0;
1636done:
1637 if (ret && pages_locked) {
1638 __unlock_for_delalloc(inode, locked_page,
1639 delalloc_start,
1640 ((u64)(start_index + pages_locked - 1)) <<
1641 PAGE_CACHE_SHIFT);
1642 }
1643 return ret;
1644}
1645
1646
1647
1648
1649
1650
1651
1652STATIC u64 find_lock_delalloc_range(struct inode *inode,
1653 struct extent_io_tree *tree,
1654 struct page *locked_page, u64 *start,
1655 u64 *end, u64 max_bytes)
1656{
1657 u64 delalloc_start;
1658 u64 delalloc_end;
1659 u64 found;
1660 struct extent_state *cached_state = NULL;
1661 int ret;
1662 int loops = 0;
1663
1664again:
1665
1666 delalloc_start = *start;
1667 delalloc_end = 0;
1668 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1669 max_bytes, &cached_state);
1670 if (!found || delalloc_end <= *start) {
1671 *start = delalloc_start;
1672 *end = delalloc_end;
1673 free_extent_state(cached_state);
1674 return 0;
1675 }
1676
1677
1678
1679
1680
1681
1682 if (delalloc_start < *start)
1683 delalloc_start = *start;
1684
1685
1686
1687
1688 if (delalloc_end + 1 - delalloc_start > max_bytes)
1689 delalloc_end = delalloc_start + max_bytes - 1;
1690
1691
1692 ret = lock_delalloc_pages(inode, locked_page,
1693 delalloc_start, delalloc_end);
1694 if (ret == -EAGAIN) {
1695
1696
1697
1698 free_extent_state(cached_state);
1699 cached_state = NULL;
1700 if (!loops) {
1701 max_bytes = PAGE_CACHE_SIZE;
1702 loops = 1;
1703 goto again;
1704 } else {
1705 found = 0;
1706 goto out_failed;
1707 }
1708 }
1709 BUG_ON(ret);
1710
1711
1712 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1713
1714
1715 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1716 EXTENT_DELALLOC, 1, cached_state);
1717 if (!ret) {
1718 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1719 &cached_state, GFP_NOFS);
1720 __unlock_for_delalloc(inode, locked_page,
1721 delalloc_start, delalloc_end);
1722 cond_resched();
1723 goto again;
1724 }
1725 free_extent_state(cached_state);
1726 *start = delalloc_start;
1727 *end = delalloc_end;
1728out_failed:
1729 return found;
1730}
1731
1732void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1733 struct page *locked_page,
1734 unsigned clear_bits,
1735 unsigned long page_ops)
1736{
1737 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1738 int ret;
1739 struct page *pages[16];
1740 unsigned long index = start >> PAGE_CACHE_SHIFT;
1741 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1742 unsigned long nr_pages = end_index - index + 1;
1743 int i;
1744
1745 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1746 if (page_ops == 0)
1747 return;
1748
1749 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1750 mapping_set_error(inode->i_mapping, -EIO);
1751
1752 while (nr_pages > 0) {
1753 ret = find_get_pages_contig(inode->i_mapping, index,
1754 min_t(unsigned long,
1755 nr_pages, ARRAY_SIZE(pages)), pages);
1756 for (i = 0; i < ret; i++) {
1757
1758 if (page_ops & PAGE_SET_PRIVATE2)
1759 SetPagePrivate2(pages[i]);
1760
1761 if (pages[i] == locked_page) {
1762 page_cache_release(pages[i]);
1763 continue;
1764 }
1765 if (page_ops & PAGE_CLEAR_DIRTY)
1766 clear_page_dirty_for_io(pages[i]);
1767 if (page_ops & PAGE_SET_WRITEBACK)
1768 set_page_writeback(pages[i]);
1769 if (page_ops & PAGE_SET_ERROR)
1770 SetPageError(pages[i]);
1771 if (page_ops & PAGE_END_WRITEBACK)
1772 end_page_writeback(pages[i]);
1773 if (page_ops & PAGE_UNLOCK)
1774 unlock_page(pages[i]);
1775 page_cache_release(pages[i]);
1776 }
1777 nr_pages -= ret;
1778 index += ret;
1779 cond_resched();
1780 }
1781}
1782
1783
1784
1785
1786
1787
1788u64 count_range_bits(struct extent_io_tree *tree,
1789 u64 *start, u64 search_end, u64 max_bytes,
1790 unsigned bits, int contig)
1791{
1792 struct rb_node *node;
1793 struct extent_state *state;
1794 u64 cur_start = *start;
1795 u64 total_bytes = 0;
1796 u64 last = 0;
1797 int found = 0;
1798
1799 if (WARN_ON(search_end <= cur_start))
1800 return 0;
1801
1802 spin_lock(&tree->lock);
1803 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1804 total_bytes = tree->dirty_bytes;
1805 goto out;
1806 }
1807
1808
1809
1810
1811 node = tree_search(tree, cur_start);
1812 if (!node)
1813 goto out;
1814
1815 while (1) {
1816 state = rb_entry(node, struct extent_state, rb_node);
1817 if (state->start > search_end)
1818 break;
1819 if (contig && found && state->start > last + 1)
1820 break;
1821 if (state->end >= cur_start && (state->state & bits) == bits) {
1822 total_bytes += min(search_end, state->end) + 1 -
1823 max(cur_start, state->start);
1824 if (total_bytes >= max_bytes)
1825 break;
1826 if (!found) {
1827 *start = max(cur_start, state->start);
1828 found = 1;
1829 }
1830 last = state->end;
1831 } else if (contig && found) {
1832 break;
1833 }
1834 node = rb_next(node);
1835 if (!node)
1836 break;
1837 }
1838out:
1839 spin_unlock(&tree->lock);
1840 return total_bytes;
1841}
1842
1843
1844
1845
1846
1847static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1848{
1849 struct rb_node *node;
1850 struct extent_state *state;
1851 int ret = 0;
1852
1853 spin_lock(&tree->lock);
1854
1855
1856
1857
1858 node = tree_search(tree, start);
1859 if (!node) {
1860 ret = -ENOENT;
1861 goto out;
1862 }
1863 state = rb_entry(node, struct extent_state, rb_node);
1864 if (state->start != start) {
1865 ret = -ENOENT;
1866 goto out;
1867 }
1868 state->private = private;
1869out:
1870 spin_unlock(&tree->lock);
1871 return ret;
1872}
1873
1874int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1875{
1876 struct rb_node *node;
1877 struct extent_state *state;
1878 int ret = 0;
1879
1880 spin_lock(&tree->lock);
1881
1882
1883
1884
1885 node = tree_search(tree, start);
1886 if (!node) {
1887 ret = -ENOENT;
1888 goto out;
1889 }
1890 state = rb_entry(node, struct extent_state, rb_node);
1891 if (state->start != start) {
1892 ret = -ENOENT;
1893 goto out;
1894 }
1895 *private = state->private;
1896out:
1897 spin_unlock(&tree->lock);
1898 return ret;
1899}
1900
1901
1902
1903
1904
1905
1906
1907int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1908 unsigned bits, int filled, struct extent_state *cached)
1909{
1910 struct extent_state *state = NULL;
1911 struct rb_node *node;
1912 int bitset = 0;
1913
1914 spin_lock(&tree->lock);
1915 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1916 cached->end > start)
1917 node = &cached->rb_node;
1918 else
1919 node = tree_search(tree, start);
1920 while (node && start <= end) {
1921 state = rb_entry(node, struct extent_state, rb_node);
1922
1923 if (filled && state->start > start) {
1924 bitset = 0;
1925 break;
1926 }
1927
1928 if (state->start > end)
1929 break;
1930
1931 if (state->state & bits) {
1932 bitset = 1;
1933 if (!filled)
1934 break;
1935 } else if (filled) {
1936 bitset = 0;
1937 break;
1938 }
1939
1940 if (state->end == (u64)-1)
1941 break;
1942
1943 start = state->end + 1;
1944 if (start > end)
1945 break;
1946 node = rb_next(node);
1947 if (!node) {
1948 if (filled)
1949 bitset = 0;
1950 break;
1951 }
1952 }
1953 spin_unlock(&tree->lock);
1954 return bitset;
1955}
1956
1957
1958
1959
1960
1961static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1962{
1963 u64 start = page_offset(page);
1964 u64 end = start + PAGE_CACHE_SIZE - 1;
1965 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1966 SetPageUptodate(page);
1967}
1968
1969int free_io_failure(struct inode *inode, struct io_failure_record *rec)
1970{
1971 int ret;
1972 int err = 0;
1973 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
1974
1975 set_state_private(failure_tree, rec->start, 0);
1976 ret = clear_extent_bits(failure_tree, rec->start,
1977 rec->start + rec->len - 1,
1978 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
1979 if (ret)
1980 err = ret;
1981
1982 ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
1983 rec->start + rec->len - 1,
1984 EXTENT_DAMAGED, GFP_NOFS);
1985 if (ret && !err)
1986 err = ret;
1987
1988 kfree(rec);
1989 return err;
1990}
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
2003 struct page *page, unsigned int pg_offset, int mirror_num)
2004{
2005 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2006 struct bio *bio;
2007 struct btrfs_device *dev;
2008 u64 map_length = 0;
2009 u64 sector;
2010 struct btrfs_bio *bbio = NULL;
2011 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
2012 int ret;
2013
2014 ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
2015 BUG_ON(!mirror_num);
2016
2017
2018 if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
2019 return 0;
2020
2021 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2022 if (!bio)
2023 return -EIO;
2024 bio->bi_iter.bi_size = 0;
2025 map_length = length;
2026
2027 ret = btrfs_map_block(fs_info, WRITE, logical,
2028 &map_length, &bbio, mirror_num);
2029 if (ret) {
2030 bio_put(bio);
2031 return -EIO;
2032 }
2033 BUG_ON(mirror_num != bbio->mirror_num);
2034 sector = bbio->stripes[mirror_num-1].physical >> 9;
2035 bio->bi_iter.bi_sector = sector;
2036 dev = bbio->stripes[mirror_num-1].dev;
2037 btrfs_put_bbio(bbio);
2038 if (!dev || !dev->bdev || !dev->writeable) {
2039 bio_put(bio);
2040 return -EIO;
2041 }
2042 bio->bi_bdev = dev->bdev;
2043 bio_add_page(bio, page, length, pg_offset);
2044
2045 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
2046
2047 bio_put(bio);
2048 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2049 return -EIO;
2050 }
2051
2052 btrfs_info_rl_in_rcu(fs_info,
2053 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2054 btrfs_ino(inode), start,
2055 rcu_str_deref(dev->name), sector);
2056 bio_put(bio);
2057 return 0;
2058}
2059
2060int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
2061 int mirror_num)
2062{
2063 u64 start = eb->start;
2064 unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
2065 int ret = 0;
2066
2067 if (root->fs_info->sb->s_flags & MS_RDONLY)
2068 return -EROFS;
2069
2070 for (i = 0; i < num_pages; i++) {
2071 struct page *p = eb->pages[i];
2072
2073 ret = repair_io_failure(root->fs_info->btree_inode, start,
2074 PAGE_CACHE_SIZE, start, p,
2075 start - page_offset(p), mirror_num);
2076 if (ret)
2077 break;
2078 start += PAGE_CACHE_SIZE;
2079 }
2080
2081 return ret;
2082}
2083
2084
2085
2086
2087
2088int clean_io_failure(struct inode *inode, u64 start, struct page *page,
2089 unsigned int pg_offset)
2090{
2091 u64 private;
2092 u64 private_failure;
2093 struct io_failure_record *failrec;
2094 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2095 struct extent_state *state;
2096 int num_copies;
2097 int ret;
2098
2099 private = 0;
2100 ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
2101 (u64)-1, 1, EXTENT_DIRTY, 0);
2102 if (!ret)
2103 return 0;
2104
2105 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start,
2106 &private_failure);
2107 if (ret)
2108 return 0;
2109
2110 failrec = (struct io_failure_record *)(unsigned long) private_failure;
2111 BUG_ON(!failrec->this_mirror);
2112
2113 if (failrec->in_validation) {
2114
2115 pr_debug("clean_io_failure: freeing dummy error at %llu\n",
2116 failrec->start);
2117 goto out;
2118 }
2119 if (fs_info->sb->s_flags & MS_RDONLY)
2120 goto out;
2121
2122 spin_lock(&BTRFS_I(inode)->io_tree.lock);
2123 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
2124 failrec->start,
2125 EXTENT_LOCKED);
2126 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
2127
2128 if (state && state->start <= failrec->start &&
2129 state->end >= failrec->start + failrec->len - 1) {
2130 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2131 failrec->len);
2132 if (num_copies > 1) {
2133 repair_io_failure(inode, start, failrec->len,
2134 failrec->logical, page,
2135 pg_offset, failrec->failed_mirror);
2136 }
2137 }
2138
2139out:
2140 free_io_failure(inode, failrec);
2141
2142 return 0;
2143}
2144
2145
2146
2147
2148
2149
2150
2151void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
2152{
2153 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2154 struct io_failure_record *failrec;
2155 struct extent_state *state, *next;
2156
2157 if (RB_EMPTY_ROOT(&failure_tree->state))
2158 return;
2159
2160 spin_lock(&failure_tree->lock);
2161 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2162 while (state) {
2163 if (state->start > end)
2164 break;
2165
2166 ASSERT(state->end <= end);
2167
2168 next = next_state(state);
2169
2170 failrec = (struct io_failure_record *)(unsigned long)state->private;
2171 free_extent_state(state);
2172 kfree(failrec);
2173
2174 state = next;
2175 }
2176 spin_unlock(&failure_tree->lock);
2177}
2178
2179int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2180 struct io_failure_record **failrec_ret)
2181{
2182 struct io_failure_record *failrec;
2183 u64 private;
2184 struct extent_map *em;
2185 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2186 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2187 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2188 int ret;
2189 u64 logical;
2190
2191 ret = get_state_private(failure_tree, start, &private);
2192 if (ret) {
2193 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2194 if (!failrec)
2195 return -ENOMEM;
2196
2197 failrec->start = start;
2198 failrec->len = end - start + 1;
2199 failrec->this_mirror = 0;
2200 failrec->bio_flags = 0;
2201 failrec->in_validation = 0;
2202
2203 read_lock(&em_tree->lock);
2204 em = lookup_extent_mapping(em_tree, start, failrec->len);
2205 if (!em) {
2206 read_unlock(&em_tree->lock);
2207 kfree(failrec);
2208 return -EIO;
2209 }
2210
2211 if (em->start > start || em->start + em->len <= start) {
2212 free_extent_map(em);
2213 em = NULL;
2214 }
2215 read_unlock(&em_tree->lock);
2216 if (!em) {
2217 kfree(failrec);
2218 return -EIO;
2219 }
2220
2221 logical = start - em->start;
2222 logical = em->block_start + logical;
2223 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2224 logical = em->block_start;
2225 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2226 extent_set_compress_type(&failrec->bio_flags,
2227 em->compress_type);
2228 }
2229
2230 pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
2231 logical, start, failrec->len);
2232
2233 failrec->logical = logical;
2234 free_extent_map(em);
2235
2236
2237 ret = set_extent_bits(failure_tree, start, end,
2238 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
2239 if (ret >= 0)
2240 ret = set_state_private(failure_tree, start,
2241 (u64)(unsigned long)failrec);
2242
2243 if (ret >= 0)
2244 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
2245 GFP_NOFS);
2246 if (ret < 0) {
2247 kfree(failrec);
2248 return ret;
2249 }
2250 } else {
2251 failrec = (struct io_failure_record *)(unsigned long)private;
2252 pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
2253 failrec->logical, failrec->start, failrec->len,
2254 failrec->in_validation);
2255
2256
2257
2258
2259
2260 }
2261
2262 *failrec_ret = failrec;
2263
2264 return 0;
2265}
2266
2267int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
2268 struct io_failure_record *failrec, int failed_mirror)
2269{
2270 int num_copies;
2271
2272 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
2273 failrec->logical, failrec->len);
2274 if (num_copies == 1) {
2275
2276
2277
2278
2279
2280 pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
2281 num_copies, failrec->this_mirror, failed_mirror);
2282 return 0;
2283 }
2284
2285
2286
2287
2288
2289
2290 if (failed_bio->bi_vcnt > 1) {
2291
2292
2293
2294
2295
2296
2297
2298
2299 BUG_ON(failrec->in_validation);
2300 failrec->in_validation = 1;
2301 failrec->this_mirror = failed_mirror;
2302 } else {
2303
2304
2305
2306
2307
2308 if (failrec->in_validation) {
2309 BUG_ON(failrec->this_mirror != failed_mirror);
2310 failrec->in_validation = 0;
2311 failrec->this_mirror = 0;
2312 }
2313 failrec->failed_mirror = failed_mirror;
2314 failrec->this_mirror++;
2315 if (failrec->this_mirror == failed_mirror)
2316 failrec->this_mirror++;
2317 }
2318
2319 if (failrec->this_mirror > num_copies) {
2320 pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2321 num_copies, failrec->this_mirror, failed_mirror);
2322 return 0;
2323 }
2324
2325 return 1;
2326}
2327
2328
2329struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2330 struct io_failure_record *failrec,
2331 struct page *page, int pg_offset, int icsum,
2332 bio_end_io_t *endio_func, void *data)
2333{
2334 struct bio *bio;
2335 struct btrfs_io_bio *btrfs_failed_bio;
2336 struct btrfs_io_bio *btrfs_bio;
2337
2338 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2339 if (!bio)
2340 return NULL;
2341
2342 bio->bi_end_io = endio_func;
2343 bio->bi_iter.bi_sector = failrec->logical >> 9;
2344 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2345 bio->bi_iter.bi_size = 0;
2346 bio->bi_private = data;
2347
2348 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2349 if (btrfs_failed_bio->csum) {
2350 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2351 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2352
2353 btrfs_bio = btrfs_io_bio(bio);
2354 btrfs_bio->csum = btrfs_bio->csum_inline;
2355 icsum *= csum_size;
2356 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2357 csum_size);
2358 }
2359
2360 bio_add_page(bio, page, failrec->len, pg_offset);
2361
2362 return bio;
2363}
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2374 struct page *page, u64 start, u64 end,
2375 int failed_mirror)
2376{
2377 struct io_failure_record *failrec;
2378 struct inode *inode = page->mapping->host;
2379 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2380 struct bio *bio;
2381 int read_mode;
2382 int ret;
2383
2384 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
2385
2386 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2387 if (ret)
2388 return ret;
2389
2390 ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
2391 if (!ret) {
2392 free_io_failure(inode, failrec);
2393 return -EIO;
2394 }
2395
2396 if (failed_bio->bi_vcnt > 1)
2397 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
2398 else
2399 read_mode = READ_SYNC;
2400
2401 phy_offset >>= inode->i_sb->s_blocksize_bits;
2402 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2403 start - page_offset(page),
2404 (int)phy_offset, failed_bio->bi_end_io,
2405 NULL);
2406 if (!bio) {
2407 free_io_failure(inode, failrec);
2408 return -EIO;
2409 }
2410
2411 pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
2412 read_mode, failrec->this_mirror, failrec->in_validation);
2413
2414 ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
2415 failrec->this_mirror,
2416 failrec->bio_flags, 0);
2417 if (ret) {
2418 free_io_failure(inode, failrec);
2419 bio_put(bio);
2420 }
2421
2422 return ret;
2423}
2424
2425
2426
2427void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2428{
2429 int uptodate = (err == 0);
2430 struct extent_io_tree *tree;
2431 int ret = 0;
2432
2433 tree = &BTRFS_I(page->mapping->host)->io_tree;
2434
2435 if (tree->ops && tree->ops->writepage_end_io_hook) {
2436 ret = tree->ops->writepage_end_io_hook(page, start,
2437 end, NULL, uptodate);
2438 if (ret)
2439 uptodate = 0;
2440 }
2441
2442 if (!uptodate) {
2443 ClearPageUptodate(page);
2444 SetPageError(page);
2445 ret = ret < 0 ? ret : -EIO;
2446 mapping_set_error(page->mapping, ret);
2447 }
2448}
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459static void end_bio_extent_writepage(struct bio *bio)
2460{
2461 struct bio_vec *bvec;
2462 u64 start;
2463 u64 end;
2464 int i;
2465
2466 bio_for_each_segment_all(bvec, bio, i) {
2467 struct page *page = bvec->bv_page;
2468
2469
2470
2471
2472
2473
2474 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2475 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2476 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2477 "partial page write in btrfs with offset %u and length %u",
2478 bvec->bv_offset, bvec->bv_len);
2479 else
2480 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2481 "incomplete page write in btrfs with offset %u and "
2482 "length %u",
2483 bvec->bv_offset, bvec->bv_len);
2484 }
2485
2486 start = page_offset(page);
2487 end = start + bvec->bv_offset + bvec->bv_len - 1;
2488
2489 end_extent_writepage(page, bio->bi_error, start, end);
2490 end_page_writeback(page);
2491 }
2492
2493 bio_put(bio);
2494}
2495
2496static void
2497endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2498 int uptodate)
2499{
2500 struct extent_state *cached = NULL;
2501 u64 end = start + len - 1;
2502
2503 if (uptodate && tree->track_uptodate)
2504 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2505 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2506}
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519static void end_bio_extent_readpage(struct bio *bio)
2520{
2521 struct bio_vec *bvec;
2522 int uptodate = !bio->bi_error;
2523 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2524 struct extent_io_tree *tree;
2525 u64 offset = 0;
2526 u64 start;
2527 u64 end;
2528 u64 len;
2529 u64 extent_start = 0;
2530 u64 extent_len = 0;
2531 int mirror;
2532 int ret;
2533 int i;
2534
2535 bio_for_each_segment_all(bvec, bio, i) {
2536 struct page *page = bvec->bv_page;
2537 struct inode *inode = page->mapping->host;
2538
2539 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2540 "mirror=%u\n", (u64)bio->bi_iter.bi_sector,
2541 bio->bi_error, io_bio->mirror_num);
2542 tree = &BTRFS_I(inode)->io_tree;
2543
2544
2545
2546
2547
2548
2549 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2550 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2551 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2552 "partial page read in btrfs with offset %u and length %u",
2553 bvec->bv_offset, bvec->bv_len);
2554 else
2555 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2556 "incomplete page read in btrfs with offset %u and "
2557 "length %u",
2558 bvec->bv_offset, bvec->bv_len);
2559 }
2560
2561 start = page_offset(page);
2562 end = start + bvec->bv_offset + bvec->bv_len - 1;
2563 len = bvec->bv_len;
2564
2565 mirror = io_bio->mirror_num;
2566 if (likely(uptodate && tree->ops &&
2567 tree->ops->readpage_end_io_hook)) {
2568 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2569 page, start, end,
2570 mirror);
2571 if (ret)
2572 uptodate = 0;
2573 else
2574 clean_io_failure(inode, start, page, 0);
2575 }
2576
2577 if (likely(uptodate))
2578 goto readpage_ok;
2579
2580 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2581 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2582 if (!ret && !bio->bi_error)
2583 uptodate = 1;
2584 } else {
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595 ret = bio_readpage_error(bio, offset, page, start, end,
2596 mirror);
2597 if (ret == 0) {
2598 uptodate = !bio->bi_error;
2599 offset += len;
2600 continue;
2601 }
2602 }
2603readpage_ok:
2604 if (likely(uptodate)) {
2605 loff_t i_size = i_size_read(inode);
2606 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2607 unsigned off;
2608
2609
2610 off = i_size & (PAGE_CACHE_SIZE-1);
2611 if (page->index == end_index && off)
2612 zero_user_segment(page, off, PAGE_CACHE_SIZE);
2613 SetPageUptodate(page);
2614 } else {
2615 ClearPageUptodate(page);
2616 SetPageError(page);
2617 }
2618 unlock_page(page);
2619 offset += len;
2620
2621 if (unlikely(!uptodate)) {
2622 if (extent_len) {
2623 endio_readpage_release_extent(tree,
2624 extent_start,
2625 extent_len, 1);
2626 extent_start = 0;
2627 extent_len = 0;
2628 }
2629 endio_readpage_release_extent(tree, start,
2630 end - start + 1, 0);
2631 } else if (!extent_len) {
2632 extent_start = start;
2633 extent_len = end + 1 - start;
2634 } else if (extent_start + extent_len == start) {
2635 extent_len += end + 1 - start;
2636 } else {
2637 endio_readpage_release_extent(tree, extent_start,
2638 extent_len, uptodate);
2639 extent_start = start;
2640 extent_len = end + 1 - start;
2641 }
2642 }
2643
2644 if (extent_len)
2645 endio_readpage_release_extent(tree, extent_start, extent_len,
2646 uptodate);
2647 if (io_bio->end_io)
2648 io_bio->end_io(io_bio, bio->bi_error);
2649 bio_put(bio);
2650}
2651
2652
2653
2654
2655
2656struct bio *
2657btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2658 gfp_t gfp_flags)
2659{
2660 struct btrfs_io_bio *btrfs_bio;
2661 struct bio *bio;
2662
2663 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
2664
2665 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
2666 while (!bio && (nr_vecs /= 2)) {
2667 bio = bio_alloc_bioset(gfp_flags,
2668 nr_vecs, btrfs_bioset);
2669 }
2670 }
2671
2672 if (bio) {
2673 bio->bi_bdev = bdev;
2674 bio->bi_iter.bi_sector = first_sector;
2675 btrfs_bio = btrfs_io_bio(bio);
2676 btrfs_bio->csum = NULL;
2677 btrfs_bio->csum_allocated = NULL;
2678 btrfs_bio->end_io = NULL;
2679 }
2680 return bio;
2681}
2682
2683struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2684{
2685 struct btrfs_io_bio *btrfs_bio;
2686 struct bio *new;
2687
2688 new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
2689 if (new) {
2690 btrfs_bio = btrfs_io_bio(new);
2691 btrfs_bio->csum = NULL;
2692 btrfs_bio->csum_allocated = NULL;
2693 btrfs_bio->end_io = NULL;
2694
2695#ifdef CONFIG_BLK_CGROUP
2696
2697 if (bio->bi_css)
2698 bio_associate_blkcg(new, bio->bi_css);
2699#endif
2700 }
2701 return new;
2702}
2703
2704
2705struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2706{
2707 struct btrfs_io_bio *btrfs_bio;
2708 struct bio *bio;
2709
2710 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2711 if (bio) {
2712 btrfs_bio = btrfs_io_bio(bio);
2713 btrfs_bio->csum = NULL;
2714 btrfs_bio->csum_allocated = NULL;
2715 btrfs_bio->end_io = NULL;
2716 }
2717 return bio;
2718}
2719
2720
2721static int __must_check submit_one_bio(int rw, struct bio *bio,
2722 int mirror_num, unsigned long bio_flags)
2723{
2724 int ret = 0;
2725 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2726 struct page *page = bvec->bv_page;
2727 struct extent_io_tree *tree = bio->bi_private;
2728 u64 start;
2729
2730 start = page_offset(page) + bvec->bv_offset;
2731
2732 bio->bi_private = NULL;
2733
2734 bio_get(bio);
2735
2736 if (tree->ops && tree->ops->submit_bio_hook)
2737 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
2738 mirror_num, bio_flags, start);
2739 else
2740 btrfsic_submit_bio(rw, bio);
2741
2742 bio_put(bio);
2743 return ret;
2744}
2745
2746static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
2747 unsigned long offset, size_t size, struct bio *bio,
2748 unsigned long bio_flags)
2749{
2750 int ret = 0;
2751 if (tree->ops && tree->ops->merge_bio_hook)
2752 ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
2753 bio_flags);
2754 BUG_ON(ret < 0);
2755 return ret;
2756
2757}
2758
2759static int submit_extent_page(int rw, struct extent_io_tree *tree,
2760 struct writeback_control *wbc,
2761 struct page *page, sector_t sector,
2762 size_t size, unsigned long offset,
2763 struct block_device *bdev,
2764 struct bio **bio_ret,
2765 unsigned long max_pages,
2766 bio_end_io_t end_io_func,
2767 int mirror_num,
2768 unsigned long prev_bio_flags,
2769 unsigned long bio_flags,
2770 bool force_bio_submit)
2771{
2772 int ret = 0;
2773 struct bio *bio;
2774 int contig = 0;
2775 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
2776 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
2777
2778 if (bio_ret && *bio_ret) {
2779 bio = *bio_ret;
2780 if (old_compressed)
2781 contig = bio->bi_iter.bi_sector == sector;
2782 else
2783 contig = bio_end_sector(bio) == sector;
2784
2785 if (prev_bio_flags != bio_flags || !contig ||
2786 force_bio_submit ||
2787 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
2788 bio_add_page(bio, page, page_size, offset) < page_size) {
2789 ret = submit_one_bio(rw, bio, mirror_num,
2790 prev_bio_flags);
2791 if (ret < 0) {
2792 *bio_ret = NULL;
2793 return ret;
2794 }
2795 bio = NULL;
2796 } else {
2797 if (wbc)
2798 wbc_account_io(wbc, page, page_size);
2799 return 0;
2800 }
2801 }
2802
2803 bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
2804 GFP_NOFS | __GFP_HIGH);
2805 if (!bio)
2806 return -ENOMEM;
2807
2808 bio_add_page(bio, page, page_size, offset);
2809 bio->bi_end_io = end_io_func;
2810 bio->bi_private = tree;
2811 if (wbc) {
2812 wbc_init_bio(wbc, bio);
2813 wbc_account_io(wbc, page, page_size);
2814 }
2815
2816 if (bio_ret)
2817 *bio_ret = bio;
2818 else
2819 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
2820
2821 return ret;
2822}
2823
2824static void attach_extent_buffer_page(struct extent_buffer *eb,
2825 struct page *page)
2826{
2827 if (!PagePrivate(page)) {
2828 SetPagePrivate(page);
2829 page_cache_get(page);
2830 set_page_private(page, (unsigned long)eb);
2831 } else {
2832 WARN_ON(page->private != (unsigned long)eb);
2833 }
2834}
2835
2836void set_page_extent_mapped(struct page *page)
2837{
2838 if (!PagePrivate(page)) {
2839 SetPagePrivate(page);
2840 page_cache_get(page);
2841 set_page_private(page, EXTENT_PAGE_PRIVATE);
2842 }
2843}
2844
2845static struct extent_map *
2846__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2847 u64 start, u64 len, get_extent_t *get_extent,
2848 struct extent_map **em_cached)
2849{
2850 struct extent_map *em;
2851
2852 if (em_cached && *em_cached) {
2853 em = *em_cached;
2854 if (extent_map_in_tree(em) && start >= em->start &&
2855 start < extent_map_end(em)) {
2856 atomic_inc(&em->refs);
2857 return em;
2858 }
2859
2860 free_extent_map(em);
2861 *em_cached = NULL;
2862 }
2863
2864 em = get_extent(inode, page, pg_offset, start, len, 0);
2865 if (em_cached && !IS_ERR_OR_NULL(em)) {
2866 BUG_ON(*em_cached);
2867 atomic_inc(&em->refs);
2868 *em_cached = em;
2869 }
2870 return em;
2871}
2872
2873
2874
2875
2876
2877
2878static int __do_readpage(struct extent_io_tree *tree,
2879 struct page *page,
2880 get_extent_t *get_extent,
2881 struct extent_map **em_cached,
2882 struct bio **bio, int mirror_num,
2883 unsigned long *bio_flags, int rw,
2884 u64 *prev_em_start)
2885{
2886 struct inode *inode = page->mapping->host;
2887 u64 start = page_offset(page);
2888 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2889 u64 end;
2890 u64 cur = start;
2891 u64 extent_offset;
2892 u64 last_byte = i_size_read(inode);
2893 u64 block_start;
2894 u64 cur_end;
2895 sector_t sector;
2896 struct extent_map *em;
2897 struct block_device *bdev;
2898 int ret;
2899 int nr = 0;
2900 size_t pg_offset = 0;
2901 size_t iosize;
2902 size_t disk_io_size;
2903 size_t blocksize = inode->i_sb->s_blocksize;
2904 unsigned long this_bio_flag = 0;
2905
2906 set_page_extent_mapped(page);
2907
2908 end = page_end;
2909 if (!PageUptodate(page)) {
2910 if (cleancache_get_page(page) == 0) {
2911 BUG_ON(blocksize != PAGE_SIZE);
2912 unlock_extent(tree, start, end);
2913 goto out;
2914 }
2915 }
2916
2917 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2918 char *userpage;
2919 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
2920
2921 if (zero_offset) {
2922 iosize = PAGE_CACHE_SIZE - zero_offset;
2923 userpage = kmap_atomic(page);
2924 memset(userpage + zero_offset, 0, iosize);
2925 flush_dcache_page(page);
2926 kunmap_atomic(userpage);
2927 }
2928 }
2929 while (cur <= end) {
2930 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2931 bool force_bio_submit = false;
2932
2933 if (cur >= last_byte) {
2934 char *userpage;
2935 struct extent_state *cached = NULL;
2936
2937 iosize = PAGE_CACHE_SIZE - pg_offset;
2938 userpage = kmap_atomic(page);
2939 memset(userpage + pg_offset, 0, iosize);
2940 flush_dcache_page(page);
2941 kunmap_atomic(userpage);
2942 set_extent_uptodate(tree, cur, cur + iosize - 1,
2943 &cached, GFP_NOFS);
2944 unlock_extent_cached(tree, cur,
2945 cur + iosize - 1,
2946 &cached, GFP_NOFS);
2947 break;
2948 }
2949 em = __get_extent_map(inode, page, pg_offset, cur,
2950 end - cur + 1, get_extent, em_cached);
2951 if (IS_ERR_OR_NULL(em)) {
2952 SetPageError(page);
2953 unlock_extent(tree, cur, end);
2954 break;
2955 }
2956 extent_offset = cur - em->start;
2957 BUG_ON(extent_map_end(em) <= cur);
2958 BUG_ON(end < cur);
2959
2960 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2961 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2962 extent_set_compress_type(&this_bio_flag,
2963 em->compress_type);
2964 }
2965
2966 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2967 cur_end = min(extent_map_end(em) - 1, end);
2968 iosize = ALIGN(iosize, blocksize);
2969 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2970 disk_io_size = em->block_len;
2971 sector = em->block_start >> 9;
2972 } else {
2973 sector = (em->block_start + extent_offset) >> 9;
2974 disk_io_size = iosize;
2975 }
2976 bdev = em->bdev;
2977 block_start = em->block_start;
2978 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2979 block_start = EXTENT_MAP_HOLE;
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3016 prev_em_start && *prev_em_start != (u64)-1 &&
3017 *prev_em_start != em->orig_start)
3018 force_bio_submit = true;
3019
3020 if (prev_em_start)
3021 *prev_em_start = em->orig_start;
3022
3023 free_extent_map(em);
3024 em = NULL;
3025
3026
3027 if (block_start == EXTENT_MAP_HOLE) {
3028 char *userpage;
3029 struct extent_state *cached = NULL;
3030
3031 userpage = kmap_atomic(page);
3032 memset(userpage + pg_offset, 0, iosize);
3033 flush_dcache_page(page);
3034 kunmap_atomic(userpage);
3035
3036 set_extent_uptodate(tree, cur, cur + iosize - 1,
3037 &cached, GFP_NOFS);
3038 unlock_extent_cached(tree, cur,
3039 cur + iosize - 1,
3040 &cached, GFP_NOFS);
3041 cur = cur + iosize;
3042 pg_offset += iosize;
3043 continue;
3044 }
3045
3046 if (test_range_bit(tree, cur, cur_end,
3047 EXTENT_UPTODATE, 1, NULL)) {
3048 check_page_uptodate(tree, page);
3049 unlock_extent(tree, cur, cur + iosize - 1);
3050 cur = cur + iosize;
3051 pg_offset += iosize;
3052 continue;
3053 }
3054
3055
3056
3057 if (block_start == EXTENT_MAP_INLINE) {
3058 SetPageError(page);
3059 unlock_extent(tree, cur, cur + iosize - 1);
3060 cur = cur + iosize;
3061 pg_offset += iosize;
3062 continue;
3063 }
3064
3065 pnr -= page->index;
3066 ret = submit_extent_page(rw, tree, NULL, page,
3067 sector, disk_io_size, pg_offset,
3068 bdev, bio, pnr,
3069 end_bio_extent_readpage, mirror_num,
3070 *bio_flags,
3071 this_bio_flag,
3072 force_bio_submit);
3073 if (!ret) {
3074 nr++;
3075 *bio_flags = this_bio_flag;
3076 } else {
3077 SetPageError(page);
3078 unlock_extent(tree, cur, cur + iosize - 1);
3079 }
3080 cur = cur + iosize;
3081 pg_offset += iosize;
3082 }
3083out:
3084 if (!nr) {
3085 if (!PageError(page))
3086 SetPageUptodate(page);
3087 unlock_page(page);
3088 }
3089 return 0;
3090}
3091
3092static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3093 struct page *pages[], int nr_pages,
3094 u64 start, u64 end,
3095 get_extent_t *get_extent,
3096 struct extent_map **em_cached,
3097 struct bio **bio, int mirror_num,
3098 unsigned long *bio_flags, int rw,
3099 u64 *prev_em_start)
3100{
3101 struct inode *inode;
3102 struct btrfs_ordered_extent *ordered;
3103 int index;
3104
3105 inode = pages[0]->mapping->host;
3106 while (1) {
3107 lock_extent(tree, start, end);
3108 ordered = btrfs_lookup_ordered_range(inode, start,
3109 end - start + 1);
3110 if (!ordered)
3111 break;
3112 unlock_extent(tree, start, end);
3113 btrfs_start_ordered_extent(inode, ordered, 1);
3114 btrfs_put_ordered_extent(ordered);
3115 }
3116
3117 for (index = 0; index < nr_pages; index++) {
3118 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
3119 mirror_num, bio_flags, rw, prev_em_start);
3120 page_cache_release(pages[index]);
3121 }
3122}
3123
3124static void __extent_readpages(struct extent_io_tree *tree,
3125 struct page *pages[],
3126 int nr_pages, get_extent_t *get_extent,
3127 struct extent_map **em_cached,
3128 struct bio **bio, int mirror_num,
3129 unsigned long *bio_flags, int rw,
3130 u64 *prev_em_start)
3131{
3132 u64 start = 0;
3133 u64 end = 0;
3134 u64 page_start;
3135 int index;
3136 int first_index = 0;
3137
3138 for (index = 0; index < nr_pages; index++) {
3139 page_start = page_offset(pages[index]);
3140 if (!end) {
3141 start = page_start;
3142 end = start + PAGE_CACHE_SIZE - 1;
3143 first_index = index;
3144 } else if (end + 1 == page_start) {
3145 end += PAGE_CACHE_SIZE;
3146 } else {
3147 __do_contiguous_readpages(tree, &pages[first_index],
3148 index - first_index, start,
3149 end, get_extent, em_cached,
3150 bio, mirror_num, bio_flags,
3151 rw, prev_em_start);
3152 start = page_start;
3153 end = start + PAGE_CACHE_SIZE - 1;
3154 first_index = index;
3155 }
3156 }
3157
3158 if (end)
3159 __do_contiguous_readpages(tree, &pages[first_index],
3160 index - first_index, start,
3161 end, get_extent, em_cached, bio,
3162 mirror_num, bio_flags, rw,
3163 prev_em_start);
3164}
3165
3166static int __extent_read_full_page(struct extent_io_tree *tree,
3167 struct page *page,
3168 get_extent_t *get_extent,
3169 struct bio **bio, int mirror_num,
3170 unsigned long *bio_flags, int rw)
3171{
3172 struct inode *inode = page->mapping->host;
3173 struct btrfs_ordered_extent *ordered;
3174 u64 start = page_offset(page);
3175 u64 end = start + PAGE_CACHE_SIZE - 1;
3176 int ret;
3177
3178 while (1) {
3179 lock_extent(tree, start, end);
3180 ordered = btrfs_lookup_ordered_extent(inode, start);
3181 if (!ordered)
3182 break;
3183 unlock_extent(tree, start, end);
3184 btrfs_start_ordered_extent(inode, ordered, 1);
3185 btrfs_put_ordered_extent(ordered);
3186 }
3187
3188 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3189 bio_flags, rw, NULL);
3190 return ret;
3191}
3192
3193int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3194 get_extent_t *get_extent, int mirror_num)
3195{
3196 struct bio *bio = NULL;
3197 unsigned long bio_flags = 0;
3198 int ret;
3199
3200 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3201 &bio_flags, READ);
3202 if (bio)
3203 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3204 return ret;
3205}
3206
3207static noinline void update_nr_written(struct page *page,
3208 struct writeback_control *wbc,
3209 unsigned long nr_written)
3210{
3211 wbc->nr_to_write -= nr_written;
3212 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
3213 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
3214 page->mapping->writeback_index = page->index + nr_written;
3215}
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227static noinline_for_stack int writepage_delalloc(struct inode *inode,
3228 struct page *page, struct writeback_control *wbc,
3229 struct extent_page_data *epd,
3230 u64 delalloc_start,
3231 unsigned long *nr_written)
3232{
3233 struct extent_io_tree *tree = epd->tree;
3234 u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
3235 u64 nr_delalloc;
3236 u64 delalloc_to_write = 0;
3237 u64 delalloc_end = 0;
3238 int ret;
3239 int page_started = 0;
3240
3241 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3242 return 0;
3243
3244 while (delalloc_end < page_end) {
3245 nr_delalloc = find_lock_delalloc_range(inode, tree,
3246 page,
3247 &delalloc_start,
3248 &delalloc_end,
3249 BTRFS_MAX_EXTENT_SIZE);
3250 if (nr_delalloc == 0) {
3251 delalloc_start = delalloc_end + 1;
3252 continue;
3253 }
3254 ret = tree->ops->fill_delalloc(inode, page,
3255 delalloc_start,
3256 delalloc_end,
3257 &page_started,
3258 nr_written);
3259
3260 if (ret) {
3261 SetPageError(page);
3262
3263
3264
3265
3266
3267 ret = ret < 0 ? ret : -EIO;
3268 goto done;
3269 }
3270
3271
3272
3273
3274
3275 delalloc_to_write += (delalloc_end - delalloc_start +
3276 PAGE_CACHE_SIZE) >>
3277 PAGE_CACHE_SHIFT;
3278 delalloc_start = delalloc_end + 1;
3279 }
3280 if (wbc->nr_to_write < delalloc_to_write) {
3281 int thresh = 8192;
3282
3283 if (delalloc_to_write < thresh * 2)
3284 thresh = delalloc_to_write;
3285 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3286 thresh);
3287 }
3288
3289
3290
3291
3292 if (page_started) {
3293
3294
3295
3296
3297
3298 wbc->nr_to_write -= *nr_written;
3299 return 1;
3300 }
3301
3302 ret = 0;
3303
3304done:
3305 return ret;
3306}
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3317 struct page *page,
3318 struct writeback_control *wbc,
3319 struct extent_page_data *epd,
3320 loff_t i_size,
3321 unsigned long nr_written,
3322 int write_flags, int *nr_ret)
3323{
3324 struct extent_io_tree *tree = epd->tree;
3325 u64 start = page_offset(page);
3326 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3327 u64 end;
3328 u64 cur = start;
3329 u64 extent_offset;
3330 u64 block_start;
3331 u64 iosize;
3332 sector_t sector;
3333 struct extent_state *cached_state = NULL;
3334 struct extent_map *em;
3335 struct block_device *bdev;
3336 size_t pg_offset = 0;
3337 size_t blocksize;
3338 int ret = 0;
3339 int nr = 0;
3340 bool compressed;
3341
3342 if (tree->ops && tree->ops->writepage_start_hook) {
3343 ret = tree->ops->writepage_start_hook(page, start,
3344 page_end);
3345 if (ret) {
3346
3347 if (ret == -EBUSY)
3348 wbc->pages_skipped++;
3349 else
3350 redirty_page_for_writepage(wbc, page);
3351
3352 update_nr_written(page, wbc, nr_written);
3353 unlock_page(page);
3354 ret = 1;
3355 goto done_unlocked;
3356 }
3357 }
3358
3359
3360
3361
3362
3363 update_nr_written(page, wbc, nr_written + 1);
3364
3365 end = page_end;
3366 if (i_size <= start) {
3367 if (tree->ops && tree->ops->writepage_end_io_hook)
3368 tree->ops->writepage_end_io_hook(page, start,
3369 page_end, NULL, 1);
3370 goto done;
3371 }
3372
3373 blocksize = inode->i_sb->s_blocksize;
3374
3375 while (cur <= end) {
3376 u64 em_end;
3377 if (cur >= i_size) {
3378 if (tree->ops && tree->ops->writepage_end_io_hook)
3379 tree->ops->writepage_end_io_hook(page, cur,
3380 page_end, NULL, 1);
3381 break;
3382 }
3383 em = epd->get_extent(inode, page, pg_offset, cur,
3384 end - cur + 1, 1);
3385 if (IS_ERR_OR_NULL(em)) {
3386 SetPageError(page);
3387 ret = PTR_ERR_OR_ZERO(em);
3388 break;
3389 }
3390
3391 extent_offset = cur - em->start;
3392 em_end = extent_map_end(em);
3393 BUG_ON(em_end <= cur);
3394 BUG_ON(end < cur);
3395 iosize = min(em_end - cur, end - cur + 1);
3396 iosize = ALIGN(iosize, blocksize);
3397 sector = (em->block_start + extent_offset) >> 9;
3398 bdev = em->bdev;
3399 block_start = em->block_start;
3400 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3401 free_extent_map(em);
3402 em = NULL;
3403
3404
3405
3406
3407
3408 if (compressed || block_start == EXTENT_MAP_HOLE ||
3409 block_start == EXTENT_MAP_INLINE) {
3410
3411
3412
3413
3414 if (!compressed && tree->ops &&
3415 tree->ops->writepage_end_io_hook)
3416 tree->ops->writepage_end_io_hook(page, cur,
3417 cur + iosize - 1,
3418 NULL, 1);
3419 else if (compressed) {
3420
3421
3422
3423
3424 nr++;
3425 }
3426
3427 cur += iosize;
3428 pg_offset += iosize;
3429 continue;
3430 }
3431
3432 if (tree->ops && tree->ops->writepage_io_hook) {
3433 ret = tree->ops->writepage_io_hook(page, cur,
3434 cur + iosize - 1);
3435 } else {
3436 ret = 0;
3437 }
3438 if (ret) {
3439 SetPageError(page);
3440 } else {
3441 unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
3442
3443 set_range_writeback(tree, cur, cur + iosize - 1);
3444 if (!PageWriteback(page)) {
3445 btrfs_err(BTRFS_I(inode)->root->fs_info,
3446 "page %lu not writeback, cur %llu end %llu",
3447 page->index, cur, end);
3448 }
3449
3450 ret = submit_extent_page(write_flags, tree, wbc, page,
3451 sector, iosize, pg_offset,
3452 bdev, &epd->bio, max_nr,
3453 end_bio_extent_writepage,
3454 0, 0, 0, false);
3455 if (ret)
3456 SetPageError(page);
3457 }
3458 cur = cur + iosize;
3459 pg_offset += iosize;
3460 nr++;
3461 }
3462done:
3463 *nr_ret = nr;
3464
3465done_unlocked:
3466
3467
3468 free_extent_state(cached_state);
3469 return ret;
3470}
3471
3472
3473
3474
3475
3476
3477
3478static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3479 void *data)
3480{
3481 struct inode *inode = page->mapping->host;
3482 struct extent_page_data *epd = data;
3483 u64 start = page_offset(page);
3484 u64 page_end = start + PAGE_CACHE_SIZE - 1;
3485 int ret;
3486 int nr = 0;
3487 size_t pg_offset = 0;
3488 loff_t i_size = i_size_read(inode);
3489 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
3490 int write_flags;
3491 unsigned long nr_written = 0;
3492
3493 if (wbc->sync_mode == WB_SYNC_ALL)
3494 write_flags = WRITE_SYNC;
3495 else
3496 write_flags = WRITE;
3497
3498 trace___extent_writepage(page, inode, wbc);
3499
3500 WARN_ON(!PageLocked(page));
3501
3502 ClearPageError(page);
3503
3504 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
3505 if (page->index > end_index ||
3506 (page->index == end_index && !pg_offset)) {
3507 page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
3508 unlock_page(page);
3509 return 0;
3510 }
3511
3512 if (page->index == end_index) {
3513 char *userpage;
3514
3515 userpage = kmap_atomic(page);
3516 memset(userpage + pg_offset, 0,
3517 PAGE_CACHE_SIZE - pg_offset);
3518 kunmap_atomic(userpage);
3519 flush_dcache_page(page);
3520 }
3521
3522 pg_offset = 0;
3523
3524 set_page_extent_mapped(page);
3525
3526 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3527 if (ret == 1)
3528 goto done_unlocked;
3529 if (ret)
3530 goto done;
3531
3532 ret = __extent_writepage_io(inode, page, wbc, epd,
3533 i_size, nr_written, write_flags, &nr);
3534 if (ret == 1)
3535 goto done_unlocked;
3536
3537done:
3538 if (nr == 0) {
3539
3540 set_page_writeback(page);
3541 end_page_writeback(page);
3542 }
3543 if (PageError(page)) {
3544 ret = ret < 0 ? ret : -EIO;
3545 end_extent_writepage(page, ret, start, page_end);
3546 }
3547 unlock_page(page);
3548 return ret;
3549
3550done_unlocked:
3551 return 0;
3552}
3553
3554void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3555{
3556 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3557 TASK_UNINTERRUPTIBLE);
3558}
3559
3560static noinline_for_stack int
3561lock_extent_buffer_for_io(struct extent_buffer *eb,
3562 struct btrfs_fs_info *fs_info,
3563 struct extent_page_data *epd)
3564{
3565 unsigned long i, num_pages;
3566 int flush = 0;
3567 int ret = 0;
3568
3569 if (!btrfs_try_tree_write_lock(eb)) {
3570 flush = 1;
3571 flush_write_bio(epd);
3572 btrfs_tree_lock(eb);
3573 }
3574
3575 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3576 btrfs_tree_unlock(eb);
3577 if (!epd->sync_io)
3578 return 0;
3579 if (!flush) {
3580 flush_write_bio(epd);
3581 flush = 1;
3582 }
3583 while (1) {
3584 wait_on_extent_buffer_writeback(eb);
3585 btrfs_tree_lock(eb);
3586 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3587 break;
3588 btrfs_tree_unlock(eb);
3589 }
3590 }
3591
3592
3593
3594
3595
3596
3597 spin_lock(&eb->refs_lock);
3598 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3599 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3600 spin_unlock(&eb->refs_lock);
3601 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3602 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
3603 -eb->len,
3604 fs_info->dirty_metadata_batch);
3605 ret = 1;
3606 } else {
3607 spin_unlock(&eb->refs_lock);
3608 }
3609
3610 btrfs_tree_unlock(eb);
3611
3612 if (!ret)
3613 return ret;
3614
3615 num_pages = num_extent_pages(eb->start, eb->len);
3616 for (i = 0; i < num_pages; i++) {
3617 struct page *p = eb->pages[i];
3618
3619 if (!trylock_page(p)) {
3620 if (!flush) {
3621 flush_write_bio(epd);
3622 flush = 1;
3623 }
3624 lock_page(p);
3625 }
3626 }
3627
3628 return ret;
3629}
3630
3631static void end_extent_buffer_writeback(struct extent_buffer *eb)
3632{
3633 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3634 smp_mb__after_atomic();
3635 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3636}
3637
3638static void set_btree_ioerr(struct page *page)
3639{
3640 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3641 struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode);
3642
3643 SetPageError(page);
3644 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3645 return;
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685 switch (eb->log_index) {
3686 case -1:
3687 set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags);
3688 break;
3689 case 0:
3690 set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
3691 break;
3692 case 1:
3693 set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
3694 break;
3695 default:
3696 BUG();
3697 }
3698}
3699
3700static void end_bio_extent_buffer_writepage(struct bio *bio)
3701{
3702 struct bio_vec *bvec;
3703 struct extent_buffer *eb;
3704 int i, done;
3705
3706 bio_for_each_segment_all(bvec, bio, i) {
3707 struct page *page = bvec->bv_page;
3708
3709 eb = (struct extent_buffer *)page->private;
3710 BUG_ON(!eb);
3711 done = atomic_dec_and_test(&eb->io_pages);
3712
3713 if (bio->bi_error ||
3714 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3715 ClearPageUptodate(page);
3716 set_btree_ioerr(page);
3717 }
3718
3719 end_page_writeback(page);
3720
3721 if (!done)
3722 continue;
3723
3724 end_extent_buffer_writeback(eb);
3725 }
3726
3727 bio_put(bio);
3728}
3729
3730static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3731 struct btrfs_fs_info *fs_info,
3732 struct writeback_control *wbc,
3733 struct extent_page_data *epd)
3734{
3735 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3736 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3737 u64 offset = eb->start;
3738 unsigned long i, num_pages;
3739 unsigned long bio_flags = 0;
3740 int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
3741 int ret = 0;
3742
3743 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3744 num_pages = num_extent_pages(eb->start, eb->len);
3745 atomic_set(&eb->io_pages, num_pages);
3746 if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
3747 bio_flags = EXTENT_BIO_TREE_LOG;
3748
3749 for (i = 0; i < num_pages; i++) {
3750 struct page *p = eb->pages[i];
3751
3752 clear_page_dirty_for_io(p);
3753 set_page_writeback(p);
3754 ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
3755 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3756 -1, end_bio_extent_buffer_writepage,
3757 0, epd->bio_flags, bio_flags, false);
3758 epd->bio_flags = bio_flags;
3759 if (ret) {
3760 set_btree_ioerr(p);
3761 end_page_writeback(p);
3762 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3763 end_extent_buffer_writeback(eb);
3764 ret = -EIO;
3765 break;
3766 }
3767 offset += PAGE_CACHE_SIZE;
3768 update_nr_written(p, wbc, 1);
3769 unlock_page(p);
3770 }
3771
3772 if (unlikely(ret)) {
3773 for (; i < num_pages; i++) {
3774 struct page *p = eb->pages[i];
3775 clear_page_dirty_for_io(p);
3776 unlock_page(p);
3777 }
3778 }
3779
3780 return ret;
3781}
3782
3783int btree_write_cache_pages(struct address_space *mapping,
3784 struct writeback_control *wbc)
3785{
3786 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3787 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3788 struct extent_buffer *eb, *prev_eb = NULL;
3789 struct extent_page_data epd = {
3790 .bio = NULL,
3791 .tree = tree,
3792 .extent_locked = 0,
3793 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3794 .bio_flags = 0,
3795 };
3796 int ret = 0;
3797 int done = 0;
3798 int nr_to_write_done = 0;
3799 struct pagevec pvec;
3800 int nr_pages;
3801 pgoff_t index;
3802 pgoff_t end;
3803 int scanned = 0;
3804 int tag;
3805
3806 pagevec_init(&pvec, 0);
3807 if (wbc->range_cyclic) {
3808 index = mapping->writeback_index;
3809 end = -1;
3810 } else {
3811 index = wbc->range_start >> PAGE_CACHE_SHIFT;
3812 end = wbc->range_end >> PAGE_CACHE_SHIFT;
3813 scanned = 1;
3814 }
3815 if (wbc->sync_mode == WB_SYNC_ALL)
3816 tag = PAGECACHE_TAG_TOWRITE;
3817 else
3818 tag = PAGECACHE_TAG_DIRTY;
3819retry:
3820 if (wbc->sync_mode == WB_SYNC_ALL)
3821 tag_pages_for_writeback(mapping, index, end);
3822 while (!done && !nr_to_write_done && (index <= end) &&
3823 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3824 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3825 unsigned i;
3826
3827 scanned = 1;
3828 for (i = 0; i < nr_pages; i++) {
3829 struct page *page = pvec.pages[i];
3830
3831 if (!PagePrivate(page))
3832 continue;
3833
3834 if (!wbc->range_cyclic && page->index > end) {
3835 done = 1;
3836 break;
3837 }
3838
3839 spin_lock(&mapping->private_lock);
3840 if (!PagePrivate(page)) {
3841 spin_unlock(&mapping->private_lock);
3842 continue;
3843 }
3844
3845 eb = (struct extent_buffer *)page->private;
3846
3847
3848
3849
3850
3851
3852 if (WARN_ON(!eb)) {
3853 spin_unlock(&mapping->private_lock);
3854 continue;
3855 }
3856
3857 if (eb == prev_eb) {
3858 spin_unlock(&mapping->private_lock);
3859 continue;
3860 }
3861
3862 ret = atomic_inc_not_zero(&eb->refs);
3863 spin_unlock(&mapping->private_lock);
3864 if (!ret)
3865 continue;
3866
3867 prev_eb = eb;
3868 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3869 if (!ret) {
3870 free_extent_buffer(eb);
3871 continue;
3872 }
3873
3874 ret = write_one_eb(eb, fs_info, wbc, &epd);
3875 if (ret) {
3876 done = 1;
3877 free_extent_buffer(eb);
3878 break;
3879 }
3880 free_extent_buffer(eb);
3881
3882
3883
3884
3885
3886
3887 nr_to_write_done = wbc->nr_to_write <= 0;
3888 }
3889 pagevec_release(&pvec);
3890 cond_resched();
3891 }
3892 if (!scanned && !done) {
3893
3894
3895
3896
3897 scanned = 1;
3898 index = 0;
3899 goto retry;
3900 }
3901 flush_write_bio(&epd);
3902 return ret;
3903}
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920static int extent_write_cache_pages(struct extent_io_tree *tree,
3921 struct address_space *mapping,
3922 struct writeback_control *wbc,
3923 writepage_t writepage, void *data,
3924 void (*flush_fn)(void *))
3925{
3926 struct inode *inode = mapping->host;
3927 int ret = 0;
3928 int done = 0;
3929 int err = 0;
3930 int nr_to_write_done = 0;
3931 struct pagevec pvec;
3932 int nr_pages;
3933 pgoff_t index;
3934 pgoff_t end;
3935 int scanned = 0;
3936 int tag;
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947 if (!igrab(inode))
3948 return 0;
3949
3950 pagevec_init(&pvec, 0);
3951 if (wbc->range_cyclic) {
3952 index = mapping->writeback_index;
3953 end = -1;
3954 } else {
3955 index = wbc->range_start >> PAGE_CACHE_SHIFT;
3956 end = wbc->range_end >> PAGE_CACHE_SHIFT;
3957 scanned = 1;
3958 }
3959 if (wbc->sync_mode == WB_SYNC_ALL)
3960 tag = PAGECACHE_TAG_TOWRITE;
3961 else
3962 tag = PAGECACHE_TAG_DIRTY;
3963retry:
3964 if (wbc->sync_mode == WB_SYNC_ALL)
3965 tag_pages_for_writeback(mapping, index, end);
3966 while (!done && !nr_to_write_done && (index <= end) &&
3967 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
3968 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
3969 unsigned i;
3970
3971 scanned = 1;
3972 for (i = 0; i < nr_pages; i++) {
3973 struct page *page = pvec.pages[i];
3974
3975
3976
3977
3978
3979
3980
3981
3982 if (!trylock_page(page)) {
3983 flush_fn(data);
3984 lock_page(page);
3985 }
3986
3987 if (unlikely(page->mapping != mapping)) {
3988 unlock_page(page);
3989 continue;
3990 }
3991
3992 if (!wbc->range_cyclic && page->index > end) {
3993 done = 1;
3994 unlock_page(page);
3995 continue;
3996 }
3997
3998 if (wbc->sync_mode != WB_SYNC_NONE) {
3999 if (PageWriteback(page))
4000 flush_fn(data);
4001 wait_on_page_writeback(page);
4002 }
4003
4004 if (PageWriteback(page) ||
4005 !clear_page_dirty_for_io(page)) {
4006 unlock_page(page);
4007 continue;
4008 }
4009
4010 ret = (*writepage)(page, wbc, data);
4011
4012 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
4013 unlock_page(page);
4014 ret = 0;
4015 }
4016 if (!err && ret < 0)
4017 err = ret;
4018
4019
4020
4021
4022
4023
4024 nr_to_write_done = wbc->nr_to_write <= 0;
4025 }
4026 pagevec_release(&pvec);
4027 cond_resched();
4028 }
4029 if (!scanned && !done && !err) {
4030
4031
4032
4033
4034 scanned = 1;
4035 index = 0;
4036 goto retry;
4037 }
4038 btrfs_add_delayed_iput(inode);
4039 return err;
4040}
4041
4042static void flush_epd_write_bio(struct extent_page_data *epd)
4043{
4044 if (epd->bio) {
4045 int rw = WRITE;
4046 int ret;
4047
4048 if (epd->sync_io)
4049 rw = WRITE_SYNC;
4050
4051 ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
4052 BUG_ON(ret < 0);
4053 epd->bio = NULL;
4054 }
4055}
4056
4057static noinline void flush_write_bio(void *data)
4058{
4059 struct extent_page_data *epd = data;
4060 flush_epd_write_bio(epd);
4061}
4062
4063int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
4064 get_extent_t *get_extent,
4065 struct writeback_control *wbc)
4066{
4067 int ret;
4068 struct extent_page_data epd = {
4069 .bio = NULL,
4070 .tree = tree,
4071 .get_extent = get_extent,
4072 .extent_locked = 0,
4073 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4074 .bio_flags = 0,
4075 };
4076
4077 ret = __extent_writepage(page, wbc, &epd);
4078
4079 flush_epd_write_bio(&epd);
4080 return ret;
4081}
4082
4083int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
4084 u64 start, u64 end, get_extent_t *get_extent,
4085 int mode)
4086{
4087 int ret = 0;
4088 struct address_space *mapping = inode->i_mapping;
4089 struct page *page;
4090 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
4091 PAGE_CACHE_SHIFT;
4092
4093 struct extent_page_data epd = {
4094 .bio = NULL,
4095 .tree = tree,
4096 .get_extent = get_extent,
4097 .extent_locked = 1,
4098 .sync_io = mode == WB_SYNC_ALL,
4099 .bio_flags = 0,
4100 };
4101 struct writeback_control wbc_writepages = {
4102 .sync_mode = mode,
4103 .nr_to_write = nr_pages * 2,
4104 .range_start = start,
4105 .range_end = end + 1,
4106 };
4107
4108 while (start <= end) {
4109 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
4110 if (clear_page_dirty_for_io(page))
4111 ret = __extent_writepage(page, &wbc_writepages, &epd);
4112 else {
4113 if (tree->ops && tree->ops->writepage_end_io_hook)
4114 tree->ops->writepage_end_io_hook(page, start,
4115 start + PAGE_CACHE_SIZE - 1,
4116 NULL, 1);
4117 unlock_page(page);
4118 }
4119 page_cache_release(page);
4120 start += PAGE_CACHE_SIZE;
4121 }
4122
4123 flush_epd_write_bio(&epd);
4124 return ret;
4125}
4126
4127int extent_writepages(struct extent_io_tree *tree,
4128 struct address_space *mapping,
4129 get_extent_t *get_extent,
4130 struct writeback_control *wbc)
4131{
4132 int ret = 0;
4133 struct extent_page_data epd = {
4134 .bio = NULL,
4135 .tree = tree,
4136 .get_extent = get_extent,
4137 .extent_locked = 0,
4138 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4139 .bio_flags = 0,
4140 };
4141
4142 ret = extent_write_cache_pages(tree, mapping, wbc,
4143 __extent_writepage, &epd,
4144 flush_write_bio);
4145 flush_epd_write_bio(&epd);
4146 return ret;
4147}
4148
4149int extent_readpages(struct extent_io_tree *tree,
4150 struct address_space *mapping,
4151 struct list_head *pages, unsigned nr_pages,
4152 get_extent_t get_extent)
4153{
4154 struct bio *bio = NULL;
4155 unsigned page_idx;
4156 unsigned long bio_flags = 0;
4157 struct page *pagepool[16];
4158 struct page *page;
4159 struct extent_map *em_cached = NULL;
4160 int nr = 0;
4161 u64 prev_em_start = (u64)-1;
4162
4163 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
4164 page = list_entry(pages->prev, struct page, lru);
4165
4166 prefetchw(&page->flags);
4167 list_del(&page->lru);
4168 if (add_to_page_cache_lru(page, mapping,
4169 page->index, GFP_NOFS)) {
4170 page_cache_release(page);
4171 continue;
4172 }
4173
4174 pagepool[nr++] = page;
4175 if (nr < ARRAY_SIZE(pagepool))
4176 continue;
4177 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4178 &bio, 0, &bio_flags, READ, &prev_em_start);
4179 nr = 0;
4180 }
4181 if (nr)
4182 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4183 &bio, 0, &bio_flags, READ, &prev_em_start);
4184
4185 if (em_cached)
4186 free_extent_map(em_cached);
4187
4188 BUG_ON(!list_empty(pages));
4189 if (bio)
4190 return submit_one_bio(READ, bio, 0, bio_flags);
4191 return 0;
4192}
4193
4194
4195
4196
4197
4198
4199int extent_invalidatepage(struct extent_io_tree *tree,
4200 struct page *page, unsigned long offset)
4201{
4202 struct extent_state *cached_state = NULL;
4203 u64 start = page_offset(page);
4204 u64 end = start + PAGE_CACHE_SIZE - 1;
4205 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4206
4207 start += ALIGN(offset, blocksize);
4208 if (start > end)
4209 return 0;
4210
4211 lock_extent_bits(tree, start, end, &cached_state);
4212 wait_on_page_writeback(page);
4213 clear_extent_bit(tree, start, end,
4214 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4215 EXTENT_DO_ACCOUNTING,
4216 1, 1, &cached_state, GFP_NOFS);
4217 return 0;
4218}
4219
4220
4221
4222
4223
4224
4225static int try_release_extent_state(struct extent_map_tree *map,
4226 struct extent_io_tree *tree,
4227 struct page *page, gfp_t mask)
4228{
4229 u64 start = page_offset(page);
4230 u64 end = start + PAGE_CACHE_SIZE - 1;
4231 int ret = 1;
4232
4233 if (test_range_bit(tree, start, end,
4234 EXTENT_IOBITS, 0, NULL))
4235 ret = 0;
4236 else {
4237 if ((mask & GFP_NOFS) == GFP_NOFS)
4238 mask = GFP_NOFS;
4239
4240
4241
4242
4243 ret = clear_extent_bit(tree, start, end,
4244 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4245 0, 0, NULL, mask);
4246
4247
4248
4249
4250 if (ret < 0)
4251 ret = 0;
4252 else
4253 ret = 1;
4254 }
4255 return ret;
4256}
4257
4258
4259
4260
4261
4262
4263int try_release_extent_mapping(struct extent_map_tree *map,
4264 struct extent_io_tree *tree, struct page *page,
4265 gfp_t mask)
4266{
4267 struct extent_map *em;
4268 u64 start = page_offset(page);
4269 u64 end = start + PAGE_CACHE_SIZE - 1;
4270
4271 if (gfpflags_allow_blocking(mask) &&
4272 page->mapping->host->i_size > SZ_16M) {
4273 u64 len;
4274 while (start <= end) {
4275 len = end - start + 1;
4276 write_lock(&map->lock);
4277 em = lookup_extent_mapping(map, start, len);
4278 if (!em) {
4279 write_unlock(&map->lock);
4280 break;
4281 }
4282 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4283 em->start != start) {
4284 write_unlock(&map->lock);
4285 free_extent_map(em);
4286 break;
4287 }
4288 if (!test_range_bit(tree, em->start,
4289 extent_map_end(em) - 1,
4290 EXTENT_LOCKED | EXTENT_WRITEBACK,
4291 0, NULL)) {
4292 remove_extent_mapping(map, em);
4293
4294 free_extent_map(em);
4295 }
4296 start = extent_map_end(em);
4297 write_unlock(&map->lock);
4298
4299
4300 free_extent_map(em);
4301 }
4302 }
4303 return try_release_extent_state(map, tree, page, mask);
4304}
4305
4306
4307
4308
4309
4310static struct extent_map *get_extent_skip_holes(struct inode *inode,
4311 u64 offset,
4312 u64 last,
4313 get_extent_t *get_extent)
4314{
4315 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
4316 struct extent_map *em;
4317 u64 len;
4318
4319 if (offset >= last)
4320 return NULL;
4321
4322 while (1) {
4323 len = last - offset;
4324 if (len == 0)
4325 break;
4326 len = ALIGN(len, sectorsize);
4327 em = get_extent(inode, NULL, 0, offset, len, 0);
4328 if (IS_ERR_OR_NULL(em))
4329 return em;
4330
4331
4332 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
4333 em->block_start != EXTENT_MAP_HOLE) {
4334 return em;
4335 }
4336
4337
4338 offset = extent_map_end(em);
4339 free_extent_map(em);
4340 if (offset >= last)
4341 break;
4342 }
4343 return NULL;
4344}
4345
4346int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4347 __u64 start, __u64 len, get_extent_t *get_extent)
4348{
4349 int ret = 0;
4350 u64 off = start;
4351 u64 max = start + len;
4352 u32 flags = 0;
4353 u32 found_type;
4354 u64 last;
4355 u64 last_for_get_extent = 0;
4356 u64 disko = 0;
4357 u64 isize = i_size_read(inode);
4358 struct btrfs_key found_key;
4359 struct extent_map *em = NULL;
4360 struct extent_state *cached_state = NULL;
4361 struct btrfs_path *path;
4362 struct btrfs_root *root = BTRFS_I(inode)->root;
4363 int end = 0;
4364 u64 em_start = 0;
4365 u64 em_len = 0;
4366 u64 em_end = 0;
4367
4368 if (len == 0)
4369 return -EINVAL;
4370
4371 path = btrfs_alloc_path();
4372 if (!path)
4373 return -ENOMEM;
4374 path->leave_spinning = 1;
4375
4376 start = round_down(start, BTRFS_I(inode)->root->sectorsize);
4377 len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start;
4378
4379
4380
4381
4382
4383 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
4384 0);
4385 if (ret < 0) {
4386 btrfs_free_path(path);
4387 return ret;
4388 }
4389 WARN_ON(!ret);
4390 path->slots[0]--;
4391 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4392 found_type = found_key.type;
4393
4394
4395 if (found_key.objectid != btrfs_ino(inode) ||
4396 found_type != BTRFS_EXTENT_DATA_KEY) {
4397
4398 last = (u64)-1;
4399 last_for_get_extent = isize;
4400 } else {
4401
4402
4403
4404
4405
4406 last = found_key.offset;
4407 last_for_get_extent = last + 1;
4408 }
4409 btrfs_release_path(path);
4410
4411
4412
4413
4414
4415
4416 if (last < isize) {
4417 last = (u64)-1;
4418 last_for_get_extent = isize;
4419 }
4420
4421 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4422 &cached_state);
4423
4424 em = get_extent_skip_holes(inode, start, last_for_get_extent,
4425 get_extent);
4426 if (!em)
4427 goto out;
4428 if (IS_ERR(em)) {
4429 ret = PTR_ERR(em);
4430 goto out;
4431 }
4432
4433 while (!end) {
4434 u64 offset_in_extent = 0;
4435
4436
4437 if (em->start >= max || extent_map_end(em) < off)
4438 break;
4439
4440
4441
4442
4443
4444
4445
4446 em_start = max(em->start, off);
4447
4448
4449
4450
4451
4452
4453
4454 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4455 offset_in_extent = em_start - em->start;
4456 em_end = extent_map_end(em);
4457 em_len = em_end - em_start;
4458 disko = 0;
4459 flags = 0;
4460
4461
4462
4463
4464 off = extent_map_end(em);
4465 if (off >= max)
4466 end = 1;
4467
4468 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4469 end = 1;
4470 flags |= FIEMAP_EXTENT_LAST;
4471 } else if (em->block_start == EXTENT_MAP_INLINE) {
4472 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4473 FIEMAP_EXTENT_NOT_ALIGNED);
4474 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4475 flags |= (FIEMAP_EXTENT_DELALLOC |
4476 FIEMAP_EXTENT_UNKNOWN);
4477 } else if (fieinfo->fi_extents_max) {
4478 u64 bytenr = em->block_start -
4479 (em->start - em->orig_start);
4480
4481 disko = em->block_start + offset_in_extent;
4482
4483
4484
4485
4486
4487
4488
4489
4490 ret = btrfs_check_shared(NULL, root->fs_info,
4491 root->objectid,
4492 btrfs_ino(inode), bytenr);
4493 if (ret < 0)
4494 goto out_free;
4495 if (ret)
4496 flags |= FIEMAP_EXTENT_SHARED;
4497 ret = 0;
4498 }
4499 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4500 flags |= FIEMAP_EXTENT_ENCODED;
4501 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4502 flags |= FIEMAP_EXTENT_UNWRITTEN;
4503
4504 free_extent_map(em);
4505 em = NULL;
4506 if ((em_start >= last) || em_len == (u64)-1 ||
4507 (last == (u64)-1 && isize <= em_end)) {
4508 flags |= FIEMAP_EXTENT_LAST;
4509 end = 1;
4510 }
4511
4512
4513 em = get_extent_skip_holes(inode, off, last_for_get_extent,
4514 get_extent);
4515 if (IS_ERR(em)) {
4516 ret = PTR_ERR(em);
4517 goto out;
4518 }
4519 if (!em) {
4520 flags |= FIEMAP_EXTENT_LAST;
4521 end = 1;
4522 }
4523 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
4524 em_len, flags);
4525 if (ret) {
4526 if (ret == 1)
4527 ret = 0;
4528 goto out_free;
4529 }
4530 }
4531out_free:
4532 free_extent_map(em);
4533out:
4534 btrfs_free_path(path);
4535 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4536 &cached_state, GFP_NOFS);
4537 return ret;
4538}
4539
4540static void __free_extent_buffer(struct extent_buffer *eb)
4541{
4542 btrfs_leak_debug_del(&eb->leak_list);
4543 kmem_cache_free(extent_buffer_cache, eb);
4544}
4545
4546int extent_buffer_under_io(struct extent_buffer *eb)
4547{
4548 return (atomic_read(&eb->io_pages) ||
4549 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4550 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4551}
4552
4553
4554
4555
4556static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
4557{
4558 unsigned long index;
4559 struct page *page;
4560 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4561
4562 BUG_ON(extent_buffer_under_io(eb));
4563
4564 index = num_extent_pages(eb->start, eb->len);
4565 if (index == 0)
4566 return;
4567
4568 do {
4569 index--;
4570 page = eb->pages[index];
4571 if (!page)
4572 continue;
4573 if (mapped)
4574 spin_lock(&page->mapping->private_lock);
4575
4576
4577
4578
4579
4580
4581
4582 if (PagePrivate(page) &&
4583 page->private == (unsigned long)eb) {
4584 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4585 BUG_ON(PageDirty(page));
4586 BUG_ON(PageWriteback(page));
4587
4588
4589
4590
4591 ClearPagePrivate(page);
4592 set_page_private(page, 0);
4593
4594 page_cache_release(page);
4595 }
4596
4597 if (mapped)
4598 spin_unlock(&page->mapping->private_lock);
4599
4600
4601 page_cache_release(page);
4602 } while (index != 0);
4603}
4604
4605
4606
4607
4608static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4609{
4610 btrfs_release_extent_buffer_page(eb);
4611 __free_extent_buffer(eb);
4612}
4613
4614static struct extent_buffer *
4615__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4616 unsigned long len)
4617{
4618 struct extent_buffer *eb = NULL;
4619
4620 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4621 eb->start = start;
4622 eb->len = len;
4623 eb->fs_info = fs_info;
4624 eb->bflags = 0;
4625 rwlock_init(&eb->lock);
4626 atomic_set(&eb->write_locks, 0);
4627 atomic_set(&eb->read_locks, 0);
4628 atomic_set(&eb->blocking_readers, 0);
4629 atomic_set(&eb->blocking_writers, 0);
4630 atomic_set(&eb->spinning_readers, 0);
4631 atomic_set(&eb->spinning_writers, 0);
4632 eb->lock_nested = 0;
4633 init_waitqueue_head(&eb->write_lock_wq);
4634 init_waitqueue_head(&eb->read_lock_wq);
4635
4636 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4637
4638 spin_lock_init(&eb->refs_lock);
4639 atomic_set(&eb->refs, 1);
4640 atomic_set(&eb->io_pages, 0);
4641
4642
4643
4644
4645 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4646 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4647 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4648
4649 return eb;
4650}
4651
4652struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4653{
4654 unsigned long i;
4655 struct page *p;
4656 struct extent_buffer *new;
4657 unsigned long num_pages = num_extent_pages(src->start, src->len);
4658
4659 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4660 if (new == NULL)
4661 return NULL;
4662
4663 for (i = 0; i < num_pages; i++) {
4664 p = alloc_page(GFP_NOFS);
4665 if (!p) {
4666 btrfs_release_extent_buffer(new);
4667 return NULL;
4668 }
4669 attach_extent_buffer_page(new, p);
4670 WARN_ON(PageDirty(p));
4671 SetPageUptodate(p);
4672 new->pages[i] = p;
4673 }
4674
4675 copy_extent_buffer(new, src, 0, 0, src->len);
4676 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4677 set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
4678
4679 return new;
4680}
4681
4682struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4683 u64 start, unsigned long len)
4684{
4685 struct extent_buffer *eb;
4686 unsigned long num_pages;
4687 unsigned long i;
4688
4689 num_pages = num_extent_pages(start, len);
4690
4691 eb = __alloc_extent_buffer(fs_info, start, len);
4692 if (!eb)
4693 return NULL;
4694
4695 for (i = 0; i < num_pages; i++) {
4696 eb->pages[i] = alloc_page(GFP_NOFS);
4697 if (!eb->pages[i])
4698 goto err;
4699 }
4700 set_extent_buffer_uptodate(eb);
4701 btrfs_set_header_nritems(eb, 0);
4702 set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4703
4704 return eb;
4705err:
4706 for (; i > 0; i--)
4707 __free_page(eb->pages[i - 1]);
4708 __free_extent_buffer(eb);
4709 return NULL;
4710}
4711
4712struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4713 u64 start)
4714{
4715 unsigned long len;
4716
4717 if (!fs_info) {
4718
4719
4720
4721
4722 len = 4096;
4723 } else {
4724 len = fs_info->tree_root->nodesize;
4725 }
4726
4727 return __alloc_dummy_extent_buffer(fs_info, start, len);
4728}
4729
4730static void check_buffer_tree_ref(struct extent_buffer *eb)
4731{
4732 int refs;
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753 refs = atomic_read(&eb->refs);
4754 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4755 return;
4756
4757 spin_lock(&eb->refs_lock);
4758 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4759 atomic_inc(&eb->refs);
4760 spin_unlock(&eb->refs_lock);
4761}
4762
4763static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4764 struct page *accessed)
4765{
4766 unsigned long num_pages, i;
4767
4768 check_buffer_tree_ref(eb);
4769
4770 num_pages = num_extent_pages(eb->start, eb->len);
4771 for (i = 0; i < num_pages; i++) {
4772 struct page *p = eb->pages[i];
4773
4774 if (p != accessed)
4775 mark_page_accessed(p);
4776 }
4777}
4778
4779struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4780 u64 start)
4781{
4782 struct extent_buffer *eb;
4783
4784 rcu_read_lock();
4785 eb = radix_tree_lookup(&fs_info->buffer_radix,
4786 start >> PAGE_CACHE_SHIFT);
4787 if (eb && atomic_inc_not_zero(&eb->refs)) {
4788 rcu_read_unlock();
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4805 spin_lock(&eb->refs_lock);
4806 spin_unlock(&eb->refs_lock);
4807 }
4808 mark_extent_buffer_accessed(eb, NULL);
4809 return eb;
4810 }
4811 rcu_read_unlock();
4812
4813 return NULL;
4814}
4815
4816#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4817struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4818 u64 start)
4819{
4820 struct extent_buffer *eb, *exists = NULL;
4821 int ret;
4822
4823 eb = find_extent_buffer(fs_info, start);
4824 if (eb)
4825 return eb;
4826 eb = alloc_dummy_extent_buffer(fs_info, start);
4827 if (!eb)
4828 return NULL;
4829 eb->fs_info = fs_info;
4830again:
4831 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
4832 if (ret)
4833 goto free_eb;
4834 spin_lock(&fs_info->buffer_lock);
4835 ret = radix_tree_insert(&fs_info->buffer_radix,
4836 start >> PAGE_CACHE_SHIFT, eb);
4837 spin_unlock(&fs_info->buffer_lock);
4838 radix_tree_preload_end();
4839 if (ret == -EEXIST) {
4840 exists = find_extent_buffer(fs_info, start);
4841 if (exists)
4842 goto free_eb;
4843 else
4844 goto again;
4845 }
4846 check_buffer_tree_ref(eb);
4847 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4848
4849
4850
4851
4852
4853
4854
4855 atomic_inc(&eb->refs);
4856 return eb;
4857free_eb:
4858 btrfs_release_extent_buffer(eb);
4859 return exists;
4860}
4861#endif
4862
4863struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4864 u64 start)
4865{
4866 unsigned long len = fs_info->tree_root->nodesize;
4867 unsigned long num_pages = num_extent_pages(start, len);
4868 unsigned long i;
4869 unsigned long index = start >> PAGE_CACHE_SHIFT;
4870 struct extent_buffer *eb;
4871 struct extent_buffer *exists = NULL;
4872 struct page *p;
4873 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4874 int uptodate = 1;
4875 int ret;
4876
4877 eb = find_extent_buffer(fs_info, start);
4878 if (eb)
4879 return eb;
4880
4881 eb = __alloc_extent_buffer(fs_info, start, len);
4882 if (!eb)
4883 return NULL;
4884
4885 for (i = 0; i < num_pages; i++, index++) {
4886 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
4887 if (!p)
4888 goto free_eb;
4889
4890 spin_lock(&mapping->private_lock);
4891 if (PagePrivate(p)) {
4892
4893
4894
4895
4896
4897
4898
4899 exists = (struct extent_buffer *)p->private;
4900 if (atomic_inc_not_zero(&exists->refs)) {
4901 spin_unlock(&mapping->private_lock);
4902 unlock_page(p);
4903 page_cache_release(p);
4904 mark_extent_buffer_accessed(exists, p);
4905 goto free_eb;
4906 }
4907 exists = NULL;
4908
4909
4910
4911
4912
4913 ClearPagePrivate(p);
4914 WARN_ON(PageDirty(p));
4915 page_cache_release(p);
4916 }
4917 attach_extent_buffer_page(eb, p);
4918 spin_unlock(&mapping->private_lock);
4919 WARN_ON(PageDirty(p));
4920 eb->pages[i] = p;
4921 if (!PageUptodate(p))
4922 uptodate = 0;
4923
4924
4925
4926
4927
4928 }
4929 if (uptodate)
4930 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
4931again:
4932 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
4933 if (ret)
4934 goto free_eb;
4935
4936 spin_lock(&fs_info->buffer_lock);
4937 ret = radix_tree_insert(&fs_info->buffer_radix,
4938 start >> PAGE_CACHE_SHIFT, eb);
4939 spin_unlock(&fs_info->buffer_lock);
4940 radix_tree_preload_end();
4941 if (ret == -EEXIST) {
4942 exists = find_extent_buffer(fs_info, start);
4943 if (exists)
4944 goto free_eb;
4945 else
4946 goto again;
4947 }
4948
4949 check_buffer_tree_ref(eb);
4950 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961 SetPageChecked(eb->pages[0]);
4962 for (i = 1; i < num_pages; i++) {
4963 p = eb->pages[i];
4964 ClearPageChecked(p);
4965 unlock_page(p);
4966 }
4967 unlock_page(eb->pages[0]);
4968 return eb;
4969
4970free_eb:
4971 WARN_ON(!atomic_dec_and_test(&eb->refs));
4972 for (i = 0; i < num_pages; i++) {
4973 if (eb->pages[i])
4974 unlock_page(eb->pages[i]);
4975 }
4976
4977 btrfs_release_extent_buffer(eb);
4978 return exists;
4979}
4980
4981static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4982{
4983 struct extent_buffer *eb =
4984 container_of(head, struct extent_buffer, rcu_head);
4985
4986 __free_extent_buffer(eb);
4987}
4988
4989
4990static int release_extent_buffer(struct extent_buffer *eb)
4991{
4992 WARN_ON(atomic_read(&eb->refs) == 0);
4993 if (atomic_dec_and_test(&eb->refs)) {
4994 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
4995 struct btrfs_fs_info *fs_info = eb->fs_info;
4996
4997 spin_unlock(&eb->refs_lock);
4998
4999 spin_lock(&fs_info->buffer_lock);
5000 radix_tree_delete(&fs_info->buffer_radix,
5001 eb->start >> PAGE_CACHE_SHIFT);
5002 spin_unlock(&fs_info->buffer_lock);
5003 } else {
5004 spin_unlock(&eb->refs_lock);
5005 }
5006
5007
5008 btrfs_release_extent_buffer_page(eb);
5009#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5010 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
5011 __free_extent_buffer(eb);
5012 return 1;
5013 }
5014#endif
5015 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5016 return 1;
5017 }
5018 spin_unlock(&eb->refs_lock);
5019
5020 return 0;
5021}
5022
5023void free_extent_buffer(struct extent_buffer *eb)
5024{
5025 int refs;
5026 int old;
5027 if (!eb)
5028 return;
5029
5030 while (1) {
5031 refs = atomic_read(&eb->refs);
5032 if (refs <= 3)
5033 break;
5034 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5035 if (old == refs)
5036 return;
5037 }
5038
5039 spin_lock(&eb->refs_lock);
5040 if (atomic_read(&eb->refs) == 2 &&
5041 test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
5042 atomic_dec(&eb->refs);
5043
5044 if (atomic_read(&eb->refs) == 2 &&
5045 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5046 !extent_buffer_under_io(eb) &&
5047 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5048 atomic_dec(&eb->refs);
5049
5050
5051
5052
5053
5054 release_extent_buffer(eb);
5055}
5056
5057void free_extent_buffer_stale(struct extent_buffer *eb)
5058{
5059 if (!eb)
5060 return;
5061
5062 spin_lock(&eb->refs_lock);
5063 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5064
5065 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5066 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5067 atomic_dec(&eb->refs);
5068 release_extent_buffer(eb);
5069}
5070
5071void clear_extent_buffer_dirty(struct extent_buffer *eb)
5072{
5073 unsigned long i;
5074 unsigned long num_pages;
5075 struct page *page;
5076
5077 num_pages = num_extent_pages(eb->start, eb->len);
5078
5079 for (i = 0; i < num_pages; i++) {
5080 page = eb->pages[i];
5081 if (!PageDirty(page))
5082 continue;
5083
5084 lock_page(page);
5085 WARN_ON(!PagePrivate(page));
5086
5087 clear_page_dirty_for_io(page);
5088 spin_lock_irq(&page->mapping->tree_lock);
5089 if (!PageDirty(page)) {
5090 radix_tree_tag_clear(&page->mapping->page_tree,
5091 page_index(page),
5092 PAGECACHE_TAG_DIRTY);
5093 }
5094 spin_unlock_irq(&page->mapping->tree_lock);
5095 ClearPageError(page);
5096 unlock_page(page);
5097 }
5098 WARN_ON(atomic_read(&eb->refs) == 0);
5099}
5100
5101int set_extent_buffer_dirty(struct extent_buffer *eb)
5102{
5103 unsigned long i;
5104 unsigned long num_pages;
5105 int was_dirty = 0;
5106
5107 check_buffer_tree_ref(eb);
5108
5109 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5110
5111 num_pages = num_extent_pages(eb->start, eb->len);
5112 WARN_ON(atomic_read(&eb->refs) == 0);
5113 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5114
5115 for (i = 0; i < num_pages; i++)
5116 set_page_dirty(eb->pages[i]);
5117 return was_dirty;
5118}
5119
5120void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5121{
5122 unsigned long i;
5123 struct page *page;
5124 unsigned long num_pages;
5125
5126 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5127 num_pages = num_extent_pages(eb->start, eb->len);
5128 for (i = 0; i < num_pages; i++) {
5129 page = eb->pages[i];
5130 if (page)
5131 ClearPageUptodate(page);
5132 }
5133}
5134
5135void set_extent_buffer_uptodate(struct extent_buffer *eb)
5136{
5137 unsigned long i;
5138 struct page *page;
5139 unsigned long num_pages;
5140
5141 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5142 num_pages = num_extent_pages(eb->start, eb->len);
5143 for (i = 0; i < num_pages; i++) {
5144 page = eb->pages[i];
5145 SetPageUptodate(page);
5146 }
5147}
5148
5149int extent_buffer_uptodate(struct extent_buffer *eb)
5150{
5151 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5152}
5153
5154int read_extent_buffer_pages(struct extent_io_tree *tree,
5155 struct extent_buffer *eb, u64 start, int wait,
5156 get_extent_t *get_extent, int mirror_num)
5157{
5158 unsigned long i;
5159 unsigned long start_i;
5160 struct page *page;
5161 int err;
5162 int ret = 0;
5163 int locked_pages = 0;
5164 int all_uptodate = 1;
5165 unsigned long num_pages;
5166 unsigned long num_reads = 0;
5167 struct bio *bio = NULL;
5168 unsigned long bio_flags = 0;
5169
5170 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5171 return 0;
5172
5173 if (start) {
5174 WARN_ON(start < eb->start);
5175 start_i = (start >> PAGE_CACHE_SHIFT) -
5176 (eb->start >> PAGE_CACHE_SHIFT);
5177 } else {
5178 start_i = 0;
5179 }
5180
5181 num_pages = num_extent_pages(eb->start, eb->len);
5182 for (i = start_i; i < num_pages; i++) {
5183 page = eb->pages[i];
5184 if (wait == WAIT_NONE) {
5185 if (!trylock_page(page))
5186 goto unlock_exit;
5187 } else {
5188 lock_page(page);
5189 }
5190 locked_pages++;
5191 if (!PageUptodate(page)) {
5192 num_reads++;
5193 all_uptodate = 0;
5194 }
5195 }
5196 if (all_uptodate) {
5197 if (start_i == 0)
5198 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5199 goto unlock_exit;
5200 }
5201
5202 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5203 eb->read_mirror = 0;
5204 atomic_set(&eb->io_pages, num_reads);
5205 for (i = start_i; i < num_pages; i++) {
5206 page = eb->pages[i];
5207 if (!PageUptodate(page)) {
5208 ClearPageError(page);
5209 err = __extent_read_full_page(tree, page,
5210 get_extent, &bio,
5211 mirror_num, &bio_flags,
5212 READ | REQ_META);
5213 if (err)
5214 ret = err;
5215 } else {
5216 unlock_page(page);
5217 }
5218 }
5219
5220 if (bio) {
5221 err = submit_one_bio(READ | REQ_META, bio, mirror_num,
5222 bio_flags);
5223 if (err)
5224 return err;
5225 }
5226
5227 if (ret || wait != WAIT_COMPLETE)
5228 return ret;
5229
5230 for (i = start_i; i < num_pages; i++) {
5231 page = eb->pages[i];
5232 wait_on_page_locked(page);
5233 if (!PageUptodate(page))
5234 ret = -EIO;
5235 }
5236
5237 return ret;
5238
5239unlock_exit:
5240 i = start_i;
5241 while (locked_pages > 0) {
5242 page = eb->pages[i];
5243 i++;
5244 unlock_page(page);
5245 locked_pages--;
5246 }
5247 return ret;
5248}
5249
5250void read_extent_buffer(struct extent_buffer *eb, void *dstv,
5251 unsigned long start,
5252 unsigned long len)
5253{
5254 size_t cur;
5255 size_t offset;
5256 struct page *page;
5257 char *kaddr;
5258 char *dst = (char *)dstv;
5259 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5260 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5261
5262 WARN_ON(start > eb->len);
5263 WARN_ON(start + len > eb->start + eb->len);
5264
5265 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5266
5267 while (len > 0) {
5268 page = eb->pages[i];
5269
5270 cur = min(len, (PAGE_CACHE_SIZE - offset));
5271 kaddr = page_address(page);
5272 memcpy(dst, kaddr + offset, cur);
5273
5274 dst += cur;
5275 len -= cur;
5276 offset = 0;
5277 i++;
5278 }
5279}
5280
5281int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
5282 unsigned long start,
5283 unsigned long len)
5284{
5285 size_t cur;
5286 size_t offset;
5287 struct page *page;
5288 char *kaddr;
5289 char __user *dst = (char __user *)dstv;
5290 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5291 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5292 int ret = 0;
5293
5294 WARN_ON(start > eb->len);
5295 WARN_ON(start + len > eb->start + eb->len);
5296
5297 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5298
5299 while (len > 0) {
5300 page = eb->pages[i];
5301
5302 cur = min(len, (PAGE_CACHE_SIZE - offset));
5303 kaddr = page_address(page);
5304 if (copy_to_user(dst, kaddr + offset, cur)) {
5305 ret = -EFAULT;
5306 break;
5307 }
5308
5309 dst += cur;
5310 len -= cur;
5311 offset = 0;
5312 i++;
5313 }
5314
5315 return ret;
5316}
5317
5318int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
5319 unsigned long min_len, char **map,
5320 unsigned long *map_start,
5321 unsigned long *map_len)
5322{
5323 size_t offset = start & (PAGE_CACHE_SIZE - 1);
5324 char *kaddr;
5325 struct page *p;
5326 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5327 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5328 unsigned long end_i = (start_offset + start + min_len - 1) >>
5329 PAGE_CACHE_SHIFT;
5330
5331 if (i != end_i)
5332 return -EINVAL;
5333
5334 if (i == 0) {
5335 offset = start_offset;
5336 *map_start = 0;
5337 } else {
5338 offset = 0;
5339 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
5340 }
5341
5342 if (start + min_len > eb->len) {
5343 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
5344 "wanted %lu %lu\n",
5345 eb->start, eb->len, start, min_len);
5346 return -EINVAL;
5347 }
5348
5349 p = eb->pages[i];
5350 kaddr = page_address(p);
5351 *map = kaddr + offset;
5352 *map_len = PAGE_CACHE_SIZE - offset;
5353 return 0;
5354}
5355
5356int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
5357 unsigned long start,
5358 unsigned long len)
5359{
5360 size_t cur;
5361 size_t offset;
5362 struct page *page;
5363 char *kaddr;
5364 char *ptr = (char *)ptrv;
5365 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5366 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5367 int ret = 0;
5368
5369 WARN_ON(start > eb->len);
5370 WARN_ON(start + len > eb->start + eb->len);
5371
5372 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5373
5374 while (len > 0) {
5375 page = eb->pages[i];
5376
5377 cur = min(len, (PAGE_CACHE_SIZE - offset));
5378
5379 kaddr = page_address(page);
5380 ret = memcmp(ptr, kaddr + offset, cur);
5381 if (ret)
5382 break;
5383
5384 ptr += cur;
5385 len -= cur;
5386 offset = 0;
5387 i++;
5388 }
5389 return ret;
5390}
5391
5392void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5393 unsigned long start, unsigned long len)
5394{
5395 size_t cur;
5396 size_t offset;
5397 struct page *page;
5398 char *kaddr;
5399 char *src = (char *)srcv;
5400 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5401 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5402
5403 WARN_ON(start > eb->len);
5404 WARN_ON(start + len > eb->start + eb->len);
5405
5406 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5407
5408 while (len > 0) {
5409 page = eb->pages[i];
5410 WARN_ON(!PageUptodate(page));
5411
5412 cur = min(len, PAGE_CACHE_SIZE - offset);
5413 kaddr = page_address(page);
5414 memcpy(kaddr + offset, src, cur);
5415
5416 src += cur;
5417 len -= cur;
5418 offset = 0;
5419 i++;
5420 }
5421}
5422
5423void memset_extent_buffer(struct extent_buffer *eb, char c,
5424 unsigned long start, unsigned long len)
5425{
5426 size_t cur;
5427 size_t offset;
5428 struct page *page;
5429 char *kaddr;
5430 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5431 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
5432
5433 WARN_ON(start > eb->len);
5434 WARN_ON(start + len > eb->start + eb->len);
5435
5436 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
5437
5438 while (len > 0) {
5439 page = eb->pages[i];
5440 WARN_ON(!PageUptodate(page));
5441
5442 cur = min(len, PAGE_CACHE_SIZE - offset);
5443 kaddr = page_address(page);
5444 memset(kaddr + offset, c, cur);
5445
5446 len -= cur;
5447 offset = 0;
5448 i++;
5449 }
5450}
5451
5452void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5453 unsigned long dst_offset, unsigned long src_offset,
5454 unsigned long len)
5455{
5456 u64 dst_len = dst->len;
5457 size_t cur;
5458 size_t offset;
5459 struct page *page;
5460 char *kaddr;
5461 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
5462 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5463
5464 WARN_ON(src->len != dst_len);
5465
5466 offset = (start_offset + dst_offset) &
5467 (PAGE_CACHE_SIZE - 1);
5468
5469 while (len > 0) {
5470 page = dst->pages[i];
5471 WARN_ON(!PageUptodate(page));
5472
5473 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
5474
5475 kaddr = page_address(page);
5476 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5477
5478 src_offset += cur;
5479 len -= cur;
5480 offset = 0;
5481 i++;
5482 }
5483}
5484
5485
5486
5487
5488
5489
5490#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
5491#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
5492#define BITMAP_FIRST_BYTE_MASK(start) \
5493 ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
5494#define BITMAP_LAST_BYTE_MASK(nbits) \
5495 (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510static inline void eb_bitmap_offset(struct extent_buffer *eb,
5511 unsigned long start, unsigned long nr,
5512 unsigned long *page_index,
5513 size_t *page_offset)
5514{
5515 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
5516 size_t byte_offset = BIT_BYTE(nr);
5517 size_t offset;
5518
5519
5520
5521
5522
5523
5524 offset = start_offset + start + byte_offset;
5525
5526 *page_index = offset >> PAGE_CACHE_SHIFT;
5527 *page_offset = offset & (PAGE_CACHE_SIZE - 1);
5528}
5529
5530
5531
5532
5533
5534
5535
5536int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5537 unsigned long nr)
5538{
5539 char *kaddr;
5540 struct page *page;
5541 unsigned long i;
5542 size_t offset;
5543
5544 eb_bitmap_offset(eb, start, nr, &i, &offset);
5545 page = eb->pages[i];
5546 WARN_ON(!PageUptodate(page));
5547 kaddr = page_address(page);
5548 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5549}
5550
5551
5552
5553
5554
5555
5556
5557
5558void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5559 unsigned long pos, unsigned long len)
5560{
5561 char *kaddr;
5562 struct page *page;
5563 unsigned long i;
5564 size_t offset;
5565 const unsigned int size = pos + len;
5566 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5567 unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
5568
5569 eb_bitmap_offset(eb, start, pos, &i, &offset);
5570 page = eb->pages[i];
5571 WARN_ON(!PageUptodate(page));
5572 kaddr = page_address(page);
5573
5574 while (len >= bits_to_set) {
5575 kaddr[offset] |= mask_to_set;
5576 len -= bits_to_set;
5577 bits_to_set = BITS_PER_BYTE;
5578 mask_to_set = ~0U;
5579 if (++offset >= PAGE_CACHE_SIZE && len > 0) {
5580 offset = 0;
5581 page = eb->pages[++i];
5582 WARN_ON(!PageUptodate(page));
5583 kaddr = page_address(page);
5584 }
5585 }
5586 if (len) {
5587 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5588 kaddr[offset] |= mask_to_set;
5589 }
5590}
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5601 unsigned long pos, unsigned long len)
5602{
5603 char *kaddr;
5604 struct page *page;
5605 unsigned long i;
5606 size_t offset;
5607 const unsigned int size = pos + len;
5608 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5609 unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
5610
5611 eb_bitmap_offset(eb, start, pos, &i, &offset);
5612 page = eb->pages[i];
5613 WARN_ON(!PageUptodate(page));
5614 kaddr = page_address(page);
5615
5616 while (len >= bits_to_clear) {
5617 kaddr[offset] &= ~mask_to_clear;
5618 len -= bits_to_clear;
5619 bits_to_clear = BITS_PER_BYTE;
5620 mask_to_clear = ~0U;
5621 if (++offset >= PAGE_CACHE_SIZE && len > 0) {
5622 offset = 0;
5623 page = eb->pages[++i];
5624 WARN_ON(!PageUptodate(page));
5625 kaddr = page_address(page);
5626 }
5627 }
5628 if (len) {
5629 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5630 kaddr[offset] &= ~mask_to_clear;
5631 }
5632}
5633
5634static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5635{
5636 unsigned long distance = (src > dst) ? src - dst : dst - src;
5637 return distance < len;
5638}
5639
5640static void copy_pages(struct page *dst_page, struct page *src_page,
5641 unsigned long dst_off, unsigned long src_off,
5642 unsigned long len)
5643{
5644 char *dst_kaddr = page_address(dst_page);
5645 char *src_kaddr;
5646 int must_memmove = 0;
5647
5648 if (dst_page != src_page) {
5649 src_kaddr = page_address(src_page);
5650 } else {
5651 src_kaddr = dst_kaddr;
5652 if (areas_overlap(src_off, dst_off, len))
5653 must_memmove = 1;
5654 }
5655
5656 if (must_memmove)
5657 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5658 else
5659 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5660}
5661
5662void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5663 unsigned long src_offset, unsigned long len)
5664{
5665 size_t cur;
5666 size_t dst_off_in_page;
5667 size_t src_off_in_page;
5668 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
5669 unsigned long dst_i;
5670 unsigned long src_i;
5671
5672 if (src_offset + len > dst->len) {
5673 btrfs_err(dst->fs_info,
5674 "memmove bogus src_offset %lu move "
5675 "len %lu dst len %lu", src_offset, len, dst->len);
5676 BUG_ON(1);
5677 }
5678 if (dst_offset + len > dst->len) {
5679 btrfs_err(dst->fs_info,
5680 "memmove bogus dst_offset %lu move "
5681 "len %lu dst len %lu", dst_offset, len, dst->len);
5682 BUG_ON(1);
5683 }
5684
5685 while (len > 0) {
5686 dst_off_in_page = (start_offset + dst_offset) &
5687 (PAGE_CACHE_SIZE - 1);
5688 src_off_in_page = (start_offset + src_offset) &
5689 (PAGE_CACHE_SIZE - 1);
5690
5691 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5692 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
5693
5694 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
5695 src_off_in_page));
5696 cur = min_t(unsigned long, cur,
5697 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
5698
5699 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5700 dst_off_in_page, src_off_in_page, cur);
5701
5702 src_offset += cur;
5703 dst_offset += cur;
5704 len -= cur;
5705 }
5706}
5707
5708void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5709 unsigned long src_offset, unsigned long len)
5710{
5711 size_t cur;
5712 size_t dst_off_in_page;
5713 size_t src_off_in_page;
5714 unsigned long dst_end = dst_offset + len - 1;
5715 unsigned long src_end = src_offset + len - 1;
5716 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
5717 unsigned long dst_i;
5718 unsigned long src_i;
5719
5720 if (src_offset + len > dst->len) {
5721 btrfs_err(dst->fs_info, "memmove bogus src_offset %lu move "
5722 "len %lu len %lu", src_offset, len, dst->len);
5723 BUG_ON(1);
5724 }
5725 if (dst_offset + len > dst->len) {
5726 btrfs_err(dst->fs_info, "memmove bogus dst_offset %lu move "
5727 "len %lu len %lu", dst_offset, len, dst->len);
5728 BUG_ON(1);
5729 }
5730 if (dst_offset < src_offset) {
5731 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5732 return;
5733 }
5734 while (len > 0) {
5735 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
5736 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
5737
5738 dst_off_in_page = (start_offset + dst_end) &
5739 (PAGE_CACHE_SIZE - 1);
5740 src_off_in_page = (start_offset + src_end) &
5741 (PAGE_CACHE_SIZE - 1);
5742
5743 cur = min_t(unsigned long, len, src_off_in_page + 1);
5744 cur = min(cur, dst_off_in_page + 1);
5745 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5746 dst_off_in_page - cur + 1,
5747 src_off_in_page - cur + 1, cur);
5748
5749 dst_end -= cur;
5750 src_end -= cur;
5751 len -= cur;
5752 }
5753}
5754
5755int try_release_extent_buffer(struct page *page)
5756{
5757 struct extent_buffer *eb;
5758
5759
5760
5761
5762
5763 spin_lock(&page->mapping->private_lock);
5764 if (!PagePrivate(page)) {
5765 spin_unlock(&page->mapping->private_lock);
5766 return 1;
5767 }
5768
5769 eb = (struct extent_buffer *)page->private;
5770 BUG_ON(!eb);
5771
5772
5773
5774
5775
5776
5777 spin_lock(&eb->refs_lock);
5778 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
5779 spin_unlock(&eb->refs_lock);
5780 spin_unlock(&page->mapping->private_lock);
5781 return 0;
5782 }
5783 spin_unlock(&page->mapping->private_lock);
5784
5785
5786
5787
5788
5789 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
5790 spin_unlock(&eb->refs_lock);
5791 return 0;
5792 }
5793
5794 return release_extent_buffer(eb);
5795}
5796