1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/pagemap.h>
8#include <linux/writeback.h>
9#include <linux/blkdev.h>
10#include <linux/rbtree.h>
11#include <linux/slab.h>
12#include <linux/error-injection.h>
13#include "ctree.h"
14#include "disk-io.h"
15#include "transaction.h"
16#include "volumes.h"
17#include "locking.h"
18#include "btrfs_inode.h"
19#include "async-thread.h"
20#include "free-space-cache.h"
21#include "qgroup.h"
22#include "print-tree.h"
23#include "delalloc-space.h"
24#include "block-group.h"
25#include "backref.h"
26#include "misc.h"
27#include "subpage.h"
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76#define RELOCATION_RESERVED_NODES 256
77
78
79
80struct mapping_node {
81 struct {
82 struct rb_node rb_node;
83 u64 bytenr;
84 };
85 void *data;
86};
87
88struct mapping_tree {
89 struct rb_root rb_root;
90 spinlock_t lock;
91};
92
93
94
95
96struct tree_block {
97 struct {
98 struct rb_node rb_node;
99 u64 bytenr;
100 };
101 u64 owner;
102 struct btrfs_key key;
103 unsigned int level:8;
104 unsigned int key_ready:1;
105};
106
107#define MAX_EXTENTS 128
108
109struct file_extent_cluster {
110 u64 start;
111 u64 end;
112 u64 boundary[MAX_EXTENTS];
113 unsigned int nr;
114};
115
116struct reloc_control {
117
118 struct btrfs_block_group *block_group;
119
120 struct btrfs_root *extent_root;
121
122 struct inode *data_inode;
123
124 struct btrfs_block_rsv *block_rsv;
125
126 struct btrfs_backref_cache backref_cache;
127
128 struct file_extent_cluster cluster;
129
130 struct extent_io_tree processed_blocks;
131
132 struct mapping_tree reloc_root_tree;
133
134 struct list_head reloc_roots;
135
136 struct list_head dirty_subvol_roots;
137
138 u64 merging_rsv_size;
139
140 u64 nodes_relocated;
141
142 u64 reserved_bytes;
143
144 u64 search_start;
145 u64 extents_found;
146
147 unsigned int stage:8;
148 unsigned int create_reloc_tree:1;
149 unsigned int merge_reloc_tree:1;
150 unsigned int found_file_extent:1;
151};
152
153
154#define MOVE_DATA_EXTENTS 0
155#define UPDATE_DATA_PTRS 1
156
157static void mark_block_processed(struct reloc_control *rc,
158 struct btrfs_backref_node *node)
159{
160 u32 blocksize;
161
162 if (node->level == 0 ||
163 in_range(node->bytenr, rc->block_group->start,
164 rc->block_group->length)) {
165 blocksize = rc->extent_root->fs_info->nodesize;
166 set_extent_bits(&rc->processed_blocks, node->bytenr,
167 node->bytenr + blocksize - 1, EXTENT_DIRTY);
168 }
169 node->processed = 1;
170}
171
172
173static void mapping_tree_init(struct mapping_tree *tree)
174{
175 tree->rb_root = RB_ROOT;
176 spin_lock_init(&tree->lock);
177}
178
179
180
181
182static struct btrfs_backref_node *walk_up_backref(
183 struct btrfs_backref_node *node,
184 struct btrfs_backref_edge *edges[], int *index)
185{
186 struct btrfs_backref_edge *edge;
187 int idx = *index;
188
189 while (!list_empty(&node->upper)) {
190 edge = list_entry(node->upper.next,
191 struct btrfs_backref_edge, list[LOWER]);
192 edges[idx++] = edge;
193 node = edge->node[UPPER];
194 }
195 BUG_ON(node->detached);
196 *index = idx;
197 return node;
198}
199
200
201
202
203static struct btrfs_backref_node *walk_down_backref(
204 struct btrfs_backref_edge *edges[], int *index)
205{
206 struct btrfs_backref_edge *edge;
207 struct btrfs_backref_node *lower;
208 int idx = *index;
209
210 while (idx > 0) {
211 edge = edges[idx - 1];
212 lower = edge->node[LOWER];
213 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
214 idx--;
215 continue;
216 }
217 edge = list_entry(edge->list[LOWER].next,
218 struct btrfs_backref_edge, list[LOWER]);
219 edges[idx - 1] = edge;
220 *index = idx;
221 return edge->node[UPPER];
222 }
223 *index = 0;
224 return NULL;
225}
226
227static void update_backref_node(struct btrfs_backref_cache *cache,
228 struct btrfs_backref_node *node, u64 bytenr)
229{
230 struct rb_node *rb_node;
231 rb_erase(&node->rb_node, &cache->rb_root);
232 node->bytenr = bytenr;
233 rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
234 if (rb_node)
235 btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST);
236}
237
238
239
240
241static int update_backref_cache(struct btrfs_trans_handle *trans,
242 struct btrfs_backref_cache *cache)
243{
244 struct btrfs_backref_node *node;
245 int level = 0;
246
247 if (cache->last_trans == 0) {
248 cache->last_trans = trans->transid;
249 return 0;
250 }
251
252 if (cache->last_trans == trans->transid)
253 return 0;
254
255
256
257
258
259
260 while (!list_empty(&cache->detached)) {
261 node = list_entry(cache->detached.next,
262 struct btrfs_backref_node, list);
263 btrfs_backref_cleanup_node(cache, node);
264 }
265
266 while (!list_empty(&cache->changed)) {
267 node = list_entry(cache->changed.next,
268 struct btrfs_backref_node, list);
269 list_del_init(&node->list);
270 BUG_ON(node->pending);
271 update_backref_node(cache, node, node->new_bytenr);
272 }
273
274
275
276
277
278 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
279 list_for_each_entry(node, &cache->pending[level], list) {
280 BUG_ON(!node->pending);
281 if (node->bytenr == node->new_bytenr)
282 continue;
283 update_backref_node(cache, node, node->new_bytenr);
284 }
285 }
286
287 cache->last_trans = 0;
288 return 1;
289}
290
291static bool reloc_root_is_dead(struct btrfs_root *root)
292{
293
294
295
296
297
298 smp_rmb();
299 if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
300 return true;
301 return false;
302}
303
304
305
306
307
308
309
310
311
312static bool have_reloc_root(struct btrfs_root *root)
313{
314 if (reloc_root_is_dead(root))
315 return false;
316 if (!root->reloc_root)
317 return false;
318 return true;
319}
320
321int btrfs_should_ignore_reloc_root(struct btrfs_root *root)
322{
323 struct btrfs_root *reloc_root;
324
325 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
326 return 0;
327
328
329 if (reloc_root_is_dead(root))
330 return 1;
331
332 reloc_root = root->reloc_root;
333 if (!reloc_root)
334 return 0;
335
336 if (btrfs_header_generation(reloc_root->commit_root) ==
337 root->fs_info->running_transaction->transid)
338 return 0;
339
340
341
342
343
344
345 return 1;
346}
347
348
349
350
351struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
352{
353 struct reloc_control *rc = fs_info->reloc_ctl;
354 struct rb_node *rb_node;
355 struct mapping_node *node;
356 struct btrfs_root *root = NULL;
357
358 ASSERT(rc);
359 spin_lock(&rc->reloc_root_tree.lock);
360 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
361 if (rb_node) {
362 node = rb_entry(rb_node, struct mapping_node, rb_node);
363 root = (struct btrfs_root *)node->data;
364 }
365 spin_unlock(&rc->reloc_root_tree.lock);
366 return btrfs_grab_root(root);
367}
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382static bool handle_useless_nodes(struct reloc_control *rc,
383 struct btrfs_backref_node *node)
384{
385 struct btrfs_backref_cache *cache = &rc->backref_cache;
386 struct list_head *useless_node = &cache->useless_node;
387 bool ret = false;
388
389 while (!list_empty(useless_node)) {
390 struct btrfs_backref_node *cur;
391
392 cur = list_first_entry(useless_node, struct btrfs_backref_node,
393 list);
394 list_del_init(&cur->list);
395
396
397 ASSERT(list_empty(&cur->upper));
398
399 if (cur == node)
400 ret = true;
401
402
403 if (cur->lowest) {
404 list_del_init(&cur->lower);
405 cur->lowest = 0;
406 }
407
408
409 while (!list_empty(&cur->lower)) {
410 struct btrfs_backref_edge *edge;
411 struct btrfs_backref_node *lower;
412
413 edge = list_entry(cur->lower.next,
414 struct btrfs_backref_edge, list[UPPER]);
415 list_del(&edge->list[UPPER]);
416 list_del(&edge->list[LOWER]);
417 lower = edge->node[LOWER];
418 btrfs_backref_free_edge(cache, edge);
419
420
421 if (list_empty(&lower->upper))
422 list_add(&lower->list, useless_node);
423 }
424
425 mark_block_processed(rc, cur);
426
427
428
429
430
431
432 if (cur->level > 0) {
433 list_add(&cur->list, &cache->detached);
434 cur->detached = 1;
435 } else {
436 rb_erase(&cur->rb_node, &cache->rb_root);
437 btrfs_backref_free_node(cache, cur);
438 }
439 }
440 return ret;
441}
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
458 struct reloc_control *rc, struct btrfs_key *node_key,
459 int level, u64 bytenr)
460{
461 struct btrfs_backref_iter *iter;
462 struct btrfs_backref_cache *cache = &rc->backref_cache;
463
464 struct btrfs_path *path;
465 struct btrfs_backref_node *cur;
466 struct btrfs_backref_node *node = NULL;
467 struct btrfs_backref_edge *edge;
468 int ret;
469 int err = 0;
470
471 iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
472 if (!iter)
473 return ERR_PTR(-ENOMEM);
474 path = btrfs_alloc_path();
475 if (!path) {
476 err = -ENOMEM;
477 goto out;
478 }
479
480 node = btrfs_backref_alloc_node(cache, bytenr, level);
481 if (!node) {
482 err = -ENOMEM;
483 goto out;
484 }
485
486 node->lowest = 1;
487 cur = node;
488
489
490 do {
491 ret = btrfs_backref_add_tree_node(cache, path, iter, node_key,
492 cur);
493 if (ret < 0) {
494 err = ret;
495 goto out;
496 }
497 edge = list_first_entry_or_null(&cache->pending_edge,
498 struct btrfs_backref_edge, list[UPPER]);
499
500
501
502
503 if (edge) {
504 list_del_init(&edge->list[UPPER]);
505 cur = edge->node[UPPER];
506 }
507 } while (edge);
508
509
510 ret = btrfs_backref_finish_upper_links(cache, node);
511 if (ret < 0) {
512 err = ret;
513 goto out;
514 }
515
516 if (handle_useless_nodes(rc, node))
517 node = NULL;
518out:
519 btrfs_backref_iter_free(iter);
520 btrfs_free_path(path);
521 if (err) {
522 btrfs_backref_error_cleanup(cache, node);
523 return ERR_PTR(err);
524 }
525 ASSERT(!node || !node->detached);
526 ASSERT(list_empty(&cache->useless_node) &&
527 list_empty(&cache->pending_edge));
528 return node;
529}
530
531
532
533
534
535
536static int clone_backref_node(struct btrfs_trans_handle *trans,
537 struct reloc_control *rc,
538 struct btrfs_root *src,
539 struct btrfs_root *dest)
540{
541 struct btrfs_root *reloc_root = src->reloc_root;
542 struct btrfs_backref_cache *cache = &rc->backref_cache;
543 struct btrfs_backref_node *node = NULL;
544 struct btrfs_backref_node *new_node;
545 struct btrfs_backref_edge *edge;
546 struct btrfs_backref_edge *new_edge;
547 struct rb_node *rb_node;
548
549 if (cache->last_trans > 0)
550 update_backref_cache(trans, cache);
551
552 rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
553 if (rb_node) {
554 node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
555 if (node->detached)
556 node = NULL;
557 else
558 BUG_ON(node->new_bytenr != reloc_root->node->start);
559 }
560
561 if (!node) {
562 rb_node = rb_simple_search(&cache->rb_root,
563 reloc_root->commit_root->start);
564 if (rb_node) {
565 node = rb_entry(rb_node, struct btrfs_backref_node,
566 rb_node);
567 BUG_ON(node->detached);
568 }
569 }
570
571 if (!node)
572 return 0;
573
574 new_node = btrfs_backref_alloc_node(cache, dest->node->start,
575 node->level);
576 if (!new_node)
577 return -ENOMEM;
578
579 new_node->lowest = node->lowest;
580 new_node->checked = 1;
581 new_node->root = btrfs_grab_root(dest);
582 ASSERT(new_node->root);
583
584 if (!node->lowest) {
585 list_for_each_entry(edge, &node->lower, list[UPPER]) {
586 new_edge = btrfs_backref_alloc_edge(cache);
587 if (!new_edge)
588 goto fail;
589
590 btrfs_backref_link_edge(new_edge, edge->node[LOWER],
591 new_node, LINK_UPPER);
592 }
593 } else {
594 list_add_tail(&new_node->lower, &cache->leaves);
595 }
596
597 rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
598 &new_node->rb_node);
599 if (rb_node)
600 btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
601
602 if (!new_node->lowest) {
603 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
604 list_add_tail(&new_edge->list[LOWER],
605 &new_edge->node[LOWER]->upper);
606 }
607 }
608 return 0;
609fail:
610 while (!list_empty(&new_node->lower)) {
611 new_edge = list_entry(new_node->lower.next,
612 struct btrfs_backref_edge, list[UPPER]);
613 list_del(&new_edge->list[UPPER]);
614 btrfs_backref_free_edge(cache, new_edge);
615 }
616 btrfs_backref_free_node(cache, new_node);
617 return -ENOMEM;
618}
619
620
621
622
623static int __must_check __add_reloc_root(struct btrfs_root *root)
624{
625 struct btrfs_fs_info *fs_info = root->fs_info;
626 struct rb_node *rb_node;
627 struct mapping_node *node;
628 struct reloc_control *rc = fs_info->reloc_ctl;
629
630 node = kmalloc(sizeof(*node), GFP_NOFS);
631 if (!node)
632 return -ENOMEM;
633
634 node->bytenr = root->commit_root->start;
635 node->data = root;
636
637 spin_lock(&rc->reloc_root_tree.lock);
638 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
639 node->bytenr, &node->rb_node);
640 spin_unlock(&rc->reloc_root_tree.lock);
641 if (rb_node) {
642 btrfs_err(fs_info,
643 "Duplicate root found for start=%llu while inserting into relocation tree",
644 node->bytenr);
645 return -EEXIST;
646 }
647
648 list_add_tail(&root->root_list, &rc->reloc_roots);
649 return 0;
650}
651
652
653
654
655
656static void __del_reloc_root(struct btrfs_root *root)
657{
658 struct btrfs_fs_info *fs_info = root->fs_info;
659 struct rb_node *rb_node;
660 struct mapping_node *node = NULL;
661 struct reloc_control *rc = fs_info->reloc_ctl;
662 bool put_ref = false;
663
664 if (rc && root->node) {
665 spin_lock(&rc->reloc_root_tree.lock);
666 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
667 root->commit_root->start);
668 if (rb_node) {
669 node = rb_entry(rb_node, struct mapping_node, rb_node);
670 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
671 RB_CLEAR_NODE(&node->rb_node);
672 }
673 spin_unlock(&rc->reloc_root_tree.lock);
674 ASSERT(!node || (struct btrfs_root *)node->data == root);
675 }
676
677
678
679
680
681
682
683
684
685 spin_lock(&fs_info->trans_lock);
686 if (!list_empty(&root->root_list)) {
687 put_ref = true;
688 list_del_init(&root->root_list);
689 }
690 spin_unlock(&fs_info->trans_lock);
691 if (put_ref)
692 btrfs_put_root(root);
693 kfree(node);
694}
695
696
697
698
699
700static int __update_reloc_root(struct btrfs_root *root)
701{
702 struct btrfs_fs_info *fs_info = root->fs_info;
703 struct rb_node *rb_node;
704 struct mapping_node *node = NULL;
705 struct reloc_control *rc = fs_info->reloc_ctl;
706
707 spin_lock(&rc->reloc_root_tree.lock);
708 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
709 root->commit_root->start);
710 if (rb_node) {
711 node = rb_entry(rb_node, struct mapping_node, rb_node);
712 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
713 }
714 spin_unlock(&rc->reloc_root_tree.lock);
715
716 if (!node)
717 return 0;
718 BUG_ON((struct btrfs_root *)node->data != root);
719
720 spin_lock(&rc->reloc_root_tree.lock);
721 node->bytenr = root->node->start;
722 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
723 node->bytenr, &node->rb_node);
724 spin_unlock(&rc->reloc_root_tree.lock);
725 if (rb_node)
726 btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
727 return 0;
728}
729
730static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
731 struct btrfs_root *root, u64 objectid)
732{
733 struct btrfs_fs_info *fs_info = root->fs_info;
734 struct btrfs_root *reloc_root;
735 struct extent_buffer *eb;
736 struct btrfs_root_item *root_item;
737 struct btrfs_key root_key;
738 int ret = 0;
739 bool must_abort = false;
740
741 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
742 if (!root_item)
743 return ERR_PTR(-ENOMEM);
744
745 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
746 root_key.type = BTRFS_ROOT_ITEM_KEY;
747 root_key.offset = objectid;
748
749 if (root->root_key.objectid == objectid) {
750 u64 commit_root_gen;
751
752
753 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
754 BTRFS_TREE_RELOC_OBJECTID);
755 if (ret)
756 goto fail;
757
758
759
760
761
762
763
764
765
766 commit_root_gen = btrfs_header_generation(root->commit_root);
767 btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen);
768 } else {
769
770
771
772
773
774
775
776 ret = btrfs_copy_root(trans, root, root->node, &eb,
777 BTRFS_TREE_RELOC_OBJECTID);
778 if (ret)
779 goto fail;
780 }
781
782
783
784
785
786 must_abort = true;
787
788 memcpy(root_item, &root->root_item, sizeof(*root_item));
789 btrfs_set_root_bytenr(root_item, eb->start);
790 btrfs_set_root_level(root_item, btrfs_header_level(eb));
791 btrfs_set_root_generation(root_item, trans->transid);
792
793 if (root->root_key.objectid == objectid) {
794 btrfs_set_root_refs(root_item, 0);
795 memset(&root_item->drop_progress, 0,
796 sizeof(struct btrfs_disk_key));
797 btrfs_set_root_drop_level(root_item, 0);
798 }
799
800 btrfs_tree_unlock(eb);
801 free_extent_buffer(eb);
802
803 ret = btrfs_insert_root(trans, fs_info->tree_root,
804 &root_key, root_item);
805 if (ret)
806 goto fail;
807
808 kfree(root_item);
809
810 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
811 if (IS_ERR(reloc_root)) {
812 ret = PTR_ERR(reloc_root);
813 goto abort;
814 }
815 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
816 reloc_root->last_trans = trans->transid;
817 return reloc_root;
818fail:
819 kfree(root_item);
820abort:
821 if (must_abort)
822 btrfs_abort_transaction(trans, ret);
823 return ERR_PTR(ret);
824}
825
826
827
828
829
830
831
832
833int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
834 struct btrfs_root *root)
835{
836 struct btrfs_fs_info *fs_info = root->fs_info;
837 struct btrfs_root *reloc_root;
838 struct reloc_control *rc = fs_info->reloc_ctl;
839 struct btrfs_block_rsv *rsv;
840 int clear_rsv = 0;
841 int ret;
842
843 if (!rc)
844 return 0;
845
846
847
848
849
850 if (reloc_root_is_dead(root))
851 return 0;
852
853
854
855
856
857
858
859
860
861 if (root->reloc_root) {
862 reloc_root = root->reloc_root;
863 reloc_root->last_trans = trans->transid;
864 return 0;
865 }
866
867
868
869
870
871 if (!rc->create_reloc_tree ||
872 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
873 return 0;
874
875 if (!trans->reloc_reserved) {
876 rsv = trans->block_rsv;
877 trans->block_rsv = rc->block_rsv;
878 clear_rsv = 1;
879 }
880 reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
881 if (clear_rsv)
882 trans->block_rsv = rsv;
883 if (IS_ERR(reloc_root))
884 return PTR_ERR(reloc_root);
885
886 ret = __add_reloc_root(reloc_root);
887 ASSERT(ret != -EEXIST);
888 if (ret) {
889
890 btrfs_put_root(reloc_root);
891 return ret;
892 }
893 root->reloc_root = btrfs_grab_root(reloc_root);
894 return 0;
895}
896
897
898
899
900int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
901 struct btrfs_root *root)
902{
903 struct btrfs_fs_info *fs_info = root->fs_info;
904 struct btrfs_root *reloc_root;
905 struct btrfs_root_item *root_item;
906 int ret;
907
908 if (!have_reloc_root(root))
909 return 0;
910
911 reloc_root = root->reloc_root;
912 root_item = &reloc_root->root_item;
913
914
915
916
917
918
919 btrfs_grab_root(reloc_root);
920
921
922 if (fs_info->reloc_ctl->merge_reloc_tree &&
923 btrfs_root_refs(root_item) == 0) {
924 set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
925
926
927
928
929 smp_wmb();
930 __del_reloc_root(reloc_root);
931 }
932
933 if (reloc_root->commit_root != reloc_root->node) {
934 __update_reloc_root(reloc_root);
935 btrfs_set_root_node(root_item, reloc_root->node);
936 free_extent_buffer(reloc_root->commit_root);
937 reloc_root->commit_root = btrfs_root_node(reloc_root);
938 }
939
940 ret = btrfs_update_root(trans, fs_info->tree_root,
941 &reloc_root->root_key, root_item);
942 btrfs_put_root(reloc_root);
943 return ret;
944}
945
946
947
948
949
950static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
951{
952 struct rb_node *node;
953 struct rb_node *prev;
954 struct btrfs_inode *entry;
955 struct inode *inode;
956
957 spin_lock(&root->inode_lock);
958again:
959 node = root->inode_tree.rb_node;
960 prev = NULL;
961 while (node) {
962 prev = node;
963 entry = rb_entry(node, struct btrfs_inode, rb_node);
964
965 if (objectid < btrfs_ino(entry))
966 node = node->rb_left;
967 else if (objectid > btrfs_ino(entry))
968 node = node->rb_right;
969 else
970 break;
971 }
972 if (!node) {
973 while (prev) {
974 entry = rb_entry(prev, struct btrfs_inode, rb_node);
975 if (objectid <= btrfs_ino(entry)) {
976 node = prev;
977 break;
978 }
979 prev = rb_next(prev);
980 }
981 }
982 while (node) {
983 entry = rb_entry(node, struct btrfs_inode, rb_node);
984 inode = igrab(&entry->vfs_inode);
985 if (inode) {
986 spin_unlock(&root->inode_lock);
987 return inode;
988 }
989
990 objectid = btrfs_ino(entry) + 1;
991 if (cond_resched_lock(&root->inode_lock))
992 goto again;
993
994 node = rb_next(node);
995 }
996 spin_unlock(&root->inode_lock);
997 return NULL;
998}
999
1000
1001
1002
1003static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1004 u64 bytenr, u64 num_bytes)
1005{
1006 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1007 struct btrfs_path *path;
1008 struct btrfs_file_extent_item *fi;
1009 struct extent_buffer *leaf;
1010 int ret;
1011
1012 path = btrfs_alloc_path();
1013 if (!path)
1014 return -ENOMEM;
1015
1016 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1017 ret = btrfs_lookup_file_extent(NULL, root, path,
1018 btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
1019 if (ret < 0)
1020 goto out;
1021 if (ret > 0) {
1022 ret = -ENOENT;
1023 goto out;
1024 }
1025
1026 leaf = path->nodes[0];
1027 fi = btrfs_item_ptr(leaf, path->slots[0],
1028 struct btrfs_file_extent_item);
1029
1030 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1031 btrfs_file_extent_compression(leaf, fi) ||
1032 btrfs_file_extent_encryption(leaf, fi) ||
1033 btrfs_file_extent_other_encoding(leaf, fi));
1034
1035 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1036 ret = -EINVAL;
1037 goto out;
1038 }
1039
1040 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1041 ret = 0;
1042out:
1043 btrfs_free_path(path);
1044 return ret;
1045}
1046
1047
1048
1049
1050
1051static noinline_for_stack
1052int replace_file_extents(struct btrfs_trans_handle *trans,
1053 struct reloc_control *rc,
1054 struct btrfs_root *root,
1055 struct extent_buffer *leaf)
1056{
1057 struct btrfs_fs_info *fs_info = root->fs_info;
1058 struct btrfs_key key;
1059 struct btrfs_file_extent_item *fi;
1060 struct inode *inode = NULL;
1061 u64 parent;
1062 u64 bytenr;
1063 u64 new_bytenr = 0;
1064 u64 num_bytes;
1065 u64 end;
1066 u32 nritems;
1067 u32 i;
1068 int ret = 0;
1069 int first = 1;
1070 int dirty = 0;
1071
1072 if (rc->stage != UPDATE_DATA_PTRS)
1073 return 0;
1074
1075
1076 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1077 parent = leaf->start;
1078 else
1079 parent = 0;
1080
1081 nritems = btrfs_header_nritems(leaf);
1082 for (i = 0; i < nritems; i++) {
1083 struct btrfs_ref ref = { 0 };
1084
1085 cond_resched();
1086 btrfs_item_key_to_cpu(leaf, &key, i);
1087 if (key.type != BTRFS_EXTENT_DATA_KEY)
1088 continue;
1089 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1090 if (btrfs_file_extent_type(leaf, fi) ==
1091 BTRFS_FILE_EXTENT_INLINE)
1092 continue;
1093 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1094 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1095 if (bytenr == 0)
1096 continue;
1097 if (!in_range(bytenr, rc->block_group->start,
1098 rc->block_group->length))
1099 continue;
1100
1101
1102
1103
1104
1105 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1106 if (first) {
1107 inode = find_next_inode(root, key.objectid);
1108 first = 0;
1109 } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
1110 btrfs_add_delayed_iput(inode);
1111 inode = find_next_inode(root, key.objectid);
1112 }
1113 if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
1114 end = key.offset +
1115 btrfs_file_extent_num_bytes(leaf, fi);
1116 WARN_ON(!IS_ALIGNED(key.offset,
1117 fs_info->sectorsize));
1118 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1119 end--;
1120 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1121 key.offset, end);
1122 if (!ret)
1123 continue;
1124
1125 btrfs_drop_extent_cache(BTRFS_I(inode),
1126 key.offset, end, 1);
1127 unlock_extent(&BTRFS_I(inode)->io_tree,
1128 key.offset, end);
1129 }
1130 }
1131
1132 ret = get_new_location(rc->data_inode, &new_bytenr,
1133 bytenr, num_bytes);
1134 if (ret) {
1135
1136
1137
1138
1139 break;
1140 }
1141
1142 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1143 dirty = 1;
1144
1145 key.offset -= btrfs_file_extent_offset(leaf, fi);
1146 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1147 num_bytes, parent);
1148 ref.real_root = root->root_key.objectid;
1149 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1150 key.objectid, key.offset);
1151 ret = btrfs_inc_extent_ref(trans, &ref);
1152 if (ret) {
1153 btrfs_abort_transaction(trans, ret);
1154 break;
1155 }
1156
1157 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1158 num_bytes, parent);
1159 ref.real_root = root->root_key.objectid;
1160 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1161 key.objectid, key.offset);
1162 ret = btrfs_free_extent(trans, &ref);
1163 if (ret) {
1164 btrfs_abort_transaction(trans, ret);
1165 break;
1166 }
1167 }
1168 if (dirty)
1169 btrfs_mark_buffer_dirty(leaf);
1170 if (inode)
1171 btrfs_add_delayed_iput(inode);
1172 return ret;
1173}
1174
1175static noinline_for_stack
1176int memcmp_node_keys(struct extent_buffer *eb, int slot,
1177 struct btrfs_path *path, int level)
1178{
1179 struct btrfs_disk_key key1;
1180 struct btrfs_disk_key key2;
1181 btrfs_node_key(eb, &key1, slot);
1182 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1183 return memcmp(&key1, &key2, sizeof(key1));
1184}
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195static noinline_for_stack
1196int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
1197 struct btrfs_root *dest, struct btrfs_root *src,
1198 struct btrfs_path *path, struct btrfs_key *next_key,
1199 int lowest_level, int max_level)
1200{
1201 struct btrfs_fs_info *fs_info = dest->fs_info;
1202 struct extent_buffer *eb;
1203 struct extent_buffer *parent;
1204 struct btrfs_ref ref = { 0 };
1205 struct btrfs_key key;
1206 u64 old_bytenr;
1207 u64 new_bytenr;
1208 u64 old_ptr_gen;
1209 u64 new_ptr_gen;
1210 u64 last_snapshot;
1211 u32 blocksize;
1212 int cow = 0;
1213 int level;
1214 int ret;
1215 int slot;
1216
1217 ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1218 ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1219
1220 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1221again:
1222 slot = path->slots[lowest_level];
1223 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1224
1225 eb = btrfs_lock_root_node(dest);
1226 level = btrfs_header_level(eb);
1227
1228 if (level < lowest_level) {
1229 btrfs_tree_unlock(eb);
1230 free_extent_buffer(eb);
1231 return 0;
1232 }
1233
1234 if (cow) {
1235 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb,
1236 BTRFS_NESTING_COW);
1237 if (ret) {
1238 btrfs_tree_unlock(eb);
1239 free_extent_buffer(eb);
1240 return ret;
1241 }
1242 }
1243
1244 if (next_key) {
1245 next_key->objectid = (u64)-1;
1246 next_key->type = (u8)-1;
1247 next_key->offset = (u64)-1;
1248 }
1249
1250 parent = eb;
1251 while (1) {
1252 level = btrfs_header_level(parent);
1253 ASSERT(level >= lowest_level);
1254
1255 ret = btrfs_bin_search(parent, &key, &slot);
1256 if (ret < 0)
1257 break;
1258 if (ret && slot > 0)
1259 slot--;
1260
1261 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1262 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1263
1264 old_bytenr = btrfs_node_blockptr(parent, slot);
1265 blocksize = fs_info->nodesize;
1266 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1267
1268 if (level <= max_level) {
1269 eb = path->nodes[level];
1270 new_bytenr = btrfs_node_blockptr(eb,
1271 path->slots[level]);
1272 new_ptr_gen = btrfs_node_ptr_generation(eb,
1273 path->slots[level]);
1274 } else {
1275 new_bytenr = 0;
1276 new_ptr_gen = 0;
1277 }
1278
1279 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
1280 ret = level;
1281 break;
1282 }
1283
1284 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1285 memcmp_node_keys(parent, slot, path, level)) {
1286 if (level <= lowest_level) {
1287 ret = 0;
1288 break;
1289 }
1290
1291 eb = btrfs_read_node_slot(parent, slot);
1292 if (IS_ERR(eb)) {
1293 ret = PTR_ERR(eb);
1294 break;
1295 }
1296 btrfs_tree_lock(eb);
1297 if (cow) {
1298 ret = btrfs_cow_block(trans, dest, eb, parent,
1299 slot, &eb,
1300 BTRFS_NESTING_COW);
1301 if (ret) {
1302 btrfs_tree_unlock(eb);
1303 free_extent_buffer(eb);
1304 break;
1305 }
1306 }
1307
1308 btrfs_tree_unlock(parent);
1309 free_extent_buffer(parent);
1310
1311 parent = eb;
1312 continue;
1313 }
1314
1315 if (!cow) {
1316 btrfs_tree_unlock(parent);
1317 free_extent_buffer(parent);
1318 cow = 1;
1319 goto again;
1320 }
1321
1322 btrfs_node_key_to_cpu(path->nodes[level], &key,
1323 path->slots[level]);
1324 btrfs_release_path(path);
1325
1326 path->lowest_level = level;
1327 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1328 path->lowest_level = 0;
1329 if (ret) {
1330 if (ret > 0)
1331 ret = -ENOENT;
1332 break;
1333 }
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349 ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
1350 rc->block_group, parent, slot,
1351 path->nodes[level], path->slots[level],
1352 last_snapshot);
1353 if (ret < 0)
1354 break;
1355
1356
1357
1358 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1359 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1360 btrfs_mark_buffer_dirty(parent);
1361
1362 btrfs_set_node_blockptr(path->nodes[level],
1363 path->slots[level], old_bytenr);
1364 btrfs_set_node_ptr_generation(path->nodes[level],
1365 path->slots[level], old_ptr_gen);
1366 btrfs_mark_buffer_dirty(path->nodes[level]);
1367
1368 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
1369 blocksize, path->nodes[level]->start);
1370 ref.skip_qgroup = true;
1371 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1372 ret = btrfs_inc_extent_ref(trans, &ref);
1373 if (ret) {
1374 btrfs_abort_transaction(trans, ret);
1375 break;
1376 }
1377 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1378 blocksize, 0);
1379 ref.skip_qgroup = true;
1380 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1381 ret = btrfs_inc_extent_ref(trans, &ref);
1382 if (ret) {
1383 btrfs_abort_transaction(trans, ret);
1384 break;
1385 }
1386
1387 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
1388 blocksize, path->nodes[level]->start);
1389 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1390 ref.skip_qgroup = true;
1391 ret = btrfs_free_extent(trans, &ref);
1392 if (ret) {
1393 btrfs_abort_transaction(trans, ret);
1394 break;
1395 }
1396
1397 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
1398 blocksize, 0);
1399 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1400 ref.skip_qgroup = true;
1401 ret = btrfs_free_extent(trans, &ref);
1402 if (ret) {
1403 btrfs_abort_transaction(trans, ret);
1404 break;
1405 }
1406
1407 btrfs_unlock_up_safe(path, 0);
1408
1409 ret = level;
1410 break;
1411 }
1412 btrfs_tree_unlock(parent);
1413 free_extent_buffer(parent);
1414 return ret;
1415}
1416
1417
1418
1419
1420static noinline_for_stack
1421int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1422 int *level)
1423{
1424 struct extent_buffer *eb;
1425 int i;
1426 u64 last_snapshot;
1427 u32 nritems;
1428
1429 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1430
1431 for (i = 0; i < *level; i++) {
1432 free_extent_buffer(path->nodes[i]);
1433 path->nodes[i] = NULL;
1434 }
1435
1436 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1437 eb = path->nodes[i];
1438 nritems = btrfs_header_nritems(eb);
1439 while (path->slots[i] + 1 < nritems) {
1440 path->slots[i]++;
1441 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1442 last_snapshot)
1443 continue;
1444
1445 *level = i;
1446 return 0;
1447 }
1448 free_extent_buffer(path->nodes[i]);
1449 path->nodes[i] = NULL;
1450 }
1451 return 1;
1452}
1453
1454
1455
1456
1457static noinline_for_stack
1458int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1459 int *level)
1460{
1461 struct extent_buffer *eb = NULL;
1462 int i;
1463 u64 ptr_gen = 0;
1464 u64 last_snapshot;
1465 u32 nritems;
1466
1467 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1468
1469 for (i = *level; i > 0; i--) {
1470 eb = path->nodes[i];
1471 nritems = btrfs_header_nritems(eb);
1472 while (path->slots[i] < nritems) {
1473 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1474 if (ptr_gen > last_snapshot)
1475 break;
1476 path->slots[i]++;
1477 }
1478 if (path->slots[i] >= nritems) {
1479 if (i == *level)
1480 break;
1481 *level = i + 1;
1482 return 0;
1483 }
1484 if (i == 1) {
1485 *level = i;
1486 return 0;
1487 }
1488
1489 eb = btrfs_read_node_slot(eb, path->slots[i]);
1490 if (IS_ERR(eb))
1491 return PTR_ERR(eb);
1492 BUG_ON(btrfs_header_level(eb) != i - 1);
1493 path->nodes[i - 1] = eb;
1494 path->slots[i - 1] = 0;
1495 }
1496 return 1;
1497}
1498
1499
1500
1501
1502
1503static int invalidate_extent_cache(struct btrfs_root *root,
1504 struct btrfs_key *min_key,
1505 struct btrfs_key *max_key)
1506{
1507 struct btrfs_fs_info *fs_info = root->fs_info;
1508 struct inode *inode = NULL;
1509 u64 objectid;
1510 u64 start, end;
1511 u64 ino;
1512
1513 objectid = min_key->objectid;
1514 while (1) {
1515 cond_resched();
1516 iput(inode);
1517
1518 if (objectid > max_key->objectid)
1519 break;
1520
1521 inode = find_next_inode(root, objectid);
1522 if (!inode)
1523 break;
1524 ino = btrfs_ino(BTRFS_I(inode));
1525
1526 if (ino > max_key->objectid) {
1527 iput(inode);
1528 break;
1529 }
1530
1531 objectid = ino + 1;
1532 if (!S_ISREG(inode->i_mode))
1533 continue;
1534
1535 if (unlikely(min_key->objectid == ino)) {
1536 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1537 continue;
1538 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1539 start = 0;
1540 else {
1541 start = min_key->offset;
1542 WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize));
1543 }
1544 } else {
1545 start = 0;
1546 }
1547
1548 if (unlikely(max_key->objectid == ino)) {
1549 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1550 continue;
1551 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1552 end = (u64)-1;
1553 } else {
1554 if (max_key->offset == 0)
1555 continue;
1556 end = max_key->offset;
1557 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1558 end--;
1559 }
1560 } else {
1561 end = (u64)-1;
1562 }
1563
1564
1565 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
1566 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
1567 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1568 }
1569 return 0;
1570}
1571
1572static int find_next_key(struct btrfs_path *path, int level,
1573 struct btrfs_key *key)
1574
1575{
1576 while (level < BTRFS_MAX_LEVEL) {
1577 if (!path->nodes[level])
1578 break;
1579 if (path->slots[level] + 1 <
1580 btrfs_header_nritems(path->nodes[level])) {
1581 btrfs_node_key_to_cpu(path->nodes[level], key,
1582 path->slots[level] + 1);
1583 return 0;
1584 }
1585 level++;
1586 }
1587 return 1;
1588}
1589
1590
1591
1592
1593static int insert_dirty_subvol(struct btrfs_trans_handle *trans,
1594 struct reloc_control *rc,
1595 struct btrfs_root *root)
1596{
1597 struct btrfs_root *reloc_root = root->reloc_root;
1598 struct btrfs_root_item *reloc_root_item;
1599 int ret;
1600
1601
1602 ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1603 ASSERT(reloc_root);
1604
1605 reloc_root_item = &reloc_root->root_item;
1606 memset(&reloc_root_item->drop_progress, 0,
1607 sizeof(reloc_root_item->drop_progress));
1608 btrfs_set_root_drop_level(reloc_root_item, 0);
1609 btrfs_set_root_refs(reloc_root_item, 0);
1610 ret = btrfs_update_reloc_root(trans, root);
1611 if (ret)
1612 return ret;
1613
1614 if (list_empty(&root->reloc_dirty_list)) {
1615 btrfs_grab_root(root);
1616 list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
1617 }
1618
1619 return 0;
1620}
1621
1622static int clean_dirty_subvols(struct reloc_control *rc)
1623{
1624 struct btrfs_root *root;
1625 struct btrfs_root *next;
1626 int ret = 0;
1627 int ret2;
1628
1629 list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
1630 reloc_dirty_list) {
1631 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1632
1633 struct btrfs_root *reloc_root = root->reloc_root;
1634
1635 list_del_init(&root->reloc_dirty_list);
1636 root->reloc_root = NULL;
1637
1638
1639
1640
1641 smp_wmb();
1642 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
1643 if (reloc_root) {
1644
1645
1646
1647
1648
1649 ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
1650 if (ret2 < 0) {
1651 btrfs_put_root(reloc_root);
1652 if (!ret)
1653 ret = ret2;
1654 }
1655 }
1656 btrfs_put_root(root);
1657 } else {
1658
1659 ret2 = btrfs_drop_snapshot(root, 0, 1);
1660 if (ret2 < 0) {
1661 btrfs_put_root(root);
1662 if (!ret)
1663 ret = ret2;
1664 }
1665 }
1666 }
1667 return ret;
1668}
1669
1670
1671
1672
1673
1674static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1675 struct btrfs_root *root)
1676{
1677 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1678 struct btrfs_key key;
1679 struct btrfs_key next_key;
1680 struct btrfs_trans_handle *trans = NULL;
1681 struct btrfs_root *reloc_root;
1682 struct btrfs_root_item *root_item;
1683 struct btrfs_path *path;
1684 struct extent_buffer *leaf;
1685 int reserve_level;
1686 int level;
1687 int max_level;
1688 int replaced = 0;
1689 int ret = 0;
1690 u32 min_reserved;
1691
1692 path = btrfs_alloc_path();
1693 if (!path)
1694 return -ENOMEM;
1695 path->reada = READA_FORWARD;
1696
1697 reloc_root = root->reloc_root;
1698 root_item = &reloc_root->root_item;
1699
1700 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1701 level = btrfs_root_level(root_item);
1702 atomic_inc(&reloc_root->node->refs);
1703 path->nodes[level] = reloc_root->node;
1704 path->slots[level] = 0;
1705 } else {
1706 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1707
1708 level = btrfs_root_drop_level(root_item);
1709 BUG_ON(level == 0);
1710 path->lowest_level = level;
1711 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1712 path->lowest_level = 0;
1713 if (ret < 0) {
1714 btrfs_free_path(path);
1715 return ret;
1716 }
1717
1718 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
1719 path->slots[level]);
1720 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
1721
1722 btrfs_unlock_up_safe(path, 0);
1723 }
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733 reserve_level = max_t(int, 1, btrfs_root_level(root_item));
1734 min_reserved = fs_info->nodesize * reserve_level * 2;
1735 memset(&next_key, 0, sizeof(next_key));
1736
1737 while (1) {
1738 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
1739 BTRFS_RESERVE_FLUSH_LIMIT);
1740 if (ret)
1741 goto out;
1742 trans = btrfs_start_transaction(root, 0);
1743 if (IS_ERR(trans)) {
1744 ret = PTR_ERR(trans);
1745 trans = NULL;
1746 goto out;
1747 }
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759 reloc_root->last_trans = trans->transid;
1760 trans->block_rsv = rc->block_rsv;
1761
1762 replaced = 0;
1763 max_level = level;
1764
1765 ret = walk_down_reloc_tree(reloc_root, path, &level);
1766 if (ret < 0)
1767 goto out;
1768 if (ret > 0)
1769 break;
1770
1771 if (!find_next_key(path, level, &key) &&
1772 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
1773 ret = 0;
1774 } else {
1775 ret = replace_path(trans, rc, root, reloc_root, path,
1776 &next_key, level, max_level);
1777 }
1778 if (ret < 0)
1779 goto out;
1780 if (ret > 0) {
1781 level = ret;
1782 btrfs_node_key_to_cpu(path->nodes[level], &key,
1783 path->slots[level]);
1784 replaced = 1;
1785 }
1786
1787 ret = walk_up_reloc_tree(reloc_root, path, &level);
1788 if (ret > 0)
1789 break;
1790
1791 BUG_ON(level == 0);
1792
1793
1794
1795
1796 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
1797 path->slots[level]);
1798 btrfs_set_root_drop_level(root_item, level);
1799
1800 btrfs_end_transaction_throttle(trans);
1801 trans = NULL;
1802
1803 btrfs_btree_balance_dirty(fs_info);
1804
1805 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1806 invalidate_extent_cache(root, &key, &next_key);
1807 }
1808
1809
1810
1811
1812
1813 leaf = btrfs_lock_root_node(root);
1814 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf,
1815 BTRFS_NESTING_COW);
1816 btrfs_tree_unlock(leaf);
1817 free_extent_buffer(leaf);
1818out:
1819 btrfs_free_path(path);
1820
1821 if (ret == 0) {
1822 ret = insert_dirty_subvol(trans, rc, root);
1823 if (ret)
1824 btrfs_abort_transaction(trans, ret);
1825 }
1826
1827 if (trans)
1828 btrfs_end_transaction_throttle(trans);
1829
1830 btrfs_btree_balance_dirty(fs_info);
1831
1832 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1833 invalidate_extent_cache(root, &key, &next_key);
1834
1835 return ret;
1836}
1837
1838static noinline_for_stack
1839int prepare_to_merge(struct reloc_control *rc, int err)
1840{
1841 struct btrfs_root *root = rc->extent_root;
1842 struct btrfs_fs_info *fs_info = root->fs_info;
1843 struct btrfs_root *reloc_root;
1844 struct btrfs_trans_handle *trans;
1845 LIST_HEAD(reloc_roots);
1846 u64 num_bytes = 0;
1847 int ret;
1848
1849 mutex_lock(&fs_info->reloc_mutex);
1850 rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
1851 rc->merging_rsv_size += rc->nodes_relocated * 2;
1852 mutex_unlock(&fs_info->reloc_mutex);
1853
1854again:
1855 if (!err) {
1856 num_bytes = rc->merging_rsv_size;
1857 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
1858 BTRFS_RESERVE_FLUSH_ALL);
1859 if (ret)
1860 err = ret;
1861 }
1862
1863 trans = btrfs_join_transaction(rc->extent_root);
1864 if (IS_ERR(trans)) {
1865 if (!err)
1866 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1867 num_bytes, NULL);
1868 return PTR_ERR(trans);
1869 }
1870
1871 if (!err) {
1872 if (num_bytes != rc->merging_rsv_size) {
1873 btrfs_end_transaction(trans);
1874 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1875 num_bytes, NULL);
1876 goto again;
1877 }
1878 }
1879
1880 rc->merge_reloc_tree = 1;
1881
1882 while (!list_empty(&rc->reloc_roots)) {
1883 reloc_root = list_entry(rc->reloc_roots.next,
1884 struct btrfs_root, root_list);
1885 list_del_init(&reloc_root->root_list);
1886
1887 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1888 false);
1889 if (IS_ERR(root)) {
1890
1891
1892
1893
1894 list_add(&reloc_root->root_list, &reloc_roots);
1895 btrfs_abort_transaction(trans, (int)PTR_ERR(root));
1896 if (!err)
1897 err = PTR_ERR(root);
1898 break;
1899 }
1900 ASSERT(root->reloc_root == reloc_root);
1901
1902
1903
1904
1905
1906 if (!err)
1907 btrfs_set_root_refs(&reloc_root->root_item, 1);
1908 ret = btrfs_update_reloc_root(trans, root);
1909
1910
1911
1912
1913
1914 list_add(&reloc_root->root_list, &reloc_roots);
1915 btrfs_put_root(root);
1916
1917 if (ret) {
1918 btrfs_abort_transaction(trans, ret);
1919 if (!err)
1920 err = ret;
1921 break;
1922 }
1923 }
1924
1925 list_splice(&reloc_roots, &rc->reloc_roots);
1926
1927 if (!err)
1928 err = btrfs_commit_transaction(trans);
1929 else
1930 btrfs_end_transaction(trans);
1931 return err;
1932}
1933
1934static noinline_for_stack
1935void free_reloc_roots(struct list_head *list)
1936{
1937 struct btrfs_root *reloc_root, *tmp;
1938
1939 list_for_each_entry_safe(reloc_root, tmp, list, root_list)
1940 __del_reloc_root(reloc_root);
1941}
1942
1943static noinline_for_stack
1944void merge_reloc_roots(struct reloc_control *rc)
1945{
1946 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1947 struct btrfs_root *root;
1948 struct btrfs_root *reloc_root;
1949 LIST_HEAD(reloc_roots);
1950 int found = 0;
1951 int ret = 0;
1952again:
1953 root = rc->extent_root;
1954
1955
1956
1957
1958
1959
1960
1961 mutex_lock(&fs_info->reloc_mutex);
1962 list_splice_init(&rc->reloc_roots, &reloc_roots);
1963 mutex_unlock(&fs_info->reloc_mutex);
1964
1965 while (!list_empty(&reloc_roots)) {
1966 found = 1;
1967 reloc_root = list_entry(reloc_roots.next,
1968 struct btrfs_root, root_list);
1969
1970 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1971 false);
1972 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
1973 if (IS_ERR(root)) {
1974
1975
1976
1977
1978
1979
1980
1981
1982 ASSERT(0);
1983 ret = PTR_ERR(root);
1984 goto out;
1985 }
1986 if (root->reloc_root != reloc_root) {
1987
1988
1989
1990
1991
1992 ASSERT(0);
1993 ret = -EINVAL;
1994 goto out;
1995 }
1996 ret = merge_reloc_root(rc, root);
1997 btrfs_put_root(root);
1998 if (ret) {
1999 if (list_empty(&reloc_root->root_list))
2000 list_add_tail(&reloc_root->root_list,
2001 &reloc_roots);
2002 goto out;
2003 }
2004 } else {
2005 if (!IS_ERR(root)) {
2006 if (root->reloc_root == reloc_root) {
2007 root->reloc_root = NULL;
2008 btrfs_put_root(reloc_root);
2009 }
2010 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE,
2011 &root->state);
2012 btrfs_put_root(root);
2013 }
2014
2015 list_del_init(&reloc_root->root_list);
2016
2017 list_add_tail(&reloc_root->reloc_dirty_list,
2018 &rc->dirty_subvol_roots);
2019 }
2020 }
2021
2022 if (found) {
2023 found = 0;
2024 goto again;
2025 }
2026out:
2027 if (ret) {
2028 btrfs_handle_fs_error(fs_info, ret, NULL);
2029 free_reloc_roots(&reloc_roots);
2030
2031
2032 mutex_lock(&fs_info->reloc_mutex);
2033 list_splice_init(&rc->reloc_roots, &reloc_roots);
2034 mutex_unlock(&fs_info->reloc_mutex);
2035 free_reloc_roots(&reloc_roots);
2036 }
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053}
2054
2055static void free_block_list(struct rb_root *blocks)
2056{
2057 struct tree_block *block;
2058 struct rb_node *rb_node;
2059 while ((rb_node = rb_first(blocks))) {
2060 block = rb_entry(rb_node, struct tree_block, rb_node);
2061 rb_erase(rb_node, blocks);
2062 kfree(block);
2063 }
2064}
2065
2066static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2067 struct btrfs_root *reloc_root)
2068{
2069 struct btrfs_fs_info *fs_info = reloc_root->fs_info;
2070 struct btrfs_root *root;
2071 int ret;
2072
2073 if (reloc_root->last_trans == trans->transid)
2074 return 0;
2075
2076 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086 if (IS_ERR(root)) {
2087 ASSERT(0);
2088 return PTR_ERR(root);
2089 }
2090 if (root->reloc_root != reloc_root) {
2091 ASSERT(0);
2092 btrfs_err(fs_info,
2093 "root %llu has two reloc roots associated with it",
2094 reloc_root->root_key.offset);
2095 btrfs_put_root(root);
2096 return -EUCLEAN;
2097 }
2098 ret = btrfs_record_root_in_trans(trans, root);
2099 btrfs_put_root(root);
2100
2101 return ret;
2102}
2103
2104static noinline_for_stack
2105struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2106 struct reloc_control *rc,
2107 struct btrfs_backref_node *node,
2108 struct btrfs_backref_edge *edges[])
2109{
2110 struct btrfs_backref_node *next;
2111 struct btrfs_root *root;
2112 int index = 0;
2113 int ret;
2114
2115 next = node;
2116 while (1) {
2117 cond_resched();
2118 next = walk_up_backref(next, edges, &index);
2119 root = next->root;
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133 if (!root) {
2134 ASSERT(0);
2135 btrfs_err(trans->fs_info,
2136 "bytenr %llu doesn't have a backref path ending in a root",
2137 node->bytenr);
2138 return ERR_PTR(-EUCLEAN);
2139 }
2140 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2141 ASSERT(0);
2142 btrfs_err(trans->fs_info,
2143 "bytenr %llu has multiple refs with one ending in a non-shareable root",
2144 node->bytenr);
2145 return ERR_PTR(-EUCLEAN);
2146 }
2147
2148 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2149 ret = record_reloc_root_in_trans(trans, root);
2150 if (ret)
2151 return ERR_PTR(ret);
2152 break;
2153 }
2154
2155 ret = btrfs_record_root_in_trans(trans, root);
2156 if (ret)
2157 return ERR_PTR(ret);
2158 root = root->reloc_root;
2159
2160
2161
2162
2163
2164 if (!root)
2165 return ERR_PTR(-ENOENT);
2166
2167 if (next->new_bytenr != root->node->start) {
2168
2169
2170
2171
2172
2173
2174
2175 ASSERT(next->new_bytenr == 0);
2176 ASSERT(list_empty(&next->list));
2177 if (next->new_bytenr || !list_empty(&next->list)) {
2178 btrfs_err(trans->fs_info,
2179 "bytenr %llu possibly has multiple roots pointing at the same bytenr %llu",
2180 node->bytenr, next->bytenr);
2181 return ERR_PTR(-EUCLEAN);
2182 }
2183
2184 next->new_bytenr = root->node->start;
2185 btrfs_put_root(next->root);
2186 next->root = btrfs_grab_root(root);
2187 ASSERT(next->root);
2188 list_add_tail(&next->list,
2189 &rc->backref_cache.changed);
2190 mark_block_processed(rc, next);
2191 break;
2192 }
2193
2194 WARN_ON(1);
2195 root = NULL;
2196 next = walk_down_backref(edges, &index);
2197 if (!next || next->level <= node->level)
2198 break;
2199 }
2200 if (!root) {
2201
2202
2203
2204
2205 ASSERT(0);
2206 return ERR_PTR(-ENOENT);
2207 }
2208
2209 next = node;
2210
2211 while (1) {
2212 rc->backref_cache.path[next->level] = next;
2213 if (--index < 0)
2214 break;
2215 next = edges[index]->node[UPPER];
2216 }
2217 return root;
2218}
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229static noinline_for_stack
2230struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
2231{
2232 struct btrfs_backref_node *next;
2233 struct btrfs_root *root;
2234 struct btrfs_root *fs_root = NULL;
2235 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2236 int index = 0;
2237
2238 next = node;
2239 while (1) {
2240 cond_resched();
2241 next = walk_up_backref(next, edges, &index);
2242 root = next->root;
2243
2244
2245
2246
2247
2248 if (!root)
2249 return ERR_PTR(-EUCLEAN);
2250
2251
2252 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2253 return root;
2254
2255 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2256 fs_root = root;
2257
2258 if (next != node)
2259 return NULL;
2260
2261 next = walk_down_backref(edges, &index);
2262 if (!next || next->level <= node->level)
2263 break;
2264 }
2265
2266 if (!fs_root)
2267 return ERR_PTR(-ENOENT);
2268 return fs_root;
2269}
2270
2271static noinline_for_stack
2272u64 calcu_metadata_size(struct reloc_control *rc,
2273 struct btrfs_backref_node *node, int reserve)
2274{
2275 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2276 struct btrfs_backref_node *next = node;
2277 struct btrfs_backref_edge *edge;
2278 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2279 u64 num_bytes = 0;
2280 int index = 0;
2281
2282 BUG_ON(reserve && node->processed);
2283
2284 while (next) {
2285 cond_resched();
2286 while (1) {
2287 if (next->processed && (reserve || next != node))
2288 break;
2289
2290 num_bytes += fs_info->nodesize;
2291
2292 if (list_empty(&next->upper))
2293 break;
2294
2295 edge = list_entry(next->upper.next,
2296 struct btrfs_backref_edge, list[LOWER]);
2297 edges[index++] = edge;
2298 next = edge->node[UPPER];
2299 }
2300 next = walk_down_backref(edges, &index);
2301 }
2302 return num_bytes;
2303}
2304
2305static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2306 struct reloc_control *rc,
2307 struct btrfs_backref_node *node)
2308{
2309 struct btrfs_root *root = rc->extent_root;
2310 struct btrfs_fs_info *fs_info = root->fs_info;
2311 u64 num_bytes;
2312 int ret;
2313 u64 tmp;
2314
2315 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2316
2317 trans->block_rsv = rc->block_rsv;
2318 rc->reserved_bytes += num_bytes;
2319
2320
2321
2322
2323
2324
2325 ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
2326 BTRFS_RESERVE_FLUSH_LIMIT);
2327 if (ret) {
2328 tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
2329 while (tmp <= rc->reserved_bytes)
2330 tmp <<= 1;
2331
2332
2333
2334
2335
2336
2337
2338 rc->block_rsv->size = tmp + fs_info->nodesize *
2339 RELOCATION_RESERVED_NODES;
2340 return -EAGAIN;
2341 }
2342
2343 return 0;
2344}
2345
2346
2347
2348
2349
2350
2351
2352
2353static int do_relocation(struct btrfs_trans_handle *trans,
2354 struct reloc_control *rc,
2355 struct btrfs_backref_node *node,
2356 struct btrfs_key *key,
2357 struct btrfs_path *path, int lowest)
2358{
2359 struct btrfs_backref_node *upper;
2360 struct btrfs_backref_edge *edge;
2361 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2362 struct btrfs_root *root;
2363 struct extent_buffer *eb;
2364 u32 blocksize;
2365 u64 bytenr;
2366 int slot;
2367 int ret = 0;
2368
2369
2370
2371
2372
2373 ASSERT(!lowest || !node->eb);
2374
2375 path->lowest_level = node->level + 1;
2376 rc->backref_cache.path[node->level] = node;
2377 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2378 struct btrfs_ref ref = { 0 };
2379
2380 cond_resched();
2381
2382 upper = edge->node[UPPER];
2383 root = select_reloc_root(trans, rc, upper, edges);
2384 if (IS_ERR(root)) {
2385 ret = PTR_ERR(root);
2386 goto next;
2387 }
2388
2389 if (upper->eb && !upper->locked) {
2390 if (!lowest) {
2391 ret = btrfs_bin_search(upper->eb, key, &slot);
2392 if (ret < 0)
2393 goto next;
2394 BUG_ON(ret);
2395 bytenr = btrfs_node_blockptr(upper->eb, slot);
2396 if (node->eb->start == bytenr)
2397 goto next;
2398 }
2399 btrfs_backref_drop_node_buffer(upper);
2400 }
2401
2402 if (!upper->eb) {
2403 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2404 if (ret) {
2405 if (ret > 0)
2406 ret = -ENOENT;
2407
2408 btrfs_release_path(path);
2409 break;
2410 }
2411
2412 if (!upper->eb) {
2413 upper->eb = path->nodes[upper->level];
2414 path->nodes[upper->level] = NULL;
2415 } else {
2416 BUG_ON(upper->eb != path->nodes[upper->level]);
2417 }
2418
2419 upper->locked = 1;
2420 path->locks[upper->level] = 0;
2421
2422 slot = path->slots[upper->level];
2423 btrfs_release_path(path);
2424 } else {
2425 ret = btrfs_bin_search(upper->eb, key, &slot);
2426 if (ret < 0)
2427 goto next;
2428 BUG_ON(ret);
2429 }
2430
2431 bytenr = btrfs_node_blockptr(upper->eb, slot);
2432 if (lowest) {
2433 if (bytenr != node->bytenr) {
2434 btrfs_err(root->fs_info,
2435 "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
2436 bytenr, node->bytenr, slot,
2437 upper->eb->start);
2438 ret = -EIO;
2439 goto next;
2440 }
2441 } else {
2442 if (node->eb->start == bytenr)
2443 goto next;
2444 }
2445
2446 blocksize = root->fs_info->nodesize;
2447 eb = btrfs_read_node_slot(upper->eb, slot);
2448 if (IS_ERR(eb)) {
2449 ret = PTR_ERR(eb);
2450 goto next;
2451 }
2452 btrfs_tree_lock(eb);
2453
2454 if (!node->eb) {
2455 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2456 slot, &eb, BTRFS_NESTING_COW);
2457 btrfs_tree_unlock(eb);
2458 free_extent_buffer(eb);
2459 if (ret < 0)
2460 goto next;
2461
2462
2463
2464
2465 ASSERT(node->eb == eb);
2466 } else {
2467 btrfs_set_node_blockptr(upper->eb, slot,
2468 node->eb->start);
2469 btrfs_set_node_ptr_generation(upper->eb, slot,
2470 trans->transid);
2471 btrfs_mark_buffer_dirty(upper->eb);
2472
2473 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
2474 node->eb->start, blocksize,
2475 upper->eb->start);
2476 ref.real_root = root->root_key.objectid;
2477 btrfs_init_tree_ref(&ref, node->level,
2478 btrfs_header_owner(upper->eb));
2479 ret = btrfs_inc_extent_ref(trans, &ref);
2480 if (!ret)
2481 ret = btrfs_drop_subtree(trans, root, eb,
2482 upper->eb);
2483 if (ret)
2484 btrfs_abort_transaction(trans, ret);
2485 }
2486next:
2487 if (!upper->pending)
2488 btrfs_backref_drop_node_buffer(upper);
2489 else
2490 btrfs_backref_unlock_node_buffer(upper);
2491 if (ret)
2492 break;
2493 }
2494
2495 if (!ret && node->pending) {
2496 btrfs_backref_drop_node_buffer(node);
2497 list_move_tail(&node->list, &rc->backref_cache.changed);
2498 node->pending = 0;
2499 }
2500
2501 path->lowest_level = 0;
2502
2503
2504
2505
2506
2507 ASSERT(ret != -ENOSPC);
2508 return ret;
2509}
2510
2511static int link_to_upper(struct btrfs_trans_handle *trans,
2512 struct reloc_control *rc,
2513 struct btrfs_backref_node *node,
2514 struct btrfs_path *path)
2515{
2516 struct btrfs_key key;
2517
2518 btrfs_node_key_to_cpu(node->eb, &key, 0);
2519 return do_relocation(trans, rc, node, &key, path, 0);
2520}
2521
2522static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2523 struct reloc_control *rc,
2524 struct btrfs_path *path, int err)
2525{
2526 LIST_HEAD(list);
2527 struct btrfs_backref_cache *cache = &rc->backref_cache;
2528 struct btrfs_backref_node *node;
2529 int level;
2530 int ret;
2531
2532 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2533 while (!list_empty(&cache->pending[level])) {
2534 node = list_entry(cache->pending[level].next,
2535 struct btrfs_backref_node, list);
2536 list_move_tail(&node->list, &list);
2537 BUG_ON(!node->pending);
2538
2539 if (!err) {
2540 ret = link_to_upper(trans, rc, node, path);
2541 if (ret < 0)
2542 err = ret;
2543 }
2544 }
2545 list_splice_init(&list, &cache->pending[level]);
2546 }
2547 return err;
2548}
2549
2550
2551
2552
2553
2554static void update_processed_blocks(struct reloc_control *rc,
2555 struct btrfs_backref_node *node)
2556{
2557 struct btrfs_backref_node *next = node;
2558 struct btrfs_backref_edge *edge;
2559 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2560 int index = 0;
2561
2562 while (next) {
2563 cond_resched();
2564 while (1) {
2565 if (next->processed)
2566 break;
2567
2568 mark_block_processed(rc, next);
2569
2570 if (list_empty(&next->upper))
2571 break;
2572
2573 edge = list_entry(next->upper.next,
2574 struct btrfs_backref_edge, list[LOWER]);
2575 edges[index++] = edge;
2576 next = edge->node[UPPER];
2577 }
2578 next = walk_down_backref(edges, &index);
2579 }
2580}
2581
2582static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
2583{
2584 u32 blocksize = rc->extent_root->fs_info->nodesize;
2585
2586 if (test_range_bit(&rc->processed_blocks, bytenr,
2587 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2588 return 1;
2589 return 0;
2590}
2591
2592static int get_tree_block_key(struct btrfs_fs_info *fs_info,
2593 struct tree_block *block)
2594{
2595 struct extent_buffer *eb;
2596
2597 eb = read_tree_block(fs_info, block->bytenr, block->owner,
2598 block->key.offset, block->level, NULL);
2599 if (IS_ERR(eb)) {
2600 return PTR_ERR(eb);
2601 } else if (!extent_buffer_uptodate(eb)) {
2602 free_extent_buffer(eb);
2603 return -EIO;
2604 }
2605 if (block->level == 0)
2606 btrfs_item_key_to_cpu(eb, &block->key, 0);
2607 else
2608 btrfs_node_key_to_cpu(eb, &block->key, 0);
2609 free_extent_buffer(eb);
2610 block->key_ready = 1;
2611 return 0;
2612}
2613
2614
2615
2616
2617static int relocate_tree_block(struct btrfs_trans_handle *trans,
2618 struct reloc_control *rc,
2619 struct btrfs_backref_node *node,
2620 struct btrfs_key *key,
2621 struct btrfs_path *path)
2622{
2623 struct btrfs_root *root;
2624 int ret = 0;
2625
2626 if (!node)
2627 return 0;
2628
2629
2630
2631
2632
2633 ret = reserve_metadata_space(trans, rc, node);
2634 if (ret)
2635 goto out;
2636
2637 BUG_ON(node->processed);
2638 root = select_one_root(node);
2639 if (IS_ERR(root)) {
2640 ret = PTR_ERR(root);
2641
2642
2643 ASSERT(ret == -ENOENT);
2644 if (ret == -ENOENT) {
2645 ret = 0;
2646 update_processed_blocks(rc, node);
2647 }
2648 goto out;
2649 }
2650
2651 if (root) {
2652 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666 ASSERT(node->new_bytenr == 0);
2667 ASSERT(list_empty(&node->list));
2668 if (node->new_bytenr || !list_empty(&node->list)) {
2669 btrfs_err(root->fs_info,
2670 "bytenr %llu has improper references to it",
2671 node->bytenr);
2672 ret = -EUCLEAN;
2673 goto out;
2674 }
2675 ret = btrfs_record_root_in_trans(trans, root);
2676 if (ret)
2677 goto out;
2678
2679
2680
2681
2682 if (!root->reloc_root) {
2683 ret = -ENOENT;
2684 goto out;
2685 }
2686 root = root->reloc_root;
2687 node->new_bytenr = root->node->start;
2688 btrfs_put_root(node->root);
2689 node->root = btrfs_grab_root(root);
2690 ASSERT(node->root);
2691 list_add_tail(&node->list, &rc->backref_cache.changed);
2692 } else {
2693 path->lowest_level = node->level;
2694 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2695 btrfs_release_path(path);
2696 if (ret > 0)
2697 ret = 0;
2698 }
2699 if (!ret)
2700 update_processed_blocks(rc, node);
2701 } else {
2702 ret = do_relocation(trans, rc, node, key, path, 1);
2703 }
2704out:
2705 if (ret || node->level == 0 || node->cowonly)
2706 btrfs_backref_cleanup_node(&rc->backref_cache, node);
2707 return ret;
2708}
2709
2710
2711
2712
2713static noinline_for_stack
2714int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2715 struct reloc_control *rc, struct rb_root *blocks)
2716{
2717 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2718 struct btrfs_backref_node *node;
2719 struct btrfs_path *path;
2720 struct tree_block *block;
2721 struct tree_block *next;
2722 int ret;
2723 int err = 0;
2724
2725 path = btrfs_alloc_path();
2726 if (!path) {
2727 err = -ENOMEM;
2728 goto out_free_blocks;
2729 }
2730
2731
2732 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2733 if (!block->key_ready)
2734 btrfs_readahead_tree_block(fs_info, block->bytenr,
2735 block->owner, 0,
2736 block->level);
2737 }
2738
2739
2740 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2741 if (!block->key_ready) {
2742 err = get_tree_block_key(fs_info, block);
2743 if (err)
2744 goto out_free_path;
2745 }
2746 }
2747
2748
2749 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2750 node = build_backref_tree(rc, &block->key,
2751 block->level, block->bytenr);
2752 if (IS_ERR(node)) {
2753 err = PTR_ERR(node);
2754 goto out;
2755 }
2756
2757 ret = relocate_tree_block(trans, rc, node, &block->key,
2758 path);
2759 if (ret < 0) {
2760 err = ret;
2761 break;
2762 }
2763 }
2764out:
2765 err = finish_pending_nodes(trans, rc, path, err);
2766
2767out_free_path:
2768 btrfs_free_path(path);
2769out_free_blocks:
2770 free_block_list(blocks);
2771 return err;
2772}
2773
2774static noinline_for_stack int prealloc_file_extent_cluster(
2775 struct btrfs_inode *inode,
2776 struct file_extent_cluster *cluster)
2777{
2778 u64 alloc_hint = 0;
2779 u64 start;
2780 u64 end;
2781 u64 offset = inode->index_cnt;
2782 u64 num_bytes;
2783 int nr;
2784 int ret = 0;
2785 u64 i_size = i_size_read(&inode->vfs_inode);
2786 u64 prealloc_start = cluster->start - offset;
2787 u64 prealloc_end = cluster->end - offset;
2788 u64 cur_offset = prealloc_start;
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801 if (!IS_ALIGNED(i_size, PAGE_SIZE)) {
2802 struct address_space *mapping = inode->vfs_inode.i_mapping;
2803 struct btrfs_fs_info *fs_info = inode->root->fs_info;
2804 const u32 sectorsize = fs_info->sectorsize;
2805 struct page *page;
2806
2807 ASSERT(sectorsize < PAGE_SIZE);
2808 ASSERT(IS_ALIGNED(i_size, sectorsize));
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829 ret = filemap_write_and_wait(mapping);
2830 if (ret < 0)
2831 return ret;
2832
2833 clear_extent_bits(&inode->io_tree, i_size,
2834 round_up(i_size, PAGE_SIZE) - 1,
2835 EXTENT_UPTODATE);
2836 page = find_lock_page(mapping, i_size >> PAGE_SHIFT);
2837
2838
2839
2840
2841 if (page) {
2842 btrfs_subpage_clear_uptodate(fs_info, page, i_size,
2843 round_up(i_size, PAGE_SIZE) - i_size);
2844 unlock_page(page);
2845 put_page(page);
2846 }
2847 }
2848
2849 BUG_ON(cluster->start != cluster->boundary[0]);
2850 ret = btrfs_alloc_data_chunk_ondemand(inode,
2851 prealloc_end + 1 - prealloc_start);
2852 if (ret)
2853 return ret;
2854
2855
2856
2857
2858
2859 if (btrfs_is_zoned(inode->root->fs_info)) {
2860 struct btrfs_root *root = inode->root;
2861 struct btrfs_trans_handle *trans;
2862
2863 end = cluster->end - offset + 1;
2864 trans = btrfs_start_transaction(root, 1);
2865 if (IS_ERR(trans))
2866 return PTR_ERR(trans);
2867
2868 inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
2869 i_size_write(&inode->vfs_inode, end);
2870 ret = btrfs_update_inode(trans, root, inode);
2871 if (ret) {
2872 btrfs_abort_transaction(trans, ret);
2873 btrfs_end_transaction(trans);
2874 return ret;
2875 }
2876
2877 return btrfs_end_transaction(trans);
2878 }
2879
2880 btrfs_inode_lock(&inode->vfs_inode, 0);
2881 for (nr = 0; nr < cluster->nr; nr++) {
2882 start = cluster->boundary[nr] - offset;
2883 if (nr + 1 < cluster->nr)
2884 end = cluster->boundary[nr + 1] - 1 - offset;
2885 else
2886 end = cluster->end - offset;
2887
2888 lock_extent(&inode->io_tree, start, end);
2889 num_bytes = end + 1 - start;
2890 ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
2891 num_bytes, num_bytes,
2892 end + 1, &alloc_hint);
2893 cur_offset = end + 1;
2894 unlock_extent(&inode->io_tree, start, end);
2895 if (ret)
2896 break;
2897 }
2898 btrfs_inode_unlock(&inode->vfs_inode, 0);
2899
2900 if (cur_offset < prealloc_end)
2901 btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
2902 prealloc_end + 1 - cur_offset);
2903 return ret;
2904}
2905
2906static noinline_for_stack
2907int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2908 u64 block_start)
2909{
2910 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2911 struct extent_map *em;
2912 int ret = 0;
2913
2914 em = alloc_extent_map();
2915 if (!em)
2916 return -ENOMEM;
2917
2918 em->start = start;
2919 em->len = end + 1 - start;
2920 em->block_len = em->len;
2921 em->block_start = block_start;
2922 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2923
2924 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2925 while (1) {
2926 write_lock(&em_tree->lock);
2927 ret = add_extent_mapping(em_tree, em, 0);
2928 write_unlock(&em_tree->lock);
2929 if (ret != -EEXIST) {
2930 free_extent_map(em);
2931 break;
2932 }
2933 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
2934 }
2935 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2936 return ret;
2937}
2938
2939
2940
2941
2942noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
2943{
2944 return atomic_read(&fs_info->balance_cancel_req) ||
2945 atomic_read(&fs_info->reloc_cancel_req) ||
2946 fatal_signal_pending(current);
2947}
2948ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
2949
2950static u64 get_cluster_boundary_end(struct file_extent_cluster *cluster,
2951 int cluster_nr)
2952{
2953
2954 if (cluster_nr >= cluster->nr - 1)
2955 return cluster->end;
2956
2957
2958 return cluster->boundary[cluster_nr + 1] - 1;
2959}
2960
2961static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
2962 struct file_extent_cluster *cluster,
2963 int *cluster_nr, unsigned long page_index)
2964{
2965 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2966 u64 offset = BTRFS_I(inode)->index_cnt;
2967 const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
2968 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
2969 struct page *page;
2970 u64 page_start;
2971 u64 page_end;
2972 u64 cur;
2973 int ret;
2974
2975 ASSERT(page_index <= last_index);
2976 page = find_lock_page(inode->i_mapping, page_index);
2977 if (!page) {
2978 page_cache_sync_readahead(inode->i_mapping, ra, NULL,
2979 page_index, last_index + 1 - page_index);
2980 page = find_or_create_page(inode->i_mapping, page_index, mask);
2981 if (!page)
2982 return -ENOMEM;
2983 }
2984 ret = set_page_extent_mapped(page);
2985 if (ret < 0)
2986 goto release_page;
2987
2988 if (PageReadahead(page))
2989 page_cache_async_readahead(inode->i_mapping, ra, NULL, page,
2990 page_index, last_index + 1 - page_index);
2991
2992 if (!PageUptodate(page)) {
2993 btrfs_readpage(NULL, page);
2994 lock_page(page);
2995 if (!PageUptodate(page)) {
2996 ret = -EIO;
2997 goto release_page;
2998 }
2999 }
3000
3001 page_start = page_offset(page);
3002 page_end = page_start + PAGE_SIZE - 1;
3003
3004
3005
3006
3007
3008 cur = max(page_start, cluster->boundary[*cluster_nr] - offset);
3009 while (cur <= page_end) {
3010 u64 extent_start = cluster->boundary[*cluster_nr] - offset;
3011 u64 extent_end = get_cluster_boundary_end(cluster,
3012 *cluster_nr) - offset;
3013 u64 clamped_start = max(page_start, extent_start);
3014 u64 clamped_end = min(page_end, extent_end);
3015 u32 clamped_len = clamped_end + 1 - clamped_start;
3016
3017
3018 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
3019 clamped_len);
3020 if (ret)
3021 goto release_page;
3022
3023
3024 lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
3025 ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
3026 clamped_end, 0, NULL);
3027 if (ret) {
3028 clear_extent_bits(&BTRFS_I(inode)->io_tree,
3029 clamped_start, clamped_end,
3030 EXTENT_LOCKED | EXTENT_BOUNDARY);
3031 btrfs_delalloc_release_metadata(BTRFS_I(inode),
3032 clamped_len, true);
3033 btrfs_delalloc_release_extents(BTRFS_I(inode),
3034 clamped_len);
3035 goto release_page;
3036 }
3037 btrfs_page_set_dirty(fs_info, page, clamped_start, clamped_len);
3038
3039
3040
3041
3042
3043
3044
3045
3046 if (in_range(cluster->boundary[*cluster_nr] - offset,
3047 page_start, PAGE_SIZE)) {
3048 u64 boundary_start = cluster->boundary[*cluster_nr] -
3049 offset;
3050 u64 boundary_end = boundary_start +
3051 fs_info->sectorsize - 1;
3052
3053 set_extent_bits(&BTRFS_I(inode)->io_tree,
3054 boundary_start, boundary_end,
3055 EXTENT_BOUNDARY);
3056 }
3057 unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
3058 btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
3059 cur += clamped_len;
3060
3061
3062 if (cur >= extent_end) {
3063 (*cluster_nr)++;
3064
3065 if (*cluster_nr >= cluster->nr)
3066 break;
3067 }
3068 }
3069 unlock_page(page);
3070 put_page(page);
3071
3072 balance_dirty_pages_ratelimited(inode->i_mapping);
3073 btrfs_throttle(fs_info);
3074 if (btrfs_should_cancel_balance(fs_info))
3075 ret = -ECANCELED;
3076 return ret;
3077
3078release_page:
3079 unlock_page(page);
3080 put_page(page);
3081 return ret;
3082}
3083
3084static int relocate_file_extent_cluster(struct inode *inode,
3085 struct file_extent_cluster *cluster)
3086{
3087 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3088 u64 offset = BTRFS_I(inode)->index_cnt;
3089 unsigned long index;
3090 unsigned long last_index;
3091 struct file_ra_state *ra;
3092 int cluster_nr = 0;
3093 int ret = 0;
3094
3095 if (!cluster->nr)
3096 return 0;
3097
3098 ra = kzalloc(sizeof(*ra), GFP_NOFS);
3099 if (!ra)
3100 return -ENOMEM;
3101
3102 ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
3103 if (ret)
3104 goto out;
3105
3106 file_ra_state_init(ra, inode->i_mapping);
3107
3108 ret = setup_extent_mapping(inode, cluster->start - offset,
3109 cluster->end - offset, cluster->start);
3110 if (ret)
3111 goto out;
3112
3113 last_index = (cluster->end - offset) >> PAGE_SHIFT;
3114 for (index = (cluster->start - offset) >> PAGE_SHIFT;
3115 index <= last_index && !ret; index++)
3116 ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
3117 if (btrfs_is_zoned(fs_info) && !ret)
3118 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
3119 if (ret == 0)
3120 WARN_ON(cluster_nr != cluster->nr);
3121out:
3122 kfree(ra);
3123 return ret;
3124}
3125
3126static noinline_for_stack
3127int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3128 struct file_extent_cluster *cluster)
3129{
3130 int ret;
3131
3132 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
3133 ret = relocate_file_extent_cluster(inode, cluster);
3134 if (ret)
3135 return ret;
3136 cluster->nr = 0;
3137 }
3138
3139 if (!cluster->nr)
3140 cluster->start = extent_key->objectid;
3141 else
3142 BUG_ON(cluster->nr >= MAX_EXTENTS);
3143 cluster->end = extent_key->objectid + extent_key->offset - 1;
3144 cluster->boundary[cluster->nr] = extent_key->objectid;
3145 cluster->nr++;
3146
3147 if (cluster->nr >= MAX_EXTENTS) {
3148 ret = relocate_file_extent_cluster(inode, cluster);
3149 if (ret)
3150 return ret;
3151 cluster->nr = 0;
3152 }
3153 return 0;
3154}
3155
3156
3157
3158
3159
3160static int add_tree_block(struct reloc_control *rc,
3161 struct btrfs_key *extent_key,
3162 struct btrfs_path *path,
3163 struct rb_root *blocks)
3164{
3165 struct extent_buffer *eb;
3166 struct btrfs_extent_item *ei;
3167 struct btrfs_tree_block_info *bi;
3168 struct tree_block *block;
3169 struct rb_node *rb_node;
3170 u32 item_size;
3171 int level = -1;
3172 u64 generation;
3173 u64 owner = 0;
3174
3175 eb = path->nodes[0];
3176 item_size = btrfs_item_size_nr(eb, path->slots[0]);
3177
3178 if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
3179 item_size >= sizeof(*ei) + sizeof(*bi)) {
3180 unsigned long ptr = 0, end;
3181
3182 ei = btrfs_item_ptr(eb, path->slots[0],
3183 struct btrfs_extent_item);
3184 end = (unsigned long)ei + item_size;
3185 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
3186 bi = (struct btrfs_tree_block_info *)(ei + 1);
3187 level = btrfs_tree_block_level(eb, bi);
3188 ptr = (unsigned long)(bi + 1);
3189 } else {
3190 level = (int)extent_key->offset;
3191 ptr = (unsigned long)(ei + 1);
3192 }
3193 generation = btrfs_extent_generation(eb, ei);
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210 if (btrfs_extent_refs(eb, ei) == 1 &&
3211 !(btrfs_extent_flags(eb, ei) &
3212 BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
3213 ptr < end) {
3214 struct btrfs_extent_inline_ref *iref;
3215 int type;
3216
3217 iref = (struct btrfs_extent_inline_ref *)ptr;
3218 type = btrfs_get_extent_inline_ref_type(eb, iref,
3219 BTRFS_REF_TYPE_BLOCK);
3220 if (type == BTRFS_REF_TYPE_INVALID)
3221 return -EINVAL;
3222 if (type == BTRFS_TREE_BLOCK_REF_KEY)
3223 owner = btrfs_extent_inline_ref_offset(eb, iref);
3224 }
3225 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
3226 btrfs_print_v0_err(eb->fs_info);
3227 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3228 return -EINVAL;
3229 } else {
3230 BUG();
3231 }
3232
3233 btrfs_release_path(path);
3234
3235 BUG_ON(level == -1);
3236
3237 block = kmalloc(sizeof(*block), GFP_NOFS);
3238 if (!block)
3239 return -ENOMEM;
3240
3241 block->bytenr = extent_key->objectid;
3242 block->key.objectid = rc->extent_root->fs_info->nodesize;
3243 block->key.offset = generation;
3244 block->level = level;
3245 block->key_ready = 0;
3246 block->owner = owner;
3247
3248 rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
3249 if (rb_node)
3250 btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
3251 -EEXIST);
3252
3253 return 0;
3254}
3255
3256
3257
3258
3259static int __add_tree_block(struct reloc_control *rc,
3260 u64 bytenr, u32 blocksize,
3261 struct rb_root *blocks)
3262{
3263 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3264 struct btrfs_path *path;
3265 struct btrfs_key key;
3266 int ret;
3267 bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
3268
3269 if (tree_block_processed(bytenr, rc))
3270 return 0;
3271
3272 if (rb_simple_search(blocks, bytenr))
3273 return 0;
3274
3275 path = btrfs_alloc_path();
3276 if (!path)
3277 return -ENOMEM;
3278again:
3279 key.objectid = bytenr;
3280 if (skinny) {
3281 key.type = BTRFS_METADATA_ITEM_KEY;
3282 key.offset = (u64)-1;
3283 } else {
3284 key.type = BTRFS_EXTENT_ITEM_KEY;
3285 key.offset = blocksize;
3286 }
3287
3288 path->search_commit_root = 1;
3289 path->skip_locking = 1;
3290 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3291 if (ret < 0)
3292 goto out;
3293
3294 if (ret > 0 && skinny) {
3295 if (path->slots[0]) {
3296 path->slots[0]--;
3297 btrfs_item_key_to_cpu(path->nodes[0], &key,
3298 path->slots[0]);
3299 if (key.objectid == bytenr &&
3300 (key.type == BTRFS_METADATA_ITEM_KEY ||
3301 (key.type == BTRFS_EXTENT_ITEM_KEY &&
3302 key.offset == blocksize)))
3303 ret = 0;
3304 }
3305
3306 if (ret) {
3307 skinny = false;
3308 btrfs_release_path(path);
3309 goto again;
3310 }
3311 }
3312 if (ret) {
3313 ASSERT(ret == 1);
3314 btrfs_print_leaf(path->nodes[0]);
3315 btrfs_err(fs_info,
3316 "tree block extent item (%llu) is not found in extent tree",
3317 bytenr);
3318 WARN_ON(1);
3319 ret = -EINVAL;
3320 goto out;
3321 }
3322
3323 ret = add_tree_block(rc, &key, path, blocks);
3324out:
3325 btrfs_free_path(path);
3326 return ret;
3327}
3328
3329static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3330 struct btrfs_block_group *block_group,
3331 struct inode *inode,
3332 u64 ino)
3333{
3334 struct btrfs_root *root = fs_info->tree_root;
3335 struct btrfs_trans_handle *trans;
3336 int ret = 0;
3337
3338 if (inode)
3339 goto truncate;
3340
3341 inode = btrfs_iget(fs_info->sb, ino, root);
3342 if (IS_ERR(inode))
3343 return -ENOENT;
3344
3345truncate:
3346 ret = btrfs_check_trunc_cache_free_space(fs_info,
3347 &fs_info->global_block_rsv);
3348 if (ret)
3349 goto out;
3350
3351 trans = btrfs_join_transaction(root);
3352 if (IS_ERR(trans)) {
3353 ret = PTR_ERR(trans);
3354 goto out;
3355 }
3356
3357 ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
3358
3359 btrfs_end_transaction(trans);
3360 btrfs_btree_balance_dirty(fs_info);
3361out:
3362 iput(inode);
3363 return ret;
3364}
3365
3366
3367
3368
3369
3370static int delete_v1_space_cache(struct extent_buffer *leaf,
3371 struct btrfs_block_group *block_group,
3372 u64 data_bytenr)
3373{
3374 u64 space_cache_ino;
3375 struct btrfs_file_extent_item *ei;
3376 struct btrfs_key key;
3377 bool found = false;
3378 int i;
3379 int ret;
3380
3381 if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
3382 return 0;
3383
3384 for (i = 0; i < btrfs_header_nritems(leaf); i++) {
3385 u8 type;
3386
3387 btrfs_item_key_to_cpu(leaf, &key, i);
3388 if (key.type != BTRFS_EXTENT_DATA_KEY)
3389 continue;
3390 ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3391 type = btrfs_file_extent_type(leaf, ei);
3392
3393 if ((type == BTRFS_FILE_EXTENT_REG ||
3394 type == BTRFS_FILE_EXTENT_PREALLOC) &&
3395 btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
3396 found = true;
3397 space_cache_ino = key.objectid;
3398 break;
3399 }
3400 }
3401 if (!found)
3402 return -ENOENT;
3403 ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
3404 space_cache_ino);
3405 return ret;
3406}
3407
3408
3409
3410
3411static noinline_for_stack
3412int add_data_references(struct reloc_control *rc,
3413 struct btrfs_key *extent_key,
3414 struct btrfs_path *path,
3415 struct rb_root *blocks)
3416{
3417 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3418 struct ulist *leaves = NULL;
3419 struct ulist_iterator leaf_uiter;
3420 struct ulist_node *ref_node = NULL;
3421 const u32 blocksize = fs_info->nodesize;
3422 int ret = 0;
3423
3424 btrfs_release_path(path);
3425 ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
3426 0, &leaves, NULL, true);
3427 if (ret < 0)
3428 return ret;
3429
3430 ULIST_ITER_INIT(&leaf_uiter);
3431 while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
3432 struct extent_buffer *eb;
3433
3434 eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL);
3435 if (IS_ERR(eb)) {
3436 ret = PTR_ERR(eb);
3437 break;
3438 }
3439 ret = delete_v1_space_cache(eb, rc->block_group,
3440 extent_key->objectid);
3441 free_extent_buffer(eb);
3442 if (ret < 0)
3443 break;
3444 ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
3445 if (ret < 0)
3446 break;
3447 }
3448 if (ret < 0)
3449 free_block_list(blocks);
3450 ulist_free(leaves);
3451 return ret;
3452}
3453
3454
3455
3456
3457static noinline_for_stack
3458int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
3459 struct btrfs_key *extent_key)
3460{
3461 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3462 struct btrfs_key key;
3463 struct extent_buffer *leaf;
3464 u64 start, end, last;
3465 int ret;
3466
3467 last = rc->block_group->start + rc->block_group->length;
3468 while (1) {
3469 cond_resched();
3470 if (rc->search_start >= last) {
3471 ret = 1;
3472 break;
3473 }
3474
3475 key.objectid = rc->search_start;
3476 key.type = BTRFS_EXTENT_ITEM_KEY;
3477 key.offset = 0;
3478
3479 path->search_commit_root = 1;
3480 path->skip_locking = 1;
3481 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3482 0, 0);
3483 if (ret < 0)
3484 break;
3485next:
3486 leaf = path->nodes[0];
3487 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3488 ret = btrfs_next_leaf(rc->extent_root, path);
3489 if (ret != 0)
3490 break;
3491 leaf = path->nodes[0];
3492 }
3493
3494 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3495 if (key.objectid >= last) {
3496 ret = 1;
3497 break;
3498 }
3499
3500 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3501 key.type != BTRFS_METADATA_ITEM_KEY) {
3502 path->slots[0]++;
3503 goto next;
3504 }
3505
3506 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3507 key.objectid + key.offset <= rc->search_start) {
3508 path->slots[0]++;
3509 goto next;
3510 }
3511
3512 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3513 key.objectid + fs_info->nodesize <=
3514 rc->search_start) {
3515 path->slots[0]++;
3516 goto next;
3517 }
3518
3519 ret = find_first_extent_bit(&rc->processed_blocks,
3520 key.objectid, &start, &end,
3521 EXTENT_DIRTY, NULL);
3522
3523 if (ret == 0 && start <= key.objectid) {
3524 btrfs_release_path(path);
3525 rc->search_start = end + 1;
3526 } else {
3527 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3528 rc->search_start = key.objectid + key.offset;
3529 else
3530 rc->search_start = key.objectid +
3531 fs_info->nodesize;
3532 memcpy(extent_key, &key, sizeof(key));
3533 return 0;
3534 }
3535 }
3536 btrfs_release_path(path);
3537 return ret;
3538}
3539
3540static void set_reloc_control(struct reloc_control *rc)
3541{
3542 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3543
3544 mutex_lock(&fs_info->reloc_mutex);
3545 fs_info->reloc_ctl = rc;
3546 mutex_unlock(&fs_info->reloc_mutex);
3547}
3548
3549static void unset_reloc_control(struct reloc_control *rc)
3550{
3551 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3552
3553 mutex_lock(&fs_info->reloc_mutex);
3554 fs_info->reloc_ctl = NULL;
3555 mutex_unlock(&fs_info->reloc_mutex);
3556}
3557
3558static noinline_for_stack
3559int prepare_to_relocate(struct reloc_control *rc)
3560{
3561 struct btrfs_trans_handle *trans;
3562 int ret;
3563
3564 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root->fs_info,
3565 BTRFS_BLOCK_RSV_TEMP);
3566 if (!rc->block_rsv)
3567 return -ENOMEM;
3568
3569 memset(&rc->cluster, 0, sizeof(rc->cluster));
3570 rc->search_start = rc->block_group->start;
3571 rc->extents_found = 0;
3572 rc->nodes_relocated = 0;
3573 rc->merging_rsv_size = 0;
3574 rc->reserved_bytes = 0;
3575 rc->block_rsv->size = rc->extent_root->fs_info->nodesize *
3576 RELOCATION_RESERVED_NODES;
3577 ret = btrfs_block_rsv_refill(rc->extent_root,
3578 rc->block_rsv, rc->block_rsv->size,
3579 BTRFS_RESERVE_FLUSH_ALL);
3580 if (ret)
3581 return ret;
3582
3583 rc->create_reloc_tree = 1;
3584 set_reloc_control(rc);
3585
3586 trans = btrfs_join_transaction(rc->extent_root);
3587 if (IS_ERR(trans)) {
3588 unset_reloc_control(rc);
3589
3590
3591
3592
3593
3594 return PTR_ERR(trans);
3595 }
3596 return btrfs_commit_transaction(trans);
3597}
3598
3599static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3600{
3601 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3602 struct rb_root blocks = RB_ROOT;
3603 struct btrfs_key key;
3604 struct btrfs_trans_handle *trans = NULL;
3605 struct btrfs_path *path;
3606 struct btrfs_extent_item *ei;
3607 u64 flags;
3608 int ret;
3609 int err = 0;
3610 int progress = 0;
3611
3612 path = btrfs_alloc_path();
3613 if (!path)
3614 return -ENOMEM;
3615 path->reada = READA_FORWARD;
3616
3617 ret = prepare_to_relocate(rc);
3618 if (ret) {
3619 err = ret;
3620 goto out_free;
3621 }
3622
3623 while (1) {
3624 rc->reserved_bytes = 0;
3625 ret = btrfs_block_rsv_refill(rc->extent_root,
3626 rc->block_rsv, rc->block_rsv->size,
3627 BTRFS_RESERVE_FLUSH_ALL);
3628 if (ret) {
3629 err = ret;
3630 break;
3631 }
3632 progress++;
3633 trans = btrfs_start_transaction(rc->extent_root, 0);
3634 if (IS_ERR(trans)) {
3635 err = PTR_ERR(trans);
3636 trans = NULL;
3637 break;
3638 }
3639restart:
3640 if (update_backref_cache(trans, &rc->backref_cache)) {
3641 btrfs_end_transaction(trans);
3642 trans = NULL;
3643 continue;
3644 }
3645
3646 ret = find_next_extent(rc, path, &key);
3647 if (ret < 0)
3648 err = ret;
3649 if (ret != 0)
3650 break;
3651
3652 rc->extents_found++;
3653
3654 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3655 struct btrfs_extent_item);
3656 flags = btrfs_extent_flags(path->nodes[0], ei);
3657
3658 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3659 ret = add_tree_block(rc, &key, path, &blocks);
3660 } else if (rc->stage == UPDATE_DATA_PTRS &&
3661 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3662 ret = add_data_references(rc, &key, path, &blocks);
3663 } else {
3664 btrfs_release_path(path);
3665 ret = 0;
3666 }
3667 if (ret < 0) {
3668 err = ret;
3669 break;
3670 }
3671
3672 if (!RB_EMPTY_ROOT(&blocks)) {
3673 ret = relocate_tree_blocks(trans, rc, &blocks);
3674 if (ret < 0) {
3675 if (ret != -EAGAIN) {
3676 err = ret;
3677 break;
3678 }
3679 rc->extents_found--;
3680 rc->search_start = key.objectid;
3681 }
3682 }
3683
3684 btrfs_end_transaction_throttle(trans);
3685 btrfs_btree_balance_dirty(fs_info);
3686 trans = NULL;
3687
3688 if (rc->stage == MOVE_DATA_EXTENTS &&
3689 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3690 rc->found_file_extent = 1;
3691 ret = relocate_data_extent(rc->data_inode,
3692 &key, &rc->cluster);
3693 if (ret < 0) {
3694 err = ret;
3695 break;
3696 }
3697 }
3698 if (btrfs_should_cancel_balance(fs_info)) {
3699 err = -ECANCELED;
3700 break;
3701 }
3702 }
3703 if (trans && progress && err == -ENOSPC) {
3704 ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
3705 if (ret == 1) {
3706 err = 0;
3707 progress = 0;
3708 goto restart;
3709 }
3710 }
3711
3712 btrfs_release_path(path);
3713 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
3714
3715 if (trans) {
3716 btrfs_end_transaction_throttle(trans);
3717 btrfs_btree_balance_dirty(fs_info);
3718 }
3719
3720 if (!err) {
3721 ret = relocate_file_extent_cluster(rc->data_inode,
3722 &rc->cluster);
3723 if (ret < 0)
3724 err = ret;
3725 }
3726
3727 rc->create_reloc_tree = 0;
3728 set_reloc_control(rc);
3729
3730 btrfs_backref_release_cache(&rc->backref_cache);
3731 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741 err = prepare_to_merge(rc, err);
3742
3743 merge_reloc_roots(rc);
3744
3745 rc->merge_reloc_tree = 0;
3746 unset_reloc_control(rc);
3747 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3748
3749
3750 trans = btrfs_join_transaction(rc->extent_root);
3751 if (IS_ERR(trans)) {
3752 err = PTR_ERR(trans);
3753 goto out_free;
3754 }
3755 ret = btrfs_commit_transaction(trans);
3756 if (ret && !err)
3757 err = ret;
3758out_free:
3759 ret = clean_dirty_subvols(rc);
3760 if (ret < 0 && !err)
3761 err = ret;
3762 btrfs_free_block_rsv(fs_info, rc->block_rsv);
3763 btrfs_free_path(path);
3764 return err;
3765}
3766
3767static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3768 struct btrfs_root *root, u64 objectid)
3769{
3770 struct btrfs_path *path;
3771 struct btrfs_inode_item *item;
3772 struct extent_buffer *leaf;
3773 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
3774 int ret;
3775
3776 if (btrfs_is_zoned(trans->fs_info))
3777 flags &= ~BTRFS_INODE_PREALLOC;
3778
3779 path = btrfs_alloc_path();
3780 if (!path)
3781 return -ENOMEM;
3782
3783 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3784 if (ret)
3785 goto out;
3786
3787 leaf = path->nodes[0];
3788 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3789 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3790 btrfs_set_inode_generation(leaf, item, 1);
3791 btrfs_set_inode_size(leaf, item, 0);
3792 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3793 btrfs_set_inode_flags(leaf, item, flags);
3794 btrfs_mark_buffer_dirty(leaf);
3795out:
3796 btrfs_free_path(path);
3797 return ret;
3798}
3799
3800static void delete_orphan_inode(struct btrfs_trans_handle *trans,
3801 struct btrfs_root *root, u64 objectid)
3802{
3803 struct btrfs_path *path;
3804 struct btrfs_key key;
3805 int ret = 0;
3806
3807 path = btrfs_alloc_path();
3808 if (!path) {
3809 ret = -ENOMEM;
3810 goto out;
3811 }
3812
3813 key.objectid = objectid;
3814 key.type = BTRFS_INODE_ITEM_KEY;
3815 key.offset = 0;
3816 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3817 if (ret) {
3818 if (ret > 0)
3819 ret = -ENOENT;
3820 goto out;
3821 }
3822 ret = btrfs_del_item(trans, root, path);
3823out:
3824 if (ret)
3825 btrfs_abort_transaction(trans, ret);
3826 btrfs_free_path(path);
3827}
3828
3829
3830
3831
3832
3833static noinline_for_stack
3834struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3835 struct btrfs_block_group *group)
3836{
3837 struct inode *inode = NULL;
3838 struct btrfs_trans_handle *trans;
3839 struct btrfs_root *root;
3840 u64 objectid;
3841 int err = 0;
3842
3843 root = btrfs_grab_root(fs_info->data_reloc_root);
3844 trans = btrfs_start_transaction(root, 6);
3845 if (IS_ERR(trans)) {
3846 btrfs_put_root(root);
3847 return ERR_CAST(trans);
3848 }
3849
3850 err = btrfs_get_free_objectid(root, &objectid);
3851 if (err)
3852 goto out;
3853
3854 err = __insert_orphan_inode(trans, root, objectid);
3855 if (err)
3856 goto out;
3857
3858 inode = btrfs_iget(fs_info->sb, objectid, root);
3859 if (IS_ERR(inode)) {
3860 delete_orphan_inode(trans, root, objectid);
3861 err = PTR_ERR(inode);
3862 inode = NULL;
3863 goto out;
3864 }
3865 BTRFS_I(inode)->index_cnt = group->start;
3866
3867 err = btrfs_orphan_add(trans, BTRFS_I(inode));
3868out:
3869 btrfs_put_root(root);
3870 btrfs_end_transaction(trans);
3871 btrfs_btree_balance_dirty(fs_info);
3872 if (err) {
3873 if (inode)
3874 iput(inode);
3875 inode = ERR_PTR(err);
3876 }
3877 return inode;
3878}
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
3891{
3892 spin_lock(&fs_info->send_reloc_lock);
3893 if (fs_info->send_in_progress) {
3894 btrfs_warn_rl(fs_info,
3895"cannot run relocation while send operations are in progress (%d in progress)",
3896 fs_info->send_in_progress);
3897 spin_unlock(&fs_info->send_reloc_lock);
3898 return -EAGAIN;
3899 }
3900 if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
3901
3902 spin_unlock(&fs_info->send_reloc_lock);
3903 btrfs_err(fs_info, "reloc already running, cannot start");
3904 return -EINPROGRESS;
3905 }
3906 spin_unlock(&fs_info->send_reloc_lock);
3907
3908 if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
3909 btrfs_info(fs_info, "chunk relocation canceled on start");
3910
3911
3912
3913
3914 atomic_set(&fs_info->reloc_cancel_req, 0);
3915 return -ECANCELED;
3916 }
3917 return 0;
3918}
3919
3920
3921
3922
3923static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
3924{
3925
3926 if (atomic_read(&fs_info->reloc_cancel_req) > 0)
3927 btrfs_info(fs_info, "chunk relocation canceled during operation");
3928 spin_lock(&fs_info->send_reloc_lock);
3929 clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
3930 spin_unlock(&fs_info->send_reloc_lock);
3931 atomic_set(&fs_info->reloc_cancel_req, 0);
3932}
3933
3934static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
3935{
3936 struct reloc_control *rc;
3937
3938 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3939 if (!rc)
3940 return NULL;
3941
3942 INIT_LIST_HEAD(&rc->reloc_roots);
3943 INIT_LIST_HEAD(&rc->dirty_subvol_roots);
3944 btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
3945 mapping_tree_init(&rc->reloc_root_tree);
3946 extent_io_tree_init(fs_info, &rc->processed_blocks,
3947 IO_TREE_RELOC_BLOCKS, NULL);
3948 return rc;
3949}
3950
3951static void free_reloc_control(struct reloc_control *rc)
3952{
3953 struct mapping_node *node, *tmp;
3954
3955 free_reloc_roots(&rc->reloc_roots);
3956 rbtree_postorder_for_each_entry_safe(node, tmp,
3957 &rc->reloc_root_tree.rb_root, rb_node)
3958 kfree(node);
3959
3960 kfree(rc);
3961}
3962
3963
3964
3965
3966static void describe_relocation(struct btrfs_fs_info *fs_info,
3967 struct btrfs_block_group *block_group)
3968{
3969 char buf[128] = {'\0'};
3970
3971 btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
3972
3973 btrfs_info(fs_info,
3974 "relocating block group %llu flags %s",
3975 block_group->start, buf);
3976}
3977
3978static const char *stage_to_string(int stage)
3979{
3980 if (stage == MOVE_DATA_EXTENTS)
3981 return "move data extents";
3982 if (stage == UPDATE_DATA_PTRS)
3983 return "update data pointers";
3984 return "unknown";
3985}
3986
3987
3988
3989
3990int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
3991{
3992 struct btrfs_block_group *bg;
3993 struct btrfs_root *extent_root = fs_info->extent_root;
3994 struct reloc_control *rc;
3995 struct inode *inode;
3996 struct btrfs_path *path;
3997 int ret;
3998 int rw = 0;
3999 int err = 0;
4000
4001 bg = btrfs_lookup_block_group(fs_info, group_start);
4002 if (!bg)
4003 return -ENOENT;
4004
4005 if (btrfs_pinned_by_swapfile(fs_info, bg)) {
4006 btrfs_put_block_group(bg);
4007 return -ETXTBSY;
4008 }
4009
4010 rc = alloc_reloc_control(fs_info);
4011 if (!rc) {
4012 btrfs_put_block_group(bg);
4013 return -ENOMEM;
4014 }
4015
4016 ret = reloc_chunk_start(fs_info);
4017 if (ret < 0) {
4018 err = ret;
4019 goto out_put_bg;
4020 }
4021
4022 rc->extent_root = extent_root;
4023 rc->block_group = bg;
4024
4025 ret = btrfs_inc_block_group_ro(rc->block_group, true);
4026 if (ret) {
4027 err = ret;
4028 goto out;
4029 }
4030 rw = 1;
4031
4032 path = btrfs_alloc_path();
4033 if (!path) {
4034 err = -ENOMEM;
4035 goto out;
4036 }
4037
4038 inode = lookup_free_space_inode(rc->block_group, path);
4039 btrfs_free_path(path);
4040
4041 if (!IS_ERR(inode))
4042 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
4043 else
4044 ret = PTR_ERR(inode);
4045
4046 if (ret && ret != -ENOENT) {
4047 err = ret;
4048 goto out;
4049 }
4050
4051 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
4052 if (IS_ERR(rc->data_inode)) {
4053 err = PTR_ERR(rc->data_inode);
4054 rc->data_inode = NULL;
4055 goto out;
4056 }
4057
4058 describe_relocation(fs_info, rc->block_group);
4059
4060 btrfs_wait_block_group_reservations(rc->block_group);
4061 btrfs_wait_nocow_writers(rc->block_group);
4062 btrfs_wait_ordered_roots(fs_info, U64_MAX,
4063 rc->block_group->start,
4064 rc->block_group->length);
4065
4066 while (1) {
4067 int finishes_stage;
4068
4069 mutex_lock(&fs_info->cleaner_mutex);
4070 ret = relocate_block_group(rc);
4071 mutex_unlock(&fs_info->cleaner_mutex);
4072 if (ret < 0)
4073 err = ret;
4074
4075 finishes_stage = rc->stage;
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4086 ret = btrfs_wait_ordered_range(rc->data_inode, 0,
4087 (u64)-1);
4088 if (ret)
4089 err = ret;
4090 invalidate_mapping_pages(rc->data_inode->i_mapping,
4091 0, -1);
4092 rc->stage = UPDATE_DATA_PTRS;
4093 }
4094
4095 if (err < 0)
4096 goto out;
4097
4098 if (rc->extents_found == 0)
4099 break;
4100
4101 btrfs_info(fs_info, "found %llu extents, stage: %s",
4102 rc->extents_found, stage_to_string(finishes_stage));
4103 }
4104
4105 WARN_ON(rc->block_group->pinned > 0);
4106 WARN_ON(rc->block_group->reserved > 0);
4107 WARN_ON(rc->block_group->used > 0);
4108out:
4109 if (err && rw)
4110 btrfs_dec_block_group_ro(rc->block_group);
4111 iput(rc->data_inode);
4112out_put_bg:
4113 btrfs_put_block_group(bg);
4114 reloc_chunk_end(fs_info);
4115 free_reloc_control(rc);
4116 return err;
4117}
4118
4119static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4120{
4121 struct btrfs_fs_info *fs_info = root->fs_info;
4122 struct btrfs_trans_handle *trans;
4123 int ret, err;
4124
4125 trans = btrfs_start_transaction(fs_info->tree_root, 0);
4126 if (IS_ERR(trans))
4127 return PTR_ERR(trans);
4128
4129 memset(&root->root_item.drop_progress, 0,
4130 sizeof(root->root_item.drop_progress));
4131 btrfs_set_root_drop_level(&root->root_item, 0);
4132 btrfs_set_root_refs(&root->root_item, 0);
4133 ret = btrfs_update_root(trans, fs_info->tree_root,
4134 &root->root_key, &root->root_item);
4135
4136 err = btrfs_end_transaction(trans);
4137 if (err)
4138 return err;
4139 return ret;
4140}
4141
4142
4143
4144
4145
4146
4147
4148int btrfs_recover_relocation(struct btrfs_root *root)
4149{
4150 struct btrfs_fs_info *fs_info = root->fs_info;
4151 LIST_HEAD(reloc_roots);
4152 struct btrfs_key key;
4153 struct btrfs_root *fs_root;
4154 struct btrfs_root *reloc_root;
4155 struct btrfs_path *path;
4156 struct extent_buffer *leaf;
4157 struct reloc_control *rc = NULL;
4158 struct btrfs_trans_handle *trans;
4159 int ret;
4160 int err = 0;
4161
4162 path = btrfs_alloc_path();
4163 if (!path)
4164 return -ENOMEM;
4165 path->reada = READA_BACK;
4166
4167 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4168 key.type = BTRFS_ROOT_ITEM_KEY;
4169 key.offset = (u64)-1;
4170
4171 while (1) {
4172 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
4173 path, 0, 0);
4174 if (ret < 0) {
4175 err = ret;
4176 goto out;
4177 }
4178 if (ret > 0) {
4179 if (path->slots[0] == 0)
4180 break;
4181 path->slots[0]--;
4182 }
4183 leaf = path->nodes[0];
4184 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4185 btrfs_release_path(path);
4186
4187 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4188 key.type != BTRFS_ROOT_ITEM_KEY)
4189 break;
4190
4191 reloc_root = btrfs_read_tree_root(root, &key);
4192 if (IS_ERR(reloc_root)) {
4193 err = PTR_ERR(reloc_root);
4194 goto out;
4195 }
4196
4197 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
4198 list_add(&reloc_root->root_list, &reloc_roots);
4199
4200 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4201 fs_root = btrfs_get_fs_root(fs_info,
4202 reloc_root->root_key.offset, false);
4203 if (IS_ERR(fs_root)) {
4204 ret = PTR_ERR(fs_root);
4205 if (ret != -ENOENT) {
4206 err = ret;
4207 goto out;
4208 }
4209 ret = mark_garbage_root(reloc_root);
4210 if (ret < 0) {
4211 err = ret;
4212 goto out;
4213 }
4214 } else {
4215 btrfs_put_root(fs_root);
4216 }
4217 }
4218
4219 if (key.offset == 0)
4220 break;
4221
4222 key.offset--;
4223 }
4224 btrfs_release_path(path);
4225
4226 if (list_empty(&reloc_roots))
4227 goto out;
4228
4229 rc = alloc_reloc_control(fs_info);
4230 if (!rc) {
4231 err = -ENOMEM;
4232 goto out;
4233 }
4234
4235 ret = reloc_chunk_start(fs_info);
4236 if (ret < 0) {
4237 err = ret;
4238 goto out_end;
4239 }
4240
4241 rc->extent_root = fs_info->extent_root;
4242
4243 set_reloc_control(rc);
4244
4245 trans = btrfs_join_transaction(rc->extent_root);
4246 if (IS_ERR(trans)) {
4247 err = PTR_ERR(trans);
4248 goto out_unset;
4249 }
4250
4251 rc->merge_reloc_tree = 1;
4252
4253 while (!list_empty(&reloc_roots)) {
4254 reloc_root = list_entry(reloc_roots.next,
4255 struct btrfs_root, root_list);
4256 list_del(&reloc_root->root_list);
4257
4258 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4259 list_add_tail(&reloc_root->root_list,
4260 &rc->reloc_roots);
4261 continue;
4262 }
4263
4264 fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
4265 false);
4266 if (IS_ERR(fs_root)) {
4267 err = PTR_ERR(fs_root);
4268 list_add_tail(&reloc_root->root_list, &reloc_roots);
4269 btrfs_end_transaction(trans);
4270 goto out_unset;
4271 }
4272
4273 err = __add_reloc_root(reloc_root);
4274 ASSERT(err != -EEXIST);
4275 if (err) {
4276 list_add_tail(&reloc_root->root_list, &reloc_roots);
4277 btrfs_put_root(fs_root);
4278 btrfs_end_transaction(trans);
4279 goto out_unset;
4280 }
4281 fs_root->reloc_root = btrfs_grab_root(reloc_root);
4282 btrfs_put_root(fs_root);
4283 }
4284
4285 err = btrfs_commit_transaction(trans);
4286 if (err)
4287 goto out_unset;
4288
4289 merge_reloc_roots(rc);
4290
4291 unset_reloc_control(rc);
4292
4293 trans = btrfs_join_transaction(rc->extent_root);
4294 if (IS_ERR(trans)) {
4295 err = PTR_ERR(trans);
4296 goto out_clean;
4297 }
4298 err = btrfs_commit_transaction(trans);
4299out_clean:
4300 ret = clean_dirty_subvols(rc);
4301 if (ret < 0 && !err)
4302 err = ret;
4303out_unset:
4304 unset_reloc_control(rc);
4305out_end:
4306 reloc_chunk_end(fs_info);
4307 free_reloc_control(rc);
4308out:
4309 free_reloc_roots(&reloc_roots);
4310
4311 btrfs_free_path(path);
4312
4313 if (err == 0) {
4314
4315 fs_root = btrfs_grab_root(fs_info->data_reloc_root);
4316 ASSERT(fs_root);
4317 err = btrfs_orphan_cleanup(fs_root);
4318 btrfs_put_root(fs_root);
4319 }
4320 return err;
4321}
4322
4323
4324
4325
4326
4327
4328
4329int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
4330{
4331 struct btrfs_fs_info *fs_info = inode->root->fs_info;
4332 struct btrfs_ordered_sum *sums;
4333 struct btrfs_ordered_extent *ordered;
4334 int ret;
4335 u64 disk_bytenr;
4336 u64 new_bytenr;
4337 LIST_HEAD(list);
4338
4339 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4340 BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
4341
4342 disk_bytenr = file_pos + inode->index_cnt;
4343 ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
4344 disk_bytenr + len - 1, &list, 0);
4345 if (ret)
4346 goto out;
4347
4348 while (!list_empty(&list)) {
4349 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4350 list_del_init(&sums->list);
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364 new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
4365 sums->bytenr = new_bytenr;
4366
4367 btrfs_add_ordered_sum(ordered, sums);
4368 }
4369out:
4370 btrfs_put_ordered_extent(ordered);
4371 return ret;
4372}
4373
4374int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4375 struct btrfs_root *root, struct extent_buffer *buf,
4376 struct extent_buffer *cow)
4377{
4378 struct btrfs_fs_info *fs_info = root->fs_info;
4379 struct reloc_control *rc;
4380 struct btrfs_backref_node *node;
4381 int first_cow = 0;
4382 int level;
4383 int ret = 0;
4384
4385 rc = fs_info->reloc_ctl;
4386 if (!rc)
4387 return 0;
4388
4389 BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4390 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
4391
4392 level = btrfs_header_level(buf);
4393 if (btrfs_header_generation(buf) <=
4394 btrfs_root_last_snapshot(&root->root_item))
4395 first_cow = 1;
4396
4397 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4398 rc->create_reloc_tree) {
4399 WARN_ON(!first_cow && level == 0);
4400
4401 node = rc->backref_cache.path[level];
4402 BUG_ON(node->bytenr != buf->start &&
4403 node->new_bytenr != buf->start);
4404
4405 btrfs_backref_drop_node_buffer(node);
4406 atomic_inc(&cow->refs);
4407 node->eb = cow;
4408 node->new_bytenr = cow->start;
4409
4410 if (!node->pending) {
4411 list_move_tail(&node->list,
4412 &rc->backref_cache.pending[level]);
4413 node->pending = 1;
4414 }
4415
4416 if (first_cow)
4417 mark_block_processed(rc, node);
4418
4419 if (first_cow && level > 0)
4420 rc->nodes_relocated += buf->len;
4421 }
4422
4423 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
4424 ret = replace_file_extents(trans, rc, root, cow);
4425 return ret;
4426}
4427
4428
4429
4430
4431
4432void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
4433 u64 *bytes_to_reserve)
4434{
4435 struct btrfs_root *root = pending->root;
4436 struct reloc_control *rc = root->fs_info->reloc_ctl;
4437
4438 if (!rc || !have_reloc_root(root))
4439 return;
4440
4441 if (!rc->merge_reloc_tree)
4442 return;
4443
4444 root = root->reloc_root;
4445 BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456 *bytes_to_reserve += rc->nodes_relocated;
4457}
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4468 struct btrfs_pending_snapshot *pending)
4469{
4470 struct btrfs_root *root = pending->root;
4471 struct btrfs_root *reloc_root;
4472 struct btrfs_root *new_root;
4473 struct reloc_control *rc = root->fs_info->reloc_ctl;
4474 int ret;
4475
4476 if (!rc || !have_reloc_root(root))
4477 return 0;
4478
4479 rc = root->fs_info->reloc_ctl;
4480 rc->merging_rsv_size += rc->nodes_relocated;
4481
4482 if (rc->merge_reloc_tree) {
4483 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4484 rc->block_rsv,
4485 rc->nodes_relocated, true);
4486 if (ret)
4487 return ret;
4488 }
4489
4490 new_root = pending->snap;
4491 reloc_root = create_reloc_root(trans, root->reloc_root,
4492 new_root->root_key.objectid);
4493 if (IS_ERR(reloc_root))
4494 return PTR_ERR(reloc_root);
4495
4496 ret = __add_reloc_root(reloc_root);
4497 ASSERT(ret != -EEXIST);
4498 if (ret) {
4499
4500 btrfs_put_root(reloc_root);
4501 return ret;
4502 }
4503 new_root->reloc_root = btrfs_grab_root(reloc_root);
4504
4505 if (rc->create_reloc_tree)
4506 ret = clone_backref_node(trans, rc, root, reloc_root);
4507 return ret;
4508}
4509