1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/pagemap.h>
8#include <linux/writeback.h>
9#include <linux/blkdev.h>
10#include <linux/rbtree.h>
11#include <linux/slab.h>
12#include <linux/error-injection.h>
13#include "ctree.h"
14#include "disk-io.h"
15#include "transaction.h"
16#include "volumes.h"
17#include "locking.h"
18#include "btrfs_inode.h"
19#include "async-thread.h"
20#include "free-space-cache.h"
21#include "qgroup.h"
22#include "print-tree.h"
23#include "delalloc-space.h"
24#include "block-group.h"
25#include "backref.h"
26#include "misc.h"
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#define RELOCATION_RESERVED_NODES 256
76
77
78
79struct mapping_node {
80 struct {
81 struct rb_node rb_node;
82 u64 bytenr;
83 };
84 void *data;
85};
86
87struct mapping_tree {
88 struct rb_root rb_root;
89 spinlock_t lock;
90};
91
92
93
94
95struct tree_block {
96 struct {
97 struct rb_node rb_node;
98 u64 bytenr;
99 };
100 u64 owner;
101 struct btrfs_key key;
102 unsigned int level:8;
103 unsigned int key_ready:1;
104};
105
106#define MAX_EXTENTS 128
107
108struct file_extent_cluster {
109 u64 start;
110 u64 end;
111 u64 boundary[MAX_EXTENTS];
112 unsigned int nr;
113};
114
115struct reloc_control {
116
117 struct btrfs_block_group *block_group;
118
119 struct btrfs_root *extent_root;
120
121 struct inode *data_inode;
122
123 struct btrfs_block_rsv *block_rsv;
124
125 struct btrfs_backref_cache backref_cache;
126
127 struct file_extent_cluster cluster;
128
129 struct extent_io_tree processed_blocks;
130
131 struct mapping_tree reloc_root_tree;
132
133 struct list_head reloc_roots;
134
135 struct list_head dirty_subvol_roots;
136
137 u64 merging_rsv_size;
138
139 u64 nodes_relocated;
140
141 u64 reserved_bytes;
142
143 u64 search_start;
144 u64 extents_found;
145
146 unsigned int stage:8;
147 unsigned int create_reloc_tree:1;
148 unsigned int merge_reloc_tree:1;
149 unsigned int found_file_extent:1;
150};
151
152
153#define MOVE_DATA_EXTENTS 0
154#define UPDATE_DATA_PTRS 1
155
156static void mark_block_processed(struct reloc_control *rc,
157 struct btrfs_backref_node *node)
158{
159 u32 blocksize;
160
161 if (node->level == 0 ||
162 in_range(node->bytenr, rc->block_group->start,
163 rc->block_group->length)) {
164 blocksize = rc->extent_root->fs_info->nodesize;
165 set_extent_bits(&rc->processed_blocks, node->bytenr,
166 node->bytenr + blocksize - 1, EXTENT_DIRTY);
167 }
168 node->processed = 1;
169}
170
171
172static void mapping_tree_init(struct mapping_tree *tree)
173{
174 tree->rb_root = RB_ROOT;
175 spin_lock_init(&tree->lock);
176}
177
178
179
180
181static struct btrfs_backref_node *walk_up_backref(
182 struct btrfs_backref_node *node,
183 struct btrfs_backref_edge *edges[], int *index)
184{
185 struct btrfs_backref_edge *edge;
186 int idx = *index;
187
188 while (!list_empty(&node->upper)) {
189 edge = list_entry(node->upper.next,
190 struct btrfs_backref_edge, list[LOWER]);
191 edges[idx++] = edge;
192 node = edge->node[UPPER];
193 }
194 BUG_ON(node->detached);
195 *index = idx;
196 return node;
197}
198
199
200
201
202static struct btrfs_backref_node *walk_down_backref(
203 struct btrfs_backref_edge *edges[], int *index)
204{
205 struct btrfs_backref_edge *edge;
206 struct btrfs_backref_node *lower;
207 int idx = *index;
208
209 while (idx > 0) {
210 edge = edges[idx - 1];
211 lower = edge->node[LOWER];
212 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
213 idx--;
214 continue;
215 }
216 edge = list_entry(edge->list[LOWER].next,
217 struct btrfs_backref_edge, list[LOWER]);
218 edges[idx - 1] = edge;
219 *index = idx;
220 return edge->node[UPPER];
221 }
222 *index = 0;
223 return NULL;
224}
225
226static void update_backref_node(struct btrfs_backref_cache *cache,
227 struct btrfs_backref_node *node, u64 bytenr)
228{
229 struct rb_node *rb_node;
230 rb_erase(&node->rb_node, &cache->rb_root);
231 node->bytenr = bytenr;
232 rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
233 if (rb_node)
234 btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST);
235}
236
237
238
239
240static int update_backref_cache(struct btrfs_trans_handle *trans,
241 struct btrfs_backref_cache *cache)
242{
243 struct btrfs_backref_node *node;
244 int level = 0;
245
246 if (cache->last_trans == 0) {
247 cache->last_trans = trans->transid;
248 return 0;
249 }
250
251 if (cache->last_trans == trans->transid)
252 return 0;
253
254
255
256
257
258
259 while (!list_empty(&cache->detached)) {
260 node = list_entry(cache->detached.next,
261 struct btrfs_backref_node, list);
262 btrfs_backref_cleanup_node(cache, node);
263 }
264
265 while (!list_empty(&cache->changed)) {
266 node = list_entry(cache->changed.next,
267 struct btrfs_backref_node, list);
268 list_del_init(&node->list);
269 BUG_ON(node->pending);
270 update_backref_node(cache, node, node->new_bytenr);
271 }
272
273
274
275
276
277 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
278 list_for_each_entry(node, &cache->pending[level], list) {
279 BUG_ON(!node->pending);
280 if (node->bytenr == node->new_bytenr)
281 continue;
282 update_backref_node(cache, node, node->new_bytenr);
283 }
284 }
285
286 cache->last_trans = 0;
287 return 1;
288}
289
290static bool reloc_root_is_dead(struct btrfs_root *root)
291{
292
293
294
295
296
297 smp_rmb();
298 if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
299 return true;
300 return false;
301}
302
303
304
305
306
307
308
309
310
311static bool have_reloc_root(struct btrfs_root *root)
312{
313 if (reloc_root_is_dead(root))
314 return false;
315 if (!root->reloc_root)
316 return false;
317 return true;
318}
319
320int btrfs_should_ignore_reloc_root(struct btrfs_root *root)
321{
322 struct btrfs_root *reloc_root;
323
324 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
325 return 0;
326
327
328 if (reloc_root_is_dead(root))
329 return 1;
330
331 reloc_root = root->reloc_root;
332 if (!reloc_root)
333 return 0;
334
335 if (btrfs_header_generation(reloc_root->commit_root) ==
336 root->fs_info->running_transaction->transid)
337 return 0;
338
339
340
341
342
343
344 return 1;
345}
346
347
348
349
350struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
351{
352 struct reloc_control *rc = fs_info->reloc_ctl;
353 struct rb_node *rb_node;
354 struct mapping_node *node;
355 struct btrfs_root *root = NULL;
356
357 ASSERT(rc);
358 spin_lock(&rc->reloc_root_tree.lock);
359 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
360 if (rb_node) {
361 node = rb_entry(rb_node, struct mapping_node, rb_node);
362 root = (struct btrfs_root *)node->data;
363 }
364 spin_unlock(&rc->reloc_root_tree.lock);
365 return btrfs_grab_root(root);
366}
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381static bool handle_useless_nodes(struct reloc_control *rc,
382 struct btrfs_backref_node *node)
383{
384 struct btrfs_backref_cache *cache = &rc->backref_cache;
385 struct list_head *useless_node = &cache->useless_node;
386 bool ret = false;
387
388 while (!list_empty(useless_node)) {
389 struct btrfs_backref_node *cur;
390
391 cur = list_first_entry(useless_node, struct btrfs_backref_node,
392 list);
393 list_del_init(&cur->list);
394
395
396 ASSERT(list_empty(&cur->upper));
397
398 if (cur == node)
399 ret = true;
400
401
402 if (cur->lowest) {
403 list_del_init(&cur->lower);
404 cur->lowest = 0;
405 }
406
407
408 while (!list_empty(&cur->lower)) {
409 struct btrfs_backref_edge *edge;
410 struct btrfs_backref_node *lower;
411
412 edge = list_entry(cur->lower.next,
413 struct btrfs_backref_edge, list[UPPER]);
414 list_del(&edge->list[UPPER]);
415 list_del(&edge->list[LOWER]);
416 lower = edge->node[LOWER];
417 btrfs_backref_free_edge(cache, edge);
418
419
420 if (list_empty(&lower->upper))
421 list_add(&lower->list, useless_node);
422 }
423
424 mark_block_processed(rc, cur);
425
426
427
428
429
430
431 if (cur->level > 0) {
432 list_add(&cur->list, &cache->detached);
433 cur->detached = 1;
434 } else {
435 rb_erase(&cur->rb_node, &cache->rb_root);
436 btrfs_backref_free_node(cache, cur);
437 }
438 }
439 return ret;
440}
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
457 struct reloc_control *rc, struct btrfs_key *node_key,
458 int level, u64 bytenr)
459{
460 struct btrfs_backref_iter *iter;
461 struct btrfs_backref_cache *cache = &rc->backref_cache;
462
463 struct btrfs_path *path;
464 struct btrfs_backref_node *cur;
465 struct btrfs_backref_node *node = NULL;
466 struct btrfs_backref_edge *edge;
467 int ret;
468 int err = 0;
469
470 iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
471 if (!iter)
472 return ERR_PTR(-ENOMEM);
473 path = btrfs_alloc_path();
474 if (!path) {
475 err = -ENOMEM;
476 goto out;
477 }
478
479 node = btrfs_backref_alloc_node(cache, bytenr, level);
480 if (!node) {
481 err = -ENOMEM;
482 goto out;
483 }
484
485 node->lowest = 1;
486 cur = node;
487
488
489 do {
490 ret = btrfs_backref_add_tree_node(cache, path, iter, node_key,
491 cur);
492 if (ret < 0) {
493 err = ret;
494 goto out;
495 }
496 edge = list_first_entry_or_null(&cache->pending_edge,
497 struct btrfs_backref_edge, list[UPPER]);
498
499
500
501
502 if (edge) {
503 list_del_init(&edge->list[UPPER]);
504 cur = edge->node[UPPER];
505 }
506 } while (edge);
507
508
509 ret = btrfs_backref_finish_upper_links(cache, node);
510 if (ret < 0) {
511 err = ret;
512 goto out;
513 }
514
515 if (handle_useless_nodes(rc, node))
516 node = NULL;
517out:
518 btrfs_backref_iter_free(iter);
519 btrfs_free_path(path);
520 if (err) {
521 btrfs_backref_error_cleanup(cache, node);
522 return ERR_PTR(err);
523 }
524 ASSERT(!node || !node->detached);
525 ASSERT(list_empty(&cache->useless_node) &&
526 list_empty(&cache->pending_edge));
527 return node;
528}
529
530
531
532
533
534
535static int clone_backref_node(struct btrfs_trans_handle *trans,
536 struct reloc_control *rc,
537 struct btrfs_root *src,
538 struct btrfs_root *dest)
539{
540 struct btrfs_root *reloc_root = src->reloc_root;
541 struct btrfs_backref_cache *cache = &rc->backref_cache;
542 struct btrfs_backref_node *node = NULL;
543 struct btrfs_backref_node *new_node;
544 struct btrfs_backref_edge *edge;
545 struct btrfs_backref_edge *new_edge;
546 struct rb_node *rb_node;
547
548 if (cache->last_trans > 0)
549 update_backref_cache(trans, cache);
550
551 rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
552 if (rb_node) {
553 node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
554 if (node->detached)
555 node = NULL;
556 else
557 BUG_ON(node->new_bytenr != reloc_root->node->start);
558 }
559
560 if (!node) {
561 rb_node = rb_simple_search(&cache->rb_root,
562 reloc_root->commit_root->start);
563 if (rb_node) {
564 node = rb_entry(rb_node, struct btrfs_backref_node,
565 rb_node);
566 BUG_ON(node->detached);
567 }
568 }
569
570 if (!node)
571 return 0;
572
573 new_node = btrfs_backref_alloc_node(cache, dest->node->start,
574 node->level);
575 if (!new_node)
576 return -ENOMEM;
577
578 new_node->lowest = node->lowest;
579 new_node->checked = 1;
580 new_node->root = btrfs_grab_root(dest);
581 ASSERT(new_node->root);
582
583 if (!node->lowest) {
584 list_for_each_entry(edge, &node->lower, list[UPPER]) {
585 new_edge = btrfs_backref_alloc_edge(cache);
586 if (!new_edge)
587 goto fail;
588
589 btrfs_backref_link_edge(new_edge, edge->node[LOWER],
590 new_node, LINK_UPPER);
591 }
592 } else {
593 list_add_tail(&new_node->lower, &cache->leaves);
594 }
595
596 rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
597 &new_node->rb_node);
598 if (rb_node)
599 btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
600
601 if (!new_node->lowest) {
602 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
603 list_add_tail(&new_edge->list[LOWER],
604 &new_edge->node[LOWER]->upper);
605 }
606 }
607 return 0;
608fail:
609 while (!list_empty(&new_node->lower)) {
610 new_edge = list_entry(new_node->lower.next,
611 struct btrfs_backref_edge, list[UPPER]);
612 list_del(&new_edge->list[UPPER]);
613 btrfs_backref_free_edge(cache, new_edge);
614 }
615 btrfs_backref_free_node(cache, new_node);
616 return -ENOMEM;
617}
618
619
620
621
622static int __must_check __add_reloc_root(struct btrfs_root *root)
623{
624 struct btrfs_fs_info *fs_info = root->fs_info;
625 struct rb_node *rb_node;
626 struct mapping_node *node;
627 struct reloc_control *rc = fs_info->reloc_ctl;
628
629 node = kmalloc(sizeof(*node), GFP_NOFS);
630 if (!node)
631 return -ENOMEM;
632
633 node->bytenr = root->commit_root->start;
634 node->data = root;
635
636 spin_lock(&rc->reloc_root_tree.lock);
637 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
638 node->bytenr, &node->rb_node);
639 spin_unlock(&rc->reloc_root_tree.lock);
640 if (rb_node) {
641 btrfs_err(fs_info,
642 "Duplicate root found for start=%llu while inserting into relocation tree",
643 node->bytenr);
644 return -EEXIST;
645 }
646
647 list_add_tail(&root->root_list, &rc->reloc_roots);
648 return 0;
649}
650
651
652
653
654
655static void __del_reloc_root(struct btrfs_root *root)
656{
657 struct btrfs_fs_info *fs_info = root->fs_info;
658 struct rb_node *rb_node;
659 struct mapping_node *node = NULL;
660 struct reloc_control *rc = fs_info->reloc_ctl;
661 bool put_ref = false;
662
663 if (rc && root->node) {
664 spin_lock(&rc->reloc_root_tree.lock);
665 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
666 root->commit_root->start);
667 if (rb_node) {
668 node = rb_entry(rb_node, struct mapping_node, rb_node);
669 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
670 RB_CLEAR_NODE(&node->rb_node);
671 }
672 spin_unlock(&rc->reloc_root_tree.lock);
673 ASSERT(!node || (struct btrfs_root *)node->data == root);
674 }
675
676
677
678
679
680
681
682
683
684 spin_lock(&fs_info->trans_lock);
685 if (!list_empty(&root->root_list)) {
686 put_ref = true;
687 list_del_init(&root->root_list);
688 }
689 spin_unlock(&fs_info->trans_lock);
690 if (put_ref)
691 btrfs_put_root(root);
692 kfree(node);
693}
694
695
696
697
698
699static int __update_reloc_root(struct btrfs_root *root)
700{
701 struct btrfs_fs_info *fs_info = root->fs_info;
702 struct rb_node *rb_node;
703 struct mapping_node *node = NULL;
704 struct reloc_control *rc = fs_info->reloc_ctl;
705
706 spin_lock(&rc->reloc_root_tree.lock);
707 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
708 root->commit_root->start);
709 if (rb_node) {
710 node = rb_entry(rb_node, struct mapping_node, rb_node);
711 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
712 }
713 spin_unlock(&rc->reloc_root_tree.lock);
714
715 if (!node)
716 return 0;
717 BUG_ON((struct btrfs_root *)node->data != root);
718
719 spin_lock(&rc->reloc_root_tree.lock);
720 node->bytenr = root->node->start;
721 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
722 node->bytenr, &node->rb_node);
723 spin_unlock(&rc->reloc_root_tree.lock);
724 if (rb_node)
725 btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
726 return 0;
727}
728
729static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
730 struct btrfs_root *root, u64 objectid)
731{
732 struct btrfs_fs_info *fs_info = root->fs_info;
733 struct btrfs_root *reloc_root;
734 struct extent_buffer *eb;
735 struct btrfs_root_item *root_item;
736 struct btrfs_key root_key;
737 int ret = 0;
738 bool must_abort = false;
739
740 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
741 if (!root_item)
742 return ERR_PTR(-ENOMEM);
743
744 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
745 root_key.type = BTRFS_ROOT_ITEM_KEY;
746 root_key.offset = objectid;
747
748 if (root->root_key.objectid == objectid) {
749 u64 commit_root_gen;
750
751
752 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
753 BTRFS_TREE_RELOC_OBJECTID);
754 if (ret)
755 goto fail;
756
757
758
759
760
761
762
763
764
765 commit_root_gen = btrfs_header_generation(root->commit_root);
766 btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen);
767 } else {
768
769
770
771
772
773
774
775 ret = btrfs_copy_root(trans, root, root->node, &eb,
776 BTRFS_TREE_RELOC_OBJECTID);
777 if (ret)
778 goto fail;
779 }
780
781
782
783
784
785 must_abort = true;
786
787 memcpy(root_item, &root->root_item, sizeof(*root_item));
788 btrfs_set_root_bytenr(root_item, eb->start);
789 btrfs_set_root_level(root_item, btrfs_header_level(eb));
790 btrfs_set_root_generation(root_item, trans->transid);
791
792 if (root->root_key.objectid == objectid) {
793 btrfs_set_root_refs(root_item, 0);
794 memset(&root_item->drop_progress, 0,
795 sizeof(struct btrfs_disk_key));
796 btrfs_set_root_drop_level(root_item, 0);
797 }
798
799 btrfs_tree_unlock(eb);
800 free_extent_buffer(eb);
801
802 ret = btrfs_insert_root(trans, fs_info->tree_root,
803 &root_key, root_item);
804 if (ret)
805 goto fail;
806
807 kfree(root_item);
808
809 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
810 if (IS_ERR(reloc_root)) {
811 ret = PTR_ERR(reloc_root);
812 goto abort;
813 }
814 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
815 reloc_root->last_trans = trans->transid;
816 return reloc_root;
817fail:
818 kfree(root_item);
819abort:
820 if (must_abort)
821 btrfs_abort_transaction(trans, ret);
822 return ERR_PTR(ret);
823}
824
825
826
827
828
829
830
831
832int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
833 struct btrfs_root *root)
834{
835 struct btrfs_fs_info *fs_info = root->fs_info;
836 struct btrfs_root *reloc_root;
837 struct reloc_control *rc = fs_info->reloc_ctl;
838 struct btrfs_block_rsv *rsv;
839 int clear_rsv = 0;
840 int ret;
841
842 if (!rc)
843 return 0;
844
845
846
847
848
849 if (reloc_root_is_dead(root))
850 return 0;
851
852
853
854
855
856
857
858
859
860 if (root->reloc_root) {
861 reloc_root = root->reloc_root;
862 reloc_root->last_trans = trans->transid;
863 return 0;
864 }
865
866
867
868
869
870 if (!rc->create_reloc_tree ||
871 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
872 return 0;
873
874 if (!trans->reloc_reserved) {
875 rsv = trans->block_rsv;
876 trans->block_rsv = rc->block_rsv;
877 clear_rsv = 1;
878 }
879 reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
880 if (clear_rsv)
881 trans->block_rsv = rsv;
882 if (IS_ERR(reloc_root))
883 return PTR_ERR(reloc_root);
884
885 ret = __add_reloc_root(reloc_root);
886 ASSERT(ret != -EEXIST);
887 if (ret) {
888
889 btrfs_put_root(reloc_root);
890 return ret;
891 }
892 root->reloc_root = btrfs_grab_root(reloc_root);
893 return 0;
894}
895
896
897
898
899int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
900 struct btrfs_root *root)
901{
902 struct btrfs_fs_info *fs_info = root->fs_info;
903 struct btrfs_root *reloc_root;
904 struct btrfs_root_item *root_item;
905 int ret;
906
907 if (!have_reloc_root(root))
908 return 0;
909
910 reloc_root = root->reloc_root;
911 root_item = &reloc_root->root_item;
912
913
914
915
916
917
918 btrfs_grab_root(reloc_root);
919
920
921 if (fs_info->reloc_ctl->merge_reloc_tree &&
922 btrfs_root_refs(root_item) == 0) {
923 set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
924
925
926
927
928 smp_wmb();
929 __del_reloc_root(reloc_root);
930 }
931
932 if (reloc_root->commit_root != reloc_root->node) {
933 __update_reloc_root(reloc_root);
934 btrfs_set_root_node(root_item, reloc_root->node);
935 free_extent_buffer(reloc_root->commit_root);
936 reloc_root->commit_root = btrfs_root_node(reloc_root);
937 }
938
939 ret = btrfs_update_root(trans, fs_info->tree_root,
940 &reloc_root->root_key, root_item);
941 btrfs_put_root(reloc_root);
942 return ret;
943}
944
945
946
947
948
949static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
950{
951 struct rb_node *node;
952 struct rb_node *prev;
953 struct btrfs_inode *entry;
954 struct inode *inode;
955
956 spin_lock(&root->inode_lock);
957again:
958 node = root->inode_tree.rb_node;
959 prev = NULL;
960 while (node) {
961 prev = node;
962 entry = rb_entry(node, struct btrfs_inode, rb_node);
963
964 if (objectid < btrfs_ino(entry))
965 node = node->rb_left;
966 else if (objectid > btrfs_ino(entry))
967 node = node->rb_right;
968 else
969 break;
970 }
971 if (!node) {
972 while (prev) {
973 entry = rb_entry(prev, struct btrfs_inode, rb_node);
974 if (objectid <= btrfs_ino(entry)) {
975 node = prev;
976 break;
977 }
978 prev = rb_next(prev);
979 }
980 }
981 while (node) {
982 entry = rb_entry(node, struct btrfs_inode, rb_node);
983 inode = igrab(&entry->vfs_inode);
984 if (inode) {
985 spin_unlock(&root->inode_lock);
986 return inode;
987 }
988
989 objectid = btrfs_ino(entry) + 1;
990 if (cond_resched_lock(&root->inode_lock))
991 goto again;
992
993 node = rb_next(node);
994 }
995 spin_unlock(&root->inode_lock);
996 return NULL;
997}
998
999
1000
1001
1002static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1003 u64 bytenr, u64 num_bytes)
1004{
1005 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1006 struct btrfs_path *path;
1007 struct btrfs_file_extent_item *fi;
1008 struct extent_buffer *leaf;
1009 int ret;
1010
1011 path = btrfs_alloc_path();
1012 if (!path)
1013 return -ENOMEM;
1014
1015 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1016 ret = btrfs_lookup_file_extent(NULL, root, path,
1017 btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
1018 if (ret < 0)
1019 goto out;
1020 if (ret > 0) {
1021 ret = -ENOENT;
1022 goto out;
1023 }
1024
1025 leaf = path->nodes[0];
1026 fi = btrfs_item_ptr(leaf, path->slots[0],
1027 struct btrfs_file_extent_item);
1028
1029 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1030 btrfs_file_extent_compression(leaf, fi) ||
1031 btrfs_file_extent_encryption(leaf, fi) ||
1032 btrfs_file_extent_other_encoding(leaf, fi));
1033
1034 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1035 ret = -EINVAL;
1036 goto out;
1037 }
1038
1039 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1040 ret = 0;
1041out:
1042 btrfs_free_path(path);
1043 return ret;
1044}
1045
1046
1047
1048
1049
1050static noinline_for_stack
1051int replace_file_extents(struct btrfs_trans_handle *trans,
1052 struct reloc_control *rc,
1053 struct btrfs_root *root,
1054 struct extent_buffer *leaf)
1055{
1056 struct btrfs_fs_info *fs_info = root->fs_info;
1057 struct btrfs_key key;
1058 struct btrfs_file_extent_item *fi;
1059 struct inode *inode = NULL;
1060 u64 parent;
1061 u64 bytenr;
1062 u64 new_bytenr = 0;
1063 u64 num_bytes;
1064 u64 end;
1065 u32 nritems;
1066 u32 i;
1067 int ret = 0;
1068 int first = 1;
1069 int dirty = 0;
1070
1071 if (rc->stage != UPDATE_DATA_PTRS)
1072 return 0;
1073
1074
1075 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1076 parent = leaf->start;
1077 else
1078 parent = 0;
1079
1080 nritems = btrfs_header_nritems(leaf);
1081 for (i = 0; i < nritems; i++) {
1082 struct btrfs_ref ref = { 0 };
1083
1084 cond_resched();
1085 btrfs_item_key_to_cpu(leaf, &key, i);
1086 if (key.type != BTRFS_EXTENT_DATA_KEY)
1087 continue;
1088 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1089 if (btrfs_file_extent_type(leaf, fi) ==
1090 BTRFS_FILE_EXTENT_INLINE)
1091 continue;
1092 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1093 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1094 if (bytenr == 0)
1095 continue;
1096 if (!in_range(bytenr, rc->block_group->start,
1097 rc->block_group->length))
1098 continue;
1099
1100
1101
1102
1103
1104 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1105 if (first) {
1106 inode = find_next_inode(root, key.objectid);
1107 first = 0;
1108 } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
1109 btrfs_add_delayed_iput(inode);
1110 inode = find_next_inode(root, key.objectid);
1111 }
1112 if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
1113 end = key.offset +
1114 btrfs_file_extent_num_bytes(leaf, fi);
1115 WARN_ON(!IS_ALIGNED(key.offset,
1116 fs_info->sectorsize));
1117 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1118 end--;
1119 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1120 key.offset, end);
1121 if (!ret)
1122 continue;
1123
1124 btrfs_drop_extent_cache(BTRFS_I(inode),
1125 key.offset, end, 1);
1126 unlock_extent(&BTRFS_I(inode)->io_tree,
1127 key.offset, end);
1128 }
1129 }
1130
1131 ret = get_new_location(rc->data_inode, &new_bytenr,
1132 bytenr, num_bytes);
1133 if (ret) {
1134
1135
1136
1137
1138 break;
1139 }
1140
1141 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1142 dirty = 1;
1143
1144 key.offset -= btrfs_file_extent_offset(leaf, fi);
1145 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1146 num_bytes, parent);
1147 ref.real_root = root->root_key.objectid;
1148 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1149 key.objectid, key.offset);
1150 ret = btrfs_inc_extent_ref(trans, &ref);
1151 if (ret) {
1152 btrfs_abort_transaction(trans, ret);
1153 break;
1154 }
1155
1156 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1157 num_bytes, parent);
1158 ref.real_root = root->root_key.objectid;
1159 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1160 key.objectid, key.offset);
1161 ret = btrfs_free_extent(trans, &ref);
1162 if (ret) {
1163 btrfs_abort_transaction(trans, ret);
1164 break;
1165 }
1166 }
1167 if (dirty)
1168 btrfs_mark_buffer_dirty(leaf);
1169 if (inode)
1170 btrfs_add_delayed_iput(inode);
1171 return ret;
1172}
1173
1174static noinline_for_stack
1175int memcmp_node_keys(struct extent_buffer *eb, int slot,
1176 struct btrfs_path *path, int level)
1177{
1178 struct btrfs_disk_key key1;
1179 struct btrfs_disk_key key2;
1180 btrfs_node_key(eb, &key1, slot);
1181 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1182 return memcmp(&key1, &key2, sizeof(key1));
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194static noinline_for_stack
1195int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
1196 struct btrfs_root *dest, struct btrfs_root *src,
1197 struct btrfs_path *path, struct btrfs_key *next_key,
1198 int lowest_level, int max_level)
1199{
1200 struct btrfs_fs_info *fs_info = dest->fs_info;
1201 struct extent_buffer *eb;
1202 struct extent_buffer *parent;
1203 struct btrfs_ref ref = { 0 };
1204 struct btrfs_key key;
1205 u64 old_bytenr;
1206 u64 new_bytenr;
1207 u64 old_ptr_gen;
1208 u64 new_ptr_gen;
1209 u64 last_snapshot;
1210 u32 blocksize;
1211 int cow = 0;
1212 int level;
1213 int ret;
1214 int slot;
1215
1216 ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1217 ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1218
1219 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1220again:
1221 slot = path->slots[lowest_level];
1222 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1223
1224 eb = btrfs_lock_root_node(dest);
1225 level = btrfs_header_level(eb);
1226
1227 if (level < lowest_level) {
1228 btrfs_tree_unlock(eb);
1229 free_extent_buffer(eb);
1230 return 0;
1231 }
1232
1233 if (cow) {
1234 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb,
1235 BTRFS_NESTING_COW);
1236 if (ret) {
1237 btrfs_tree_unlock(eb);
1238 free_extent_buffer(eb);
1239 return ret;
1240 }
1241 }
1242
1243 if (next_key) {
1244 next_key->objectid = (u64)-1;
1245 next_key->type = (u8)-1;
1246 next_key->offset = (u64)-1;
1247 }
1248
1249 parent = eb;
1250 while (1) {
1251 level = btrfs_header_level(parent);
1252 ASSERT(level >= lowest_level);
1253
1254 ret = btrfs_bin_search(parent, &key, &slot);
1255 if (ret < 0)
1256 break;
1257 if (ret && slot > 0)
1258 slot--;
1259
1260 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1261 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1262
1263 old_bytenr = btrfs_node_blockptr(parent, slot);
1264 blocksize = fs_info->nodesize;
1265 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1266
1267 if (level <= max_level) {
1268 eb = path->nodes[level];
1269 new_bytenr = btrfs_node_blockptr(eb,
1270 path->slots[level]);
1271 new_ptr_gen = btrfs_node_ptr_generation(eb,
1272 path->slots[level]);
1273 } else {
1274 new_bytenr = 0;
1275 new_ptr_gen = 0;
1276 }
1277
1278 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
1279 ret = level;
1280 break;
1281 }
1282
1283 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1284 memcmp_node_keys(parent, slot, path, level)) {
1285 if (level <= lowest_level) {
1286 ret = 0;
1287 break;
1288 }
1289
1290 eb = btrfs_read_node_slot(parent, slot);
1291 if (IS_ERR(eb)) {
1292 ret = PTR_ERR(eb);
1293 break;
1294 }
1295 btrfs_tree_lock(eb);
1296 if (cow) {
1297 ret = btrfs_cow_block(trans, dest, eb, parent,
1298 slot, &eb,
1299 BTRFS_NESTING_COW);
1300 if (ret) {
1301 btrfs_tree_unlock(eb);
1302 free_extent_buffer(eb);
1303 break;
1304 }
1305 }
1306
1307 btrfs_tree_unlock(parent);
1308 free_extent_buffer(parent);
1309
1310 parent = eb;
1311 continue;
1312 }
1313
1314 if (!cow) {
1315 btrfs_tree_unlock(parent);
1316 free_extent_buffer(parent);
1317 cow = 1;
1318 goto again;
1319 }
1320
1321 btrfs_node_key_to_cpu(path->nodes[level], &key,
1322 path->slots[level]);
1323 btrfs_release_path(path);
1324
1325 path->lowest_level = level;
1326 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1327 path->lowest_level = 0;
1328 if (ret) {
1329 if (ret > 0)
1330 ret = -ENOENT;
1331 break;
1332 }
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348 ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
1349 rc->block_group, parent, slot,
1350 path->nodes[level], path->slots[level],
1351 last_snapshot);
1352 if (ret < 0)
1353 break;
1354
1355
1356
1357 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1358 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1359 btrfs_mark_buffer_dirty(parent);
1360
1361 btrfs_set_node_blockptr(path->nodes[level],
1362 path->slots[level], old_bytenr);
1363 btrfs_set_node_ptr_generation(path->nodes[level],
1364 path->slots[level], old_ptr_gen);
1365 btrfs_mark_buffer_dirty(path->nodes[level]);
1366
1367 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
1368 blocksize, path->nodes[level]->start);
1369 ref.skip_qgroup = true;
1370 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1371 ret = btrfs_inc_extent_ref(trans, &ref);
1372 if (ret) {
1373 btrfs_abort_transaction(trans, ret);
1374 break;
1375 }
1376 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1377 blocksize, 0);
1378 ref.skip_qgroup = true;
1379 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1380 ret = btrfs_inc_extent_ref(trans, &ref);
1381 if (ret) {
1382 btrfs_abort_transaction(trans, ret);
1383 break;
1384 }
1385
1386 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
1387 blocksize, path->nodes[level]->start);
1388 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1389 ref.skip_qgroup = true;
1390 ret = btrfs_free_extent(trans, &ref);
1391 if (ret) {
1392 btrfs_abort_transaction(trans, ret);
1393 break;
1394 }
1395
1396 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
1397 blocksize, 0);
1398 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1399 ref.skip_qgroup = true;
1400 ret = btrfs_free_extent(trans, &ref);
1401 if (ret) {
1402 btrfs_abort_transaction(trans, ret);
1403 break;
1404 }
1405
1406 btrfs_unlock_up_safe(path, 0);
1407
1408 ret = level;
1409 break;
1410 }
1411 btrfs_tree_unlock(parent);
1412 free_extent_buffer(parent);
1413 return ret;
1414}
1415
1416
1417
1418
1419static noinline_for_stack
1420int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1421 int *level)
1422{
1423 struct extent_buffer *eb;
1424 int i;
1425 u64 last_snapshot;
1426 u32 nritems;
1427
1428 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1429
1430 for (i = 0; i < *level; i++) {
1431 free_extent_buffer(path->nodes[i]);
1432 path->nodes[i] = NULL;
1433 }
1434
1435 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1436 eb = path->nodes[i];
1437 nritems = btrfs_header_nritems(eb);
1438 while (path->slots[i] + 1 < nritems) {
1439 path->slots[i]++;
1440 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1441 last_snapshot)
1442 continue;
1443
1444 *level = i;
1445 return 0;
1446 }
1447 free_extent_buffer(path->nodes[i]);
1448 path->nodes[i] = NULL;
1449 }
1450 return 1;
1451}
1452
1453
1454
1455
1456static noinline_for_stack
1457int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1458 int *level)
1459{
1460 struct extent_buffer *eb = NULL;
1461 int i;
1462 u64 ptr_gen = 0;
1463 u64 last_snapshot;
1464 u32 nritems;
1465
1466 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1467
1468 for (i = *level; i > 0; i--) {
1469 eb = path->nodes[i];
1470 nritems = btrfs_header_nritems(eb);
1471 while (path->slots[i] < nritems) {
1472 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1473 if (ptr_gen > last_snapshot)
1474 break;
1475 path->slots[i]++;
1476 }
1477 if (path->slots[i] >= nritems) {
1478 if (i == *level)
1479 break;
1480 *level = i + 1;
1481 return 0;
1482 }
1483 if (i == 1) {
1484 *level = i;
1485 return 0;
1486 }
1487
1488 eb = btrfs_read_node_slot(eb, path->slots[i]);
1489 if (IS_ERR(eb))
1490 return PTR_ERR(eb);
1491 BUG_ON(btrfs_header_level(eb) != i - 1);
1492 path->nodes[i - 1] = eb;
1493 path->slots[i - 1] = 0;
1494 }
1495 return 1;
1496}
1497
1498
1499
1500
1501
1502static int invalidate_extent_cache(struct btrfs_root *root,
1503 struct btrfs_key *min_key,
1504 struct btrfs_key *max_key)
1505{
1506 struct btrfs_fs_info *fs_info = root->fs_info;
1507 struct inode *inode = NULL;
1508 u64 objectid;
1509 u64 start, end;
1510 u64 ino;
1511
1512 objectid = min_key->objectid;
1513 while (1) {
1514 cond_resched();
1515 iput(inode);
1516
1517 if (objectid > max_key->objectid)
1518 break;
1519
1520 inode = find_next_inode(root, objectid);
1521 if (!inode)
1522 break;
1523 ino = btrfs_ino(BTRFS_I(inode));
1524
1525 if (ino > max_key->objectid) {
1526 iput(inode);
1527 break;
1528 }
1529
1530 objectid = ino + 1;
1531 if (!S_ISREG(inode->i_mode))
1532 continue;
1533
1534 if (unlikely(min_key->objectid == ino)) {
1535 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1536 continue;
1537 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1538 start = 0;
1539 else {
1540 start = min_key->offset;
1541 WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize));
1542 }
1543 } else {
1544 start = 0;
1545 }
1546
1547 if (unlikely(max_key->objectid == ino)) {
1548 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1549 continue;
1550 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1551 end = (u64)-1;
1552 } else {
1553 if (max_key->offset == 0)
1554 continue;
1555 end = max_key->offset;
1556 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1557 end--;
1558 }
1559 } else {
1560 end = (u64)-1;
1561 }
1562
1563
1564 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
1565 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
1566 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1567 }
1568 return 0;
1569}
1570
1571static int find_next_key(struct btrfs_path *path, int level,
1572 struct btrfs_key *key)
1573
1574{
1575 while (level < BTRFS_MAX_LEVEL) {
1576 if (!path->nodes[level])
1577 break;
1578 if (path->slots[level] + 1 <
1579 btrfs_header_nritems(path->nodes[level])) {
1580 btrfs_node_key_to_cpu(path->nodes[level], key,
1581 path->slots[level] + 1);
1582 return 0;
1583 }
1584 level++;
1585 }
1586 return 1;
1587}
1588
1589
1590
1591
1592static int insert_dirty_subvol(struct btrfs_trans_handle *trans,
1593 struct reloc_control *rc,
1594 struct btrfs_root *root)
1595{
1596 struct btrfs_root *reloc_root = root->reloc_root;
1597 struct btrfs_root_item *reloc_root_item;
1598 int ret;
1599
1600
1601 ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1602 ASSERT(reloc_root);
1603
1604 reloc_root_item = &reloc_root->root_item;
1605 memset(&reloc_root_item->drop_progress, 0,
1606 sizeof(reloc_root_item->drop_progress));
1607 btrfs_set_root_drop_level(reloc_root_item, 0);
1608 btrfs_set_root_refs(reloc_root_item, 0);
1609 ret = btrfs_update_reloc_root(trans, root);
1610 if (ret)
1611 return ret;
1612
1613 if (list_empty(&root->reloc_dirty_list)) {
1614 btrfs_grab_root(root);
1615 list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
1616 }
1617
1618 return 0;
1619}
1620
1621static int clean_dirty_subvols(struct reloc_control *rc)
1622{
1623 struct btrfs_root *root;
1624 struct btrfs_root *next;
1625 int ret = 0;
1626 int ret2;
1627
1628 list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
1629 reloc_dirty_list) {
1630 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1631
1632 struct btrfs_root *reloc_root = root->reloc_root;
1633
1634 list_del_init(&root->reloc_dirty_list);
1635 root->reloc_root = NULL;
1636
1637
1638
1639
1640 smp_wmb();
1641 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
1642 if (reloc_root) {
1643
1644
1645
1646
1647
1648 ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
1649 if (ret2 < 0) {
1650 btrfs_put_root(reloc_root);
1651 if (!ret)
1652 ret = ret2;
1653 }
1654 }
1655 btrfs_put_root(root);
1656 } else {
1657
1658 ret2 = btrfs_drop_snapshot(root, 0, 1);
1659 if (ret2 < 0) {
1660 btrfs_put_root(root);
1661 if (!ret)
1662 ret = ret2;
1663 }
1664 }
1665 }
1666 return ret;
1667}
1668
1669
1670
1671
1672
1673static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1674 struct btrfs_root *root)
1675{
1676 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1677 struct btrfs_key key;
1678 struct btrfs_key next_key;
1679 struct btrfs_trans_handle *trans = NULL;
1680 struct btrfs_root *reloc_root;
1681 struct btrfs_root_item *root_item;
1682 struct btrfs_path *path;
1683 struct extent_buffer *leaf;
1684 int reserve_level;
1685 int level;
1686 int max_level;
1687 int replaced = 0;
1688 int ret = 0;
1689 u32 min_reserved;
1690
1691 path = btrfs_alloc_path();
1692 if (!path)
1693 return -ENOMEM;
1694 path->reada = READA_FORWARD;
1695
1696 reloc_root = root->reloc_root;
1697 root_item = &reloc_root->root_item;
1698
1699 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1700 level = btrfs_root_level(root_item);
1701 atomic_inc(&reloc_root->node->refs);
1702 path->nodes[level] = reloc_root->node;
1703 path->slots[level] = 0;
1704 } else {
1705 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1706
1707 level = btrfs_root_drop_level(root_item);
1708 BUG_ON(level == 0);
1709 path->lowest_level = level;
1710 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1711 path->lowest_level = 0;
1712 if (ret < 0) {
1713 btrfs_free_path(path);
1714 return ret;
1715 }
1716
1717 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
1718 path->slots[level]);
1719 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
1720
1721 btrfs_unlock_up_safe(path, 0);
1722 }
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732 reserve_level = max_t(int, 1, btrfs_root_level(root_item));
1733 min_reserved = fs_info->nodesize * reserve_level * 2;
1734 memset(&next_key, 0, sizeof(next_key));
1735
1736 while (1) {
1737 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
1738 BTRFS_RESERVE_FLUSH_LIMIT);
1739 if (ret)
1740 goto out;
1741 trans = btrfs_start_transaction(root, 0);
1742 if (IS_ERR(trans)) {
1743 ret = PTR_ERR(trans);
1744 trans = NULL;
1745 goto out;
1746 }
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758 reloc_root->last_trans = trans->transid;
1759 trans->block_rsv = rc->block_rsv;
1760
1761 replaced = 0;
1762 max_level = level;
1763
1764 ret = walk_down_reloc_tree(reloc_root, path, &level);
1765 if (ret < 0)
1766 goto out;
1767 if (ret > 0)
1768 break;
1769
1770 if (!find_next_key(path, level, &key) &&
1771 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
1772 ret = 0;
1773 } else {
1774 ret = replace_path(trans, rc, root, reloc_root, path,
1775 &next_key, level, max_level);
1776 }
1777 if (ret < 0)
1778 goto out;
1779 if (ret > 0) {
1780 level = ret;
1781 btrfs_node_key_to_cpu(path->nodes[level], &key,
1782 path->slots[level]);
1783 replaced = 1;
1784 }
1785
1786 ret = walk_up_reloc_tree(reloc_root, path, &level);
1787 if (ret > 0)
1788 break;
1789
1790 BUG_ON(level == 0);
1791
1792
1793
1794
1795 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
1796 path->slots[level]);
1797 btrfs_set_root_drop_level(root_item, level);
1798
1799 btrfs_end_transaction_throttle(trans);
1800 trans = NULL;
1801
1802 btrfs_btree_balance_dirty(fs_info);
1803
1804 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1805 invalidate_extent_cache(root, &key, &next_key);
1806 }
1807
1808
1809
1810
1811
1812 leaf = btrfs_lock_root_node(root);
1813 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf,
1814 BTRFS_NESTING_COW);
1815 btrfs_tree_unlock(leaf);
1816 free_extent_buffer(leaf);
1817out:
1818 btrfs_free_path(path);
1819
1820 if (ret == 0) {
1821 ret = insert_dirty_subvol(trans, rc, root);
1822 if (ret)
1823 btrfs_abort_transaction(trans, ret);
1824 }
1825
1826 if (trans)
1827 btrfs_end_transaction_throttle(trans);
1828
1829 btrfs_btree_balance_dirty(fs_info);
1830
1831 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1832 invalidate_extent_cache(root, &key, &next_key);
1833
1834 return ret;
1835}
1836
1837static noinline_for_stack
1838int prepare_to_merge(struct reloc_control *rc, int err)
1839{
1840 struct btrfs_root *root = rc->extent_root;
1841 struct btrfs_fs_info *fs_info = root->fs_info;
1842 struct btrfs_root *reloc_root;
1843 struct btrfs_trans_handle *trans;
1844 LIST_HEAD(reloc_roots);
1845 u64 num_bytes = 0;
1846 int ret;
1847
1848 mutex_lock(&fs_info->reloc_mutex);
1849 rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
1850 rc->merging_rsv_size += rc->nodes_relocated * 2;
1851 mutex_unlock(&fs_info->reloc_mutex);
1852
1853again:
1854 if (!err) {
1855 num_bytes = rc->merging_rsv_size;
1856 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
1857 BTRFS_RESERVE_FLUSH_ALL);
1858 if (ret)
1859 err = ret;
1860 }
1861
1862 trans = btrfs_join_transaction(rc->extent_root);
1863 if (IS_ERR(trans)) {
1864 if (!err)
1865 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1866 num_bytes, NULL);
1867 return PTR_ERR(trans);
1868 }
1869
1870 if (!err) {
1871 if (num_bytes != rc->merging_rsv_size) {
1872 btrfs_end_transaction(trans);
1873 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1874 num_bytes, NULL);
1875 goto again;
1876 }
1877 }
1878
1879 rc->merge_reloc_tree = 1;
1880
1881 while (!list_empty(&rc->reloc_roots)) {
1882 reloc_root = list_entry(rc->reloc_roots.next,
1883 struct btrfs_root, root_list);
1884 list_del_init(&reloc_root->root_list);
1885
1886 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1887 false);
1888 if (IS_ERR(root)) {
1889
1890
1891
1892
1893 list_add(&reloc_root->root_list, &reloc_roots);
1894 btrfs_abort_transaction(trans, (int)PTR_ERR(root));
1895 if (!err)
1896 err = PTR_ERR(root);
1897 break;
1898 }
1899 ASSERT(root->reloc_root == reloc_root);
1900
1901
1902
1903
1904
1905 if (!err)
1906 btrfs_set_root_refs(&reloc_root->root_item, 1);
1907 ret = btrfs_update_reloc_root(trans, root);
1908
1909
1910
1911
1912
1913 list_add(&reloc_root->root_list, &reloc_roots);
1914 btrfs_put_root(root);
1915
1916 if (ret) {
1917 btrfs_abort_transaction(trans, ret);
1918 if (!err)
1919 err = ret;
1920 break;
1921 }
1922 }
1923
1924 list_splice(&reloc_roots, &rc->reloc_roots);
1925
1926 if (!err)
1927 err = btrfs_commit_transaction(trans);
1928 else
1929 btrfs_end_transaction(trans);
1930 return err;
1931}
1932
1933static noinline_for_stack
1934void free_reloc_roots(struct list_head *list)
1935{
1936 struct btrfs_root *reloc_root, *tmp;
1937
1938 list_for_each_entry_safe(reloc_root, tmp, list, root_list)
1939 __del_reloc_root(reloc_root);
1940}
1941
1942static noinline_for_stack
1943void merge_reloc_roots(struct reloc_control *rc)
1944{
1945 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1946 struct btrfs_root *root;
1947 struct btrfs_root *reloc_root;
1948 LIST_HEAD(reloc_roots);
1949 int found = 0;
1950 int ret = 0;
1951again:
1952 root = rc->extent_root;
1953
1954
1955
1956
1957
1958
1959
1960 mutex_lock(&fs_info->reloc_mutex);
1961 list_splice_init(&rc->reloc_roots, &reloc_roots);
1962 mutex_unlock(&fs_info->reloc_mutex);
1963
1964 while (!list_empty(&reloc_roots)) {
1965 found = 1;
1966 reloc_root = list_entry(reloc_roots.next,
1967 struct btrfs_root, root_list);
1968
1969 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1970 false);
1971 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
1972 if (IS_ERR(root)) {
1973
1974
1975
1976
1977
1978
1979
1980
1981 ASSERT(0);
1982 ret = PTR_ERR(root);
1983 goto out;
1984 }
1985 if (root->reloc_root != reloc_root) {
1986
1987
1988
1989
1990
1991 ASSERT(0);
1992 ret = -EINVAL;
1993 goto out;
1994 }
1995 ret = merge_reloc_root(rc, root);
1996 btrfs_put_root(root);
1997 if (ret) {
1998 if (list_empty(&reloc_root->root_list))
1999 list_add_tail(&reloc_root->root_list,
2000 &reloc_roots);
2001 goto out;
2002 }
2003 } else {
2004 if (!IS_ERR(root)) {
2005 if (root->reloc_root == reloc_root) {
2006 root->reloc_root = NULL;
2007 btrfs_put_root(reloc_root);
2008 }
2009 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE,
2010 &root->state);
2011 btrfs_put_root(root);
2012 }
2013
2014 list_del_init(&reloc_root->root_list);
2015
2016 list_add_tail(&reloc_root->reloc_dirty_list,
2017 &rc->dirty_subvol_roots);
2018 }
2019 }
2020
2021 if (found) {
2022 found = 0;
2023 goto again;
2024 }
2025out:
2026 if (ret) {
2027 btrfs_handle_fs_error(fs_info, ret, NULL);
2028 free_reloc_roots(&reloc_roots);
2029
2030
2031 mutex_lock(&fs_info->reloc_mutex);
2032 list_splice_init(&rc->reloc_roots, &reloc_roots);
2033 mutex_unlock(&fs_info->reloc_mutex);
2034 free_reloc_roots(&reloc_roots);
2035 }
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052}
2053
2054static void free_block_list(struct rb_root *blocks)
2055{
2056 struct tree_block *block;
2057 struct rb_node *rb_node;
2058 while ((rb_node = rb_first(blocks))) {
2059 block = rb_entry(rb_node, struct tree_block, rb_node);
2060 rb_erase(rb_node, blocks);
2061 kfree(block);
2062 }
2063}
2064
2065static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2066 struct btrfs_root *reloc_root)
2067{
2068 struct btrfs_fs_info *fs_info = reloc_root->fs_info;
2069 struct btrfs_root *root;
2070 int ret;
2071
2072 if (reloc_root->last_trans == trans->transid)
2073 return 0;
2074
2075 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085 if (IS_ERR(root)) {
2086 ASSERT(0);
2087 return PTR_ERR(root);
2088 }
2089 if (root->reloc_root != reloc_root) {
2090 ASSERT(0);
2091 btrfs_err(fs_info,
2092 "root %llu has two reloc roots associated with it",
2093 reloc_root->root_key.offset);
2094 btrfs_put_root(root);
2095 return -EUCLEAN;
2096 }
2097 ret = btrfs_record_root_in_trans(trans, root);
2098 btrfs_put_root(root);
2099
2100 return ret;
2101}
2102
2103static noinline_for_stack
2104struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2105 struct reloc_control *rc,
2106 struct btrfs_backref_node *node,
2107 struct btrfs_backref_edge *edges[])
2108{
2109 struct btrfs_backref_node *next;
2110 struct btrfs_root *root;
2111 int index = 0;
2112 int ret;
2113
2114 next = node;
2115 while (1) {
2116 cond_resched();
2117 next = walk_up_backref(next, edges, &index);
2118 root = next->root;
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132 if (!root) {
2133 ASSERT(0);
2134 btrfs_err(trans->fs_info,
2135 "bytenr %llu doesn't have a backref path ending in a root",
2136 node->bytenr);
2137 return ERR_PTR(-EUCLEAN);
2138 }
2139 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2140 ASSERT(0);
2141 btrfs_err(trans->fs_info,
2142 "bytenr %llu has multiple refs with one ending in a non-shareable root",
2143 node->bytenr);
2144 return ERR_PTR(-EUCLEAN);
2145 }
2146
2147 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2148 ret = record_reloc_root_in_trans(trans, root);
2149 if (ret)
2150 return ERR_PTR(ret);
2151 break;
2152 }
2153
2154 ret = btrfs_record_root_in_trans(trans, root);
2155 if (ret)
2156 return ERR_PTR(ret);
2157 root = root->reloc_root;
2158
2159
2160
2161
2162
2163 if (!root)
2164 return ERR_PTR(-ENOENT);
2165
2166 if (next->new_bytenr != root->node->start) {
2167
2168
2169
2170
2171
2172
2173
2174 ASSERT(next->new_bytenr == 0);
2175 ASSERT(list_empty(&next->list));
2176 if (next->new_bytenr || !list_empty(&next->list)) {
2177 btrfs_err(trans->fs_info,
2178 "bytenr %llu possibly has multiple roots pointing at the same bytenr %llu",
2179 node->bytenr, next->bytenr);
2180 return ERR_PTR(-EUCLEAN);
2181 }
2182
2183 next->new_bytenr = root->node->start;
2184 btrfs_put_root(next->root);
2185 next->root = btrfs_grab_root(root);
2186 ASSERT(next->root);
2187 list_add_tail(&next->list,
2188 &rc->backref_cache.changed);
2189 mark_block_processed(rc, next);
2190 break;
2191 }
2192
2193 WARN_ON(1);
2194 root = NULL;
2195 next = walk_down_backref(edges, &index);
2196 if (!next || next->level <= node->level)
2197 break;
2198 }
2199 if (!root) {
2200
2201
2202
2203
2204 ASSERT(0);
2205 return ERR_PTR(-ENOENT);
2206 }
2207
2208 next = node;
2209
2210 while (1) {
2211 rc->backref_cache.path[next->level] = next;
2212 if (--index < 0)
2213 break;
2214 next = edges[index]->node[UPPER];
2215 }
2216 return root;
2217}
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228static noinline_for_stack
2229struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
2230{
2231 struct btrfs_backref_node *next;
2232 struct btrfs_root *root;
2233 struct btrfs_root *fs_root = NULL;
2234 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2235 int index = 0;
2236
2237 next = node;
2238 while (1) {
2239 cond_resched();
2240 next = walk_up_backref(next, edges, &index);
2241 root = next->root;
2242
2243
2244
2245
2246
2247 if (!root)
2248 return ERR_PTR(-EUCLEAN);
2249
2250
2251 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2252 return root;
2253
2254 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2255 fs_root = root;
2256
2257 if (next != node)
2258 return NULL;
2259
2260 next = walk_down_backref(edges, &index);
2261 if (!next || next->level <= node->level)
2262 break;
2263 }
2264
2265 if (!fs_root)
2266 return ERR_PTR(-ENOENT);
2267 return fs_root;
2268}
2269
2270static noinline_for_stack
2271u64 calcu_metadata_size(struct reloc_control *rc,
2272 struct btrfs_backref_node *node, int reserve)
2273{
2274 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2275 struct btrfs_backref_node *next = node;
2276 struct btrfs_backref_edge *edge;
2277 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2278 u64 num_bytes = 0;
2279 int index = 0;
2280
2281 BUG_ON(reserve && node->processed);
2282
2283 while (next) {
2284 cond_resched();
2285 while (1) {
2286 if (next->processed && (reserve || next != node))
2287 break;
2288
2289 num_bytes += fs_info->nodesize;
2290
2291 if (list_empty(&next->upper))
2292 break;
2293
2294 edge = list_entry(next->upper.next,
2295 struct btrfs_backref_edge, list[LOWER]);
2296 edges[index++] = edge;
2297 next = edge->node[UPPER];
2298 }
2299 next = walk_down_backref(edges, &index);
2300 }
2301 return num_bytes;
2302}
2303
2304static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2305 struct reloc_control *rc,
2306 struct btrfs_backref_node *node)
2307{
2308 struct btrfs_root *root = rc->extent_root;
2309 struct btrfs_fs_info *fs_info = root->fs_info;
2310 u64 num_bytes;
2311 int ret;
2312 u64 tmp;
2313
2314 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2315
2316 trans->block_rsv = rc->block_rsv;
2317 rc->reserved_bytes += num_bytes;
2318
2319
2320
2321
2322
2323
2324 ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
2325 BTRFS_RESERVE_FLUSH_LIMIT);
2326 if (ret) {
2327 tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
2328 while (tmp <= rc->reserved_bytes)
2329 tmp <<= 1;
2330
2331
2332
2333
2334
2335
2336
2337 rc->block_rsv->size = tmp + fs_info->nodesize *
2338 RELOCATION_RESERVED_NODES;
2339 return -EAGAIN;
2340 }
2341
2342 return 0;
2343}
2344
2345
2346
2347
2348
2349
2350
2351
2352static int do_relocation(struct btrfs_trans_handle *trans,
2353 struct reloc_control *rc,
2354 struct btrfs_backref_node *node,
2355 struct btrfs_key *key,
2356 struct btrfs_path *path, int lowest)
2357{
2358 struct btrfs_backref_node *upper;
2359 struct btrfs_backref_edge *edge;
2360 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2361 struct btrfs_root *root;
2362 struct extent_buffer *eb;
2363 u32 blocksize;
2364 u64 bytenr;
2365 int slot;
2366 int ret = 0;
2367
2368
2369
2370
2371
2372 ASSERT(!lowest || !node->eb);
2373
2374 path->lowest_level = node->level + 1;
2375 rc->backref_cache.path[node->level] = node;
2376 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2377 struct btrfs_ref ref = { 0 };
2378
2379 cond_resched();
2380
2381 upper = edge->node[UPPER];
2382 root = select_reloc_root(trans, rc, upper, edges);
2383 if (IS_ERR(root)) {
2384 ret = PTR_ERR(root);
2385 goto next;
2386 }
2387
2388 if (upper->eb && !upper->locked) {
2389 if (!lowest) {
2390 ret = btrfs_bin_search(upper->eb, key, &slot);
2391 if (ret < 0)
2392 goto next;
2393 BUG_ON(ret);
2394 bytenr = btrfs_node_blockptr(upper->eb, slot);
2395 if (node->eb->start == bytenr)
2396 goto next;
2397 }
2398 btrfs_backref_drop_node_buffer(upper);
2399 }
2400
2401 if (!upper->eb) {
2402 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2403 if (ret) {
2404 if (ret > 0)
2405 ret = -ENOENT;
2406
2407 btrfs_release_path(path);
2408 break;
2409 }
2410
2411 if (!upper->eb) {
2412 upper->eb = path->nodes[upper->level];
2413 path->nodes[upper->level] = NULL;
2414 } else {
2415 BUG_ON(upper->eb != path->nodes[upper->level]);
2416 }
2417
2418 upper->locked = 1;
2419 path->locks[upper->level] = 0;
2420
2421 slot = path->slots[upper->level];
2422 btrfs_release_path(path);
2423 } else {
2424 ret = btrfs_bin_search(upper->eb, key, &slot);
2425 if (ret < 0)
2426 goto next;
2427 BUG_ON(ret);
2428 }
2429
2430 bytenr = btrfs_node_blockptr(upper->eb, slot);
2431 if (lowest) {
2432 if (bytenr != node->bytenr) {
2433 btrfs_err(root->fs_info,
2434 "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
2435 bytenr, node->bytenr, slot,
2436 upper->eb->start);
2437 ret = -EIO;
2438 goto next;
2439 }
2440 } else {
2441 if (node->eb->start == bytenr)
2442 goto next;
2443 }
2444
2445 blocksize = root->fs_info->nodesize;
2446 eb = btrfs_read_node_slot(upper->eb, slot);
2447 if (IS_ERR(eb)) {
2448 ret = PTR_ERR(eb);
2449 goto next;
2450 }
2451 btrfs_tree_lock(eb);
2452
2453 if (!node->eb) {
2454 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2455 slot, &eb, BTRFS_NESTING_COW);
2456 btrfs_tree_unlock(eb);
2457 free_extent_buffer(eb);
2458 if (ret < 0)
2459 goto next;
2460
2461
2462
2463
2464 ASSERT(node->eb == eb);
2465 } else {
2466 btrfs_set_node_blockptr(upper->eb, slot,
2467 node->eb->start);
2468 btrfs_set_node_ptr_generation(upper->eb, slot,
2469 trans->transid);
2470 btrfs_mark_buffer_dirty(upper->eb);
2471
2472 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
2473 node->eb->start, blocksize,
2474 upper->eb->start);
2475 ref.real_root = root->root_key.objectid;
2476 btrfs_init_tree_ref(&ref, node->level,
2477 btrfs_header_owner(upper->eb));
2478 ret = btrfs_inc_extent_ref(trans, &ref);
2479 if (!ret)
2480 ret = btrfs_drop_subtree(trans, root, eb,
2481 upper->eb);
2482 if (ret)
2483 btrfs_abort_transaction(trans, ret);
2484 }
2485next:
2486 if (!upper->pending)
2487 btrfs_backref_drop_node_buffer(upper);
2488 else
2489 btrfs_backref_unlock_node_buffer(upper);
2490 if (ret)
2491 break;
2492 }
2493
2494 if (!ret && node->pending) {
2495 btrfs_backref_drop_node_buffer(node);
2496 list_move_tail(&node->list, &rc->backref_cache.changed);
2497 node->pending = 0;
2498 }
2499
2500 path->lowest_level = 0;
2501
2502
2503
2504
2505
2506 ASSERT(ret != -ENOSPC);
2507 return ret;
2508}
2509
2510static int link_to_upper(struct btrfs_trans_handle *trans,
2511 struct reloc_control *rc,
2512 struct btrfs_backref_node *node,
2513 struct btrfs_path *path)
2514{
2515 struct btrfs_key key;
2516
2517 btrfs_node_key_to_cpu(node->eb, &key, 0);
2518 return do_relocation(trans, rc, node, &key, path, 0);
2519}
2520
2521static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2522 struct reloc_control *rc,
2523 struct btrfs_path *path, int err)
2524{
2525 LIST_HEAD(list);
2526 struct btrfs_backref_cache *cache = &rc->backref_cache;
2527 struct btrfs_backref_node *node;
2528 int level;
2529 int ret;
2530
2531 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2532 while (!list_empty(&cache->pending[level])) {
2533 node = list_entry(cache->pending[level].next,
2534 struct btrfs_backref_node, list);
2535 list_move_tail(&node->list, &list);
2536 BUG_ON(!node->pending);
2537
2538 if (!err) {
2539 ret = link_to_upper(trans, rc, node, path);
2540 if (ret < 0)
2541 err = ret;
2542 }
2543 }
2544 list_splice_init(&list, &cache->pending[level]);
2545 }
2546 return err;
2547}
2548
2549
2550
2551
2552
2553static void update_processed_blocks(struct reloc_control *rc,
2554 struct btrfs_backref_node *node)
2555{
2556 struct btrfs_backref_node *next = node;
2557 struct btrfs_backref_edge *edge;
2558 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2559 int index = 0;
2560
2561 while (next) {
2562 cond_resched();
2563 while (1) {
2564 if (next->processed)
2565 break;
2566
2567 mark_block_processed(rc, next);
2568
2569 if (list_empty(&next->upper))
2570 break;
2571
2572 edge = list_entry(next->upper.next,
2573 struct btrfs_backref_edge, list[LOWER]);
2574 edges[index++] = edge;
2575 next = edge->node[UPPER];
2576 }
2577 next = walk_down_backref(edges, &index);
2578 }
2579}
2580
2581static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
2582{
2583 u32 blocksize = rc->extent_root->fs_info->nodesize;
2584
2585 if (test_range_bit(&rc->processed_blocks, bytenr,
2586 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2587 return 1;
2588 return 0;
2589}
2590
2591static int get_tree_block_key(struct btrfs_fs_info *fs_info,
2592 struct tree_block *block)
2593{
2594 struct extent_buffer *eb;
2595
2596 eb = read_tree_block(fs_info, block->bytenr, block->owner,
2597 block->key.offset, block->level, NULL);
2598 if (IS_ERR(eb)) {
2599 return PTR_ERR(eb);
2600 } else if (!extent_buffer_uptodate(eb)) {
2601 free_extent_buffer(eb);
2602 return -EIO;
2603 }
2604 if (block->level == 0)
2605 btrfs_item_key_to_cpu(eb, &block->key, 0);
2606 else
2607 btrfs_node_key_to_cpu(eb, &block->key, 0);
2608 free_extent_buffer(eb);
2609 block->key_ready = 1;
2610 return 0;
2611}
2612
2613
2614
2615
2616static int relocate_tree_block(struct btrfs_trans_handle *trans,
2617 struct reloc_control *rc,
2618 struct btrfs_backref_node *node,
2619 struct btrfs_key *key,
2620 struct btrfs_path *path)
2621{
2622 struct btrfs_root *root;
2623 int ret = 0;
2624
2625 if (!node)
2626 return 0;
2627
2628
2629
2630
2631
2632 ret = reserve_metadata_space(trans, rc, node);
2633 if (ret)
2634 goto out;
2635
2636 BUG_ON(node->processed);
2637 root = select_one_root(node);
2638 if (IS_ERR(root)) {
2639 ret = PTR_ERR(root);
2640
2641
2642 ASSERT(ret == -ENOENT);
2643 if (ret == -ENOENT) {
2644 ret = 0;
2645 update_processed_blocks(rc, node);
2646 }
2647 goto out;
2648 }
2649
2650 if (root) {
2651 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665 ASSERT(node->new_bytenr == 0);
2666 ASSERT(list_empty(&node->list));
2667 if (node->new_bytenr || !list_empty(&node->list)) {
2668 btrfs_err(root->fs_info,
2669 "bytenr %llu has improper references to it",
2670 node->bytenr);
2671 ret = -EUCLEAN;
2672 goto out;
2673 }
2674 ret = btrfs_record_root_in_trans(trans, root);
2675 if (ret)
2676 goto out;
2677
2678
2679
2680
2681 if (!root->reloc_root) {
2682 ret = -ENOENT;
2683 goto out;
2684 }
2685 root = root->reloc_root;
2686 node->new_bytenr = root->node->start;
2687 btrfs_put_root(node->root);
2688 node->root = btrfs_grab_root(root);
2689 ASSERT(node->root);
2690 list_add_tail(&node->list, &rc->backref_cache.changed);
2691 } else {
2692 path->lowest_level = node->level;
2693 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2694 btrfs_release_path(path);
2695 if (ret > 0)
2696 ret = 0;
2697 }
2698 if (!ret)
2699 update_processed_blocks(rc, node);
2700 } else {
2701 ret = do_relocation(trans, rc, node, key, path, 1);
2702 }
2703out:
2704 if (ret || node->level == 0 || node->cowonly)
2705 btrfs_backref_cleanup_node(&rc->backref_cache, node);
2706 return ret;
2707}
2708
2709
2710
2711
2712static noinline_for_stack
2713int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2714 struct reloc_control *rc, struct rb_root *blocks)
2715{
2716 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2717 struct btrfs_backref_node *node;
2718 struct btrfs_path *path;
2719 struct tree_block *block;
2720 struct tree_block *next;
2721 int ret;
2722 int err = 0;
2723
2724 path = btrfs_alloc_path();
2725 if (!path) {
2726 err = -ENOMEM;
2727 goto out_free_blocks;
2728 }
2729
2730
2731 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2732 if (!block->key_ready)
2733 btrfs_readahead_tree_block(fs_info, block->bytenr,
2734 block->owner, 0,
2735 block->level);
2736 }
2737
2738
2739 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2740 if (!block->key_ready) {
2741 err = get_tree_block_key(fs_info, block);
2742 if (err)
2743 goto out_free_path;
2744 }
2745 }
2746
2747
2748 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2749 node = build_backref_tree(rc, &block->key,
2750 block->level, block->bytenr);
2751 if (IS_ERR(node)) {
2752 err = PTR_ERR(node);
2753 goto out;
2754 }
2755
2756 ret = relocate_tree_block(trans, rc, node, &block->key,
2757 path);
2758 if (ret < 0) {
2759 err = ret;
2760 break;
2761 }
2762 }
2763out:
2764 err = finish_pending_nodes(trans, rc, path, err);
2765
2766out_free_path:
2767 btrfs_free_path(path);
2768out_free_blocks:
2769 free_block_list(blocks);
2770 return err;
2771}
2772
2773static noinline_for_stack int prealloc_file_extent_cluster(
2774 struct btrfs_inode *inode,
2775 struct file_extent_cluster *cluster)
2776{
2777 u64 alloc_hint = 0;
2778 u64 start;
2779 u64 end;
2780 u64 offset = inode->index_cnt;
2781 u64 num_bytes;
2782 int nr;
2783 int ret = 0;
2784 u64 prealloc_start = cluster->start - offset;
2785 u64 prealloc_end = cluster->end - offset;
2786 u64 cur_offset = prealloc_start;
2787
2788 BUG_ON(cluster->start != cluster->boundary[0]);
2789 ret = btrfs_alloc_data_chunk_ondemand(inode,
2790 prealloc_end + 1 - prealloc_start);
2791 if (ret)
2792 return ret;
2793
2794
2795
2796
2797
2798 if (btrfs_is_zoned(inode->root->fs_info)) {
2799 struct btrfs_root *root = inode->root;
2800 struct btrfs_trans_handle *trans;
2801
2802 end = cluster->end - offset + 1;
2803 trans = btrfs_start_transaction(root, 1);
2804 if (IS_ERR(trans))
2805 return PTR_ERR(trans);
2806
2807 inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
2808 i_size_write(&inode->vfs_inode, end);
2809 ret = btrfs_update_inode(trans, root, inode);
2810 if (ret) {
2811 btrfs_abort_transaction(trans, ret);
2812 btrfs_end_transaction(trans);
2813 return ret;
2814 }
2815
2816 return btrfs_end_transaction(trans);
2817 }
2818
2819 btrfs_inode_lock(&inode->vfs_inode, 0);
2820 for (nr = 0; nr < cluster->nr; nr++) {
2821 start = cluster->boundary[nr] - offset;
2822 if (nr + 1 < cluster->nr)
2823 end = cluster->boundary[nr + 1] - 1 - offset;
2824 else
2825 end = cluster->end - offset;
2826
2827 lock_extent(&inode->io_tree, start, end);
2828 num_bytes = end + 1 - start;
2829 ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
2830 num_bytes, num_bytes,
2831 end + 1, &alloc_hint);
2832 cur_offset = end + 1;
2833 unlock_extent(&inode->io_tree, start, end);
2834 if (ret)
2835 break;
2836 }
2837 btrfs_inode_unlock(&inode->vfs_inode, 0);
2838
2839 if (cur_offset < prealloc_end)
2840 btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
2841 prealloc_end + 1 - cur_offset);
2842 return ret;
2843}
2844
2845static noinline_for_stack
2846int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2847 u64 block_start)
2848{
2849 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2850 struct extent_map *em;
2851 int ret = 0;
2852
2853 em = alloc_extent_map();
2854 if (!em)
2855 return -ENOMEM;
2856
2857 em->start = start;
2858 em->len = end + 1 - start;
2859 em->block_len = em->len;
2860 em->block_start = block_start;
2861 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2862
2863 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2864 while (1) {
2865 write_lock(&em_tree->lock);
2866 ret = add_extent_mapping(em_tree, em, 0);
2867 write_unlock(&em_tree->lock);
2868 if (ret != -EEXIST) {
2869 free_extent_map(em);
2870 break;
2871 }
2872 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
2873 }
2874 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2875 return ret;
2876}
2877
2878
2879
2880
2881noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
2882{
2883 return atomic_read(&fs_info->balance_cancel_req) ||
2884 atomic_read(&fs_info->reloc_cancel_req) ||
2885 fatal_signal_pending(current);
2886}
2887ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
2888
2889static int relocate_file_extent_cluster(struct inode *inode,
2890 struct file_extent_cluster *cluster)
2891{
2892 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2893 u64 page_start;
2894 u64 page_end;
2895 u64 offset = BTRFS_I(inode)->index_cnt;
2896 unsigned long index;
2897 unsigned long last_index;
2898 struct page *page;
2899 struct file_ra_state *ra;
2900 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
2901 int nr = 0;
2902 int ret = 0;
2903
2904 if (!cluster->nr)
2905 return 0;
2906
2907 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2908 if (!ra)
2909 return -ENOMEM;
2910
2911 ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
2912 if (ret)
2913 goto out;
2914
2915 file_ra_state_init(ra, inode->i_mapping);
2916
2917 ret = setup_extent_mapping(inode, cluster->start - offset,
2918 cluster->end - offset, cluster->start);
2919 if (ret)
2920 goto out;
2921
2922 index = (cluster->start - offset) >> PAGE_SHIFT;
2923 last_index = (cluster->end - offset) >> PAGE_SHIFT;
2924 while (index <= last_index) {
2925 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
2926 PAGE_SIZE);
2927 if (ret)
2928 goto out;
2929
2930 page = find_lock_page(inode->i_mapping, index);
2931 if (!page) {
2932 page_cache_sync_readahead(inode->i_mapping,
2933 ra, NULL, index,
2934 last_index + 1 - index);
2935 page = find_or_create_page(inode->i_mapping, index,
2936 mask);
2937 if (!page) {
2938 btrfs_delalloc_release_metadata(BTRFS_I(inode),
2939 PAGE_SIZE, true);
2940 btrfs_delalloc_release_extents(BTRFS_I(inode),
2941 PAGE_SIZE);
2942 ret = -ENOMEM;
2943 goto out;
2944 }
2945 }
2946 ret = set_page_extent_mapped(page);
2947 if (ret < 0) {
2948 btrfs_delalloc_release_metadata(BTRFS_I(inode),
2949 PAGE_SIZE, true);
2950 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
2951 unlock_page(page);
2952 put_page(page);
2953 goto out;
2954 }
2955
2956 if (PageReadahead(page)) {
2957 page_cache_async_readahead(inode->i_mapping,
2958 ra, NULL, page, index,
2959 last_index + 1 - index);
2960 }
2961
2962 if (!PageUptodate(page)) {
2963 btrfs_readpage(NULL, page);
2964 lock_page(page);
2965 if (!PageUptodate(page)) {
2966 unlock_page(page);
2967 put_page(page);
2968 btrfs_delalloc_release_metadata(BTRFS_I(inode),
2969 PAGE_SIZE, true);
2970 btrfs_delalloc_release_extents(BTRFS_I(inode),
2971 PAGE_SIZE);
2972 ret = -EIO;
2973 goto out;
2974 }
2975 }
2976
2977 page_start = page_offset(page);
2978 page_end = page_start + PAGE_SIZE - 1;
2979
2980 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
2981
2982 if (nr < cluster->nr &&
2983 page_start + offset == cluster->boundary[nr]) {
2984 set_extent_bits(&BTRFS_I(inode)->io_tree,
2985 page_start, page_end,
2986 EXTENT_BOUNDARY);
2987 nr++;
2988 }
2989
2990 ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start,
2991 page_end, 0, NULL);
2992 if (ret) {
2993 unlock_page(page);
2994 put_page(page);
2995 btrfs_delalloc_release_metadata(BTRFS_I(inode),
2996 PAGE_SIZE, true);
2997 btrfs_delalloc_release_extents(BTRFS_I(inode),
2998 PAGE_SIZE);
2999
3000 clear_extent_bits(&BTRFS_I(inode)->io_tree,
3001 page_start, page_end,
3002 EXTENT_LOCKED | EXTENT_BOUNDARY);
3003 goto out;
3004
3005 }
3006 set_page_dirty(page);
3007
3008 unlock_extent(&BTRFS_I(inode)->io_tree,
3009 page_start, page_end);
3010 unlock_page(page);
3011 put_page(page);
3012
3013 index++;
3014 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
3015 balance_dirty_pages_ratelimited(inode->i_mapping);
3016 btrfs_throttle(fs_info);
3017 if (btrfs_should_cancel_balance(fs_info)) {
3018 ret = -ECANCELED;
3019 goto out;
3020 }
3021 }
3022 WARN_ON(nr != cluster->nr);
3023 if (btrfs_is_zoned(fs_info) && !ret)
3024 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
3025out:
3026 kfree(ra);
3027 return ret;
3028}
3029
3030static noinline_for_stack
3031int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3032 struct file_extent_cluster *cluster)
3033{
3034 int ret;
3035
3036 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
3037 ret = relocate_file_extent_cluster(inode, cluster);
3038 if (ret)
3039 return ret;
3040 cluster->nr = 0;
3041 }
3042
3043 if (!cluster->nr)
3044 cluster->start = extent_key->objectid;
3045 else
3046 BUG_ON(cluster->nr >= MAX_EXTENTS);
3047 cluster->end = extent_key->objectid + extent_key->offset - 1;
3048 cluster->boundary[cluster->nr] = extent_key->objectid;
3049 cluster->nr++;
3050
3051 if (cluster->nr >= MAX_EXTENTS) {
3052 ret = relocate_file_extent_cluster(inode, cluster);
3053 if (ret)
3054 return ret;
3055 cluster->nr = 0;
3056 }
3057 return 0;
3058}
3059
3060
3061
3062
3063
3064static int add_tree_block(struct reloc_control *rc,
3065 struct btrfs_key *extent_key,
3066 struct btrfs_path *path,
3067 struct rb_root *blocks)
3068{
3069 struct extent_buffer *eb;
3070 struct btrfs_extent_item *ei;
3071 struct btrfs_tree_block_info *bi;
3072 struct tree_block *block;
3073 struct rb_node *rb_node;
3074 u32 item_size;
3075 int level = -1;
3076 u64 generation;
3077 u64 owner = 0;
3078
3079 eb = path->nodes[0];
3080 item_size = btrfs_item_size_nr(eb, path->slots[0]);
3081
3082 if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
3083 item_size >= sizeof(*ei) + sizeof(*bi)) {
3084 unsigned long ptr = 0, end;
3085
3086 ei = btrfs_item_ptr(eb, path->slots[0],
3087 struct btrfs_extent_item);
3088 end = (unsigned long)ei + item_size;
3089 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
3090 bi = (struct btrfs_tree_block_info *)(ei + 1);
3091 level = btrfs_tree_block_level(eb, bi);
3092 ptr = (unsigned long)(bi + 1);
3093 } else {
3094 level = (int)extent_key->offset;
3095 ptr = (unsigned long)(ei + 1);
3096 }
3097 generation = btrfs_extent_generation(eb, ei);
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114 if (btrfs_extent_refs(eb, ei) == 1 &&
3115 !(btrfs_extent_flags(eb, ei) &
3116 BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
3117 ptr < end) {
3118 struct btrfs_extent_inline_ref *iref;
3119 int type;
3120
3121 iref = (struct btrfs_extent_inline_ref *)ptr;
3122 type = btrfs_get_extent_inline_ref_type(eb, iref,
3123 BTRFS_REF_TYPE_BLOCK);
3124 if (type == BTRFS_REF_TYPE_INVALID)
3125 return -EINVAL;
3126 if (type == BTRFS_TREE_BLOCK_REF_KEY)
3127 owner = btrfs_extent_inline_ref_offset(eb, iref);
3128 }
3129 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
3130 btrfs_print_v0_err(eb->fs_info);
3131 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3132 return -EINVAL;
3133 } else {
3134 BUG();
3135 }
3136
3137 btrfs_release_path(path);
3138
3139 BUG_ON(level == -1);
3140
3141 block = kmalloc(sizeof(*block), GFP_NOFS);
3142 if (!block)
3143 return -ENOMEM;
3144
3145 block->bytenr = extent_key->objectid;
3146 block->key.objectid = rc->extent_root->fs_info->nodesize;
3147 block->key.offset = generation;
3148 block->level = level;
3149 block->key_ready = 0;
3150 block->owner = owner;
3151
3152 rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
3153 if (rb_node)
3154 btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
3155 -EEXIST);
3156
3157 return 0;
3158}
3159
3160
3161
3162
3163static int __add_tree_block(struct reloc_control *rc,
3164 u64 bytenr, u32 blocksize,
3165 struct rb_root *blocks)
3166{
3167 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3168 struct btrfs_path *path;
3169 struct btrfs_key key;
3170 int ret;
3171 bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
3172
3173 if (tree_block_processed(bytenr, rc))
3174 return 0;
3175
3176 if (rb_simple_search(blocks, bytenr))
3177 return 0;
3178
3179 path = btrfs_alloc_path();
3180 if (!path)
3181 return -ENOMEM;
3182again:
3183 key.objectid = bytenr;
3184 if (skinny) {
3185 key.type = BTRFS_METADATA_ITEM_KEY;
3186 key.offset = (u64)-1;
3187 } else {
3188 key.type = BTRFS_EXTENT_ITEM_KEY;
3189 key.offset = blocksize;
3190 }
3191
3192 path->search_commit_root = 1;
3193 path->skip_locking = 1;
3194 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3195 if (ret < 0)
3196 goto out;
3197
3198 if (ret > 0 && skinny) {
3199 if (path->slots[0]) {
3200 path->slots[0]--;
3201 btrfs_item_key_to_cpu(path->nodes[0], &key,
3202 path->slots[0]);
3203 if (key.objectid == bytenr &&
3204 (key.type == BTRFS_METADATA_ITEM_KEY ||
3205 (key.type == BTRFS_EXTENT_ITEM_KEY &&
3206 key.offset == blocksize)))
3207 ret = 0;
3208 }
3209
3210 if (ret) {
3211 skinny = false;
3212 btrfs_release_path(path);
3213 goto again;
3214 }
3215 }
3216 if (ret) {
3217 ASSERT(ret == 1);
3218 btrfs_print_leaf(path->nodes[0]);
3219 btrfs_err(fs_info,
3220 "tree block extent item (%llu) is not found in extent tree",
3221 bytenr);
3222 WARN_ON(1);
3223 ret = -EINVAL;
3224 goto out;
3225 }
3226
3227 ret = add_tree_block(rc, &key, path, blocks);
3228out:
3229 btrfs_free_path(path);
3230 return ret;
3231}
3232
3233static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3234 struct btrfs_block_group *block_group,
3235 struct inode *inode,
3236 u64 ino)
3237{
3238 struct btrfs_root *root = fs_info->tree_root;
3239 struct btrfs_trans_handle *trans;
3240 int ret = 0;
3241
3242 if (inode)
3243 goto truncate;
3244
3245 inode = btrfs_iget(fs_info->sb, ino, root);
3246 if (IS_ERR(inode))
3247 return -ENOENT;
3248
3249truncate:
3250 ret = btrfs_check_trunc_cache_free_space(fs_info,
3251 &fs_info->global_block_rsv);
3252 if (ret)
3253 goto out;
3254
3255 trans = btrfs_join_transaction(root);
3256 if (IS_ERR(trans)) {
3257 ret = PTR_ERR(trans);
3258 goto out;
3259 }
3260
3261 ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
3262
3263 btrfs_end_transaction(trans);
3264 btrfs_btree_balance_dirty(fs_info);
3265out:
3266 iput(inode);
3267 return ret;
3268}
3269
3270
3271
3272
3273
3274static int delete_v1_space_cache(struct extent_buffer *leaf,
3275 struct btrfs_block_group *block_group,
3276 u64 data_bytenr)
3277{
3278 u64 space_cache_ino;
3279 struct btrfs_file_extent_item *ei;
3280 struct btrfs_key key;
3281 bool found = false;
3282 int i;
3283 int ret;
3284
3285 if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
3286 return 0;
3287
3288 for (i = 0; i < btrfs_header_nritems(leaf); i++) {
3289 u8 type;
3290
3291 btrfs_item_key_to_cpu(leaf, &key, i);
3292 if (key.type != BTRFS_EXTENT_DATA_KEY)
3293 continue;
3294 ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3295 type = btrfs_file_extent_type(leaf, ei);
3296
3297 if ((type == BTRFS_FILE_EXTENT_REG ||
3298 type == BTRFS_FILE_EXTENT_PREALLOC) &&
3299 btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
3300 found = true;
3301 space_cache_ino = key.objectid;
3302 break;
3303 }
3304 }
3305 if (!found)
3306 return -ENOENT;
3307 ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
3308 space_cache_ino);
3309 return ret;
3310}
3311
3312
3313
3314
3315static noinline_for_stack
3316int add_data_references(struct reloc_control *rc,
3317 struct btrfs_key *extent_key,
3318 struct btrfs_path *path,
3319 struct rb_root *blocks)
3320{
3321 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3322 struct ulist *leaves = NULL;
3323 struct ulist_iterator leaf_uiter;
3324 struct ulist_node *ref_node = NULL;
3325 const u32 blocksize = fs_info->nodesize;
3326 int ret = 0;
3327
3328 btrfs_release_path(path);
3329 ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
3330 0, &leaves, NULL, true);
3331 if (ret < 0)
3332 return ret;
3333
3334 ULIST_ITER_INIT(&leaf_uiter);
3335 while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
3336 struct extent_buffer *eb;
3337
3338 eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL);
3339 if (IS_ERR(eb)) {
3340 ret = PTR_ERR(eb);
3341 break;
3342 }
3343 ret = delete_v1_space_cache(eb, rc->block_group,
3344 extent_key->objectid);
3345 free_extent_buffer(eb);
3346 if (ret < 0)
3347 break;
3348 ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
3349 if (ret < 0)
3350 break;
3351 }
3352 if (ret < 0)
3353 free_block_list(blocks);
3354 ulist_free(leaves);
3355 return ret;
3356}
3357
3358
3359
3360
3361static noinline_for_stack
3362int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
3363 struct btrfs_key *extent_key)
3364{
3365 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3366 struct btrfs_key key;
3367 struct extent_buffer *leaf;
3368 u64 start, end, last;
3369 int ret;
3370
3371 last = rc->block_group->start + rc->block_group->length;
3372 while (1) {
3373 cond_resched();
3374 if (rc->search_start >= last) {
3375 ret = 1;
3376 break;
3377 }
3378
3379 key.objectid = rc->search_start;
3380 key.type = BTRFS_EXTENT_ITEM_KEY;
3381 key.offset = 0;
3382
3383 path->search_commit_root = 1;
3384 path->skip_locking = 1;
3385 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3386 0, 0);
3387 if (ret < 0)
3388 break;
3389next:
3390 leaf = path->nodes[0];
3391 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3392 ret = btrfs_next_leaf(rc->extent_root, path);
3393 if (ret != 0)
3394 break;
3395 leaf = path->nodes[0];
3396 }
3397
3398 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3399 if (key.objectid >= last) {
3400 ret = 1;
3401 break;
3402 }
3403
3404 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3405 key.type != BTRFS_METADATA_ITEM_KEY) {
3406 path->slots[0]++;
3407 goto next;
3408 }
3409
3410 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3411 key.objectid + key.offset <= rc->search_start) {
3412 path->slots[0]++;
3413 goto next;
3414 }
3415
3416 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3417 key.objectid + fs_info->nodesize <=
3418 rc->search_start) {
3419 path->slots[0]++;
3420 goto next;
3421 }
3422
3423 ret = find_first_extent_bit(&rc->processed_blocks,
3424 key.objectid, &start, &end,
3425 EXTENT_DIRTY, NULL);
3426
3427 if (ret == 0 && start <= key.objectid) {
3428 btrfs_release_path(path);
3429 rc->search_start = end + 1;
3430 } else {
3431 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3432 rc->search_start = key.objectid + key.offset;
3433 else
3434 rc->search_start = key.objectid +
3435 fs_info->nodesize;
3436 memcpy(extent_key, &key, sizeof(key));
3437 return 0;
3438 }
3439 }
3440 btrfs_release_path(path);
3441 return ret;
3442}
3443
3444static void set_reloc_control(struct reloc_control *rc)
3445{
3446 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3447
3448 mutex_lock(&fs_info->reloc_mutex);
3449 fs_info->reloc_ctl = rc;
3450 mutex_unlock(&fs_info->reloc_mutex);
3451}
3452
3453static void unset_reloc_control(struct reloc_control *rc)
3454{
3455 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3456
3457 mutex_lock(&fs_info->reloc_mutex);
3458 fs_info->reloc_ctl = NULL;
3459 mutex_unlock(&fs_info->reloc_mutex);
3460}
3461
3462static noinline_for_stack
3463int prepare_to_relocate(struct reloc_control *rc)
3464{
3465 struct btrfs_trans_handle *trans;
3466 int ret;
3467
3468 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root->fs_info,
3469 BTRFS_BLOCK_RSV_TEMP);
3470 if (!rc->block_rsv)
3471 return -ENOMEM;
3472
3473 memset(&rc->cluster, 0, sizeof(rc->cluster));
3474 rc->search_start = rc->block_group->start;
3475 rc->extents_found = 0;
3476 rc->nodes_relocated = 0;
3477 rc->merging_rsv_size = 0;
3478 rc->reserved_bytes = 0;
3479 rc->block_rsv->size = rc->extent_root->fs_info->nodesize *
3480 RELOCATION_RESERVED_NODES;
3481 ret = btrfs_block_rsv_refill(rc->extent_root,
3482 rc->block_rsv, rc->block_rsv->size,
3483 BTRFS_RESERVE_FLUSH_ALL);
3484 if (ret)
3485 return ret;
3486
3487 rc->create_reloc_tree = 1;
3488 set_reloc_control(rc);
3489
3490 trans = btrfs_join_transaction(rc->extent_root);
3491 if (IS_ERR(trans)) {
3492 unset_reloc_control(rc);
3493
3494
3495
3496
3497
3498 return PTR_ERR(trans);
3499 }
3500 return btrfs_commit_transaction(trans);
3501}
3502
3503static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3504{
3505 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3506 struct rb_root blocks = RB_ROOT;
3507 struct btrfs_key key;
3508 struct btrfs_trans_handle *trans = NULL;
3509 struct btrfs_path *path;
3510 struct btrfs_extent_item *ei;
3511 u64 flags;
3512 int ret;
3513 int err = 0;
3514 int progress = 0;
3515
3516 path = btrfs_alloc_path();
3517 if (!path)
3518 return -ENOMEM;
3519 path->reada = READA_FORWARD;
3520
3521 ret = prepare_to_relocate(rc);
3522 if (ret) {
3523 err = ret;
3524 goto out_free;
3525 }
3526
3527 while (1) {
3528 rc->reserved_bytes = 0;
3529 ret = btrfs_block_rsv_refill(rc->extent_root,
3530 rc->block_rsv, rc->block_rsv->size,
3531 BTRFS_RESERVE_FLUSH_ALL);
3532 if (ret) {
3533 err = ret;
3534 break;
3535 }
3536 progress++;
3537 trans = btrfs_start_transaction(rc->extent_root, 0);
3538 if (IS_ERR(trans)) {
3539 err = PTR_ERR(trans);
3540 trans = NULL;
3541 break;
3542 }
3543restart:
3544 if (update_backref_cache(trans, &rc->backref_cache)) {
3545 btrfs_end_transaction(trans);
3546 trans = NULL;
3547 continue;
3548 }
3549
3550 ret = find_next_extent(rc, path, &key);
3551 if (ret < 0)
3552 err = ret;
3553 if (ret != 0)
3554 break;
3555
3556 rc->extents_found++;
3557
3558 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3559 struct btrfs_extent_item);
3560 flags = btrfs_extent_flags(path->nodes[0], ei);
3561
3562 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3563 ret = add_tree_block(rc, &key, path, &blocks);
3564 } else if (rc->stage == UPDATE_DATA_PTRS &&
3565 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3566 ret = add_data_references(rc, &key, path, &blocks);
3567 } else {
3568 btrfs_release_path(path);
3569 ret = 0;
3570 }
3571 if (ret < 0) {
3572 err = ret;
3573 break;
3574 }
3575
3576 if (!RB_EMPTY_ROOT(&blocks)) {
3577 ret = relocate_tree_blocks(trans, rc, &blocks);
3578 if (ret < 0) {
3579 if (ret != -EAGAIN) {
3580 err = ret;
3581 break;
3582 }
3583 rc->extents_found--;
3584 rc->search_start = key.objectid;
3585 }
3586 }
3587
3588 btrfs_end_transaction_throttle(trans);
3589 btrfs_btree_balance_dirty(fs_info);
3590 trans = NULL;
3591
3592 if (rc->stage == MOVE_DATA_EXTENTS &&
3593 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3594 rc->found_file_extent = 1;
3595 ret = relocate_data_extent(rc->data_inode,
3596 &key, &rc->cluster);
3597 if (ret < 0) {
3598 err = ret;
3599 break;
3600 }
3601 }
3602 if (btrfs_should_cancel_balance(fs_info)) {
3603 err = -ECANCELED;
3604 break;
3605 }
3606 }
3607 if (trans && progress && err == -ENOSPC) {
3608 ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
3609 if (ret == 1) {
3610 err = 0;
3611 progress = 0;
3612 goto restart;
3613 }
3614 }
3615
3616 btrfs_release_path(path);
3617 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
3618
3619 if (trans) {
3620 btrfs_end_transaction_throttle(trans);
3621 btrfs_btree_balance_dirty(fs_info);
3622 }
3623
3624 if (!err) {
3625 ret = relocate_file_extent_cluster(rc->data_inode,
3626 &rc->cluster);
3627 if (ret < 0)
3628 err = ret;
3629 }
3630
3631 rc->create_reloc_tree = 0;
3632 set_reloc_control(rc);
3633
3634 btrfs_backref_release_cache(&rc->backref_cache);
3635 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645 err = prepare_to_merge(rc, err);
3646
3647 merge_reloc_roots(rc);
3648
3649 rc->merge_reloc_tree = 0;
3650 unset_reloc_control(rc);
3651 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3652
3653
3654 trans = btrfs_join_transaction(rc->extent_root);
3655 if (IS_ERR(trans)) {
3656 err = PTR_ERR(trans);
3657 goto out_free;
3658 }
3659 ret = btrfs_commit_transaction(trans);
3660 if (ret && !err)
3661 err = ret;
3662out_free:
3663 ret = clean_dirty_subvols(rc);
3664 if (ret < 0 && !err)
3665 err = ret;
3666 btrfs_free_block_rsv(fs_info, rc->block_rsv);
3667 btrfs_free_path(path);
3668 return err;
3669}
3670
3671static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3672 struct btrfs_root *root, u64 objectid)
3673{
3674 struct btrfs_path *path;
3675 struct btrfs_inode_item *item;
3676 struct extent_buffer *leaf;
3677 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
3678 int ret;
3679
3680 if (btrfs_is_zoned(trans->fs_info))
3681 flags &= ~BTRFS_INODE_PREALLOC;
3682
3683 path = btrfs_alloc_path();
3684 if (!path)
3685 return -ENOMEM;
3686
3687 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3688 if (ret)
3689 goto out;
3690
3691 leaf = path->nodes[0];
3692 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3693 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3694 btrfs_set_inode_generation(leaf, item, 1);
3695 btrfs_set_inode_size(leaf, item, 0);
3696 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3697 btrfs_set_inode_flags(leaf, item, flags);
3698 btrfs_mark_buffer_dirty(leaf);
3699out:
3700 btrfs_free_path(path);
3701 return ret;
3702}
3703
3704static void delete_orphan_inode(struct btrfs_trans_handle *trans,
3705 struct btrfs_root *root, u64 objectid)
3706{
3707 struct btrfs_path *path;
3708 struct btrfs_key key;
3709 int ret = 0;
3710
3711 path = btrfs_alloc_path();
3712 if (!path) {
3713 ret = -ENOMEM;
3714 goto out;
3715 }
3716
3717 key.objectid = objectid;
3718 key.type = BTRFS_INODE_ITEM_KEY;
3719 key.offset = 0;
3720 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3721 if (ret) {
3722 if (ret > 0)
3723 ret = -ENOENT;
3724 goto out;
3725 }
3726 ret = btrfs_del_item(trans, root, path);
3727out:
3728 if (ret)
3729 btrfs_abort_transaction(trans, ret);
3730 btrfs_free_path(path);
3731}
3732
3733
3734
3735
3736
3737static noinline_for_stack
3738struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3739 struct btrfs_block_group *group)
3740{
3741 struct inode *inode = NULL;
3742 struct btrfs_trans_handle *trans;
3743 struct btrfs_root *root;
3744 u64 objectid;
3745 int err = 0;
3746
3747 root = btrfs_grab_root(fs_info->data_reloc_root);
3748 trans = btrfs_start_transaction(root, 6);
3749 if (IS_ERR(trans)) {
3750 btrfs_put_root(root);
3751 return ERR_CAST(trans);
3752 }
3753
3754 err = btrfs_get_free_objectid(root, &objectid);
3755 if (err)
3756 goto out;
3757
3758 err = __insert_orphan_inode(trans, root, objectid);
3759 if (err)
3760 goto out;
3761
3762 inode = btrfs_iget(fs_info->sb, objectid, root);
3763 if (IS_ERR(inode)) {
3764 delete_orphan_inode(trans, root, objectid);
3765 err = PTR_ERR(inode);
3766 inode = NULL;
3767 goto out;
3768 }
3769 BTRFS_I(inode)->index_cnt = group->start;
3770
3771 err = btrfs_orphan_add(trans, BTRFS_I(inode));
3772out:
3773 btrfs_put_root(root);
3774 btrfs_end_transaction(trans);
3775 btrfs_btree_balance_dirty(fs_info);
3776 if (err) {
3777 if (inode)
3778 iput(inode);
3779 inode = ERR_PTR(err);
3780 }
3781 return inode;
3782}
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
3795{
3796 spin_lock(&fs_info->send_reloc_lock);
3797 if (fs_info->send_in_progress) {
3798 btrfs_warn_rl(fs_info,
3799"cannot run relocation while send operations are in progress (%d in progress)",
3800 fs_info->send_in_progress);
3801 spin_unlock(&fs_info->send_reloc_lock);
3802 return -EAGAIN;
3803 }
3804 if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
3805
3806 spin_unlock(&fs_info->send_reloc_lock);
3807 btrfs_err(fs_info, "reloc already running, cannot start");
3808 return -EINPROGRESS;
3809 }
3810 spin_unlock(&fs_info->send_reloc_lock);
3811
3812 if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
3813 btrfs_info(fs_info, "chunk relocation canceled on start");
3814
3815
3816
3817
3818 atomic_set(&fs_info->reloc_cancel_req, 0);
3819 return -ECANCELED;
3820 }
3821 return 0;
3822}
3823
3824
3825
3826
3827static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
3828{
3829
3830 if (atomic_read(&fs_info->reloc_cancel_req) > 0)
3831 btrfs_info(fs_info, "chunk relocation canceled during operation");
3832 spin_lock(&fs_info->send_reloc_lock);
3833 clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
3834 spin_unlock(&fs_info->send_reloc_lock);
3835 atomic_set(&fs_info->reloc_cancel_req, 0);
3836}
3837
3838static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
3839{
3840 struct reloc_control *rc;
3841
3842 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3843 if (!rc)
3844 return NULL;
3845
3846 INIT_LIST_HEAD(&rc->reloc_roots);
3847 INIT_LIST_HEAD(&rc->dirty_subvol_roots);
3848 btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
3849 mapping_tree_init(&rc->reloc_root_tree);
3850 extent_io_tree_init(fs_info, &rc->processed_blocks,
3851 IO_TREE_RELOC_BLOCKS, NULL);
3852 return rc;
3853}
3854
3855static void free_reloc_control(struct reloc_control *rc)
3856{
3857 struct mapping_node *node, *tmp;
3858
3859 free_reloc_roots(&rc->reloc_roots);
3860 rbtree_postorder_for_each_entry_safe(node, tmp,
3861 &rc->reloc_root_tree.rb_root, rb_node)
3862 kfree(node);
3863
3864 kfree(rc);
3865}
3866
3867
3868
3869
3870static void describe_relocation(struct btrfs_fs_info *fs_info,
3871 struct btrfs_block_group *block_group)
3872{
3873 char buf[128] = {'\0'};
3874
3875 btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
3876
3877 btrfs_info(fs_info,
3878 "relocating block group %llu flags %s",
3879 block_group->start, buf);
3880}
3881
3882static const char *stage_to_string(int stage)
3883{
3884 if (stage == MOVE_DATA_EXTENTS)
3885 return "move data extents";
3886 if (stage == UPDATE_DATA_PTRS)
3887 return "update data pointers";
3888 return "unknown";
3889}
3890
3891
3892
3893
3894int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
3895{
3896 struct btrfs_block_group *bg;
3897 struct btrfs_root *extent_root = fs_info->extent_root;
3898 struct reloc_control *rc;
3899 struct inode *inode;
3900 struct btrfs_path *path;
3901 int ret;
3902 int rw = 0;
3903 int err = 0;
3904
3905 bg = btrfs_lookup_block_group(fs_info, group_start);
3906 if (!bg)
3907 return -ENOENT;
3908
3909 if (btrfs_pinned_by_swapfile(fs_info, bg)) {
3910 btrfs_put_block_group(bg);
3911 return -ETXTBSY;
3912 }
3913
3914 rc = alloc_reloc_control(fs_info);
3915 if (!rc) {
3916 btrfs_put_block_group(bg);
3917 return -ENOMEM;
3918 }
3919
3920 ret = reloc_chunk_start(fs_info);
3921 if (ret < 0) {
3922 err = ret;
3923 goto out_put_bg;
3924 }
3925
3926 rc->extent_root = extent_root;
3927 rc->block_group = bg;
3928
3929 ret = btrfs_inc_block_group_ro(rc->block_group, true);
3930 if (ret) {
3931 err = ret;
3932 goto out;
3933 }
3934 rw = 1;
3935
3936 path = btrfs_alloc_path();
3937 if (!path) {
3938 err = -ENOMEM;
3939 goto out;
3940 }
3941
3942 inode = lookup_free_space_inode(rc->block_group, path);
3943 btrfs_free_path(path);
3944
3945 if (!IS_ERR(inode))
3946 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
3947 else
3948 ret = PTR_ERR(inode);
3949
3950 if (ret && ret != -ENOENT) {
3951 err = ret;
3952 goto out;
3953 }
3954
3955 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3956 if (IS_ERR(rc->data_inode)) {
3957 err = PTR_ERR(rc->data_inode);
3958 rc->data_inode = NULL;
3959 goto out;
3960 }
3961
3962 describe_relocation(fs_info, rc->block_group);
3963
3964 btrfs_wait_block_group_reservations(rc->block_group);
3965 btrfs_wait_nocow_writers(rc->block_group);
3966 btrfs_wait_ordered_roots(fs_info, U64_MAX,
3967 rc->block_group->start,
3968 rc->block_group->length);
3969
3970 while (1) {
3971 int finishes_stage;
3972
3973 mutex_lock(&fs_info->cleaner_mutex);
3974 ret = relocate_block_group(rc);
3975 mutex_unlock(&fs_info->cleaner_mutex);
3976 if (ret < 0)
3977 err = ret;
3978
3979 finishes_stage = rc->stage;
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
3990 ret = btrfs_wait_ordered_range(rc->data_inode, 0,
3991 (u64)-1);
3992 if (ret)
3993 err = ret;
3994 invalidate_mapping_pages(rc->data_inode->i_mapping,
3995 0, -1);
3996 rc->stage = UPDATE_DATA_PTRS;
3997 }
3998
3999 if (err < 0)
4000 goto out;
4001
4002 if (rc->extents_found == 0)
4003 break;
4004
4005 btrfs_info(fs_info, "found %llu extents, stage: %s",
4006 rc->extents_found, stage_to_string(finishes_stage));
4007 }
4008
4009 WARN_ON(rc->block_group->pinned > 0);
4010 WARN_ON(rc->block_group->reserved > 0);
4011 WARN_ON(rc->block_group->used > 0);
4012out:
4013 if (err && rw)
4014 btrfs_dec_block_group_ro(rc->block_group);
4015 iput(rc->data_inode);
4016out_put_bg:
4017 btrfs_put_block_group(bg);
4018 reloc_chunk_end(fs_info);
4019 free_reloc_control(rc);
4020 return err;
4021}
4022
4023static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4024{
4025 struct btrfs_fs_info *fs_info = root->fs_info;
4026 struct btrfs_trans_handle *trans;
4027 int ret, err;
4028
4029 trans = btrfs_start_transaction(fs_info->tree_root, 0);
4030 if (IS_ERR(trans))
4031 return PTR_ERR(trans);
4032
4033 memset(&root->root_item.drop_progress, 0,
4034 sizeof(root->root_item.drop_progress));
4035 btrfs_set_root_drop_level(&root->root_item, 0);
4036 btrfs_set_root_refs(&root->root_item, 0);
4037 ret = btrfs_update_root(trans, fs_info->tree_root,
4038 &root->root_key, &root->root_item);
4039
4040 err = btrfs_end_transaction(trans);
4041 if (err)
4042 return err;
4043 return ret;
4044}
4045
4046
4047
4048
4049
4050
4051
4052int btrfs_recover_relocation(struct btrfs_root *root)
4053{
4054 struct btrfs_fs_info *fs_info = root->fs_info;
4055 LIST_HEAD(reloc_roots);
4056 struct btrfs_key key;
4057 struct btrfs_root *fs_root;
4058 struct btrfs_root *reloc_root;
4059 struct btrfs_path *path;
4060 struct extent_buffer *leaf;
4061 struct reloc_control *rc = NULL;
4062 struct btrfs_trans_handle *trans;
4063 int ret;
4064 int err = 0;
4065
4066 path = btrfs_alloc_path();
4067 if (!path)
4068 return -ENOMEM;
4069 path->reada = READA_BACK;
4070
4071 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4072 key.type = BTRFS_ROOT_ITEM_KEY;
4073 key.offset = (u64)-1;
4074
4075 while (1) {
4076 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
4077 path, 0, 0);
4078 if (ret < 0) {
4079 err = ret;
4080 goto out;
4081 }
4082 if (ret > 0) {
4083 if (path->slots[0] == 0)
4084 break;
4085 path->slots[0]--;
4086 }
4087 leaf = path->nodes[0];
4088 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4089 btrfs_release_path(path);
4090
4091 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4092 key.type != BTRFS_ROOT_ITEM_KEY)
4093 break;
4094
4095 reloc_root = btrfs_read_tree_root(root, &key);
4096 if (IS_ERR(reloc_root)) {
4097 err = PTR_ERR(reloc_root);
4098 goto out;
4099 }
4100
4101 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
4102 list_add(&reloc_root->root_list, &reloc_roots);
4103
4104 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4105 fs_root = btrfs_get_fs_root(fs_info,
4106 reloc_root->root_key.offset, false);
4107 if (IS_ERR(fs_root)) {
4108 ret = PTR_ERR(fs_root);
4109 if (ret != -ENOENT) {
4110 err = ret;
4111 goto out;
4112 }
4113 ret = mark_garbage_root(reloc_root);
4114 if (ret < 0) {
4115 err = ret;
4116 goto out;
4117 }
4118 } else {
4119 btrfs_put_root(fs_root);
4120 }
4121 }
4122
4123 if (key.offset == 0)
4124 break;
4125
4126 key.offset--;
4127 }
4128 btrfs_release_path(path);
4129
4130 if (list_empty(&reloc_roots))
4131 goto out;
4132
4133 rc = alloc_reloc_control(fs_info);
4134 if (!rc) {
4135 err = -ENOMEM;
4136 goto out;
4137 }
4138
4139 ret = reloc_chunk_start(fs_info);
4140 if (ret < 0) {
4141 err = ret;
4142 goto out_end;
4143 }
4144
4145 rc->extent_root = fs_info->extent_root;
4146
4147 set_reloc_control(rc);
4148
4149 trans = btrfs_join_transaction(rc->extent_root);
4150 if (IS_ERR(trans)) {
4151 err = PTR_ERR(trans);
4152 goto out_unset;
4153 }
4154
4155 rc->merge_reloc_tree = 1;
4156
4157 while (!list_empty(&reloc_roots)) {
4158 reloc_root = list_entry(reloc_roots.next,
4159 struct btrfs_root, root_list);
4160 list_del(&reloc_root->root_list);
4161
4162 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4163 list_add_tail(&reloc_root->root_list,
4164 &rc->reloc_roots);
4165 continue;
4166 }
4167
4168 fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
4169 false);
4170 if (IS_ERR(fs_root)) {
4171 err = PTR_ERR(fs_root);
4172 list_add_tail(&reloc_root->root_list, &reloc_roots);
4173 btrfs_end_transaction(trans);
4174 goto out_unset;
4175 }
4176
4177 err = __add_reloc_root(reloc_root);
4178 ASSERT(err != -EEXIST);
4179 if (err) {
4180 list_add_tail(&reloc_root->root_list, &reloc_roots);
4181 btrfs_put_root(fs_root);
4182 btrfs_end_transaction(trans);
4183 goto out_unset;
4184 }
4185 fs_root->reloc_root = btrfs_grab_root(reloc_root);
4186 btrfs_put_root(fs_root);
4187 }
4188
4189 err = btrfs_commit_transaction(trans);
4190 if (err)
4191 goto out_unset;
4192
4193 merge_reloc_roots(rc);
4194
4195 unset_reloc_control(rc);
4196
4197 trans = btrfs_join_transaction(rc->extent_root);
4198 if (IS_ERR(trans)) {
4199 err = PTR_ERR(trans);
4200 goto out_clean;
4201 }
4202 err = btrfs_commit_transaction(trans);
4203out_clean:
4204 ret = clean_dirty_subvols(rc);
4205 if (ret < 0 && !err)
4206 err = ret;
4207out_unset:
4208 unset_reloc_control(rc);
4209out_end:
4210 reloc_chunk_end(fs_info);
4211 free_reloc_control(rc);
4212out:
4213 free_reloc_roots(&reloc_roots);
4214
4215 btrfs_free_path(path);
4216
4217 if (err == 0) {
4218
4219 fs_root = btrfs_grab_root(fs_info->data_reloc_root);
4220 ASSERT(fs_root);
4221 err = btrfs_orphan_cleanup(fs_root);
4222 btrfs_put_root(fs_root);
4223 }
4224 return err;
4225}
4226
4227
4228
4229
4230
4231
4232
4233int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
4234{
4235 struct btrfs_fs_info *fs_info = inode->root->fs_info;
4236 struct btrfs_ordered_sum *sums;
4237 struct btrfs_ordered_extent *ordered;
4238 int ret;
4239 u64 disk_bytenr;
4240 u64 new_bytenr;
4241 LIST_HEAD(list);
4242
4243 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4244 BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
4245
4246 disk_bytenr = file_pos + inode->index_cnt;
4247 ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
4248 disk_bytenr + len - 1, &list, 0);
4249 if (ret)
4250 goto out;
4251
4252 while (!list_empty(&list)) {
4253 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4254 list_del_init(&sums->list);
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268 new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
4269 sums->bytenr = new_bytenr;
4270
4271 btrfs_add_ordered_sum(ordered, sums);
4272 }
4273out:
4274 btrfs_put_ordered_extent(ordered);
4275 return ret;
4276}
4277
4278int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4279 struct btrfs_root *root, struct extent_buffer *buf,
4280 struct extent_buffer *cow)
4281{
4282 struct btrfs_fs_info *fs_info = root->fs_info;
4283 struct reloc_control *rc;
4284 struct btrfs_backref_node *node;
4285 int first_cow = 0;
4286 int level;
4287 int ret = 0;
4288
4289 rc = fs_info->reloc_ctl;
4290 if (!rc)
4291 return 0;
4292
4293 BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4294 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
4295
4296 level = btrfs_header_level(buf);
4297 if (btrfs_header_generation(buf) <=
4298 btrfs_root_last_snapshot(&root->root_item))
4299 first_cow = 1;
4300
4301 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4302 rc->create_reloc_tree) {
4303 WARN_ON(!first_cow && level == 0);
4304
4305 node = rc->backref_cache.path[level];
4306 BUG_ON(node->bytenr != buf->start &&
4307 node->new_bytenr != buf->start);
4308
4309 btrfs_backref_drop_node_buffer(node);
4310 atomic_inc(&cow->refs);
4311 node->eb = cow;
4312 node->new_bytenr = cow->start;
4313
4314 if (!node->pending) {
4315 list_move_tail(&node->list,
4316 &rc->backref_cache.pending[level]);
4317 node->pending = 1;
4318 }
4319
4320 if (first_cow)
4321 mark_block_processed(rc, node);
4322
4323 if (first_cow && level > 0)
4324 rc->nodes_relocated += buf->len;
4325 }
4326
4327 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
4328 ret = replace_file_extents(trans, rc, root, cow);
4329 return ret;
4330}
4331
4332
4333
4334
4335
4336void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
4337 u64 *bytes_to_reserve)
4338{
4339 struct btrfs_root *root = pending->root;
4340 struct reloc_control *rc = root->fs_info->reloc_ctl;
4341
4342 if (!rc || !have_reloc_root(root))
4343 return;
4344
4345 if (!rc->merge_reloc_tree)
4346 return;
4347
4348 root = root->reloc_root;
4349 BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360 *bytes_to_reserve += rc->nodes_relocated;
4361}
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4372 struct btrfs_pending_snapshot *pending)
4373{
4374 struct btrfs_root *root = pending->root;
4375 struct btrfs_root *reloc_root;
4376 struct btrfs_root *new_root;
4377 struct reloc_control *rc = root->fs_info->reloc_ctl;
4378 int ret;
4379
4380 if (!rc || !have_reloc_root(root))
4381 return 0;
4382
4383 rc = root->fs_info->reloc_ctl;
4384 rc->merging_rsv_size += rc->nodes_relocated;
4385
4386 if (rc->merge_reloc_tree) {
4387 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4388 rc->block_rsv,
4389 rc->nodes_relocated, true);
4390 if (ret)
4391 return ret;
4392 }
4393
4394 new_root = pending->snap;
4395 reloc_root = create_reloc_root(trans, root->reloc_root,
4396 new_root->root_key.objectid);
4397 if (IS_ERR(reloc_root))
4398 return PTR_ERR(reloc_root);
4399
4400 ret = __add_reloc_root(reloc_root);
4401 ASSERT(ret != -EEXIST);
4402 if (ret) {
4403
4404 btrfs_put_root(reloc_root);
4405 return ret;
4406 }
4407 new_root->reloc_root = btrfs_grab_root(reloc_root);
4408
4409 if (rc->create_reloc_tree)
4410 ret = clone_backref_node(trans, rc, root, reloc_root);
4411 return ret;
4412}
4413