1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/pagemap.h>
8#include <linux/writeback.h>
9#include <linux/blkdev.h>
10#include <linux/rbtree.h>
11#include <linux/slab.h>
12#include <linux/error-injection.h>
13#include "ctree.h"
14#include "disk-io.h"
15#include "transaction.h"
16#include "volumes.h"
17#include "locking.h"
18#include "btrfs_inode.h"
19#include "async-thread.h"
20#include "free-space-cache.h"
21#include "qgroup.h"
22#include "print-tree.h"
23#include "delalloc-space.h"
24#include "block-group.h"
25#include "backref.h"
26#include "misc.h"
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#define RELOCATION_RESERVED_NODES 256
76
77
78
79struct mapping_node {
80 struct {
81 struct rb_node rb_node;
82 u64 bytenr;
83 };
84 void *data;
85};
86
87struct mapping_tree {
88 struct rb_root rb_root;
89 spinlock_t lock;
90};
91
92
93
94
95struct tree_block {
96 struct {
97 struct rb_node rb_node;
98 u64 bytenr;
99 };
100 u64 owner;
101 struct btrfs_key key;
102 unsigned int level:8;
103 unsigned int key_ready:1;
104};
105
106#define MAX_EXTENTS 128
107
108struct file_extent_cluster {
109 u64 start;
110 u64 end;
111 u64 boundary[MAX_EXTENTS];
112 unsigned int nr;
113};
114
115struct reloc_control {
116
117 struct btrfs_block_group *block_group;
118
119 struct btrfs_root *extent_root;
120
121 struct inode *data_inode;
122
123 struct btrfs_block_rsv *block_rsv;
124
125 struct btrfs_backref_cache backref_cache;
126
127 struct file_extent_cluster cluster;
128
129 struct extent_io_tree processed_blocks;
130
131 struct mapping_tree reloc_root_tree;
132
133 struct list_head reloc_roots;
134
135 struct list_head dirty_subvol_roots;
136
137 u64 merging_rsv_size;
138
139 u64 nodes_relocated;
140
141 u64 reserved_bytes;
142
143 u64 search_start;
144 u64 extents_found;
145
146 unsigned int stage:8;
147 unsigned int create_reloc_tree:1;
148 unsigned int merge_reloc_tree:1;
149 unsigned int found_file_extent:1;
150};
151
152
153#define MOVE_DATA_EXTENTS 0
154#define UPDATE_DATA_PTRS 1
155
156static void mark_block_processed(struct reloc_control *rc,
157 struct btrfs_backref_node *node)
158{
159 u32 blocksize;
160
161 if (node->level == 0 ||
162 in_range(node->bytenr, rc->block_group->start,
163 rc->block_group->length)) {
164 blocksize = rc->extent_root->fs_info->nodesize;
165 set_extent_bits(&rc->processed_blocks, node->bytenr,
166 node->bytenr + blocksize - 1, EXTENT_DIRTY);
167 }
168 node->processed = 1;
169}
170
171
172static void mapping_tree_init(struct mapping_tree *tree)
173{
174 tree->rb_root = RB_ROOT;
175 spin_lock_init(&tree->lock);
176}
177
178
179
180
181static struct btrfs_backref_node *walk_up_backref(
182 struct btrfs_backref_node *node,
183 struct btrfs_backref_edge *edges[], int *index)
184{
185 struct btrfs_backref_edge *edge;
186 int idx = *index;
187
188 while (!list_empty(&node->upper)) {
189 edge = list_entry(node->upper.next,
190 struct btrfs_backref_edge, list[LOWER]);
191 edges[idx++] = edge;
192 node = edge->node[UPPER];
193 }
194 BUG_ON(node->detached);
195 *index = idx;
196 return node;
197}
198
199
200
201
202static struct btrfs_backref_node *walk_down_backref(
203 struct btrfs_backref_edge *edges[], int *index)
204{
205 struct btrfs_backref_edge *edge;
206 struct btrfs_backref_node *lower;
207 int idx = *index;
208
209 while (idx > 0) {
210 edge = edges[idx - 1];
211 lower = edge->node[LOWER];
212 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
213 idx--;
214 continue;
215 }
216 edge = list_entry(edge->list[LOWER].next,
217 struct btrfs_backref_edge, list[LOWER]);
218 edges[idx - 1] = edge;
219 *index = idx;
220 return edge->node[UPPER];
221 }
222 *index = 0;
223 return NULL;
224}
225
226static void update_backref_node(struct btrfs_backref_cache *cache,
227 struct btrfs_backref_node *node, u64 bytenr)
228{
229 struct rb_node *rb_node;
230 rb_erase(&node->rb_node, &cache->rb_root);
231 node->bytenr = bytenr;
232 rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
233 if (rb_node)
234 btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST);
235}
236
237
238
239
240static int update_backref_cache(struct btrfs_trans_handle *trans,
241 struct btrfs_backref_cache *cache)
242{
243 struct btrfs_backref_node *node;
244 int level = 0;
245
246 if (cache->last_trans == 0) {
247 cache->last_trans = trans->transid;
248 return 0;
249 }
250
251 if (cache->last_trans == trans->transid)
252 return 0;
253
254
255
256
257
258
259 while (!list_empty(&cache->detached)) {
260 node = list_entry(cache->detached.next,
261 struct btrfs_backref_node, list);
262 btrfs_backref_cleanup_node(cache, node);
263 }
264
265 while (!list_empty(&cache->changed)) {
266 node = list_entry(cache->changed.next,
267 struct btrfs_backref_node, list);
268 list_del_init(&node->list);
269 BUG_ON(node->pending);
270 update_backref_node(cache, node, node->new_bytenr);
271 }
272
273
274
275
276
277 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
278 list_for_each_entry(node, &cache->pending[level], list) {
279 BUG_ON(!node->pending);
280 if (node->bytenr == node->new_bytenr)
281 continue;
282 update_backref_node(cache, node, node->new_bytenr);
283 }
284 }
285
286 cache->last_trans = 0;
287 return 1;
288}
289
290static bool reloc_root_is_dead(struct btrfs_root *root)
291{
292
293
294
295
296
297 smp_rmb();
298 if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
299 return true;
300 return false;
301}
302
303
304
305
306
307
308
309
310
311static bool have_reloc_root(struct btrfs_root *root)
312{
313 if (reloc_root_is_dead(root))
314 return false;
315 if (!root->reloc_root)
316 return false;
317 return true;
318}
319
320int btrfs_should_ignore_reloc_root(struct btrfs_root *root)
321{
322 struct btrfs_root *reloc_root;
323
324 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
325 return 0;
326
327
328 if (reloc_root_is_dead(root))
329 return 1;
330
331 reloc_root = root->reloc_root;
332 if (!reloc_root)
333 return 0;
334
335 if (btrfs_header_generation(reloc_root->commit_root) ==
336 root->fs_info->running_transaction->transid)
337 return 0;
338
339
340
341
342
343
344 return 1;
345}
346
347
348
349
350struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
351{
352 struct reloc_control *rc = fs_info->reloc_ctl;
353 struct rb_node *rb_node;
354 struct mapping_node *node;
355 struct btrfs_root *root = NULL;
356
357 ASSERT(rc);
358 spin_lock(&rc->reloc_root_tree.lock);
359 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
360 if (rb_node) {
361 node = rb_entry(rb_node, struct mapping_node, rb_node);
362 root = (struct btrfs_root *)node->data;
363 }
364 spin_unlock(&rc->reloc_root_tree.lock);
365 return btrfs_grab_root(root);
366}
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381static bool handle_useless_nodes(struct reloc_control *rc,
382 struct btrfs_backref_node *node)
383{
384 struct btrfs_backref_cache *cache = &rc->backref_cache;
385 struct list_head *useless_node = &cache->useless_node;
386 bool ret = false;
387
388 while (!list_empty(useless_node)) {
389 struct btrfs_backref_node *cur;
390
391 cur = list_first_entry(useless_node, struct btrfs_backref_node,
392 list);
393 list_del_init(&cur->list);
394
395
396 ASSERT(list_empty(&cur->upper));
397
398 if (cur == node)
399 ret = true;
400
401
402 if (cur->lowest) {
403 list_del_init(&cur->lower);
404 cur->lowest = 0;
405 }
406
407
408 while (!list_empty(&cur->lower)) {
409 struct btrfs_backref_edge *edge;
410 struct btrfs_backref_node *lower;
411
412 edge = list_entry(cur->lower.next,
413 struct btrfs_backref_edge, list[UPPER]);
414 list_del(&edge->list[UPPER]);
415 list_del(&edge->list[LOWER]);
416 lower = edge->node[LOWER];
417 btrfs_backref_free_edge(cache, edge);
418
419
420 if (list_empty(&lower->upper))
421 list_add(&lower->list, useless_node);
422 }
423
424 mark_block_processed(rc, cur);
425
426
427
428
429
430
431 if (cur->level > 0) {
432 list_add(&cur->list, &cache->detached);
433 cur->detached = 1;
434 } else {
435 rb_erase(&cur->rb_node, &cache->rb_root);
436 btrfs_backref_free_node(cache, cur);
437 }
438 }
439 return ret;
440}
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
457 struct reloc_control *rc, struct btrfs_key *node_key,
458 int level, u64 bytenr)
459{
460 struct btrfs_backref_iter *iter;
461 struct btrfs_backref_cache *cache = &rc->backref_cache;
462
463 struct btrfs_path *path;
464 struct btrfs_backref_node *cur;
465 struct btrfs_backref_node *node = NULL;
466 struct btrfs_backref_edge *edge;
467 int ret;
468 int err = 0;
469
470 iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
471 if (!iter)
472 return ERR_PTR(-ENOMEM);
473 path = btrfs_alloc_path();
474 if (!path) {
475 err = -ENOMEM;
476 goto out;
477 }
478
479 node = btrfs_backref_alloc_node(cache, bytenr, level);
480 if (!node) {
481 err = -ENOMEM;
482 goto out;
483 }
484
485 node->lowest = 1;
486 cur = node;
487
488
489 do {
490 ret = btrfs_backref_add_tree_node(cache, path, iter, node_key,
491 cur);
492 if (ret < 0) {
493 err = ret;
494 goto out;
495 }
496 edge = list_first_entry_or_null(&cache->pending_edge,
497 struct btrfs_backref_edge, list[UPPER]);
498
499
500
501
502 if (edge) {
503 list_del_init(&edge->list[UPPER]);
504 cur = edge->node[UPPER];
505 }
506 } while (edge);
507
508
509 ret = btrfs_backref_finish_upper_links(cache, node);
510 if (ret < 0) {
511 err = ret;
512 goto out;
513 }
514
515 if (handle_useless_nodes(rc, node))
516 node = NULL;
517out:
518 btrfs_backref_iter_free(iter);
519 btrfs_free_path(path);
520 if (err) {
521 btrfs_backref_error_cleanup(cache, node);
522 return ERR_PTR(err);
523 }
524 ASSERT(!node || !node->detached);
525 ASSERT(list_empty(&cache->useless_node) &&
526 list_empty(&cache->pending_edge));
527 return node;
528}
529
530
531
532
533
534
535static int clone_backref_node(struct btrfs_trans_handle *trans,
536 struct reloc_control *rc,
537 struct btrfs_root *src,
538 struct btrfs_root *dest)
539{
540 struct btrfs_root *reloc_root = src->reloc_root;
541 struct btrfs_backref_cache *cache = &rc->backref_cache;
542 struct btrfs_backref_node *node = NULL;
543 struct btrfs_backref_node *new_node;
544 struct btrfs_backref_edge *edge;
545 struct btrfs_backref_edge *new_edge;
546 struct rb_node *rb_node;
547
548 if (cache->last_trans > 0)
549 update_backref_cache(trans, cache);
550
551 rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
552 if (rb_node) {
553 node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
554 if (node->detached)
555 node = NULL;
556 else
557 BUG_ON(node->new_bytenr != reloc_root->node->start);
558 }
559
560 if (!node) {
561 rb_node = rb_simple_search(&cache->rb_root,
562 reloc_root->commit_root->start);
563 if (rb_node) {
564 node = rb_entry(rb_node, struct btrfs_backref_node,
565 rb_node);
566 BUG_ON(node->detached);
567 }
568 }
569
570 if (!node)
571 return 0;
572
573 new_node = btrfs_backref_alloc_node(cache, dest->node->start,
574 node->level);
575 if (!new_node)
576 return -ENOMEM;
577
578 new_node->lowest = node->lowest;
579 new_node->checked = 1;
580 new_node->root = btrfs_grab_root(dest);
581 ASSERT(new_node->root);
582
583 if (!node->lowest) {
584 list_for_each_entry(edge, &node->lower, list[UPPER]) {
585 new_edge = btrfs_backref_alloc_edge(cache);
586 if (!new_edge)
587 goto fail;
588
589 btrfs_backref_link_edge(new_edge, edge->node[LOWER],
590 new_node, LINK_UPPER);
591 }
592 } else {
593 list_add_tail(&new_node->lower, &cache->leaves);
594 }
595
596 rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
597 &new_node->rb_node);
598 if (rb_node)
599 btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
600
601 if (!new_node->lowest) {
602 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
603 list_add_tail(&new_edge->list[LOWER],
604 &new_edge->node[LOWER]->upper);
605 }
606 }
607 return 0;
608fail:
609 while (!list_empty(&new_node->lower)) {
610 new_edge = list_entry(new_node->lower.next,
611 struct btrfs_backref_edge, list[UPPER]);
612 list_del(&new_edge->list[UPPER]);
613 btrfs_backref_free_edge(cache, new_edge);
614 }
615 btrfs_backref_free_node(cache, new_node);
616 return -ENOMEM;
617}
618
619
620
621
622static int __must_check __add_reloc_root(struct btrfs_root *root)
623{
624 struct btrfs_fs_info *fs_info = root->fs_info;
625 struct rb_node *rb_node;
626 struct mapping_node *node;
627 struct reloc_control *rc = fs_info->reloc_ctl;
628
629 node = kmalloc(sizeof(*node), GFP_NOFS);
630 if (!node)
631 return -ENOMEM;
632
633 node->bytenr = root->commit_root->start;
634 node->data = root;
635
636 spin_lock(&rc->reloc_root_tree.lock);
637 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
638 node->bytenr, &node->rb_node);
639 spin_unlock(&rc->reloc_root_tree.lock);
640 if (rb_node) {
641 btrfs_err(fs_info,
642 "Duplicate root found for start=%llu while inserting into relocation tree",
643 node->bytenr);
644 return -EEXIST;
645 }
646
647 list_add_tail(&root->root_list, &rc->reloc_roots);
648 return 0;
649}
650
651
652
653
654
655static void __del_reloc_root(struct btrfs_root *root)
656{
657 struct btrfs_fs_info *fs_info = root->fs_info;
658 struct rb_node *rb_node;
659 struct mapping_node *node = NULL;
660 struct reloc_control *rc = fs_info->reloc_ctl;
661 bool put_ref = false;
662
663 if (rc && root->node) {
664 spin_lock(&rc->reloc_root_tree.lock);
665 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
666 root->commit_root->start);
667 if (rb_node) {
668 node = rb_entry(rb_node, struct mapping_node, rb_node);
669 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
670 RB_CLEAR_NODE(&node->rb_node);
671 }
672 spin_unlock(&rc->reloc_root_tree.lock);
673 ASSERT(!node || (struct btrfs_root *)node->data == root);
674 }
675
676
677
678
679
680
681
682
683
684 spin_lock(&fs_info->trans_lock);
685 if (!list_empty(&root->root_list)) {
686 put_ref = true;
687 list_del_init(&root->root_list);
688 }
689 spin_unlock(&fs_info->trans_lock);
690 if (put_ref)
691 btrfs_put_root(root);
692 kfree(node);
693}
694
695
696
697
698
699static int __update_reloc_root(struct btrfs_root *root)
700{
701 struct btrfs_fs_info *fs_info = root->fs_info;
702 struct rb_node *rb_node;
703 struct mapping_node *node = NULL;
704 struct reloc_control *rc = fs_info->reloc_ctl;
705
706 spin_lock(&rc->reloc_root_tree.lock);
707 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
708 root->commit_root->start);
709 if (rb_node) {
710 node = rb_entry(rb_node, struct mapping_node, rb_node);
711 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
712 }
713 spin_unlock(&rc->reloc_root_tree.lock);
714
715 if (!node)
716 return 0;
717 BUG_ON((struct btrfs_root *)node->data != root);
718
719 spin_lock(&rc->reloc_root_tree.lock);
720 node->bytenr = root->node->start;
721 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
722 node->bytenr, &node->rb_node);
723 spin_unlock(&rc->reloc_root_tree.lock);
724 if (rb_node)
725 btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
726 return 0;
727}
728
729static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
730 struct btrfs_root *root, u64 objectid)
731{
732 struct btrfs_fs_info *fs_info = root->fs_info;
733 struct btrfs_root *reloc_root;
734 struct extent_buffer *eb;
735 struct btrfs_root_item *root_item;
736 struct btrfs_key root_key;
737 int ret = 0;
738 bool must_abort = false;
739
740 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
741 if (!root_item)
742 return ERR_PTR(-ENOMEM);
743
744 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
745 root_key.type = BTRFS_ROOT_ITEM_KEY;
746 root_key.offset = objectid;
747
748 if (root->root_key.objectid == objectid) {
749 u64 commit_root_gen;
750
751
752 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
753 BTRFS_TREE_RELOC_OBJECTID);
754 if (ret)
755 goto fail;
756
757
758
759
760
761
762
763
764
765 commit_root_gen = btrfs_header_generation(root->commit_root);
766 btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen);
767 } else {
768
769
770
771
772
773
774
775 ret = btrfs_copy_root(trans, root, root->node, &eb,
776 BTRFS_TREE_RELOC_OBJECTID);
777 if (ret)
778 goto fail;
779 }
780
781
782
783
784
785 must_abort = true;
786
787 memcpy(root_item, &root->root_item, sizeof(*root_item));
788 btrfs_set_root_bytenr(root_item, eb->start);
789 btrfs_set_root_level(root_item, btrfs_header_level(eb));
790 btrfs_set_root_generation(root_item, trans->transid);
791
792 if (root->root_key.objectid == objectid) {
793 btrfs_set_root_refs(root_item, 0);
794 memset(&root_item->drop_progress, 0,
795 sizeof(struct btrfs_disk_key));
796 btrfs_set_root_drop_level(root_item, 0);
797 }
798
799 btrfs_tree_unlock(eb);
800 free_extent_buffer(eb);
801
802 ret = btrfs_insert_root(trans, fs_info->tree_root,
803 &root_key, root_item);
804 if (ret)
805 goto fail;
806
807 kfree(root_item);
808
809 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
810 if (IS_ERR(reloc_root)) {
811 ret = PTR_ERR(reloc_root);
812 goto abort;
813 }
814 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
815 reloc_root->last_trans = trans->transid;
816 return reloc_root;
817fail:
818 kfree(root_item);
819abort:
820 if (must_abort)
821 btrfs_abort_transaction(trans, ret);
822 return ERR_PTR(ret);
823}
824
825
826
827
828
829
830
831
832int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
833 struct btrfs_root *root)
834{
835 struct btrfs_fs_info *fs_info = root->fs_info;
836 struct btrfs_root *reloc_root;
837 struct reloc_control *rc = fs_info->reloc_ctl;
838 struct btrfs_block_rsv *rsv;
839 int clear_rsv = 0;
840 int ret;
841
842 if (!rc)
843 return 0;
844
845
846
847
848
849 if (reloc_root_is_dead(root))
850 return 0;
851
852
853
854
855
856
857
858
859
860 if (root->reloc_root) {
861 reloc_root = root->reloc_root;
862 reloc_root->last_trans = trans->transid;
863 return 0;
864 }
865
866
867
868
869
870 if (!rc->create_reloc_tree ||
871 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
872 return 0;
873
874 if (!trans->reloc_reserved) {
875 rsv = trans->block_rsv;
876 trans->block_rsv = rc->block_rsv;
877 clear_rsv = 1;
878 }
879 reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
880 if (clear_rsv)
881 trans->block_rsv = rsv;
882 if (IS_ERR(reloc_root))
883 return PTR_ERR(reloc_root);
884
885 ret = __add_reloc_root(reloc_root);
886 ASSERT(ret != -EEXIST);
887 if (ret) {
888
889 btrfs_put_root(reloc_root);
890 return ret;
891 }
892 root->reloc_root = btrfs_grab_root(reloc_root);
893 return 0;
894}
895
896
897
898
899int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
900 struct btrfs_root *root)
901{
902 struct btrfs_fs_info *fs_info = root->fs_info;
903 struct btrfs_root *reloc_root;
904 struct btrfs_root_item *root_item;
905 int ret;
906
907 if (!have_reloc_root(root))
908 return 0;
909
910 reloc_root = root->reloc_root;
911 root_item = &reloc_root->root_item;
912
913
914
915
916
917
918 btrfs_grab_root(reloc_root);
919
920
921 if (fs_info->reloc_ctl->merge_reloc_tree &&
922 btrfs_root_refs(root_item) == 0) {
923 set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
924
925
926
927
928 smp_wmb();
929 __del_reloc_root(reloc_root);
930 }
931
932 if (reloc_root->commit_root != reloc_root->node) {
933 __update_reloc_root(reloc_root);
934 btrfs_set_root_node(root_item, reloc_root->node);
935 free_extent_buffer(reloc_root->commit_root);
936 reloc_root->commit_root = btrfs_root_node(reloc_root);
937 }
938
939 ret = btrfs_update_root(trans, fs_info->tree_root,
940 &reloc_root->root_key, root_item);
941 btrfs_put_root(reloc_root);
942 return ret;
943}
944
945
946
947
948
949static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
950{
951 struct rb_node *node;
952 struct rb_node *prev;
953 struct btrfs_inode *entry;
954 struct inode *inode;
955
956 spin_lock(&root->inode_lock);
957again:
958 node = root->inode_tree.rb_node;
959 prev = NULL;
960 while (node) {
961 prev = node;
962 entry = rb_entry(node, struct btrfs_inode, rb_node);
963
964 if (objectid < btrfs_ino(entry))
965 node = node->rb_left;
966 else if (objectid > btrfs_ino(entry))
967 node = node->rb_right;
968 else
969 break;
970 }
971 if (!node) {
972 while (prev) {
973 entry = rb_entry(prev, struct btrfs_inode, rb_node);
974 if (objectid <= btrfs_ino(entry)) {
975 node = prev;
976 break;
977 }
978 prev = rb_next(prev);
979 }
980 }
981 while (node) {
982 entry = rb_entry(node, struct btrfs_inode, rb_node);
983 inode = igrab(&entry->vfs_inode);
984 if (inode) {
985 spin_unlock(&root->inode_lock);
986 return inode;
987 }
988
989 objectid = btrfs_ino(entry) + 1;
990 if (cond_resched_lock(&root->inode_lock))
991 goto again;
992
993 node = rb_next(node);
994 }
995 spin_unlock(&root->inode_lock);
996 return NULL;
997}
998
999
1000
1001
1002static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1003 u64 bytenr, u64 num_bytes)
1004{
1005 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1006 struct btrfs_path *path;
1007 struct btrfs_file_extent_item *fi;
1008 struct extent_buffer *leaf;
1009 int ret;
1010
1011 path = btrfs_alloc_path();
1012 if (!path)
1013 return -ENOMEM;
1014
1015 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1016 ret = btrfs_lookup_file_extent(NULL, root, path,
1017 btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
1018 if (ret < 0)
1019 goto out;
1020 if (ret > 0) {
1021 ret = -ENOENT;
1022 goto out;
1023 }
1024
1025 leaf = path->nodes[0];
1026 fi = btrfs_item_ptr(leaf, path->slots[0],
1027 struct btrfs_file_extent_item);
1028
1029 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1030 btrfs_file_extent_compression(leaf, fi) ||
1031 btrfs_file_extent_encryption(leaf, fi) ||
1032 btrfs_file_extent_other_encoding(leaf, fi));
1033
1034 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1035 ret = -EINVAL;
1036 goto out;
1037 }
1038
1039 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1040 ret = 0;
1041out:
1042 btrfs_free_path(path);
1043 return ret;
1044}
1045
1046
1047
1048
1049
1050static noinline_for_stack
1051int replace_file_extents(struct btrfs_trans_handle *trans,
1052 struct reloc_control *rc,
1053 struct btrfs_root *root,
1054 struct extent_buffer *leaf)
1055{
1056 struct btrfs_fs_info *fs_info = root->fs_info;
1057 struct btrfs_key key;
1058 struct btrfs_file_extent_item *fi;
1059 struct inode *inode = NULL;
1060 u64 parent;
1061 u64 bytenr;
1062 u64 new_bytenr = 0;
1063 u64 num_bytes;
1064 u64 end;
1065 u32 nritems;
1066 u32 i;
1067 int ret = 0;
1068 int first = 1;
1069 int dirty = 0;
1070
1071 if (rc->stage != UPDATE_DATA_PTRS)
1072 return 0;
1073
1074
1075 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1076 parent = leaf->start;
1077 else
1078 parent = 0;
1079
1080 nritems = btrfs_header_nritems(leaf);
1081 for (i = 0; i < nritems; i++) {
1082 struct btrfs_ref ref = { 0 };
1083
1084 cond_resched();
1085 btrfs_item_key_to_cpu(leaf, &key, i);
1086 if (key.type != BTRFS_EXTENT_DATA_KEY)
1087 continue;
1088 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1089 if (btrfs_file_extent_type(leaf, fi) ==
1090 BTRFS_FILE_EXTENT_INLINE)
1091 continue;
1092 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1093 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1094 if (bytenr == 0)
1095 continue;
1096 if (!in_range(bytenr, rc->block_group->start,
1097 rc->block_group->length))
1098 continue;
1099
1100
1101
1102
1103
1104 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1105 if (first) {
1106 inode = find_next_inode(root, key.objectid);
1107 first = 0;
1108 } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
1109 btrfs_add_delayed_iput(inode);
1110 inode = find_next_inode(root, key.objectid);
1111 }
1112 if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
1113 end = key.offset +
1114 btrfs_file_extent_num_bytes(leaf, fi);
1115 WARN_ON(!IS_ALIGNED(key.offset,
1116 fs_info->sectorsize));
1117 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1118 end--;
1119 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1120 key.offset, end);
1121 if (!ret)
1122 continue;
1123
1124 btrfs_drop_extent_cache(BTRFS_I(inode),
1125 key.offset, end, 1);
1126 unlock_extent(&BTRFS_I(inode)->io_tree,
1127 key.offset, end);
1128 }
1129 }
1130
1131 ret = get_new_location(rc->data_inode, &new_bytenr,
1132 bytenr, num_bytes);
1133 if (ret) {
1134
1135
1136
1137
1138 break;
1139 }
1140
1141 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1142 dirty = 1;
1143
1144 key.offset -= btrfs_file_extent_offset(leaf, fi);
1145 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1146 num_bytes, parent);
1147 ref.real_root = root->root_key.objectid;
1148 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1149 key.objectid, key.offset);
1150 ret = btrfs_inc_extent_ref(trans, &ref);
1151 if (ret) {
1152 btrfs_abort_transaction(trans, ret);
1153 break;
1154 }
1155
1156 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1157 num_bytes, parent);
1158 ref.real_root = root->root_key.objectid;
1159 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1160 key.objectid, key.offset);
1161 ret = btrfs_free_extent(trans, &ref);
1162 if (ret) {
1163 btrfs_abort_transaction(trans, ret);
1164 break;
1165 }
1166 }
1167 if (dirty)
1168 btrfs_mark_buffer_dirty(leaf);
1169 if (inode)
1170 btrfs_add_delayed_iput(inode);
1171 return ret;
1172}
1173
1174static noinline_for_stack
1175int memcmp_node_keys(struct extent_buffer *eb, int slot,
1176 struct btrfs_path *path, int level)
1177{
1178 struct btrfs_disk_key key1;
1179 struct btrfs_disk_key key2;
1180 btrfs_node_key(eb, &key1, slot);
1181 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1182 return memcmp(&key1, &key2, sizeof(key1));
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194static noinline_for_stack
1195int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
1196 struct btrfs_root *dest, struct btrfs_root *src,
1197 struct btrfs_path *path, struct btrfs_key *next_key,
1198 int lowest_level, int max_level)
1199{
1200 struct btrfs_fs_info *fs_info = dest->fs_info;
1201 struct extent_buffer *eb;
1202 struct extent_buffer *parent;
1203 struct btrfs_ref ref = { 0 };
1204 struct btrfs_key key;
1205 u64 old_bytenr;
1206 u64 new_bytenr;
1207 u64 old_ptr_gen;
1208 u64 new_ptr_gen;
1209 u64 last_snapshot;
1210 u32 blocksize;
1211 int cow = 0;
1212 int level;
1213 int ret;
1214 int slot;
1215
1216 ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1217 ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1218
1219 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1220again:
1221 slot = path->slots[lowest_level];
1222 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1223
1224 eb = btrfs_lock_root_node(dest);
1225 level = btrfs_header_level(eb);
1226
1227 if (level < lowest_level) {
1228 btrfs_tree_unlock(eb);
1229 free_extent_buffer(eb);
1230 return 0;
1231 }
1232
1233 if (cow) {
1234 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb,
1235 BTRFS_NESTING_COW);
1236 if (ret) {
1237 btrfs_tree_unlock(eb);
1238 free_extent_buffer(eb);
1239 return ret;
1240 }
1241 }
1242
1243 if (next_key) {
1244 next_key->objectid = (u64)-1;
1245 next_key->type = (u8)-1;
1246 next_key->offset = (u64)-1;
1247 }
1248
1249 parent = eb;
1250 while (1) {
1251 level = btrfs_header_level(parent);
1252 ASSERT(level >= lowest_level);
1253
1254 ret = btrfs_bin_search(parent, &key, &slot);
1255 if (ret < 0)
1256 break;
1257 if (ret && slot > 0)
1258 slot--;
1259
1260 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1261 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1262
1263 old_bytenr = btrfs_node_blockptr(parent, slot);
1264 blocksize = fs_info->nodesize;
1265 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1266
1267 if (level <= max_level) {
1268 eb = path->nodes[level];
1269 new_bytenr = btrfs_node_blockptr(eb,
1270 path->slots[level]);
1271 new_ptr_gen = btrfs_node_ptr_generation(eb,
1272 path->slots[level]);
1273 } else {
1274 new_bytenr = 0;
1275 new_ptr_gen = 0;
1276 }
1277
1278 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
1279 ret = level;
1280 break;
1281 }
1282
1283 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1284 memcmp_node_keys(parent, slot, path, level)) {
1285 if (level <= lowest_level) {
1286 ret = 0;
1287 break;
1288 }
1289
1290 eb = btrfs_read_node_slot(parent, slot);
1291 if (IS_ERR(eb)) {
1292 ret = PTR_ERR(eb);
1293 break;
1294 }
1295 btrfs_tree_lock(eb);
1296 if (cow) {
1297 ret = btrfs_cow_block(trans, dest, eb, parent,
1298 slot, &eb,
1299 BTRFS_NESTING_COW);
1300 if (ret) {
1301 btrfs_tree_unlock(eb);
1302 free_extent_buffer(eb);
1303 break;
1304 }
1305 }
1306
1307 btrfs_tree_unlock(parent);
1308 free_extent_buffer(parent);
1309
1310 parent = eb;
1311 continue;
1312 }
1313
1314 if (!cow) {
1315 btrfs_tree_unlock(parent);
1316 free_extent_buffer(parent);
1317 cow = 1;
1318 goto again;
1319 }
1320
1321 btrfs_node_key_to_cpu(path->nodes[level], &key,
1322 path->slots[level]);
1323 btrfs_release_path(path);
1324
1325 path->lowest_level = level;
1326 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1327 path->lowest_level = 0;
1328 if (ret) {
1329 if (ret > 0)
1330 ret = -ENOENT;
1331 break;
1332 }
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348 ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
1349 rc->block_group, parent, slot,
1350 path->nodes[level], path->slots[level],
1351 last_snapshot);
1352 if (ret < 0)
1353 break;
1354
1355
1356
1357 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1358 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1359 btrfs_mark_buffer_dirty(parent);
1360
1361 btrfs_set_node_blockptr(path->nodes[level],
1362 path->slots[level], old_bytenr);
1363 btrfs_set_node_ptr_generation(path->nodes[level],
1364 path->slots[level], old_ptr_gen);
1365 btrfs_mark_buffer_dirty(path->nodes[level]);
1366
1367 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
1368 blocksize, path->nodes[level]->start);
1369 ref.skip_qgroup = true;
1370 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1371 ret = btrfs_inc_extent_ref(trans, &ref);
1372 if (ret) {
1373 btrfs_abort_transaction(trans, ret);
1374 break;
1375 }
1376 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1377 blocksize, 0);
1378 ref.skip_qgroup = true;
1379 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1380 ret = btrfs_inc_extent_ref(trans, &ref);
1381 if (ret) {
1382 btrfs_abort_transaction(trans, ret);
1383 break;
1384 }
1385
1386 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
1387 blocksize, path->nodes[level]->start);
1388 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1389 ref.skip_qgroup = true;
1390 ret = btrfs_free_extent(trans, &ref);
1391 if (ret) {
1392 btrfs_abort_transaction(trans, ret);
1393 break;
1394 }
1395
1396 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
1397 blocksize, 0);
1398 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1399 ref.skip_qgroup = true;
1400 ret = btrfs_free_extent(trans, &ref);
1401 if (ret) {
1402 btrfs_abort_transaction(trans, ret);
1403 break;
1404 }
1405
1406 btrfs_unlock_up_safe(path, 0);
1407
1408 ret = level;
1409 break;
1410 }
1411 btrfs_tree_unlock(parent);
1412 free_extent_buffer(parent);
1413 return ret;
1414}
1415
1416
1417
1418
1419static noinline_for_stack
1420int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1421 int *level)
1422{
1423 struct extent_buffer *eb;
1424 int i;
1425 u64 last_snapshot;
1426 u32 nritems;
1427
1428 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1429
1430 for (i = 0; i < *level; i++) {
1431 free_extent_buffer(path->nodes[i]);
1432 path->nodes[i] = NULL;
1433 }
1434
1435 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1436 eb = path->nodes[i];
1437 nritems = btrfs_header_nritems(eb);
1438 while (path->slots[i] + 1 < nritems) {
1439 path->slots[i]++;
1440 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1441 last_snapshot)
1442 continue;
1443
1444 *level = i;
1445 return 0;
1446 }
1447 free_extent_buffer(path->nodes[i]);
1448 path->nodes[i] = NULL;
1449 }
1450 return 1;
1451}
1452
1453
1454
1455
1456static noinline_for_stack
1457int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1458 int *level)
1459{
1460 struct extent_buffer *eb = NULL;
1461 int i;
1462 u64 ptr_gen = 0;
1463 u64 last_snapshot;
1464 u32 nritems;
1465
1466 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1467
1468 for (i = *level; i > 0; i--) {
1469 eb = path->nodes[i];
1470 nritems = btrfs_header_nritems(eb);
1471 while (path->slots[i] < nritems) {
1472 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1473 if (ptr_gen > last_snapshot)
1474 break;
1475 path->slots[i]++;
1476 }
1477 if (path->slots[i] >= nritems) {
1478 if (i == *level)
1479 break;
1480 *level = i + 1;
1481 return 0;
1482 }
1483 if (i == 1) {
1484 *level = i;
1485 return 0;
1486 }
1487
1488 eb = btrfs_read_node_slot(eb, path->slots[i]);
1489 if (IS_ERR(eb))
1490 return PTR_ERR(eb);
1491 BUG_ON(btrfs_header_level(eb) != i - 1);
1492 path->nodes[i - 1] = eb;
1493 path->slots[i - 1] = 0;
1494 }
1495 return 1;
1496}
1497
1498
1499
1500
1501
1502static int invalidate_extent_cache(struct btrfs_root *root,
1503 struct btrfs_key *min_key,
1504 struct btrfs_key *max_key)
1505{
1506 struct btrfs_fs_info *fs_info = root->fs_info;
1507 struct inode *inode = NULL;
1508 u64 objectid;
1509 u64 start, end;
1510 u64 ino;
1511
1512 objectid = min_key->objectid;
1513 while (1) {
1514 cond_resched();
1515 iput(inode);
1516
1517 if (objectid > max_key->objectid)
1518 break;
1519
1520 inode = find_next_inode(root, objectid);
1521 if (!inode)
1522 break;
1523 ino = btrfs_ino(BTRFS_I(inode));
1524
1525 if (ino > max_key->objectid) {
1526 iput(inode);
1527 break;
1528 }
1529
1530 objectid = ino + 1;
1531 if (!S_ISREG(inode->i_mode))
1532 continue;
1533
1534 if (unlikely(min_key->objectid == ino)) {
1535 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1536 continue;
1537 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1538 start = 0;
1539 else {
1540 start = min_key->offset;
1541 WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize));
1542 }
1543 } else {
1544 start = 0;
1545 }
1546
1547 if (unlikely(max_key->objectid == ino)) {
1548 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1549 continue;
1550 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1551 end = (u64)-1;
1552 } else {
1553 if (max_key->offset == 0)
1554 continue;
1555 end = max_key->offset;
1556 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1557 end--;
1558 }
1559 } else {
1560 end = (u64)-1;
1561 }
1562
1563
1564 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
1565 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
1566 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1567 }
1568 return 0;
1569}
1570
1571static int find_next_key(struct btrfs_path *path, int level,
1572 struct btrfs_key *key)
1573
1574{
1575 while (level < BTRFS_MAX_LEVEL) {
1576 if (!path->nodes[level])
1577 break;
1578 if (path->slots[level] + 1 <
1579 btrfs_header_nritems(path->nodes[level])) {
1580 btrfs_node_key_to_cpu(path->nodes[level], key,
1581 path->slots[level] + 1);
1582 return 0;
1583 }
1584 level++;
1585 }
1586 return 1;
1587}
1588
1589
1590
1591
1592static int insert_dirty_subvol(struct btrfs_trans_handle *trans,
1593 struct reloc_control *rc,
1594 struct btrfs_root *root)
1595{
1596 struct btrfs_root *reloc_root = root->reloc_root;
1597 struct btrfs_root_item *reloc_root_item;
1598 int ret;
1599
1600
1601 ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1602 ASSERT(reloc_root);
1603
1604 reloc_root_item = &reloc_root->root_item;
1605 memset(&reloc_root_item->drop_progress, 0,
1606 sizeof(reloc_root_item->drop_progress));
1607 btrfs_set_root_drop_level(reloc_root_item, 0);
1608 btrfs_set_root_refs(reloc_root_item, 0);
1609 ret = btrfs_update_reloc_root(trans, root);
1610 if (ret)
1611 return ret;
1612
1613 if (list_empty(&root->reloc_dirty_list)) {
1614 btrfs_grab_root(root);
1615 list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
1616 }
1617
1618 return 0;
1619}
1620
1621static int clean_dirty_subvols(struct reloc_control *rc)
1622{
1623 struct btrfs_root *root;
1624 struct btrfs_root *next;
1625 int ret = 0;
1626 int ret2;
1627
1628 list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
1629 reloc_dirty_list) {
1630 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1631
1632 struct btrfs_root *reloc_root = root->reloc_root;
1633
1634 list_del_init(&root->reloc_dirty_list);
1635 root->reloc_root = NULL;
1636
1637
1638
1639
1640 smp_wmb();
1641 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
1642 if (reloc_root) {
1643
1644
1645
1646
1647
1648 ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
1649 if (ret2 < 0) {
1650 btrfs_put_root(reloc_root);
1651 if (!ret)
1652 ret = ret2;
1653 }
1654 }
1655 btrfs_put_root(root);
1656 } else {
1657
1658 ret2 = btrfs_drop_snapshot(root, 0, 1);
1659 if (ret2 < 0) {
1660 btrfs_put_root(root);
1661 if (!ret)
1662 ret = ret2;
1663 }
1664 }
1665 }
1666 return ret;
1667}
1668
1669
1670
1671
1672
1673static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1674 struct btrfs_root *root)
1675{
1676 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1677 struct btrfs_key key;
1678 struct btrfs_key next_key;
1679 struct btrfs_trans_handle *trans = NULL;
1680 struct btrfs_root *reloc_root;
1681 struct btrfs_root_item *root_item;
1682 struct btrfs_path *path;
1683 struct extent_buffer *leaf;
1684 int reserve_level;
1685 int level;
1686 int max_level;
1687 int replaced = 0;
1688 int ret = 0;
1689 u32 min_reserved;
1690
1691 path = btrfs_alloc_path();
1692 if (!path)
1693 return -ENOMEM;
1694 path->reada = READA_FORWARD;
1695
1696 reloc_root = root->reloc_root;
1697 root_item = &reloc_root->root_item;
1698
1699 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1700 level = btrfs_root_level(root_item);
1701 atomic_inc(&reloc_root->node->refs);
1702 path->nodes[level] = reloc_root->node;
1703 path->slots[level] = 0;
1704 } else {
1705 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1706
1707 level = btrfs_root_drop_level(root_item);
1708 BUG_ON(level == 0);
1709 path->lowest_level = level;
1710 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1711 path->lowest_level = 0;
1712 if (ret < 0) {
1713 btrfs_free_path(path);
1714 return ret;
1715 }
1716
1717 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
1718 path->slots[level]);
1719 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
1720
1721 btrfs_unlock_up_safe(path, 0);
1722 }
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732 reserve_level = max_t(int, 1, btrfs_root_level(root_item));
1733 min_reserved = fs_info->nodesize * reserve_level * 2;
1734 memset(&next_key, 0, sizeof(next_key));
1735
1736 while (1) {
1737 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
1738 BTRFS_RESERVE_FLUSH_LIMIT);
1739 if (ret)
1740 goto out;
1741 trans = btrfs_start_transaction(root, 0);
1742 if (IS_ERR(trans)) {
1743 ret = PTR_ERR(trans);
1744 trans = NULL;
1745 goto out;
1746 }
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758 reloc_root->last_trans = trans->transid;
1759 trans->block_rsv = rc->block_rsv;
1760
1761 replaced = 0;
1762 max_level = level;
1763
1764 ret = walk_down_reloc_tree(reloc_root, path, &level);
1765 if (ret < 0)
1766 goto out;
1767 if (ret > 0)
1768 break;
1769
1770 if (!find_next_key(path, level, &key) &&
1771 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
1772 ret = 0;
1773 } else {
1774 ret = replace_path(trans, rc, root, reloc_root, path,
1775 &next_key, level, max_level);
1776 }
1777 if (ret < 0)
1778 goto out;
1779 if (ret > 0) {
1780 level = ret;
1781 btrfs_node_key_to_cpu(path->nodes[level], &key,
1782 path->slots[level]);
1783 replaced = 1;
1784 }
1785
1786 ret = walk_up_reloc_tree(reloc_root, path, &level);
1787 if (ret > 0)
1788 break;
1789
1790 BUG_ON(level == 0);
1791
1792
1793
1794
1795 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
1796 path->slots[level]);
1797 btrfs_set_root_drop_level(root_item, level);
1798
1799 btrfs_end_transaction_throttle(trans);
1800 trans = NULL;
1801
1802 btrfs_btree_balance_dirty(fs_info);
1803
1804 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1805 invalidate_extent_cache(root, &key, &next_key);
1806 }
1807
1808
1809
1810
1811
1812 leaf = btrfs_lock_root_node(root);
1813 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf,
1814 BTRFS_NESTING_COW);
1815 btrfs_tree_unlock(leaf);
1816 free_extent_buffer(leaf);
1817out:
1818 btrfs_free_path(path);
1819
1820 if (ret == 0) {
1821 ret = insert_dirty_subvol(trans, rc, root);
1822 if (ret)
1823 btrfs_abort_transaction(trans, ret);
1824 }
1825
1826 if (trans)
1827 btrfs_end_transaction_throttle(trans);
1828
1829 btrfs_btree_balance_dirty(fs_info);
1830
1831 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1832 invalidate_extent_cache(root, &key, &next_key);
1833
1834 return ret;
1835}
1836
1837static noinline_for_stack
1838int prepare_to_merge(struct reloc_control *rc, int err)
1839{
1840 struct btrfs_root *root = rc->extent_root;
1841 struct btrfs_fs_info *fs_info = root->fs_info;
1842 struct btrfs_root *reloc_root;
1843 struct btrfs_trans_handle *trans;
1844 LIST_HEAD(reloc_roots);
1845 u64 num_bytes = 0;
1846 int ret;
1847
1848 mutex_lock(&fs_info->reloc_mutex);
1849 rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
1850 rc->merging_rsv_size += rc->nodes_relocated * 2;
1851 mutex_unlock(&fs_info->reloc_mutex);
1852
1853again:
1854 if (!err) {
1855 num_bytes = rc->merging_rsv_size;
1856 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
1857 BTRFS_RESERVE_FLUSH_ALL);
1858 if (ret)
1859 err = ret;
1860 }
1861
1862 trans = btrfs_join_transaction(rc->extent_root);
1863 if (IS_ERR(trans)) {
1864 if (!err)
1865 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1866 num_bytes, NULL);
1867 return PTR_ERR(trans);
1868 }
1869
1870 if (!err) {
1871 if (num_bytes != rc->merging_rsv_size) {
1872 btrfs_end_transaction(trans);
1873 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1874 num_bytes, NULL);
1875 goto again;
1876 }
1877 }
1878
1879 rc->merge_reloc_tree = 1;
1880
1881 while (!list_empty(&rc->reloc_roots)) {
1882 reloc_root = list_entry(rc->reloc_roots.next,
1883 struct btrfs_root, root_list);
1884 list_del_init(&reloc_root->root_list);
1885
1886 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1887 false);
1888 if (IS_ERR(root)) {
1889
1890
1891
1892
1893 list_add(&reloc_root->root_list, &reloc_roots);
1894 btrfs_abort_transaction(trans, (int)PTR_ERR(root));
1895 if (!err)
1896 err = PTR_ERR(root);
1897 break;
1898 }
1899 ASSERT(root->reloc_root == reloc_root);
1900
1901
1902
1903
1904
1905 if (!err)
1906 btrfs_set_root_refs(&reloc_root->root_item, 1);
1907 ret = btrfs_update_reloc_root(trans, root);
1908
1909
1910
1911
1912
1913 list_add(&reloc_root->root_list, &reloc_roots);
1914 btrfs_put_root(root);
1915
1916 if (ret) {
1917 btrfs_abort_transaction(trans, ret);
1918 if (!err)
1919 err = ret;
1920 break;
1921 }
1922 }
1923
1924 list_splice(&reloc_roots, &rc->reloc_roots);
1925
1926 if (!err)
1927 err = btrfs_commit_transaction(trans);
1928 else
1929 btrfs_end_transaction(trans);
1930 return err;
1931}
1932
1933static noinline_for_stack
1934void free_reloc_roots(struct list_head *list)
1935{
1936 struct btrfs_root *reloc_root, *tmp;
1937
1938 list_for_each_entry_safe(reloc_root, tmp, list, root_list)
1939 __del_reloc_root(reloc_root);
1940}
1941
1942static noinline_for_stack
1943void merge_reloc_roots(struct reloc_control *rc)
1944{
1945 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1946 struct btrfs_root *root;
1947 struct btrfs_root *reloc_root;
1948 LIST_HEAD(reloc_roots);
1949 int found = 0;
1950 int ret = 0;
1951again:
1952 root = rc->extent_root;
1953
1954
1955
1956
1957
1958
1959
1960 mutex_lock(&fs_info->reloc_mutex);
1961 list_splice_init(&rc->reloc_roots, &reloc_roots);
1962 mutex_unlock(&fs_info->reloc_mutex);
1963
1964 while (!list_empty(&reloc_roots)) {
1965 found = 1;
1966 reloc_root = list_entry(reloc_roots.next,
1967 struct btrfs_root, root_list);
1968
1969 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1970 false);
1971 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
1972 if (IS_ERR(root)) {
1973
1974
1975
1976
1977
1978
1979
1980
1981 ASSERT(0);
1982 ret = PTR_ERR(root);
1983 goto out;
1984 }
1985 if (root->reloc_root != reloc_root) {
1986
1987
1988
1989
1990
1991 ASSERT(0);
1992 ret = -EINVAL;
1993 goto out;
1994 }
1995 ret = merge_reloc_root(rc, root);
1996 btrfs_put_root(root);
1997 if (ret) {
1998 if (list_empty(&reloc_root->root_list))
1999 list_add_tail(&reloc_root->root_list,
2000 &reloc_roots);
2001 goto out;
2002 }
2003 } else {
2004 if (!IS_ERR(root)) {
2005 if (root->reloc_root == reloc_root) {
2006 root->reloc_root = NULL;
2007 btrfs_put_root(reloc_root);
2008 }
2009 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE,
2010 &root->state);
2011 btrfs_put_root(root);
2012 }
2013
2014 list_del_init(&reloc_root->root_list);
2015
2016 list_add_tail(&reloc_root->reloc_dirty_list,
2017 &rc->dirty_subvol_roots);
2018 }
2019 }
2020
2021 if (found) {
2022 found = 0;
2023 goto again;
2024 }
2025out:
2026 if (ret) {
2027 btrfs_handle_fs_error(fs_info, ret, NULL);
2028 free_reloc_roots(&reloc_roots);
2029
2030
2031 mutex_lock(&fs_info->reloc_mutex);
2032 list_splice_init(&rc->reloc_roots, &reloc_roots);
2033 mutex_unlock(&fs_info->reloc_mutex);
2034 free_reloc_roots(&reloc_roots);
2035 }
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052}
2053
2054static void free_block_list(struct rb_root *blocks)
2055{
2056 struct tree_block *block;
2057 struct rb_node *rb_node;
2058 while ((rb_node = rb_first(blocks))) {
2059 block = rb_entry(rb_node, struct tree_block, rb_node);
2060 rb_erase(rb_node, blocks);
2061 kfree(block);
2062 }
2063}
2064
2065static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2066 struct btrfs_root *reloc_root)
2067{
2068 struct btrfs_fs_info *fs_info = reloc_root->fs_info;
2069 struct btrfs_root *root;
2070 int ret;
2071
2072 if (reloc_root->last_trans == trans->transid)
2073 return 0;
2074
2075 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085 if (IS_ERR(root)) {
2086 ASSERT(0);
2087 return PTR_ERR(root);
2088 }
2089 if (root->reloc_root != reloc_root) {
2090 ASSERT(0);
2091 btrfs_err(fs_info,
2092 "root %llu has two reloc roots associated with it",
2093 reloc_root->root_key.offset);
2094 btrfs_put_root(root);
2095 return -EUCLEAN;
2096 }
2097 ret = btrfs_record_root_in_trans(trans, root);
2098 btrfs_put_root(root);
2099
2100 return ret;
2101}
2102
2103static noinline_for_stack
2104struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2105 struct reloc_control *rc,
2106 struct btrfs_backref_node *node,
2107 struct btrfs_backref_edge *edges[])
2108{
2109 struct btrfs_backref_node *next;
2110 struct btrfs_root *root;
2111 int index = 0;
2112 int ret;
2113
2114 next = node;
2115 while (1) {
2116 cond_resched();
2117 next = walk_up_backref(next, edges, &index);
2118 root = next->root;
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132 if (!root) {
2133 ASSERT(0);
2134 btrfs_err(trans->fs_info,
2135 "bytenr %llu doesn't have a backref path ending in a root",
2136 node->bytenr);
2137 return ERR_PTR(-EUCLEAN);
2138 }
2139 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2140 ASSERT(0);
2141 btrfs_err(trans->fs_info,
2142 "bytenr %llu has multiple refs with one ending in a non-shareable root",
2143 node->bytenr);
2144 return ERR_PTR(-EUCLEAN);
2145 }
2146
2147 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2148 ret = record_reloc_root_in_trans(trans, root);
2149 if (ret)
2150 return ERR_PTR(ret);
2151 break;
2152 }
2153
2154 ret = btrfs_record_root_in_trans(trans, root);
2155 if (ret)
2156 return ERR_PTR(ret);
2157 root = root->reloc_root;
2158
2159
2160
2161
2162
2163 if (!root)
2164 return ERR_PTR(-ENOENT);
2165
2166 if (next->new_bytenr != root->node->start) {
2167
2168
2169
2170
2171
2172
2173
2174 ASSERT(next->new_bytenr == 0);
2175 ASSERT(list_empty(&next->list));
2176 if (next->new_bytenr || !list_empty(&next->list)) {
2177 btrfs_err(trans->fs_info,
2178 "bytenr %llu possibly has multiple roots pointing at the same bytenr %llu",
2179 node->bytenr, next->bytenr);
2180 return ERR_PTR(-EUCLEAN);
2181 }
2182
2183 next->new_bytenr = root->node->start;
2184 btrfs_put_root(next->root);
2185 next->root = btrfs_grab_root(root);
2186 ASSERT(next->root);
2187 list_add_tail(&next->list,
2188 &rc->backref_cache.changed);
2189 mark_block_processed(rc, next);
2190 break;
2191 }
2192
2193 WARN_ON(1);
2194 root = NULL;
2195 next = walk_down_backref(edges, &index);
2196 if (!next || next->level <= node->level)
2197 break;
2198 }
2199 if (!root) {
2200
2201
2202
2203
2204 ASSERT(0);
2205 return ERR_PTR(-ENOENT);
2206 }
2207
2208 next = node;
2209
2210 while (1) {
2211 rc->backref_cache.path[next->level] = next;
2212 if (--index < 0)
2213 break;
2214 next = edges[index]->node[UPPER];
2215 }
2216 return root;
2217}
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228static noinline_for_stack
2229struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
2230{
2231 struct btrfs_backref_node *next;
2232 struct btrfs_root *root;
2233 struct btrfs_root *fs_root = NULL;
2234 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2235 int index = 0;
2236
2237 next = node;
2238 while (1) {
2239 cond_resched();
2240 next = walk_up_backref(next, edges, &index);
2241 root = next->root;
2242
2243
2244
2245
2246
2247 if (!root)
2248 return ERR_PTR(-EUCLEAN);
2249
2250
2251 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2252 return root;
2253
2254 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2255 fs_root = root;
2256
2257 if (next != node)
2258 return NULL;
2259
2260 next = walk_down_backref(edges, &index);
2261 if (!next || next->level <= node->level)
2262 break;
2263 }
2264
2265 if (!fs_root)
2266 return ERR_PTR(-ENOENT);
2267 return fs_root;
2268}
2269
2270static noinline_for_stack
2271u64 calcu_metadata_size(struct reloc_control *rc,
2272 struct btrfs_backref_node *node, int reserve)
2273{
2274 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2275 struct btrfs_backref_node *next = node;
2276 struct btrfs_backref_edge *edge;
2277 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2278 u64 num_bytes = 0;
2279 int index = 0;
2280
2281 BUG_ON(reserve && node->processed);
2282
2283 while (next) {
2284 cond_resched();
2285 while (1) {
2286 if (next->processed && (reserve || next != node))
2287 break;
2288
2289 num_bytes += fs_info->nodesize;
2290
2291 if (list_empty(&next->upper))
2292 break;
2293
2294 edge = list_entry(next->upper.next,
2295 struct btrfs_backref_edge, list[LOWER]);
2296 edges[index++] = edge;
2297 next = edge->node[UPPER];
2298 }
2299 next = walk_down_backref(edges, &index);
2300 }
2301 return num_bytes;
2302}
2303
2304static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2305 struct reloc_control *rc,
2306 struct btrfs_backref_node *node)
2307{
2308 struct btrfs_root *root = rc->extent_root;
2309 struct btrfs_fs_info *fs_info = root->fs_info;
2310 u64 num_bytes;
2311 int ret;
2312 u64 tmp;
2313
2314 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2315
2316 trans->block_rsv = rc->block_rsv;
2317 rc->reserved_bytes += num_bytes;
2318
2319
2320
2321
2322
2323
2324 ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
2325 BTRFS_RESERVE_FLUSH_LIMIT);
2326 if (ret) {
2327 tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
2328 while (tmp <= rc->reserved_bytes)
2329 tmp <<= 1;
2330
2331
2332
2333
2334
2335
2336
2337 rc->block_rsv->size = tmp + fs_info->nodesize *
2338 RELOCATION_RESERVED_NODES;
2339 return -EAGAIN;
2340 }
2341
2342 return 0;
2343}
2344
2345
2346
2347
2348
2349
2350
2351
2352static int do_relocation(struct btrfs_trans_handle *trans,
2353 struct reloc_control *rc,
2354 struct btrfs_backref_node *node,
2355 struct btrfs_key *key,
2356 struct btrfs_path *path, int lowest)
2357{
2358 struct btrfs_backref_node *upper;
2359 struct btrfs_backref_edge *edge;
2360 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2361 struct btrfs_root *root;
2362 struct extent_buffer *eb;
2363 u32 blocksize;
2364 u64 bytenr;
2365 int slot;
2366 int ret = 0;
2367
2368
2369
2370
2371
2372 ASSERT(!lowest || !node->eb);
2373
2374 path->lowest_level = node->level + 1;
2375 rc->backref_cache.path[node->level] = node;
2376 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2377 struct btrfs_ref ref = { 0 };
2378
2379 cond_resched();
2380
2381 upper = edge->node[UPPER];
2382 root = select_reloc_root(trans, rc, upper, edges);
2383 if (IS_ERR(root)) {
2384 ret = PTR_ERR(root);
2385 goto next;
2386 }
2387
2388 if (upper->eb && !upper->locked) {
2389 if (!lowest) {
2390 ret = btrfs_bin_search(upper->eb, key, &slot);
2391 if (ret < 0)
2392 goto next;
2393 BUG_ON(ret);
2394 bytenr = btrfs_node_blockptr(upper->eb, slot);
2395 if (node->eb->start == bytenr)
2396 goto next;
2397 }
2398 btrfs_backref_drop_node_buffer(upper);
2399 }
2400
2401 if (!upper->eb) {
2402 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2403 if (ret) {
2404 if (ret > 0)
2405 ret = -ENOENT;
2406
2407 btrfs_release_path(path);
2408 break;
2409 }
2410
2411 if (!upper->eb) {
2412 upper->eb = path->nodes[upper->level];
2413 path->nodes[upper->level] = NULL;
2414 } else {
2415 BUG_ON(upper->eb != path->nodes[upper->level]);
2416 }
2417
2418 upper->locked = 1;
2419 path->locks[upper->level] = 0;
2420
2421 slot = path->slots[upper->level];
2422 btrfs_release_path(path);
2423 } else {
2424 ret = btrfs_bin_search(upper->eb, key, &slot);
2425 if (ret < 0)
2426 goto next;
2427 BUG_ON(ret);
2428 }
2429
2430 bytenr = btrfs_node_blockptr(upper->eb, slot);
2431 if (lowest) {
2432 if (bytenr != node->bytenr) {
2433 btrfs_err(root->fs_info,
2434 "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
2435 bytenr, node->bytenr, slot,
2436 upper->eb->start);
2437 ret = -EIO;
2438 goto next;
2439 }
2440 } else {
2441 if (node->eb->start == bytenr)
2442 goto next;
2443 }
2444
2445 blocksize = root->fs_info->nodesize;
2446 eb = btrfs_read_node_slot(upper->eb, slot);
2447 if (IS_ERR(eb)) {
2448 ret = PTR_ERR(eb);
2449 goto next;
2450 }
2451 btrfs_tree_lock(eb);
2452
2453 if (!node->eb) {
2454 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2455 slot, &eb, BTRFS_NESTING_COW);
2456 btrfs_tree_unlock(eb);
2457 free_extent_buffer(eb);
2458 if (ret < 0)
2459 goto next;
2460
2461
2462
2463
2464 ASSERT(node->eb == eb);
2465 } else {
2466 btrfs_set_node_blockptr(upper->eb, slot,
2467 node->eb->start);
2468 btrfs_set_node_ptr_generation(upper->eb, slot,
2469 trans->transid);
2470 btrfs_mark_buffer_dirty(upper->eb);
2471
2472 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
2473 node->eb->start, blocksize,
2474 upper->eb->start);
2475 ref.real_root = root->root_key.objectid;
2476 btrfs_init_tree_ref(&ref, node->level,
2477 btrfs_header_owner(upper->eb));
2478 ret = btrfs_inc_extent_ref(trans, &ref);
2479 if (!ret)
2480 ret = btrfs_drop_subtree(trans, root, eb,
2481 upper->eb);
2482 if (ret)
2483 btrfs_abort_transaction(trans, ret);
2484 }
2485next:
2486 if (!upper->pending)
2487 btrfs_backref_drop_node_buffer(upper);
2488 else
2489 btrfs_backref_unlock_node_buffer(upper);
2490 if (ret)
2491 break;
2492 }
2493
2494 if (!ret && node->pending) {
2495 btrfs_backref_drop_node_buffer(node);
2496 list_move_tail(&node->list, &rc->backref_cache.changed);
2497 node->pending = 0;
2498 }
2499
2500 path->lowest_level = 0;
2501
2502
2503
2504
2505
2506 ASSERT(ret != -ENOSPC);
2507 return ret;
2508}
2509
2510static int link_to_upper(struct btrfs_trans_handle *trans,
2511 struct reloc_control *rc,
2512 struct btrfs_backref_node *node,
2513 struct btrfs_path *path)
2514{
2515 struct btrfs_key key;
2516
2517 btrfs_node_key_to_cpu(node->eb, &key, 0);
2518 return do_relocation(trans, rc, node, &key, path, 0);
2519}
2520
2521static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2522 struct reloc_control *rc,
2523 struct btrfs_path *path, int err)
2524{
2525 LIST_HEAD(list);
2526 struct btrfs_backref_cache *cache = &rc->backref_cache;
2527 struct btrfs_backref_node *node;
2528 int level;
2529 int ret;
2530
2531 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2532 while (!list_empty(&cache->pending[level])) {
2533 node = list_entry(cache->pending[level].next,
2534 struct btrfs_backref_node, list);
2535 list_move_tail(&node->list, &list);
2536 BUG_ON(!node->pending);
2537
2538 if (!err) {
2539 ret = link_to_upper(trans, rc, node, path);
2540 if (ret < 0)
2541 err = ret;
2542 }
2543 }
2544 list_splice_init(&list, &cache->pending[level]);
2545 }
2546 return err;
2547}
2548
2549
2550
2551
2552
2553static void update_processed_blocks(struct reloc_control *rc,
2554 struct btrfs_backref_node *node)
2555{
2556 struct btrfs_backref_node *next = node;
2557 struct btrfs_backref_edge *edge;
2558 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2559 int index = 0;
2560
2561 while (next) {
2562 cond_resched();
2563 while (1) {
2564 if (next->processed)
2565 break;
2566
2567 mark_block_processed(rc, next);
2568
2569 if (list_empty(&next->upper))
2570 break;
2571
2572 edge = list_entry(next->upper.next,
2573 struct btrfs_backref_edge, list[LOWER]);
2574 edges[index++] = edge;
2575 next = edge->node[UPPER];
2576 }
2577 next = walk_down_backref(edges, &index);
2578 }
2579}
2580
2581static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
2582{
2583 u32 blocksize = rc->extent_root->fs_info->nodesize;
2584
2585 if (test_range_bit(&rc->processed_blocks, bytenr,
2586 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2587 return 1;
2588 return 0;
2589}
2590
2591static int get_tree_block_key(struct btrfs_fs_info *fs_info,
2592 struct tree_block *block)
2593{
2594 struct extent_buffer *eb;
2595
2596 eb = read_tree_block(fs_info, block->bytenr, block->owner,
2597 block->key.offset, block->level, NULL);
2598 if (IS_ERR(eb)) {
2599 return PTR_ERR(eb);
2600 } else if (!extent_buffer_uptodate(eb)) {
2601 free_extent_buffer(eb);
2602 return -EIO;
2603 }
2604 if (block->level == 0)
2605 btrfs_item_key_to_cpu(eb, &block->key, 0);
2606 else
2607 btrfs_node_key_to_cpu(eb, &block->key, 0);
2608 free_extent_buffer(eb);
2609 block->key_ready = 1;
2610 return 0;
2611}
2612
2613
2614
2615
2616static int relocate_tree_block(struct btrfs_trans_handle *trans,
2617 struct reloc_control *rc,
2618 struct btrfs_backref_node *node,
2619 struct btrfs_key *key,
2620 struct btrfs_path *path)
2621{
2622 struct btrfs_root *root;
2623 int ret = 0;
2624
2625 if (!node)
2626 return 0;
2627
2628
2629
2630
2631
2632 ret = reserve_metadata_space(trans, rc, node);
2633 if (ret)
2634 goto out;
2635
2636 BUG_ON(node->processed);
2637 root = select_one_root(node);
2638 if (IS_ERR(root)) {
2639 ret = PTR_ERR(root);
2640
2641
2642 ASSERT(ret == -ENOENT);
2643 if (ret == -ENOENT) {
2644 ret = 0;
2645 update_processed_blocks(rc, node);
2646 }
2647 goto out;
2648 }
2649
2650 if (root) {
2651 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665 ASSERT(node->new_bytenr == 0);
2666 ASSERT(list_empty(&node->list));
2667 if (node->new_bytenr || !list_empty(&node->list)) {
2668 btrfs_err(root->fs_info,
2669 "bytenr %llu has improper references to it",
2670 node->bytenr);
2671 ret = -EUCLEAN;
2672 goto out;
2673 }
2674 ret = btrfs_record_root_in_trans(trans, root);
2675 if (ret)
2676 goto out;
2677
2678
2679
2680
2681 if (!root->reloc_root) {
2682 ret = -ENOENT;
2683 goto out;
2684 }
2685 root = root->reloc_root;
2686 node->new_bytenr = root->node->start;
2687 btrfs_put_root(node->root);
2688 node->root = btrfs_grab_root(root);
2689 ASSERT(node->root);
2690 list_add_tail(&node->list, &rc->backref_cache.changed);
2691 } else {
2692 path->lowest_level = node->level;
2693 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2694 btrfs_release_path(path);
2695 if (ret > 0)
2696 ret = 0;
2697 }
2698 if (!ret)
2699 update_processed_blocks(rc, node);
2700 } else {
2701 ret = do_relocation(trans, rc, node, key, path, 1);
2702 }
2703out:
2704 if (ret || node->level == 0 || node->cowonly)
2705 btrfs_backref_cleanup_node(&rc->backref_cache, node);
2706 return ret;
2707}
2708
2709
2710
2711
2712static noinline_for_stack
2713int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2714 struct reloc_control *rc, struct rb_root *blocks)
2715{
2716 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2717 struct btrfs_backref_node *node;
2718 struct btrfs_path *path;
2719 struct tree_block *block;
2720 struct tree_block *next;
2721 int ret;
2722 int err = 0;
2723
2724 path = btrfs_alloc_path();
2725 if (!path) {
2726 err = -ENOMEM;
2727 goto out_free_blocks;
2728 }
2729
2730
2731 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2732 if (!block->key_ready)
2733 btrfs_readahead_tree_block(fs_info, block->bytenr,
2734 block->owner, 0,
2735 block->level);
2736 }
2737
2738
2739 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2740 if (!block->key_ready) {
2741 err = get_tree_block_key(fs_info, block);
2742 if (err)
2743 goto out_free_path;
2744 }
2745 }
2746
2747
2748 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2749 node = build_backref_tree(rc, &block->key,
2750 block->level, block->bytenr);
2751 if (IS_ERR(node)) {
2752 err = PTR_ERR(node);
2753 goto out;
2754 }
2755
2756 ret = relocate_tree_block(trans, rc, node, &block->key,
2757 path);
2758 if (ret < 0) {
2759 err = ret;
2760 break;
2761 }
2762 }
2763out:
2764 err = finish_pending_nodes(trans, rc, path, err);
2765
2766out_free_path:
2767 btrfs_free_path(path);
2768out_free_blocks:
2769 free_block_list(blocks);
2770 return err;
2771}
2772
2773static noinline_for_stack int prealloc_file_extent_cluster(
2774 struct btrfs_inode *inode,
2775 struct file_extent_cluster *cluster)
2776{
2777 u64 alloc_hint = 0;
2778 u64 start;
2779 u64 end;
2780 u64 offset = inode->index_cnt;
2781 u64 num_bytes;
2782 int nr;
2783 int ret = 0;
2784 u64 prealloc_start = cluster->start - offset;
2785 u64 prealloc_end = cluster->end - offset;
2786 u64 cur_offset = prealloc_start;
2787
2788 BUG_ON(cluster->start != cluster->boundary[0]);
2789 ret = btrfs_alloc_data_chunk_ondemand(inode,
2790 prealloc_end + 1 - prealloc_start);
2791 if (ret)
2792 return ret;
2793
2794
2795
2796
2797
2798 if (btrfs_is_zoned(inode->root->fs_info)) {
2799 struct btrfs_root *root = inode->root;
2800 struct btrfs_trans_handle *trans;
2801
2802 end = cluster->end - offset + 1;
2803 trans = btrfs_start_transaction(root, 1);
2804 if (IS_ERR(trans))
2805 return PTR_ERR(trans);
2806
2807 inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
2808 i_size_write(&inode->vfs_inode, end);
2809 ret = btrfs_update_inode(trans, root, inode);
2810 if (ret) {
2811 btrfs_abort_transaction(trans, ret);
2812 btrfs_end_transaction(trans);
2813 return ret;
2814 }
2815
2816 return btrfs_end_transaction(trans);
2817 }
2818
2819 btrfs_inode_lock(&inode->vfs_inode, 0);
2820 for (nr = 0; nr < cluster->nr; nr++) {
2821 start = cluster->boundary[nr] - offset;
2822 if (nr + 1 < cluster->nr)
2823 end = cluster->boundary[nr + 1] - 1 - offset;
2824 else
2825 end = cluster->end - offset;
2826
2827 lock_extent(&inode->io_tree, start, end);
2828 num_bytes = end + 1 - start;
2829 ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
2830 num_bytes, num_bytes,
2831 end + 1, &alloc_hint);
2832 cur_offset = end + 1;
2833 unlock_extent(&inode->io_tree, start, end);
2834 if (ret)
2835 break;
2836 }
2837 btrfs_inode_unlock(&inode->vfs_inode, 0);
2838
2839 if (cur_offset < prealloc_end)
2840 btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
2841 prealloc_end + 1 - cur_offset);
2842 return ret;
2843}
2844
2845static noinline_for_stack
2846int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2847 u64 block_start)
2848{
2849 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2850 struct extent_map *em;
2851 int ret = 0;
2852
2853 em = alloc_extent_map();
2854 if (!em)
2855 return -ENOMEM;
2856
2857 em->start = start;
2858 em->len = end + 1 - start;
2859 em->block_len = em->len;
2860 em->block_start = block_start;
2861 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2862
2863 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2864 while (1) {
2865 write_lock(&em_tree->lock);
2866 ret = add_extent_mapping(em_tree, em, 0);
2867 write_unlock(&em_tree->lock);
2868 if (ret != -EEXIST) {
2869 free_extent_map(em);
2870 break;
2871 }
2872 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
2873 }
2874 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2875 return ret;
2876}
2877
2878
2879
2880
2881noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
2882{
2883 return atomic_read(&fs_info->balance_cancel_req) ||
2884 fatal_signal_pending(current);
2885}
2886ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
2887
2888static int relocate_file_extent_cluster(struct inode *inode,
2889 struct file_extent_cluster *cluster)
2890{
2891 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2892 u64 page_start;
2893 u64 page_end;
2894 u64 offset = BTRFS_I(inode)->index_cnt;
2895 unsigned long index;
2896 unsigned long last_index;
2897 struct page *page;
2898 struct file_ra_state *ra;
2899 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
2900 int nr = 0;
2901 int ret = 0;
2902
2903 if (!cluster->nr)
2904 return 0;
2905
2906 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2907 if (!ra)
2908 return -ENOMEM;
2909
2910 ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
2911 if (ret)
2912 goto out;
2913
2914 file_ra_state_init(ra, inode->i_mapping);
2915
2916 ret = setup_extent_mapping(inode, cluster->start - offset,
2917 cluster->end - offset, cluster->start);
2918 if (ret)
2919 goto out;
2920
2921 index = (cluster->start - offset) >> PAGE_SHIFT;
2922 last_index = (cluster->end - offset) >> PAGE_SHIFT;
2923 while (index <= last_index) {
2924 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
2925 PAGE_SIZE);
2926 if (ret)
2927 goto out;
2928
2929 page = find_lock_page(inode->i_mapping, index);
2930 if (!page) {
2931 page_cache_sync_readahead(inode->i_mapping,
2932 ra, NULL, index,
2933 last_index + 1 - index);
2934 page = find_or_create_page(inode->i_mapping, index,
2935 mask);
2936 if (!page) {
2937 btrfs_delalloc_release_metadata(BTRFS_I(inode),
2938 PAGE_SIZE, true);
2939 btrfs_delalloc_release_extents(BTRFS_I(inode),
2940 PAGE_SIZE);
2941 ret = -ENOMEM;
2942 goto out;
2943 }
2944 }
2945 ret = set_page_extent_mapped(page);
2946 if (ret < 0) {
2947 btrfs_delalloc_release_metadata(BTRFS_I(inode),
2948 PAGE_SIZE, true);
2949 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
2950 unlock_page(page);
2951 put_page(page);
2952 goto out;
2953 }
2954
2955 if (PageReadahead(page)) {
2956 page_cache_async_readahead(inode->i_mapping,
2957 ra, NULL, page, index,
2958 last_index + 1 - index);
2959 }
2960
2961 if (!PageUptodate(page)) {
2962 btrfs_readpage(NULL, page);
2963 lock_page(page);
2964 if (!PageUptodate(page)) {
2965 unlock_page(page);
2966 put_page(page);
2967 btrfs_delalloc_release_metadata(BTRFS_I(inode),
2968 PAGE_SIZE, true);
2969 btrfs_delalloc_release_extents(BTRFS_I(inode),
2970 PAGE_SIZE);
2971 ret = -EIO;
2972 goto out;
2973 }
2974 }
2975
2976 page_start = page_offset(page);
2977 page_end = page_start + PAGE_SIZE - 1;
2978
2979 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
2980
2981 if (nr < cluster->nr &&
2982 page_start + offset == cluster->boundary[nr]) {
2983 set_extent_bits(&BTRFS_I(inode)->io_tree,
2984 page_start, page_end,
2985 EXTENT_BOUNDARY);
2986 nr++;
2987 }
2988
2989 ret = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start,
2990 page_end, 0, NULL);
2991 if (ret) {
2992 unlock_page(page);
2993 put_page(page);
2994 btrfs_delalloc_release_metadata(BTRFS_I(inode),
2995 PAGE_SIZE, true);
2996 btrfs_delalloc_release_extents(BTRFS_I(inode),
2997 PAGE_SIZE);
2998
2999 clear_extent_bits(&BTRFS_I(inode)->io_tree,
3000 page_start, page_end,
3001 EXTENT_LOCKED | EXTENT_BOUNDARY);
3002 goto out;
3003
3004 }
3005 set_page_dirty(page);
3006
3007 unlock_extent(&BTRFS_I(inode)->io_tree,
3008 page_start, page_end);
3009 unlock_page(page);
3010 put_page(page);
3011
3012 index++;
3013 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
3014 balance_dirty_pages_ratelimited(inode->i_mapping);
3015 btrfs_throttle(fs_info);
3016 if (btrfs_should_cancel_balance(fs_info)) {
3017 ret = -ECANCELED;
3018 goto out;
3019 }
3020 }
3021 WARN_ON(nr != cluster->nr);
3022 if (btrfs_is_zoned(fs_info) && !ret)
3023 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
3024out:
3025 kfree(ra);
3026 return ret;
3027}
3028
3029static noinline_for_stack
3030int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3031 struct file_extent_cluster *cluster)
3032{
3033 int ret;
3034
3035 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
3036 ret = relocate_file_extent_cluster(inode, cluster);
3037 if (ret)
3038 return ret;
3039 cluster->nr = 0;
3040 }
3041
3042 if (!cluster->nr)
3043 cluster->start = extent_key->objectid;
3044 else
3045 BUG_ON(cluster->nr >= MAX_EXTENTS);
3046 cluster->end = extent_key->objectid + extent_key->offset - 1;
3047 cluster->boundary[cluster->nr] = extent_key->objectid;
3048 cluster->nr++;
3049
3050 if (cluster->nr >= MAX_EXTENTS) {
3051 ret = relocate_file_extent_cluster(inode, cluster);
3052 if (ret)
3053 return ret;
3054 cluster->nr = 0;
3055 }
3056 return 0;
3057}
3058
3059
3060
3061
3062
3063static int add_tree_block(struct reloc_control *rc,
3064 struct btrfs_key *extent_key,
3065 struct btrfs_path *path,
3066 struct rb_root *blocks)
3067{
3068 struct extent_buffer *eb;
3069 struct btrfs_extent_item *ei;
3070 struct btrfs_tree_block_info *bi;
3071 struct tree_block *block;
3072 struct rb_node *rb_node;
3073 u32 item_size;
3074 int level = -1;
3075 u64 generation;
3076 u64 owner = 0;
3077
3078 eb = path->nodes[0];
3079 item_size = btrfs_item_size_nr(eb, path->slots[0]);
3080
3081 if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
3082 item_size >= sizeof(*ei) + sizeof(*bi)) {
3083 unsigned long ptr = 0, end;
3084
3085 ei = btrfs_item_ptr(eb, path->slots[0],
3086 struct btrfs_extent_item);
3087 end = (unsigned long)ei + item_size;
3088 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
3089 bi = (struct btrfs_tree_block_info *)(ei + 1);
3090 level = btrfs_tree_block_level(eb, bi);
3091 ptr = (unsigned long)(bi + 1);
3092 } else {
3093 level = (int)extent_key->offset;
3094 ptr = (unsigned long)(ei + 1);
3095 }
3096 generation = btrfs_extent_generation(eb, ei);
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113 if (btrfs_extent_refs(eb, ei) == 1 &&
3114 !(btrfs_extent_flags(eb, ei) &
3115 BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
3116 ptr < end) {
3117 struct btrfs_extent_inline_ref *iref;
3118 int type;
3119
3120 iref = (struct btrfs_extent_inline_ref *)ptr;
3121 type = btrfs_get_extent_inline_ref_type(eb, iref,
3122 BTRFS_REF_TYPE_BLOCK);
3123 if (type == BTRFS_REF_TYPE_INVALID)
3124 return -EINVAL;
3125 if (type == BTRFS_TREE_BLOCK_REF_KEY)
3126 owner = btrfs_extent_inline_ref_offset(eb, iref);
3127 }
3128 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
3129 btrfs_print_v0_err(eb->fs_info);
3130 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3131 return -EINVAL;
3132 } else {
3133 BUG();
3134 }
3135
3136 btrfs_release_path(path);
3137
3138 BUG_ON(level == -1);
3139
3140 block = kmalloc(sizeof(*block), GFP_NOFS);
3141 if (!block)
3142 return -ENOMEM;
3143
3144 block->bytenr = extent_key->objectid;
3145 block->key.objectid = rc->extent_root->fs_info->nodesize;
3146 block->key.offset = generation;
3147 block->level = level;
3148 block->key_ready = 0;
3149 block->owner = owner;
3150
3151 rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
3152 if (rb_node)
3153 btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
3154 -EEXIST);
3155
3156 return 0;
3157}
3158
3159
3160
3161
3162static int __add_tree_block(struct reloc_control *rc,
3163 u64 bytenr, u32 blocksize,
3164 struct rb_root *blocks)
3165{
3166 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3167 struct btrfs_path *path;
3168 struct btrfs_key key;
3169 int ret;
3170 bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
3171
3172 if (tree_block_processed(bytenr, rc))
3173 return 0;
3174
3175 if (rb_simple_search(blocks, bytenr))
3176 return 0;
3177
3178 path = btrfs_alloc_path();
3179 if (!path)
3180 return -ENOMEM;
3181again:
3182 key.objectid = bytenr;
3183 if (skinny) {
3184 key.type = BTRFS_METADATA_ITEM_KEY;
3185 key.offset = (u64)-1;
3186 } else {
3187 key.type = BTRFS_EXTENT_ITEM_KEY;
3188 key.offset = blocksize;
3189 }
3190
3191 path->search_commit_root = 1;
3192 path->skip_locking = 1;
3193 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3194 if (ret < 0)
3195 goto out;
3196
3197 if (ret > 0 && skinny) {
3198 if (path->slots[0]) {
3199 path->slots[0]--;
3200 btrfs_item_key_to_cpu(path->nodes[0], &key,
3201 path->slots[0]);
3202 if (key.objectid == bytenr &&
3203 (key.type == BTRFS_METADATA_ITEM_KEY ||
3204 (key.type == BTRFS_EXTENT_ITEM_KEY &&
3205 key.offset == blocksize)))
3206 ret = 0;
3207 }
3208
3209 if (ret) {
3210 skinny = false;
3211 btrfs_release_path(path);
3212 goto again;
3213 }
3214 }
3215 if (ret) {
3216 ASSERT(ret == 1);
3217 btrfs_print_leaf(path->nodes[0]);
3218 btrfs_err(fs_info,
3219 "tree block extent item (%llu) is not found in extent tree",
3220 bytenr);
3221 WARN_ON(1);
3222 ret = -EINVAL;
3223 goto out;
3224 }
3225
3226 ret = add_tree_block(rc, &key, path, blocks);
3227out:
3228 btrfs_free_path(path);
3229 return ret;
3230}
3231
3232static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3233 struct btrfs_block_group *block_group,
3234 struct inode *inode,
3235 u64 ino)
3236{
3237 struct btrfs_root *root = fs_info->tree_root;
3238 struct btrfs_trans_handle *trans;
3239 int ret = 0;
3240
3241 if (inode)
3242 goto truncate;
3243
3244 inode = btrfs_iget(fs_info->sb, ino, root);
3245 if (IS_ERR(inode))
3246 return -ENOENT;
3247
3248truncate:
3249 ret = btrfs_check_trunc_cache_free_space(fs_info,
3250 &fs_info->global_block_rsv);
3251 if (ret)
3252 goto out;
3253
3254 trans = btrfs_join_transaction(root);
3255 if (IS_ERR(trans)) {
3256 ret = PTR_ERR(trans);
3257 goto out;
3258 }
3259
3260 ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
3261
3262 btrfs_end_transaction(trans);
3263 btrfs_btree_balance_dirty(fs_info);
3264out:
3265 iput(inode);
3266 return ret;
3267}
3268
3269
3270
3271
3272
3273static int delete_v1_space_cache(struct extent_buffer *leaf,
3274 struct btrfs_block_group *block_group,
3275 u64 data_bytenr)
3276{
3277 u64 space_cache_ino;
3278 struct btrfs_file_extent_item *ei;
3279 struct btrfs_key key;
3280 bool found = false;
3281 int i;
3282 int ret;
3283
3284 if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
3285 return 0;
3286
3287 for (i = 0; i < btrfs_header_nritems(leaf); i++) {
3288 u8 type;
3289
3290 btrfs_item_key_to_cpu(leaf, &key, i);
3291 if (key.type != BTRFS_EXTENT_DATA_KEY)
3292 continue;
3293 ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3294 type = btrfs_file_extent_type(leaf, ei);
3295
3296 if ((type == BTRFS_FILE_EXTENT_REG ||
3297 type == BTRFS_FILE_EXTENT_PREALLOC) &&
3298 btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
3299 found = true;
3300 space_cache_ino = key.objectid;
3301 break;
3302 }
3303 }
3304 if (!found)
3305 return -ENOENT;
3306 ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
3307 space_cache_ino);
3308 return ret;
3309}
3310
3311
3312
3313
3314static noinline_for_stack
3315int add_data_references(struct reloc_control *rc,
3316 struct btrfs_key *extent_key,
3317 struct btrfs_path *path,
3318 struct rb_root *blocks)
3319{
3320 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3321 struct ulist *leaves = NULL;
3322 struct ulist_iterator leaf_uiter;
3323 struct ulist_node *ref_node = NULL;
3324 const u32 blocksize = fs_info->nodesize;
3325 int ret = 0;
3326
3327 btrfs_release_path(path);
3328 ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
3329 0, &leaves, NULL, true);
3330 if (ret < 0)
3331 return ret;
3332
3333 ULIST_ITER_INIT(&leaf_uiter);
3334 while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
3335 struct extent_buffer *eb;
3336
3337 eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL);
3338 if (IS_ERR(eb)) {
3339 ret = PTR_ERR(eb);
3340 break;
3341 }
3342 ret = delete_v1_space_cache(eb, rc->block_group,
3343 extent_key->objectid);
3344 free_extent_buffer(eb);
3345 if (ret < 0)
3346 break;
3347 ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
3348 if (ret < 0)
3349 break;
3350 }
3351 if (ret < 0)
3352 free_block_list(blocks);
3353 ulist_free(leaves);
3354 return ret;
3355}
3356
3357
3358
3359
3360static noinline_for_stack
3361int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
3362 struct btrfs_key *extent_key)
3363{
3364 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3365 struct btrfs_key key;
3366 struct extent_buffer *leaf;
3367 u64 start, end, last;
3368 int ret;
3369
3370 last = rc->block_group->start + rc->block_group->length;
3371 while (1) {
3372 cond_resched();
3373 if (rc->search_start >= last) {
3374 ret = 1;
3375 break;
3376 }
3377
3378 key.objectid = rc->search_start;
3379 key.type = BTRFS_EXTENT_ITEM_KEY;
3380 key.offset = 0;
3381
3382 path->search_commit_root = 1;
3383 path->skip_locking = 1;
3384 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3385 0, 0);
3386 if (ret < 0)
3387 break;
3388next:
3389 leaf = path->nodes[0];
3390 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3391 ret = btrfs_next_leaf(rc->extent_root, path);
3392 if (ret != 0)
3393 break;
3394 leaf = path->nodes[0];
3395 }
3396
3397 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3398 if (key.objectid >= last) {
3399 ret = 1;
3400 break;
3401 }
3402
3403 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3404 key.type != BTRFS_METADATA_ITEM_KEY) {
3405 path->slots[0]++;
3406 goto next;
3407 }
3408
3409 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3410 key.objectid + key.offset <= rc->search_start) {
3411 path->slots[0]++;
3412 goto next;
3413 }
3414
3415 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3416 key.objectid + fs_info->nodesize <=
3417 rc->search_start) {
3418 path->slots[0]++;
3419 goto next;
3420 }
3421
3422 ret = find_first_extent_bit(&rc->processed_blocks,
3423 key.objectid, &start, &end,
3424 EXTENT_DIRTY, NULL);
3425
3426 if (ret == 0 && start <= key.objectid) {
3427 btrfs_release_path(path);
3428 rc->search_start = end + 1;
3429 } else {
3430 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3431 rc->search_start = key.objectid + key.offset;
3432 else
3433 rc->search_start = key.objectid +
3434 fs_info->nodesize;
3435 memcpy(extent_key, &key, sizeof(key));
3436 return 0;
3437 }
3438 }
3439 btrfs_release_path(path);
3440 return ret;
3441}
3442
3443static void set_reloc_control(struct reloc_control *rc)
3444{
3445 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3446
3447 mutex_lock(&fs_info->reloc_mutex);
3448 fs_info->reloc_ctl = rc;
3449 mutex_unlock(&fs_info->reloc_mutex);
3450}
3451
3452static void unset_reloc_control(struct reloc_control *rc)
3453{
3454 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3455
3456 mutex_lock(&fs_info->reloc_mutex);
3457 fs_info->reloc_ctl = NULL;
3458 mutex_unlock(&fs_info->reloc_mutex);
3459}
3460
3461static noinline_for_stack
3462int prepare_to_relocate(struct reloc_control *rc)
3463{
3464 struct btrfs_trans_handle *trans;
3465 int ret;
3466
3467 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root->fs_info,
3468 BTRFS_BLOCK_RSV_TEMP);
3469 if (!rc->block_rsv)
3470 return -ENOMEM;
3471
3472 memset(&rc->cluster, 0, sizeof(rc->cluster));
3473 rc->search_start = rc->block_group->start;
3474 rc->extents_found = 0;
3475 rc->nodes_relocated = 0;
3476 rc->merging_rsv_size = 0;
3477 rc->reserved_bytes = 0;
3478 rc->block_rsv->size = rc->extent_root->fs_info->nodesize *
3479 RELOCATION_RESERVED_NODES;
3480 ret = btrfs_block_rsv_refill(rc->extent_root,
3481 rc->block_rsv, rc->block_rsv->size,
3482 BTRFS_RESERVE_FLUSH_ALL);
3483 if (ret)
3484 return ret;
3485
3486 rc->create_reloc_tree = 1;
3487 set_reloc_control(rc);
3488
3489 trans = btrfs_join_transaction(rc->extent_root);
3490 if (IS_ERR(trans)) {
3491 unset_reloc_control(rc);
3492
3493
3494
3495
3496
3497 return PTR_ERR(trans);
3498 }
3499 return btrfs_commit_transaction(trans);
3500}
3501
3502static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3503{
3504 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3505 struct rb_root blocks = RB_ROOT;
3506 struct btrfs_key key;
3507 struct btrfs_trans_handle *trans = NULL;
3508 struct btrfs_path *path;
3509 struct btrfs_extent_item *ei;
3510 u64 flags;
3511 int ret;
3512 int err = 0;
3513 int progress = 0;
3514
3515 path = btrfs_alloc_path();
3516 if (!path)
3517 return -ENOMEM;
3518 path->reada = READA_FORWARD;
3519
3520 ret = prepare_to_relocate(rc);
3521 if (ret) {
3522 err = ret;
3523 goto out_free;
3524 }
3525
3526 while (1) {
3527 rc->reserved_bytes = 0;
3528 ret = btrfs_block_rsv_refill(rc->extent_root,
3529 rc->block_rsv, rc->block_rsv->size,
3530 BTRFS_RESERVE_FLUSH_ALL);
3531 if (ret) {
3532 err = ret;
3533 break;
3534 }
3535 progress++;
3536 trans = btrfs_start_transaction(rc->extent_root, 0);
3537 if (IS_ERR(trans)) {
3538 err = PTR_ERR(trans);
3539 trans = NULL;
3540 break;
3541 }
3542restart:
3543 if (update_backref_cache(trans, &rc->backref_cache)) {
3544 btrfs_end_transaction(trans);
3545 trans = NULL;
3546 continue;
3547 }
3548
3549 ret = find_next_extent(rc, path, &key);
3550 if (ret < 0)
3551 err = ret;
3552 if (ret != 0)
3553 break;
3554
3555 rc->extents_found++;
3556
3557 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3558 struct btrfs_extent_item);
3559 flags = btrfs_extent_flags(path->nodes[0], ei);
3560
3561 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3562 ret = add_tree_block(rc, &key, path, &blocks);
3563 } else if (rc->stage == UPDATE_DATA_PTRS &&
3564 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3565 ret = add_data_references(rc, &key, path, &blocks);
3566 } else {
3567 btrfs_release_path(path);
3568 ret = 0;
3569 }
3570 if (ret < 0) {
3571 err = ret;
3572 break;
3573 }
3574
3575 if (!RB_EMPTY_ROOT(&blocks)) {
3576 ret = relocate_tree_blocks(trans, rc, &blocks);
3577 if (ret < 0) {
3578 if (ret != -EAGAIN) {
3579 err = ret;
3580 break;
3581 }
3582 rc->extents_found--;
3583 rc->search_start = key.objectid;
3584 }
3585 }
3586
3587 btrfs_end_transaction_throttle(trans);
3588 btrfs_btree_balance_dirty(fs_info);
3589 trans = NULL;
3590
3591 if (rc->stage == MOVE_DATA_EXTENTS &&
3592 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3593 rc->found_file_extent = 1;
3594 ret = relocate_data_extent(rc->data_inode,
3595 &key, &rc->cluster);
3596 if (ret < 0) {
3597 err = ret;
3598 break;
3599 }
3600 }
3601 if (btrfs_should_cancel_balance(fs_info)) {
3602 err = -ECANCELED;
3603 break;
3604 }
3605 }
3606 if (trans && progress && err == -ENOSPC) {
3607 ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
3608 if (ret == 1) {
3609 err = 0;
3610 progress = 0;
3611 goto restart;
3612 }
3613 }
3614
3615 btrfs_release_path(path);
3616 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
3617
3618 if (trans) {
3619 btrfs_end_transaction_throttle(trans);
3620 btrfs_btree_balance_dirty(fs_info);
3621 }
3622
3623 if (!err) {
3624 ret = relocate_file_extent_cluster(rc->data_inode,
3625 &rc->cluster);
3626 if (ret < 0)
3627 err = ret;
3628 }
3629
3630 rc->create_reloc_tree = 0;
3631 set_reloc_control(rc);
3632
3633 btrfs_backref_release_cache(&rc->backref_cache);
3634 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644 err = prepare_to_merge(rc, err);
3645
3646 merge_reloc_roots(rc);
3647
3648 rc->merge_reloc_tree = 0;
3649 unset_reloc_control(rc);
3650 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3651
3652
3653 trans = btrfs_join_transaction(rc->extent_root);
3654 if (IS_ERR(trans)) {
3655 err = PTR_ERR(trans);
3656 goto out_free;
3657 }
3658 ret = btrfs_commit_transaction(trans);
3659 if (ret && !err)
3660 err = ret;
3661out_free:
3662 ret = clean_dirty_subvols(rc);
3663 if (ret < 0 && !err)
3664 err = ret;
3665 btrfs_free_block_rsv(fs_info, rc->block_rsv);
3666 btrfs_free_path(path);
3667 return err;
3668}
3669
3670static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3671 struct btrfs_root *root, u64 objectid)
3672{
3673 struct btrfs_path *path;
3674 struct btrfs_inode_item *item;
3675 struct extent_buffer *leaf;
3676 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
3677 int ret;
3678
3679 if (btrfs_is_zoned(trans->fs_info))
3680 flags &= ~BTRFS_INODE_PREALLOC;
3681
3682 path = btrfs_alloc_path();
3683 if (!path)
3684 return -ENOMEM;
3685
3686 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3687 if (ret)
3688 goto out;
3689
3690 leaf = path->nodes[0];
3691 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3692 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3693 btrfs_set_inode_generation(leaf, item, 1);
3694 btrfs_set_inode_size(leaf, item, 0);
3695 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3696 btrfs_set_inode_flags(leaf, item, flags);
3697 btrfs_mark_buffer_dirty(leaf);
3698out:
3699 btrfs_free_path(path);
3700 return ret;
3701}
3702
3703static void delete_orphan_inode(struct btrfs_trans_handle *trans,
3704 struct btrfs_root *root, u64 objectid)
3705{
3706 struct btrfs_path *path;
3707 struct btrfs_key key;
3708 int ret = 0;
3709
3710 path = btrfs_alloc_path();
3711 if (!path) {
3712 ret = -ENOMEM;
3713 goto out;
3714 }
3715
3716 key.objectid = objectid;
3717 key.type = BTRFS_INODE_ITEM_KEY;
3718 key.offset = 0;
3719 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3720 if (ret) {
3721 if (ret > 0)
3722 ret = -ENOENT;
3723 goto out;
3724 }
3725 ret = btrfs_del_item(trans, root, path);
3726out:
3727 if (ret)
3728 btrfs_abort_transaction(trans, ret);
3729 btrfs_free_path(path);
3730}
3731
3732
3733
3734
3735
3736static noinline_for_stack
3737struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3738 struct btrfs_block_group *group)
3739{
3740 struct inode *inode = NULL;
3741 struct btrfs_trans_handle *trans;
3742 struct btrfs_root *root;
3743 u64 objectid;
3744 int err = 0;
3745
3746 root = btrfs_grab_root(fs_info->data_reloc_root);
3747 trans = btrfs_start_transaction(root, 6);
3748 if (IS_ERR(trans)) {
3749 btrfs_put_root(root);
3750 return ERR_CAST(trans);
3751 }
3752
3753 err = btrfs_get_free_objectid(root, &objectid);
3754 if (err)
3755 goto out;
3756
3757 err = __insert_orphan_inode(trans, root, objectid);
3758 if (err)
3759 goto out;
3760
3761 inode = btrfs_iget(fs_info->sb, objectid, root);
3762 if (IS_ERR(inode)) {
3763 delete_orphan_inode(trans, root, objectid);
3764 err = PTR_ERR(inode);
3765 inode = NULL;
3766 goto out;
3767 }
3768 BTRFS_I(inode)->index_cnt = group->start;
3769
3770 err = btrfs_orphan_add(trans, BTRFS_I(inode));
3771out:
3772 btrfs_put_root(root);
3773 btrfs_end_transaction(trans);
3774 btrfs_btree_balance_dirty(fs_info);
3775 if (err) {
3776 if (inode)
3777 iput(inode);
3778 inode = ERR_PTR(err);
3779 }
3780 return inode;
3781}
3782
3783static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
3784{
3785 struct reloc_control *rc;
3786
3787 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3788 if (!rc)
3789 return NULL;
3790
3791 INIT_LIST_HEAD(&rc->reloc_roots);
3792 INIT_LIST_HEAD(&rc->dirty_subvol_roots);
3793 btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
3794 mapping_tree_init(&rc->reloc_root_tree);
3795 extent_io_tree_init(fs_info, &rc->processed_blocks,
3796 IO_TREE_RELOC_BLOCKS, NULL);
3797 return rc;
3798}
3799
3800static void free_reloc_control(struct reloc_control *rc)
3801{
3802 struct mapping_node *node, *tmp;
3803
3804 free_reloc_roots(&rc->reloc_roots);
3805 rbtree_postorder_for_each_entry_safe(node, tmp,
3806 &rc->reloc_root_tree.rb_root, rb_node)
3807 kfree(node);
3808
3809 kfree(rc);
3810}
3811
3812
3813
3814
3815static void describe_relocation(struct btrfs_fs_info *fs_info,
3816 struct btrfs_block_group *block_group)
3817{
3818 char buf[128] = {'\0'};
3819
3820 btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
3821
3822 btrfs_info(fs_info,
3823 "relocating block group %llu flags %s",
3824 block_group->start, buf);
3825}
3826
3827static const char *stage_to_string(int stage)
3828{
3829 if (stage == MOVE_DATA_EXTENTS)
3830 return "move data extents";
3831 if (stage == UPDATE_DATA_PTRS)
3832 return "update data pointers";
3833 return "unknown";
3834}
3835
3836
3837
3838
3839int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
3840{
3841 struct btrfs_block_group *bg;
3842 struct btrfs_root *extent_root = fs_info->extent_root;
3843 struct reloc_control *rc;
3844 struct inode *inode;
3845 struct btrfs_path *path;
3846 int ret;
3847 int rw = 0;
3848 int err = 0;
3849
3850 bg = btrfs_lookup_block_group(fs_info, group_start);
3851 if (!bg)
3852 return -ENOENT;
3853
3854 if (btrfs_pinned_by_swapfile(fs_info, bg)) {
3855 btrfs_put_block_group(bg);
3856 return -ETXTBSY;
3857 }
3858
3859 rc = alloc_reloc_control(fs_info);
3860 if (!rc) {
3861 btrfs_put_block_group(bg);
3862 return -ENOMEM;
3863 }
3864
3865 rc->extent_root = extent_root;
3866 rc->block_group = bg;
3867
3868 ret = btrfs_inc_block_group_ro(rc->block_group, true);
3869 if (ret) {
3870 err = ret;
3871 goto out;
3872 }
3873 rw = 1;
3874
3875 path = btrfs_alloc_path();
3876 if (!path) {
3877 err = -ENOMEM;
3878 goto out;
3879 }
3880
3881 inode = lookup_free_space_inode(rc->block_group, path);
3882 btrfs_free_path(path);
3883
3884 if (!IS_ERR(inode))
3885 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
3886 else
3887 ret = PTR_ERR(inode);
3888
3889 if (ret && ret != -ENOENT) {
3890 err = ret;
3891 goto out;
3892 }
3893
3894 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3895 if (IS_ERR(rc->data_inode)) {
3896 err = PTR_ERR(rc->data_inode);
3897 rc->data_inode = NULL;
3898 goto out;
3899 }
3900
3901 describe_relocation(fs_info, rc->block_group);
3902
3903 btrfs_wait_block_group_reservations(rc->block_group);
3904 btrfs_wait_nocow_writers(rc->block_group);
3905 btrfs_wait_ordered_roots(fs_info, U64_MAX,
3906 rc->block_group->start,
3907 rc->block_group->length);
3908
3909 while (1) {
3910 int finishes_stage;
3911
3912 mutex_lock(&fs_info->cleaner_mutex);
3913 ret = relocate_block_group(rc);
3914 mutex_unlock(&fs_info->cleaner_mutex);
3915 if (ret < 0)
3916 err = ret;
3917
3918 finishes_stage = rc->stage;
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
3929 ret = btrfs_wait_ordered_range(rc->data_inode, 0,
3930 (u64)-1);
3931 if (ret)
3932 err = ret;
3933 invalidate_mapping_pages(rc->data_inode->i_mapping,
3934 0, -1);
3935 rc->stage = UPDATE_DATA_PTRS;
3936 }
3937
3938 if (err < 0)
3939 goto out;
3940
3941 if (rc->extents_found == 0)
3942 break;
3943
3944 btrfs_info(fs_info, "found %llu extents, stage: %s",
3945 rc->extents_found, stage_to_string(finishes_stage));
3946 }
3947
3948 WARN_ON(rc->block_group->pinned > 0);
3949 WARN_ON(rc->block_group->reserved > 0);
3950 WARN_ON(rc->block_group->used > 0);
3951out:
3952 if (err && rw)
3953 btrfs_dec_block_group_ro(rc->block_group);
3954 iput(rc->data_inode);
3955 btrfs_put_block_group(rc->block_group);
3956 free_reloc_control(rc);
3957 return err;
3958}
3959
3960static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
3961{
3962 struct btrfs_fs_info *fs_info = root->fs_info;
3963 struct btrfs_trans_handle *trans;
3964 int ret, err;
3965
3966 trans = btrfs_start_transaction(fs_info->tree_root, 0);
3967 if (IS_ERR(trans))
3968 return PTR_ERR(trans);
3969
3970 memset(&root->root_item.drop_progress, 0,
3971 sizeof(root->root_item.drop_progress));
3972 btrfs_set_root_drop_level(&root->root_item, 0);
3973 btrfs_set_root_refs(&root->root_item, 0);
3974 ret = btrfs_update_root(trans, fs_info->tree_root,
3975 &root->root_key, &root->root_item);
3976
3977 err = btrfs_end_transaction(trans);
3978 if (err)
3979 return err;
3980 return ret;
3981}
3982
3983
3984
3985
3986
3987
3988
3989int btrfs_recover_relocation(struct btrfs_root *root)
3990{
3991 struct btrfs_fs_info *fs_info = root->fs_info;
3992 LIST_HEAD(reloc_roots);
3993 struct btrfs_key key;
3994 struct btrfs_root *fs_root;
3995 struct btrfs_root *reloc_root;
3996 struct btrfs_path *path;
3997 struct extent_buffer *leaf;
3998 struct reloc_control *rc = NULL;
3999 struct btrfs_trans_handle *trans;
4000 int ret;
4001 int err = 0;
4002
4003 path = btrfs_alloc_path();
4004 if (!path)
4005 return -ENOMEM;
4006 path->reada = READA_BACK;
4007
4008 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4009 key.type = BTRFS_ROOT_ITEM_KEY;
4010 key.offset = (u64)-1;
4011
4012 while (1) {
4013 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
4014 path, 0, 0);
4015 if (ret < 0) {
4016 err = ret;
4017 goto out;
4018 }
4019 if (ret > 0) {
4020 if (path->slots[0] == 0)
4021 break;
4022 path->slots[0]--;
4023 }
4024 leaf = path->nodes[0];
4025 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4026 btrfs_release_path(path);
4027
4028 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4029 key.type != BTRFS_ROOT_ITEM_KEY)
4030 break;
4031
4032 reloc_root = btrfs_read_tree_root(root, &key);
4033 if (IS_ERR(reloc_root)) {
4034 err = PTR_ERR(reloc_root);
4035 goto out;
4036 }
4037
4038 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
4039 list_add(&reloc_root->root_list, &reloc_roots);
4040
4041 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4042 fs_root = btrfs_get_fs_root(fs_info,
4043 reloc_root->root_key.offset, false);
4044 if (IS_ERR(fs_root)) {
4045 ret = PTR_ERR(fs_root);
4046 if (ret != -ENOENT) {
4047 err = ret;
4048 goto out;
4049 }
4050 ret = mark_garbage_root(reloc_root);
4051 if (ret < 0) {
4052 err = ret;
4053 goto out;
4054 }
4055 } else {
4056 btrfs_put_root(fs_root);
4057 }
4058 }
4059
4060 if (key.offset == 0)
4061 break;
4062
4063 key.offset--;
4064 }
4065 btrfs_release_path(path);
4066
4067 if (list_empty(&reloc_roots))
4068 goto out;
4069
4070 rc = alloc_reloc_control(fs_info);
4071 if (!rc) {
4072 err = -ENOMEM;
4073 goto out;
4074 }
4075
4076 rc->extent_root = fs_info->extent_root;
4077
4078 set_reloc_control(rc);
4079
4080 trans = btrfs_join_transaction(rc->extent_root);
4081 if (IS_ERR(trans)) {
4082 err = PTR_ERR(trans);
4083 goto out_unset;
4084 }
4085
4086 rc->merge_reloc_tree = 1;
4087
4088 while (!list_empty(&reloc_roots)) {
4089 reloc_root = list_entry(reloc_roots.next,
4090 struct btrfs_root, root_list);
4091 list_del(&reloc_root->root_list);
4092
4093 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4094 list_add_tail(&reloc_root->root_list,
4095 &rc->reloc_roots);
4096 continue;
4097 }
4098
4099 fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
4100 false);
4101 if (IS_ERR(fs_root)) {
4102 err = PTR_ERR(fs_root);
4103 list_add_tail(&reloc_root->root_list, &reloc_roots);
4104 btrfs_end_transaction(trans);
4105 goto out_unset;
4106 }
4107
4108 err = __add_reloc_root(reloc_root);
4109 ASSERT(err != -EEXIST);
4110 if (err) {
4111 list_add_tail(&reloc_root->root_list, &reloc_roots);
4112 btrfs_put_root(fs_root);
4113 btrfs_end_transaction(trans);
4114 goto out_unset;
4115 }
4116 fs_root->reloc_root = btrfs_grab_root(reloc_root);
4117 btrfs_put_root(fs_root);
4118 }
4119
4120 err = btrfs_commit_transaction(trans);
4121 if (err)
4122 goto out_unset;
4123
4124 merge_reloc_roots(rc);
4125
4126 unset_reloc_control(rc);
4127
4128 trans = btrfs_join_transaction(rc->extent_root);
4129 if (IS_ERR(trans)) {
4130 err = PTR_ERR(trans);
4131 goto out_clean;
4132 }
4133 err = btrfs_commit_transaction(trans);
4134out_clean:
4135 ret = clean_dirty_subvols(rc);
4136 if (ret < 0 && !err)
4137 err = ret;
4138out_unset:
4139 unset_reloc_control(rc);
4140 free_reloc_control(rc);
4141out:
4142 free_reloc_roots(&reloc_roots);
4143
4144 btrfs_free_path(path);
4145
4146 if (err == 0) {
4147
4148 fs_root = btrfs_grab_root(fs_info->data_reloc_root);
4149 ASSERT(fs_root);
4150 err = btrfs_orphan_cleanup(fs_root);
4151 btrfs_put_root(fs_root);
4152 }
4153 return err;
4154}
4155
4156
4157
4158
4159
4160
4161
4162int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
4163{
4164 struct btrfs_fs_info *fs_info = inode->root->fs_info;
4165 struct btrfs_ordered_sum *sums;
4166 struct btrfs_ordered_extent *ordered;
4167 int ret;
4168 u64 disk_bytenr;
4169 u64 new_bytenr;
4170 LIST_HEAD(list);
4171
4172 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4173 BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
4174
4175 disk_bytenr = file_pos + inode->index_cnt;
4176 ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
4177 disk_bytenr + len - 1, &list, 0);
4178 if (ret)
4179 goto out;
4180
4181 while (!list_empty(&list)) {
4182 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4183 list_del_init(&sums->list);
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197 new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
4198 sums->bytenr = new_bytenr;
4199
4200 btrfs_add_ordered_sum(ordered, sums);
4201 }
4202out:
4203 btrfs_put_ordered_extent(ordered);
4204 return ret;
4205}
4206
4207int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4208 struct btrfs_root *root, struct extent_buffer *buf,
4209 struct extent_buffer *cow)
4210{
4211 struct btrfs_fs_info *fs_info = root->fs_info;
4212 struct reloc_control *rc;
4213 struct btrfs_backref_node *node;
4214 int first_cow = 0;
4215 int level;
4216 int ret = 0;
4217
4218 rc = fs_info->reloc_ctl;
4219 if (!rc)
4220 return 0;
4221
4222 BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4223 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
4224
4225 level = btrfs_header_level(buf);
4226 if (btrfs_header_generation(buf) <=
4227 btrfs_root_last_snapshot(&root->root_item))
4228 first_cow = 1;
4229
4230 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4231 rc->create_reloc_tree) {
4232 WARN_ON(!first_cow && level == 0);
4233
4234 node = rc->backref_cache.path[level];
4235 BUG_ON(node->bytenr != buf->start &&
4236 node->new_bytenr != buf->start);
4237
4238 btrfs_backref_drop_node_buffer(node);
4239 atomic_inc(&cow->refs);
4240 node->eb = cow;
4241 node->new_bytenr = cow->start;
4242
4243 if (!node->pending) {
4244 list_move_tail(&node->list,
4245 &rc->backref_cache.pending[level]);
4246 node->pending = 1;
4247 }
4248
4249 if (first_cow)
4250 mark_block_processed(rc, node);
4251
4252 if (first_cow && level > 0)
4253 rc->nodes_relocated += buf->len;
4254 }
4255
4256 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
4257 ret = replace_file_extents(trans, rc, root, cow);
4258 return ret;
4259}
4260
4261
4262
4263
4264
4265void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
4266 u64 *bytes_to_reserve)
4267{
4268 struct btrfs_root *root = pending->root;
4269 struct reloc_control *rc = root->fs_info->reloc_ctl;
4270
4271 if (!rc || !have_reloc_root(root))
4272 return;
4273
4274 if (!rc->merge_reloc_tree)
4275 return;
4276
4277 root = root->reloc_root;
4278 BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289 *bytes_to_reserve += rc->nodes_relocated;
4290}
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4301 struct btrfs_pending_snapshot *pending)
4302{
4303 struct btrfs_root *root = pending->root;
4304 struct btrfs_root *reloc_root;
4305 struct btrfs_root *new_root;
4306 struct reloc_control *rc = root->fs_info->reloc_ctl;
4307 int ret;
4308
4309 if (!rc || !have_reloc_root(root))
4310 return 0;
4311
4312 rc = root->fs_info->reloc_ctl;
4313 rc->merging_rsv_size += rc->nodes_relocated;
4314
4315 if (rc->merge_reloc_tree) {
4316 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4317 rc->block_rsv,
4318 rc->nodes_relocated, true);
4319 if (ret)
4320 return ret;
4321 }
4322
4323 new_root = pending->snap;
4324 reloc_root = create_reloc_root(trans, root->reloc_root,
4325 new_root->root_key.objectid);
4326 if (IS_ERR(reloc_root))
4327 return PTR_ERR(reloc_root);
4328
4329 ret = __add_reloc_root(reloc_root);
4330 ASSERT(ret != -EEXIST);
4331 if (ret) {
4332
4333 btrfs_put_root(reloc_root);
4334 return ret;
4335 }
4336 new_root->reloc_root = btrfs_grab_root(reloc_root);
4337
4338 if (rc->create_reloc_tree)
4339 ret = clone_backref_node(trans, rc, root, reloc_root);
4340 return ret;
4341}
4342