1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/pagemap.h>
8#include <linux/writeback.h>
9#include <linux/blkdev.h>
10#include <linux/rbtree.h>
11#include <linux/slab.h>
12#include <linux/error-injection.h>
13#include "ctree.h"
14#include "disk-io.h"
15#include "transaction.h"
16#include "volumes.h"
17#include "locking.h"
18#include "btrfs_inode.h"
19#include "async-thread.h"
20#include "free-space-cache.h"
21#include "qgroup.h"
22#include "print-tree.h"
23#include "delalloc-space.h"
24#include "block-group.h"
25#include "backref.h"
26#include "misc.h"
27#include "subpage.h"
28#include "zoned.h"
29#include "inode-item.h"
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78#define RELOCATION_RESERVED_NODES 256
79
80
81
82struct mapping_node {
83 struct {
84 struct rb_node rb_node;
85 u64 bytenr;
86 };
87 void *data;
88};
89
90struct mapping_tree {
91 struct rb_root rb_root;
92 spinlock_t lock;
93};
94
95
96
97
98struct tree_block {
99 struct {
100 struct rb_node rb_node;
101 u64 bytenr;
102 };
103 u64 owner;
104 struct btrfs_key key;
105 unsigned int level:8;
106 unsigned int key_ready:1;
107};
108
109#define MAX_EXTENTS 128
110
111struct file_extent_cluster {
112 u64 start;
113 u64 end;
114 u64 boundary[MAX_EXTENTS];
115 unsigned int nr;
116};
117
118struct reloc_control {
119
120 struct btrfs_block_group *block_group;
121
122 struct btrfs_root *extent_root;
123
124 struct inode *data_inode;
125
126 struct btrfs_block_rsv *block_rsv;
127
128 struct btrfs_backref_cache backref_cache;
129
130 struct file_extent_cluster cluster;
131
132 struct extent_io_tree processed_blocks;
133
134 struct mapping_tree reloc_root_tree;
135
136 struct list_head reloc_roots;
137
138 struct list_head dirty_subvol_roots;
139
140 u64 merging_rsv_size;
141
142 u64 nodes_relocated;
143
144 u64 reserved_bytes;
145
146 u64 search_start;
147 u64 extents_found;
148
149 unsigned int stage:8;
150 unsigned int create_reloc_tree:1;
151 unsigned int merge_reloc_tree:1;
152 unsigned int found_file_extent:1;
153};
154
155
156#define MOVE_DATA_EXTENTS 0
157#define UPDATE_DATA_PTRS 1
158
159static void mark_block_processed(struct reloc_control *rc,
160 struct btrfs_backref_node *node)
161{
162 u32 blocksize;
163
164 if (node->level == 0 ||
165 in_range(node->bytenr, rc->block_group->start,
166 rc->block_group->length)) {
167 blocksize = rc->extent_root->fs_info->nodesize;
168 set_extent_bits(&rc->processed_blocks, node->bytenr,
169 node->bytenr + blocksize - 1, EXTENT_DIRTY);
170 }
171 node->processed = 1;
172}
173
174
175static void mapping_tree_init(struct mapping_tree *tree)
176{
177 tree->rb_root = RB_ROOT;
178 spin_lock_init(&tree->lock);
179}
180
181
182
183
184static struct btrfs_backref_node *walk_up_backref(
185 struct btrfs_backref_node *node,
186 struct btrfs_backref_edge *edges[], int *index)
187{
188 struct btrfs_backref_edge *edge;
189 int idx = *index;
190
191 while (!list_empty(&node->upper)) {
192 edge = list_entry(node->upper.next,
193 struct btrfs_backref_edge, list[LOWER]);
194 edges[idx++] = edge;
195 node = edge->node[UPPER];
196 }
197 BUG_ON(node->detached);
198 *index = idx;
199 return node;
200}
201
202
203
204
205static struct btrfs_backref_node *walk_down_backref(
206 struct btrfs_backref_edge *edges[], int *index)
207{
208 struct btrfs_backref_edge *edge;
209 struct btrfs_backref_node *lower;
210 int idx = *index;
211
212 while (idx > 0) {
213 edge = edges[idx - 1];
214 lower = edge->node[LOWER];
215 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
216 idx--;
217 continue;
218 }
219 edge = list_entry(edge->list[LOWER].next,
220 struct btrfs_backref_edge, list[LOWER]);
221 edges[idx - 1] = edge;
222 *index = idx;
223 return edge->node[UPPER];
224 }
225 *index = 0;
226 return NULL;
227}
228
229static void update_backref_node(struct btrfs_backref_cache *cache,
230 struct btrfs_backref_node *node, u64 bytenr)
231{
232 struct rb_node *rb_node;
233 rb_erase(&node->rb_node, &cache->rb_root);
234 node->bytenr = bytenr;
235 rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
236 if (rb_node)
237 btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST);
238}
239
240
241
242
243static int update_backref_cache(struct btrfs_trans_handle *trans,
244 struct btrfs_backref_cache *cache)
245{
246 struct btrfs_backref_node *node;
247 int level = 0;
248
249 if (cache->last_trans == 0) {
250 cache->last_trans = trans->transid;
251 return 0;
252 }
253
254 if (cache->last_trans == trans->transid)
255 return 0;
256
257
258
259
260
261
262 while (!list_empty(&cache->detached)) {
263 node = list_entry(cache->detached.next,
264 struct btrfs_backref_node, list);
265 btrfs_backref_cleanup_node(cache, node);
266 }
267
268 while (!list_empty(&cache->changed)) {
269 node = list_entry(cache->changed.next,
270 struct btrfs_backref_node, list);
271 list_del_init(&node->list);
272 BUG_ON(node->pending);
273 update_backref_node(cache, node, node->new_bytenr);
274 }
275
276
277
278
279
280 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
281 list_for_each_entry(node, &cache->pending[level], list) {
282 BUG_ON(!node->pending);
283 if (node->bytenr == node->new_bytenr)
284 continue;
285 update_backref_node(cache, node, node->new_bytenr);
286 }
287 }
288
289 cache->last_trans = 0;
290 return 1;
291}
292
293static bool reloc_root_is_dead(struct btrfs_root *root)
294{
295
296
297
298
299
300 smp_rmb();
301 if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
302 return true;
303 return false;
304}
305
306
307
308
309
310
311
312
313
314static bool have_reloc_root(struct btrfs_root *root)
315{
316 if (reloc_root_is_dead(root))
317 return false;
318 if (!root->reloc_root)
319 return false;
320 return true;
321}
322
323int btrfs_should_ignore_reloc_root(struct btrfs_root *root)
324{
325 struct btrfs_root *reloc_root;
326
327 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
328 return 0;
329
330
331 if (reloc_root_is_dead(root))
332 return 1;
333
334 reloc_root = root->reloc_root;
335 if (!reloc_root)
336 return 0;
337
338 if (btrfs_header_generation(reloc_root->commit_root) ==
339 root->fs_info->running_transaction->transid)
340 return 0;
341
342
343
344
345
346
347 return 1;
348}
349
350
351
352
353struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
354{
355 struct reloc_control *rc = fs_info->reloc_ctl;
356 struct rb_node *rb_node;
357 struct mapping_node *node;
358 struct btrfs_root *root = NULL;
359
360 ASSERT(rc);
361 spin_lock(&rc->reloc_root_tree.lock);
362 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
363 if (rb_node) {
364 node = rb_entry(rb_node, struct mapping_node, rb_node);
365 root = (struct btrfs_root *)node->data;
366 }
367 spin_unlock(&rc->reloc_root_tree.lock);
368 return btrfs_grab_root(root);
369}
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384static bool handle_useless_nodes(struct reloc_control *rc,
385 struct btrfs_backref_node *node)
386{
387 struct btrfs_backref_cache *cache = &rc->backref_cache;
388 struct list_head *useless_node = &cache->useless_node;
389 bool ret = false;
390
391 while (!list_empty(useless_node)) {
392 struct btrfs_backref_node *cur;
393
394 cur = list_first_entry(useless_node, struct btrfs_backref_node,
395 list);
396 list_del_init(&cur->list);
397
398
399 ASSERT(list_empty(&cur->upper));
400
401 if (cur == node)
402 ret = true;
403
404
405 if (cur->lowest) {
406 list_del_init(&cur->lower);
407 cur->lowest = 0;
408 }
409
410
411 while (!list_empty(&cur->lower)) {
412 struct btrfs_backref_edge *edge;
413 struct btrfs_backref_node *lower;
414
415 edge = list_entry(cur->lower.next,
416 struct btrfs_backref_edge, list[UPPER]);
417 list_del(&edge->list[UPPER]);
418 list_del(&edge->list[LOWER]);
419 lower = edge->node[LOWER];
420 btrfs_backref_free_edge(cache, edge);
421
422
423 if (list_empty(&lower->upper))
424 list_add(&lower->list, useless_node);
425 }
426
427 mark_block_processed(rc, cur);
428
429
430
431
432
433
434 if (cur->level > 0) {
435 list_add(&cur->list, &cache->detached);
436 cur->detached = 1;
437 } else {
438 rb_erase(&cur->rb_node, &cache->rb_root);
439 btrfs_backref_free_node(cache, cur);
440 }
441 }
442 return ret;
443}
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
460 struct reloc_control *rc, struct btrfs_key *node_key,
461 int level, u64 bytenr)
462{
463 struct btrfs_backref_iter *iter;
464 struct btrfs_backref_cache *cache = &rc->backref_cache;
465
466 struct btrfs_path *path;
467 struct btrfs_backref_node *cur;
468 struct btrfs_backref_node *node = NULL;
469 struct btrfs_backref_edge *edge;
470 int ret;
471 int err = 0;
472
473 iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
474 if (!iter)
475 return ERR_PTR(-ENOMEM);
476 path = btrfs_alloc_path();
477 if (!path) {
478 err = -ENOMEM;
479 goto out;
480 }
481
482 node = btrfs_backref_alloc_node(cache, bytenr, level);
483 if (!node) {
484 err = -ENOMEM;
485 goto out;
486 }
487
488 node->lowest = 1;
489 cur = node;
490
491
492 do {
493 ret = btrfs_backref_add_tree_node(cache, path, iter, node_key,
494 cur);
495 if (ret < 0) {
496 err = ret;
497 goto out;
498 }
499 edge = list_first_entry_or_null(&cache->pending_edge,
500 struct btrfs_backref_edge, list[UPPER]);
501
502
503
504
505 if (edge) {
506 list_del_init(&edge->list[UPPER]);
507 cur = edge->node[UPPER];
508 }
509 } while (edge);
510
511
512 ret = btrfs_backref_finish_upper_links(cache, node);
513 if (ret < 0) {
514 err = ret;
515 goto out;
516 }
517
518 if (handle_useless_nodes(rc, node))
519 node = NULL;
520out:
521 btrfs_backref_iter_free(iter);
522 btrfs_free_path(path);
523 if (err) {
524 btrfs_backref_error_cleanup(cache, node);
525 return ERR_PTR(err);
526 }
527 ASSERT(!node || !node->detached);
528 ASSERT(list_empty(&cache->useless_node) &&
529 list_empty(&cache->pending_edge));
530 return node;
531}
532
533
534
535
536
537
538static int clone_backref_node(struct btrfs_trans_handle *trans,
539 struct reloc_control *rc,
540 struct btrfs_root *src,
541 struct btrfs_root *dest)
542{
543 struct btrfs_root *reloc_root = src->reloc_root;
544 struct btrfs_backref_cache *cache = &rc->backref_cache;
545 struct btrfs_backref_node *node = NULL;
546 struct btrfs_backref_node *new_node;
547 struct btrfs_backref_edge *edge;
548 struct btrfs_backref_edge *new_edge;
549 struct rb_node *rb_node;
550
551 if (cache->last_trans > 0)
552 update_backref_cache(trans, cache);
553
554 rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
555 if (rb_node) {
556 node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
557 if (node->detached)
558 node = NULL;
559 else
560 BUG_ON(node->new_bytenr != reloc_root->node->start);
561 }
562
563 if (!node) {
564 rb_node = rb_simple_search(&cache->rb_root,
565 reloc_root->commit_root->start);
566 if (rb_node) {
567 node = rb_entry(rb_node, struct btrfs_backref_node,
568 rb_node);
569 BUG_ON(node->detached);
570 }
571 }
572
573 if (!node)
574 return 0;
575
576 new_node = btrfs_backref_alloc_node(cache, dest->node->start,
577 node->level);
578 if (!new_node)
579 return -ENOMEM;
580
581 new_node->lowest = node->lowest;
582 new_node->checked = 1;
583 new_node->root = btrfs_grab_root(dest);
584 ASSERT(new_node->root);
585
586 if (!node->lowest) {
587 list_for_each_entry(edge, &node->lower, list[UPPER]) {
588 new_edge = btrfs_backref_alloc_edge(cache);
589 if (!new_edge)
590 goto fail;
591
592 btrfs_backref_link_edge(new_edge, edge->node[LOWER],
593 new_node, LINK_UPPER);
594 }
595 } else {
596 list_add_tail(&new_node->lower, &cache->leaves);
597 }
598
599 rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
600 &new_node->rb_node);
601 if (rb_node)
602 btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
603
604 if (!new_node->lowest) {
605 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
606 list_add_tail(&new_edge->list[LOWER],
607 &new_edge->node[LOWER]->upper);
608 }
609 }
610 return 0;
611fail:
612 while (!list_empty(&new_node->lower)) {
613 new_edge = list_entry(new_node->lower.next,
614 struct btrfs_backref_edge, list[UPPER]);
615 list_del(&new_edge->list[UPPER]);
616 btrfs_backref_free_edge(cache, new_edge);
617 }
618 btrfs_backref_free_node(cache, new_node);
619 return -ENOMEM;
620}
621
622
623
624
625static int __must_check __add_reloc_root(struct btrfs_root *root)
626{
627 struct btrfs_fs_info *fs_info = root->fs_info;
628 struct rb_node *rb_node;
629 struct mapping_node *node;
630 struct reloc_control *rc = fs_info->reloc_ctl;
631
632 node = kmalloc(sizeof(*node), GFP_NOFS);
633 if (!node)
634 return -ENOMEM;
635
636 node->bytenr = root->commit_root->start;
637 node->data = root;
638
639 spin_lock(&rc->reloc_root_tree.lock);
640 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
641 node->bytenr, &node->rb_node);
642 spin_unlock(&rc->reloc_root_tree.lock);
643 if (rb_node) {
644 btrfs_err(fs_info,
645 "Duplicate root found for start=%llu while inserting into relocation tree",
646 node->bytenr);
647 return -EEXIST;
648 }
649
650 list_add_tail(&root->root_list, &rc->reloc_roots);
651 return 0;
652}
653
654
655
656
657
658static void __del_reloc_root(struct btrfs_root *root)
659{
660 struct btrfs_fs_info *fs_info = root->fs_info;
661 struct rb_node *rb_node;
662 struct mapping_node *node = NULL;
663 struct reloc_control *rc = fs_info->reloc_ctl;
664 bool put_ref = false;
665
666 if (rc && root->node) {
667 spin_lock(&rc->reloc_root_tree.lock);
668 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
669 root->commit_root->start);
670 if (rb_node) {
671 node = rb_entry(rb_node, struct mapping_node, rb_node);
672 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
673 RB_CLEAR_NODE(&node->rb_node);
674 }
675 spin_unlock(&rc->reloc_root_tree.lock);
676 ASSERT(!node || (struct btrfs_root *)node->data == root);
677 }
678
679
680
681
682
683
684
685
686
687 spin_lock(&fs_info->trans_lock);
688 if (!list_empty(&root->root_list)) {
689 put_ref = true;
690 list_del_init(&root->root_list);
691 }
692 spin_unlock(&fs_info->trans_lock);
693 if (put_ref)
694 btrfs_put_root(root);
695 kfree(node);
696}
697
698
699
700
701
702static int __update_reloc_root(struct btrfs_root *root)
703{
704 struct btrfs_fs_info *fs_info = root->fs_info;
705 struct rb_node *rb_node;
706 struct mapping_node *node = NULL;
707 struct reloc_control *rc = fs_info->reloc_ctl;
708
709 spin_lock(&rc->reloc_root_tree.lock);
710 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
711 root->commit_root->start);
712 if (rb_node) {
713 node = rb_entry(rb_node, struct mapping_node, rb_node);
714 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
715 }
716 spin_unlock(&rc->reloc_root_tree.lock);
717
718 if (!node)
719 return 0;
720 BUG_ON((struct btrfs_root *)node->data != root);
721
722 spin_lock(&rc->reloc_root_tree.lock);
723 node->bytenr = root->node->start;
724 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
725 node->bytenr, &node->rb_node);
726 spin_unlock(&rc->reloc_root_tree.lock);
727 if (rb_node)
728 btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
729 return 0;
730}
731
732static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
733 struct btrfs_root *root, u64 objectid)
734{
735 struct btrfs_fs_info *fs_info = root->fs_info;
736 struct btrfs_root *reloc_root;
737 struct extent_buffer *eb;
738 struct btrfs_root_item *root_item;
739 struct btrfs_key root_key;
740 int ret = 0;
741 bool must_abort = false;
742
743 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
744 if (!root_item)
745 return ERR_PTR(-ENOMEM);
746
747 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
748 root_key.type = BTRFS_ROOT_ITEM_KEY;
749 root_key.offset = objectid;
750
751 if (root->root_key.objectid == objectid) {
752 u64 commit_root_gen;
753
754
755 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
756 BTRFS_TREE_RELOC_OBJECTID);
757 if (ret)
758 goto fail;
759
760
761
762
763
764
765
766
767
768 commit_root_gen = btrfs_header_generation(root->commit_root);
769 btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen);
770 } else {
771
772
773
774
775
776
777
778 ret = btrfs_copy_root(trans, root, root->node, &eb,
779 BTRFS_TREE_RELOC_OBJECTID);
780 if (ret)
781 goto fail;
782 }
783
784
785
786
787
788 must_abort = true;
789
790 memcpy(root_item, &root->root_item, sizeof(*root_item));
791 btrfs_set_root_bytenr(root_item, eb->start);
792 btrfs_set_root_level(root_item, btrfs_header_level(eb));
793 btrfs_set_root_generation(root_item, trans->transid);
794
795 if (root->root_key.objectid == objectid) {
796 btrfs_set_root_refs(root_item, 0);
797 memset(&root_item->drop_progress, 0,
798 sizeof(struct btrfs_disk_key));
799 btrfs_set_root_drop_level(root_item, 0);
800 }
801
802 btrfs_tree_unlock(eb);
803 free_extent_buffer(eb);
804
805 ret = btrfs_insert_root(trans, fs_info->tree_root,
806 &root_key, root_item);
807 if (ret)
808 goto fail;
809
810 kfree(root_item);
811
812 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
813 if (IS_ERR(reloc_root)) {
814 ret = PTR_ERR(reloc_root);
815 goto abort;
816 }
817 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
818 reloc_root->last_trans = trans->transid;
819 return reloc_root;
820fail:
821 kfree(root_item);
822abort:
823 if (must_abort)
824 btrfs_abort_transaction(trans, ret);
825 return ERR_PTR(ret);
826}
827
828
829
830
831
832
833
834
835int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
836 struct btrfs_root *root)
837{
838 struct btrfs_fs_info *fs_info = root->fs_info;
839 struct btrfs_root *reloc_root;
840 struct reloc_control *rc = fs_info->reloc_ctl;
841 struct btrfs_block_rsv *rsv;
842 int clear_rsv = 0;
843 int ret;
844
845 if (!rc)
846 return 0;
847
848
849
850
851
852 if (reloc_root_is_dead(root))
853 return 0;
854
855
856
857
858
859
860
861
862
863 if (root->reloc_root) {
864 reloc_root = root->reloc_root;
865 reloc_root->last_trans = trans->transid;
866 return 0;
867 }
868
869
870
871
872
873 if (!rc->create_reloc_tree ||
874 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
875 return 0;
876
877 if (!trans->reloc_reserved) {
878 rsv = trans->block_rsv;
879 trans->block_rsv = rc->block_rsv;
880 clear_rsv = 1;
881 }
882 reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
883 if (clear_rsv)
884 trans->block_rsv = rsv;
885 if (IS_ERR(reloc_root))
886 return PTR_ERR(reloc_root);
887
888 ret = __add_reloc_root(reloc_root);
889 ASSERT(ret != -EEXIST);
890 if (ret) {
891
892 btrfs_put_root(reloc_root);
893 return ret;
894 }
895 root->reloc_root = btrfs_grab_root(reloc_root);
896 return 0;
897}
898
899
900
901
902int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
903 struct btrfs_root *root)
904{
905 struct btrfs_fs_info *fs_info = root->fs_info;
906 struct btrfs_root *reloc_root;
907 struct btrfs_root_item *root_item;
908 int ret;
909
910 if (!have_reloc_root(root))
911 return 0;
912
913 reloc_root = root->reloc_root;
914 root_item = &reloc_root->root_item;
915
916
917
918
919
920
921 btrfs_grab_root(reloc_root);
922
923
924 if (fs_info->reloc_ctl->merge_reloc_tree &&
925 btrfs_root_refs(root_item) == 0) {
926 set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
927
928
929
930
931 smp_wmb();
932 __del_reloc_root(reloc_root);
933 }
934
935 if (reloc_root->commit_root != reloc_root->node) {
936 __update_reloc_root(reloc_root);
937 btrfs_set_root_node(root_item, reloc_root->node);
938 free_extent_buffer(reloc_root->commit_root);
939 reloc_root->commit_root = btrfs_root_node(reloc_root);
940 }
941
942 ret = btrfs_update_root(trans, fs_info->tree_root,
943 &reloc_root->root_key, root_item);
944 btrfs_put_root(reloc_root);
945 return ret;
946}
947
948
949
950
951
952static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
953{
954 struct rb_node *node;
955 struct rb_node *prev;
956 struct btrfs_inode *entry;
957 struct inode *inode;
958
959 spin_lock(&root->inode_lock);
960again:
961 node = root->inode_tree.rb_node;
962 prev = NULL;
963 while (node) {
964 prev = node;
965 entry = rb_entry(node, struct btrfs_inode, rb_node);
966
967 if (objectid < btrfs_ino(entry))
968 node = node->rb_left;
969 else if (objectid > btrfs_ino(entry))
970 node = node->rb_right;
971 else
972 break;
973 }
974 if (!node) {
975 while (prev) {
976 entry = rb_entry(prev, struct btrfs_inode, rb_node);
977 if (objectid <= btrfs_ino(entry)) {
978 node = prev;
979 break;
980 }
981 prev = rb_next(prev);
982 }
983 }
984 while (node) {
985 entry = rb_entry(node, struct btrfs_inode, rb_node);
986 inode = igrab(&entry->vfs_inode);
987 if (inode) {
988 spin_unlock(&root->inode_lock);
989 return inode;
990 }
991
992 objectid = btrfs_ino(entry) + 1;
993 if (cond_resched_lock(&root->inode_lock))
994 goto again;
995
996 node = rb_next(node);
997 }
998 spin_unlock(&root->inode_lock);
999 return NULL;
1000}
1001
1002
1003
1004
1005static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1006 u64 bytenr, u64 num_bytes)
1007{
1008 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1009 struct btrfs_path *path;
1010 struct btrfs_file_extent_item *fi;
1011 struct extent_buffer *leaf;
1012 int ret;
1013
1014 path = btrfs_alloc_path();
1015 if (!path)
1016 return -ENOMEM;
1017
1018 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1019 ret = btrfs_lookup_file_extent(NULL, root, path,
1020 btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
1021 if (ret < 0)
1022 goto out;
1023 if (ret > 0) {
1024 ret = -ENOENT;
1025 goto out;
1026 }
1027
1028 leaf = path->nodes[0];
1029 fi = btrfs_item_ptr(leaf, path->slots[0],
1030 struct btrfs_file_extent_item);
1031
1032 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1033 btrfs_file_extent_compression(leaf, fi) ||
1034 btrfs_file_extent_encryption(leaf, fi) ||
1035 btrfs_file_extent_other_encoding(leaf, fi));
1036
1037 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1038 ret = -EINVAL;
1039 goto out;
1040 }
1041
1042 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1043 ret = 0;
1044out:
1045 btrfs_free_path(path);
1046 return ret;
1047}
1048
1049
1050
1051
1052
1053static noinline_for_stack
1054int replace_file_extents(struct btrfs_trans_handle *trans,
1055 struct reloc_control *rc,
1056 struct btrfs_root *root,
1057 struct extent_buffer *leaf)
1058{
1059 struct btrfs_fs_info *fs_info = root->fs_info;
1060 struct btrfs_key key;
1061 struct btrfs_file_extent_item *fi;
1062 struct inode *inode = NULL;
1063 u64 parent;
1064 u64 bytenr;
1065 u64 new_bytenr = 0;
1066 u64 num_bytes;
1067 u64 end;
1068 u32 nritems;
1069 u32 i;
1070 int ret = 0;
1071 int first = 1;
1072 int dirty = 0;
1073
1074 if (rc->stage != UPDATE_DATA_PTRS)
1075 return 0;
1076
1077
1078 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1079 parent = leaf->start;
1080 else
1081 parent = 0;
1082
1083 nritems = btrfs_header_nritems(leaf);
1084 for (i = 0; i < nritems; i++) {
1085 struct btrfs_ref ref = { 0 };
1086
1087 cond_resched();
1088 btrfs_item_key_to_cpu(leaf, &key, i);
1089 if (key.type != BTRFS_EXTENT_DATA_KEY)
1090 continue;
1091 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1092 if (btrfs_file_extent_type(leaf, fi) ==
1093 BTRFS_FILE_EXTENT_INLINE)
1094 continue;
1095 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1096 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1097 if (bytenr == 0)
1098 continue;
1099 if (!in_range(bytenr, rc->block_group->start,
1100 rc->block_group->length))
1101 continue;
1102
1103
1104
1105
1106
1107 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1108 if (first) {
1109 inode = find_next_inode(root, key.objectid);
1110 first = 0;
1111 } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
1112 btrfs_add_delayed_iput(inode);
1113 inode = find_next_inode(root, key.objectid);
1114 }
1115 if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
1116 end = key.offset +
1117 btrfs_file_extent_num_bytes(leaf, fi);
1118 WARN_ON(!IS_ALIGNED(key.offset,
1119 fs_info->sectorsize));
1120 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1121 end--;
1122 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1123 key.offset, end);
1124 if (!ret)
1125 continue;
1126
1127 btrfs_drop_extent_cache(BTRFS_I(inode),
1128 key.offset, end, 1);
1129 unlock_extent(&BTRFS_I(inode)->io_tree,
1130 key.offset, end);
1131 }
1132 }
1133
1134 ret = get_new_location(rc->data_inode, &new_bytenr,
1135 bytenr, num_bytes);
1136 if (ret) {
1137
1138
1139
1140
1141 break;
1142 }
1143
1144 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1145 dirty = 1;
1146
1147 key.offset -= btrfs_file_extent_offset(leaf, fi);
1148 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1149 num_bytes, parent);
1150 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1151 key.objectid, key.offset,
1152 root->root_key.objectid, false);
1153 ret = btrfs_inc_extent_ref(trans, &ref);
1154 if (ret) {
1155 btrfs_abort_transaction(trans, ret);
1156 break;
1157 }
1158
1159 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1160 num_bytes, parent);
1161 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1162 key.objectid, key.offset,
1163 root->root_key.objectid, false);
1164 ret = btrfs_free_extent(trans, &ref);
1165 if (ret) {
1166 btrfs_abort_transaction(trans, ret);
1167 break;
1168 }
1169 }
1170 if (dirty)
1171 btrfs_mark_buffer_dirty(leaf);
1172 if (inode)
1173 btrfs_add_delayed_iput(inode);
1174 return ret;
1175}
1176
1177static noinline_for_stack
1178int memcmp_node_keys(struct extent_buffer *eb, int slot,
1179 struct btrfs_path *path, int level)
1180{
1181 struct btrfs_disk_key key1;
1182 struct btrfs_disk_key key2;
1183 btrfs_node_key(eb, &key1, slot);
1184 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1185 return memcmp(&key1, &key2, sizeof(key1));
1186}
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197static noinline_for_stack
1198int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
1199 struct btrfs_root *dest, struct btrfs_root *src,
1200 struct btrfs_path *path, struct btrfs_key *next_key,
1201 int lowest_level, int max_level)
1202{
1203 struct btrfs_fs_info *fs_info = dest->fs_info;
1204 struct extent_buffer *eb;
1205 struct extent_buffer *parent;
1206 struct btrfs_ref ref = { 0 };
1207 struct btrfs_key key;
1208 u64 old_bytenr;
1209 u64 new_bytenr;
1210 u64 old_ptr_gen;
1211 u64 new_ptr_gen;
1212 u64 last_snapshot;
1213 u32 blocksize;
1214 int cow = 0;
1215 int level;
1216 int ret;
1217 int slot;
1218
1219 ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1220 ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1221
1222 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1223again:
1224 slot = path->slots[lowest_level];
1225 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1226
1227 eb = btrfs_lock_root_node(dest);
1228 level = btrfs_header_level(eb);
1229
1230 if (level < lowest_level) {
1231 btrfs_tree_unlock(eb);
1232 free_extent_buffer(eb);
1233 return 0;
1234 }
1235
1236 if (cow) {
1237 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb,
1238 BTRFS_NESTING_COW);
1239 if (ret) {
1240 btrfs_tree_unlock(eb);
1241 free_extent_buffer(eb);
1242 return ret;
1243 }
1244 }
1245
1246 if (next_key) {
1247 next_key->objectid = (u64)-1;
1248 next_key->type = (u8)-1;
1249 next_key->offset = (u64)-1;
1250 }
1251
1252 parent = eb;
1253 while (1) {
1254 level = btrfs_header_level(parent);
1255 ASSERT(level >= lowest_level);
1256
1257 ret = btrfs_bin_search(parent, &key, &slot);
1258 if (ret < 0)
1259 break;
1260 if (ret && slot > 0)
1261 slot--;
1262
1263 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1264 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1265
1266 old_bytenr = btrfs_node_blockptr(parent, slot);
1267 blocksize = fs_info->nodesize;
1268 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1269
1270 if (level <= max_level) {
1271 eb = path->nodes[level];
1272 new_bytenr = btrfs_node_blockptr(eb,
1273 path->slots[level]);
1274 new_ptr_gen = btrfs_node_ptr_generation(eb,
1275 path->slots[level]);
1276 } else {
1277 new_bytenr = 0;
1278 new_ptr_gen = 0;
1279 }
1280
1281 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
1282 ret = level;
1283 break;
1284 }
1285
1286 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1287 memcmp_node_keys(parent, slot, path, level)) {
1288 if (level <= lowest_level) {
1289 ret = 0;
1290 break;
1291 }
1292
1293 eb = btrfs_read_node_slot(parent, slot);
1294 if (IS_ERR(eb)) {
1295 ret = PTR_ERR(eb);
1296 break;
1297 }
1298 btrfs_tree_lock(eb);
1299 if (cow) {
1300 ret = btrfs_cow_block(trans, dest, eb, parent,
1301 slot, &eb,
1302 BTRFS_NESTING_COW);
1303 if (ret) {
1304 btrfs_tree_unlock(eb);
1305 free_extent_buffer(eb);
1306 break;
1307 }
1308 }
1309
1310 btrfs_tree_unlock(parent);
1311 free_extent_buffer(parent);
1312
1313 parent = eb;
1314 continue;
1315 }
1316
1317 if (!cow) {
1318 btrfs_tree_unlock(parent);
1319 free_extent_buffer(parent);
1320 cow = 1;
1321 goto again;
1322 }
1323
1324 btrfs_node_key_to_cpu(path->nodes[level], &key,
1325 path->slots[level]);
1326 btrfs_release_path(path);
1327
1328 path->lowest_level = level;
1329 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1330 path->lowest_level = 0;
1331 if (ret) {
1332 if (ret > 0)
1333 ret = -ENOENT;
1334 break;
1335 }
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351 ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
1352 rc->block_group, parent, slot,
1353 path->nodes[level], path->slots[level],
1354 last_snapshot);
1355 if (ret < 0)
1356 break;
1357
1358
1359
1360 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1361 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1362 btrfs_mark_buffer_dirty(parent);
1363
1364 btrfs_set_node_blockptr(path->nodes[level],
1365 path->slots[level], old_bytenr);
1366 btrfs_set_node_ptr_generation(path->nodes[level],
1367 path->slots[level], old_ptr_gen);
1368 btrfs_mark_buffer_dirty(path->nodes[level]);
1369
1370 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
1371 blocksize, path->nodes[level]->start);
1372 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
1373 0, true);
1374 ret = btrfs_inc_extent_ref(trans, &ref);
1375 if (ret) {
1376 btrfs_abort_transaction(trans, ret);
1377 break;
1378 }
1379 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
1380 blocksize, 0);
1381 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0,
1382 true);
1383 ret = btrfs_inc_extent_ref(trans, &ref);
1384 if (ret) {
1385 btrfs_abort_transaction(trans, ret);
1386 break;
1387 }
1388
1389 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
1390 blocksize, path->nodes[level]->start);
1391 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
1392 0, true);
1393 ret = btrfs_free_extent(trans, &ref);
1394 if (ret) {
1395 btrfs_abort_transaction(trans, ret);
1396 break;
1397 }
1398
1399 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
1400 blocksize, 0);
1401 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid,
1402 0, true);
1403 ret = btrfs_free_extent(trans, &ref);
1404 if (ret) {
1405 btrfs_abort_transaction(trans, ret);
1406 break;
1407 }
1408
1409 btrfs_unlock_up_safe(path, 0);
1410
1411 ret = level;
1412 break;
1413 }
1414 btrfs_tree_unlock(parent);
1415 free_extent_buffer(parent);
1416 return ret;
1417}
1418
1419
1420
1421
1422static noinline_for_stack
1423int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1424 int *level)
1425{
1426 struct extent_buffer *eb;
1427 int i;
1428 u64 last_snapshot;
1429 u32 nritems;
1430
1431 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1432
1433 for (i = 0; i < *level; i++) {
1434 free_extent_buffer(path->nodes[i]);
1435 path->nodes[i] = NULL;
1436 }
1437
1438 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1439 eb = path->nodes[i];
1440 nritems = btrfs_header_nritems(eb);
1441 while (path->slots[i] + 1 < nritems) {
1442 path->slots[i]++;
1443 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1444 last_snapshot)
1445 continue;
1446
1447 *level = i;
1448 return 0;
1449 }
1450 free_extent_buffer(path->nodes[i]);
1451 path->nodes[i] = NULL;
1452 }
1453 return 1;
1454}
1455
1456
1457
1458
1459static noinline_for_stack
1460int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1461 int *level)
1462{
1463 struct extent_buffer *eb = NULL;
1464 int i;
1465 u64 ptr_gen = 0;
1466 u64 last_snapshot;
1467 u32 nritems;
1468
1469 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1470
1471 for (i = *level; i > 0; i--) {
1472 eb = path->nodes[i];
1473 nritems = btrfs_header_nritems(eb);
1474 while (path->slots[i] < nritems) {
1475 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1476 if (ptr_gen > last_snapshot)
1477 break;
1478 path->slots[i]++;
1479 }
1480 if (path->slots[i] >= nritems) {
1481 if (i == *level)
1482 break;
1483 *level = i + 1;
1484 return 0;
1485 }
1486 if (i == 1) {
1487 *level = i;
1488 return 0;
1489 }
1490
1491 eb = btrfs_read_node_slot(eb, path->slots[i]);
1492 if (IS_ERR(eb))
1493 return PTR_ERR(eb);
1494 BUG_ON(btrfs_header_level(eb) != i - 1);
1495 path->nodes[i - 1] = eb;
1496 path->slots[i - 1] = 0;
1497 }
1498 return 1;
1499}
1500
1501
1502
1503
1504
1505static int invalidate_extent_cache(struct btrfs_root *root,
1506 struct btrfs_key *min_key,
1507 struct btrfs_key *max_key)
1508{
1509 struct btrfs_fs_info *fs_info = root->fs_info;
1510 struct inode *inode = NULL;
1511 u64 objectid;
1512 u64 start, end;
1513 u64 ino;
1514
1515 objectid = min_key->objectid;
1516 while (1) {
1517 cond_resched();
1518 iput(inode);
1519
1520 if (objectid > max_key->objectid)
1521 break;
1522
1523 inode = find_next_inode(root, objectid);
1524 if (!inode)
1525 break;
1526 ino = btrfs_ino(BTRFS_I(inode));
1527
1528 if (ino > max_key->objectid) {
1529 iput(inode);
1530 break;
1531 }
1532
1533 objectid = ino + 1;
1534 if (!S_ISREG(inode->i_mode))
1535 continue;
1536
1537 if (unlikely(min_key->objectid == ino)) {
1538 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1539 continue;
1540 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1541 start = 0;
1542 else {
1543 start = min_key->offset;
1544 WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize));
1545 }
1546 } else {
1547 start = 0;
1548 }
1549
1550 if (unlikely(max_key->objectid == ino)) {
1551 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1552 continue;
1553 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1554 end = (u64)-1;
1555 } else {
1556 if (max_key->offset == 0)
1557 continue;
1558 end = max_key->offset;
1559 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
1560 end--;
1561 }
1562 } else {
1563 end = (u64)-1;
1564 }
1565
1566
1567 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
1568 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
1569 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1570 }
1571 return 0;
1572}
1573
1574static int find_next_key(struct btrfs_path *path, int level,
1575 struct btrfs_key *key)
1576
1577{
1578 while (level < BTRFS_MAX_LEVEL) {
1579 if (!path->nodes[level])
1580 break;
1581 if (path->slots[level] + 1 <
1582 btrfs_header_nritems(path->nodes[level])) {
1583 btrfs_node_key_to_cpu(path->nodes[level], key,
1584 path->slots[level] + 1);
1585 return 0;
1586 }
1587 level++;
1588 }
1589 return 1;
1590}
1591
1592
1593
1594
1595static int insert_dirty_subvol(struct btrfs_trans_handle *trans,
1596 struct reloc_control *rc,
1597 struct btrfs_root *root)
1598{
1599 struct btrfs_root *reloc_root = root->reloc_root;
1600 struct btrfs_root_item *reloc_root_item;
1601 int ret;
1602
1603
1604 ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1605 ASSERT(reloc_root);
1606
1607 reloc_root_item = &reloc_root->root_item;
1608 memset(&reloc_root_item->drop_progress, 0,
1609 sizeof(reloc_root_item->drop_progress));
1610 btrfs_set_root_drop_level(reloc_root_item, 0);
1611 btrfs_set_root_refs(reloc_root_item, 0);
1612 ret = btrfs_update_reloc_root(trans, root);
1613 if (ret)
1614 return ret;
1615
1616 if (list_empty(&root->reloc_dirty_list)) {
1617 btrfs_grab_root(root);
1618 list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
1619 }
1620
1621 return 0;
1622}
1623
1624static int clean_dirty_subvols(struct reloc_control *rc)
1625{
1626 struct btrfs_root *root;
1627 struct btrfs_root *next;
1628 int ret = 0;
1629 int ret2;
1630
1631 list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
1632 reloc_dirty_list) {
1633 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1634
1635 struct btrfs_root *reloc_root = root->reloc_root;
1636
1637 list_del_init(&root->reloc_dirty_list);
1638 root->reloc_root = NULL;
1639
1640
1641
1642
1643 smp_wmb();
1644 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
1645 if (reloc_root) {
1646
1647
1648
1649
1650
1651 ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
1652 if (ret2 < 0) {
1653 btrfs_put_root(reloc_root);
1654 if (!ret)
1655 ret = ret2;
1656 }
1657 }
1658 btrfs_put_root(root);
1659 } else {
1660
1661 ret2 = btrfs_drop_snapshot(root, 0, 1);
1662 if (ret2 < 0) {
1663 btrfs_put_root(root);
1664 if (!ret)
1665 ret = ret2;
1666 }
1667 }
1668 }
1669 return ret;
1670}
1671
1672
1673
1674
1675
1676static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1677 struct btrfs_root *root)
1678{
1679 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1680 struct btrfs_key key;
1681 struct btrfs_key next_key;
1682 struct btrfs_trans_handle *trans = NULL;
1683 struct btrfs_root *reloc_root;
1684 struct btrfs_root_item *root_item;
1685 struct btrfs_path *path;
1686 struct extent_buffer *leaf;
1687 int reserve_level;
1688 int level;
1689 int max_level;
1690 int replaced = 0;
1691 int ret = 0;
1692 u32 min_reserved;
1693
1694 path = btrfs_alloc_path();
1695 if (!path)
1696 return -ENOMEM;
1697 path->reada = READA_FORWARD;
1698
1699 reloc_root = root->reloc_root;
1700 root_item = &reloc_root->root_item;
1701
1702 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1703 level = btrfs_root_level(root_item);
1704 atomic_inc(&reloc_root->node->refs);
1705 path->nodes[level] = reloc_root->node;
1706 path->slots[level] = 0;
1707 } else {
1708 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1709
1710 level = btrfs_root_drop_level(root_item);
1711 BUG_ON(level == 0);
1712 path->lowest_level = level;
1713 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1714 path->lowest_level = 0;
1715 if (ret < 0) {
1716 btrfs_free_path(path);
1717 return ret;
1718 }
1719
1720 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
1721 path->slots[level]);
1722 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
1723
1724 btrfs_unlock_up_safe(path, 0);
1725 }
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735 reserve_level = max_t(int, 1, btrfs_root_level(root_item));
1736 min_reserved = fs_info->nodesize * reserve_level * 2;
1737 memset(&next_key, 0, sizeof(next_key));
1738
1739 while (1) {
1740 ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv,
1741 min_reserved,
1742 BTRFS_RESERVE_FLUSH_LIMIT);
1743 if (ret)
1744 goto out;
1745 trans = btrfs_start_transaction(root, 0);
1746 if (IS_ERR(trans)) {
1747 ret = PTR_ERR(trans);
1748 trans = NULL;
1749 goto out;
1750 }
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762 reloc_root->last_trans = trans->transid;
1763 trans->block_rsv = rc->block_rsv;
1764
1765 replaced = 0;
1766 max_level = level;
1767
1768 ret = walk_down_reloc_tree(reloc_root, path, &level);
1769 if (ret < 0)
1770 goto out;
1771 if (ret > 0)
1772 break;
1773
1774 if (!find_next_key(path, level, &key) &&
1775 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
1776 ret = 0;
1777 } else {
1778 ret = replace_path(trans, rc, root, reloc_root, path,
1779 &next_key, level, max_level);
1780 }
1781 if (ret < 0)
1782 goto out;
1783 if (ret > 0) {
1784 level = ret;
1785 btrfs_node_key_to_cpu(path->nodes[level], &key,
1786 path->slots[level]);
1787 replaced = 1;
1788 }
1789
1790 ret = walk_up_reloc_tree(reloc_root, path, &level);
1791 if (ret > 0)
1792 break;
1793
1794 BUG_ON(level == 0);
1795
1796
1797
1798
1799 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
1800 path->slots[level]);
1801 btrfs_set_root_drop_level(root_item, level);
1802
1803 btrfs_end_transaction_throttle(trans);
1804 trans = NULL;
1805
1806 btrfs_btree_balance_dirty(fs_info);
1807
1808 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1809 invalidate_extent_cache(root, &key, &next_key);
1810 }
1811
1812
1813
1814
1815
1816 leaf = btrfs_lock_root_node(root);
1817 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf,
1818 BTRFS_NESTING_COW);
1819 btrfs_tree_unlock(leaf);
1820 free_extent_buffer(leaf);
1821out:
1822 btrfs_free_path(path);
1823
1824 if (ret == 0) {
1825 ret = insert_dirty_subvol(trans, rc, root);
1826 if (ret)
1827 btrfs_abort_transaction(trans, ret);
1828 }
1829
1830 if (trans)
1831 btrfs_end_transaction_throttle(trans);
1832
1833 btrfs_btree_balance_dirty(fs_info);
1834
1835 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1836 invalidate_extent_cache(root, &key, &next_key);
1837
1838 return ret;
1839}
1840
1841static noinline_for_stack
1842int prepare_to_merge(struct reloc_control *rc, int err)
1843{
1844 struct btrfs_root *root = rc->extent_root;
1845 struct btrfs_fs_info *fs_info = root->fs_info;
1846 struct btrfs_root *reloc_root;
1847 struct btrfs_trans_handle *trans;
1848 LIST_HEAD(reloc_roots);
1849 u64 num_bytes = 0;
1850 int ret;
1851
1852 mutex_lock(&fs_info->reloc_mutex);
1853 rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
1854 rc->merging_rsv_size += rc->nodes_relocated * 2;
1855 mutex_unlock(&fs_info->reloc_mutex);
1856
1857again:
1858 if (!err) {
1859 num_bytes = rc->merging_rsv_size;
1860 ret = btrfs_block_rsv_add(fs_info, rc->block_rsv, num_bytes,
1861 BTRFS_RESERVE_FLUSH_ALL);
1862 if (ret)
1863 err = ret;
1864 }
1865
1866 trans = btrfs_join_transaction(rc->extent_root);
1867 if (IS_ERR(trans)) {
1868 if (!err)
1869 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1870 num_bytes, NULL);
1871 return PTR_ERR(trans);
1872 }
1873
1874 if (!err) {
1875 if (num_bytes != rc->merging_rsv_size) {
1876 btrfs_end_transaction(trans);
1877 btrfs_block_rsv_release(fs_info, rc->block_rsv,
1878 num_bytes, NULL);
1879 goto again;
1880 }
1881 }
1882
1883 rc->merge_reloc_tree = 1;
1884
1885 while (!list_empty(&rc->reloc_roots)) {
1886 reloc_root = list_entry(rc->reloc_roots.next,
1887 struct btrfs_root, root_list);
1888 list_del_init(&reloc_root->root_list);
1889
1890 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1891 false);
1892 if (IS_ERR(root)) {
1893
1894
1895
1896
1897 list_add(&reloc_root->root_list, &reloc_roots);
1898 btrfs_abort_transaction(trans, (int)PTR_ERR(root));
1899 if (!err)
1900 err = PTR_ERR(root);
1901 break;
1902 }
1903 ASSERT(root->reloc_root == reloc_root);
1904
1905
1906
1907
1908
1909 if (!err)
1910 btrfs_set_root_refs(&reloc_root->root_item, 1);
1911 ret = btrfs_update_reloc_root(trans, root);
1912
1913
1914
1915
1916
1917 list_add(&reloc_root->root_list, &reloc_roots);
1918 btrfs_put_root(root);
1919
1920 if (ret) {
1921 btrfs_abort_transaction(trans, ret);
1922 if (!err)
1923 err = ret;
1924 break;
1925 }
1926 }
1927
1928 list_splice(&reloc_roots, &rc->reloc_roots);
1929
1930 if (!err)
1931 err = btrfs_commit_transaction(trans);
1932 else
1933 btrfs_end_transaction(trans);
1934 return err;
1935}
1936
1937static noinline_for_stack
1938void free_reloc_roots(struct list_head *list)
1939{
1940 struct btrfs_root *reloc_root, *tmp;
1941
1942 list_for_each_entry_safe(reloc_root, tmp, list, root_list)
1943 __del_reloc_root(reloc_root);
1944}
1945
1946static noinline_for_stack
1947void merge_reloc_roots(struct reloc_control *rc)
1948{
1949 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
1950 struct btrfs_root *root;
1951 struct btrfs_root *reloc_root;
1952 LIST_HEAD(reloc_roots);
1953 int found = 0;
1954 int ret = 0;
1955again:
1956 root = rc->extent_root;
1957
1958
1959
1960
1961
1962
1963
1964 mutex_lock(&fs_info->reloc_mutex);
1965 list_splice_init(&rc->reloc_roots, &reloc_roots);
1966 mutex_unlock(&fs_info->reloc_mutex);
1967
1968 while (!list_empty(&reloc_roots)) {
1969 found = 1;
1970 reloc_root = list_entry(reloc_roots.next,
1971 struct btrfs_root, root_list);
1972
1973 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1974 false);
1975 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
1976 if (IS_ERR(root)) {
1977
1978
1979
1980
1981
1982
1983
1984
1985 ASSERT(0);
1986 ret = PTR_ERR(root);
1987 goto out;
1988 }
1989 if (root->reloc_root != reloc_root) {
1990
1991
1992
1993
1994
1995 ASSERT(0);
1996 ret = -EINVAL;
1997 goto out;
1998 }
1999 ret = merge_reloc_root(rc, root);
2000 btrfs_put_root(root);
2001 if (ret) {
2002 if (list_empty(&reloc_root->root_list))
2003 list_add_tail(&reloc_root->root_list,
2004 &reloc_roots);
2005 goto out;
2006 }
2007 } else {
2008 if (!IS_ERR(root)) {
2009 if (root->reloc_root == reloc_root) {
2010 root->reloc_root = NULL;
2011 btrfs_put_root(reloc_root);
2012 }
2013 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE,
2014 &root->state);
2015 btrfs_put_root(root);
2016 }
2017
2018 list_del_init(&reloc_root->root_list);
2019
2020 list_add_tail(&reloc_root->reloc_dirty_list,
2021 &rc->dirty_subvol_roots);
2022 }
2023 }
2024
2025 if (found) {
2026 found = 0;
2027 goto again;
2028 }
2029out:
2030 if (ret) {
2031 btrfs_handle_fs_error(fs_info, ret, NULL);
2032 free_reloc_roots(&reloc_roots);
2033
2034
2035 mutex_lock(&fs_info->reloc_mutex);
2036 list_splice_init(&rc->reloc_roots, &reloc_roots);
2037 mutex_unlock(&fs_info->reloc_mutex);
2038 free_reloc_roots(&reloc_roots);
2039 }
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056}
2057
2058static void free_block_list(struct rb_root *blocks)
2059{
2060 struct tree_block *block;
2061 struct rb_node *rb_node;
2062 while ((rb_node = rb_first(blocks))) {
2063 block = rb_entry(rb_node, struct tree_block, rb_node);
2064 rb_erase(rb_node, blocks);
2065 kfree(block);
2066 }
2067}
2068
2069static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2070 struct btrfs_root *reloc_root)
2071{
2072 struct btrfs_fs_info *fs_info = reloc_root->fs_info;
2073 struct btrfs_root *root;
2074 int ret;
2075
2076 if (reloc_root->last_trans == trans->transid)
2077 return 0;
2078
2079 root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089 if (IS_ERR(root)) {
2090 ASSERT(0);
2091 return PTR_ERR(root);
2092 }
2093 if (root->reloc_root != reloc_root) {
2094 ASSERT(0);
2095 btrfs_err(fs_info,
2096 "root %llu has two reloc roots associated with it",
2097 reloc_root->root_key.offset);
2098 btrfs_put_root(root);
2099 return -EUCLEAN;
2100 }
2101 ret = btrfs_record_root_in_trans(trans, root);
2102 btrfs_put_root(root);
2103
2104 return ret;
2105}
2106
2107static noinline_for_stack
2108struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2109 struct reloc_control *rc,
2110 struct btrfs_backref_node *node,
2111 struct btrfs_backref_edge *edges[])
2112{
2113 struct btrfs_backref_node *next;
2114 struct btrfs_root *root;
2115 int index = 0;
2116 int ret;
2117
2118 next = node;
2119 while (1) {
2120 cond_resched();
2121 next = walk_up_backref(next, edges, &index);
2122 root = next->root;
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136 if (!root) {
2137 ASSERT(0);
2138 btrfs_err(trans->fs_info,
2139 "bytenr %llu doesn't have a backref path ending in a root",
2140 node->bytenr);
2141 return ERR_PTR(-EUCLEAN);
2142 }
2143 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2144 ASSERT(0);
2145 btrfs_err(trans->fs_info,
2146 "bytenr %llu has multiple refs with one ending in a non-shareable root",
2147 node->bytenr);
2148 return ERR_PTR(-EUCLEAN);
2149 }
2150
2151 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2152 ret = record_reloc_root_in_trans(trans, root);
2153 if (ret)
2154 return ERR_PTR(ret);
2155 break;
2156 }
2157
2158 ret = btrfs_record_root_in_trans(trans, root);
2159 if (ret)
2160 return ERR_PTR(ret);
2161 root = root->reloc_root;
2162
2163
2164
2165
2166
2167 if (!root)
2168 return ERR_PTR(-ENOENT);
2169
2170 if (next->new_bytenr != root->node->start) {
2171
2172
2173
2174
2175
2176
2177
2178 ASSERT(next->new_bytenr == 0);
2179 ASSERT(list_empty(&next->list));
2180 if (next->new_bytenr || !list_empty(&next->list)) {
2181 btrfs_err(trans->fs_info,
2182 "bytenr %llu possibly has multiple roots pointing at the same bytenr %llu",
2183 node->bytenr, next->bytenr);
2184 return ERR_PTR(-EUCLEAN);
2185 }
2186
2187 next->new_bytenr = root->node->start;
2188 btrfs_put_root(next->root);
2189 next->root = btrfs_grab_root(root);
2190 ASSERT(next->root);
2191 list_add_tail(&next->list,
2192 &rc->backref_cache.changed);
2193 mark_block_processed(rc, next);
2194 break;
2195 }
2196
2197 WARN_ON(1);
2198 root = NULL;
2199 next = walk_down_backref(edges, &index);
2200 if (!next || next->level <= node->level)
2201 break;
2202 }
2203 if (!root) {
2204
2205
2206
2207
2208 ASSERT(0);
2209 return ERR_PTR(-ENOENT);
2210 }
2211
2212 next = node;
2213
2214 while (1) {
2215 rc->backref_cache.path[next->level] = next;
2216 if (--index < 0)
2217 break;
2218 next = edges[index]->node[UPPER];
2219 }
2220 return root;
2221}
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232static noinline_for_stack
2233struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
2234{
2235 struct btrfs_backref_node *next;
2236 struct btrfs_root *root;
2237 struct btrfs_root *fs_root = NULL;
2238 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2239 int index = 0;
2240
2241 next = node;
2242 while (1) {
2243 cond_resched();
2244 next = walk_up_backref(next, edges, &index);
2245 root = next->root;
2246
2247
2248
2249
2250
2251 if (!root)
2252 return ERR_PTR(-EUCLEAN);
2253
2254
2255 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2256 return root;
2257
2258 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2259 fs_root = root;
2260
2261 if (next != node)
2262 return NULL;
2263
2264 next = walk_down_backref(edges, &index);
2265 if (!next || next->level <= node->level)
2266 break;
2267 }
2268
2269 if (!fs_root)
2270 return ERR_PTR(-ENOENT);
2271 return fs_root;
2272}
2273
2274static noinline_for_stack
2275u64 calcu_metadata_size(struct reloc_control *rc,
2276 struct btrfs_backref_node *node, int reserve)
2277{
2278 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2279 struct btrfs_backref_node *next = node;
2280 struct btrfs_backref_edge *edge;
2281 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2282 u64 num_bytes = 0;
2283 int index = 0;
2284
2285 BUG_ON(reserve && node->processed);
2286
2287 while (next) {
2288 cond_resched();
2289 while (1) {
2290 if (next->processed && (reserve || next != node))
2291 break;
2292
2293 num_bytes += fs_info->nodesize;
2294
2295 if (list_empty(&next->upper))
2296 break;
2297
2298 edge = list_entry(next->upper.next,
2299 struct btrfs_backref_edge, list[LOWER]);
2300 edges[index++] = edge;
2301 next = edge->node[UPPER];
2302 }
2303 next = walk_down_backref(edges, &index);
2304 }
2305 return num_bytes;
2306}
2307
2308static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2309 struct reloc_control *rc,
2310 struct btrfs_backref_node *node)
2311{
2312 struct btrfs_root *root = rc->extent_root;
2313 struct btrfs_fs_info *fs_info = root->fs_info;
2314 u64 num_bytes;
2315 int ret;
2316 u64 tmp;
2317
2318 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2319
2320 trans->block_rsv = rc->block_rsv;
2321 rc->reserved_bytes += num_bytes;
2322
2323
2324
2325
2326
2327
2328 ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv, num_bytes,
2329 BTRFS_RESERVE_FLUSH_LIMIT);
2330 if (ret) {
2331 tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
2332 while (tmp <= rc->reserved_bytes)
2333 tmp <<= 1;
2334
2335
2336
2337
2338
2339
2340
2341 rc->block_rsv->size = tmp + fs_info->nodesize *
2342 RELOCATION_RESERVED_NODES;
2343 return -EAGAIN;
2344 }
2345
2346 return 0;
2347}
2348
2349
2350
2351
2352
2353
2354
2355
2356static int do_relocation(struct btrfs_trans_handle *trans,
2357 struct reloc_control *rc,
2358 struct btrfs_backref_node *node,
2359 struct btrfs_key *key,
2360 struct btrfs_path *path, int lowest)
2361{
2362 struct btrfs_backref_node *upper;
2363 struct btrfs_backref_edge *edge;
2364 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2365 struct btrfs_root *root;
2366 struct extent_buffer *eb;
2367 u32 blocksize;
2368 u64 bytenr;
2369 int slot;
2370 int ret = 0;
2371
2372
2373
2374
2375
2376 ASSERT(!lowest || !node->eb);
2377
2378 path->lowest_level = node->level + 1;
2379 rc->backref_cache.path[node->level] = node;
2380 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2381 struct btrfs_ref ref = { 0 };
2382
2383 cond_resched();
2384
2385 upper = edge->node[UPPER];
2386 root = select_reloc_root(trans, rc, upper, edges);
2387 if (IS_ERR(root)) {
2388 ret = PTR_ERR(root);
2389 goto next;
2390 }
2391
2392 if (upper->eb && !upper->locked) {
2393 if (!lowest) {
2394 ret = btrfs_bin_search(upper->eb, key, &slot);
2395 if (ret < 0)
2396 goto next;
2397 BUG_ON(ret);
2398 bytenr = btrfs_node_blockptr(upper->eb, slot);
2399 if (node->eb->start == bytenr)
2400 goto next;
2401 }
2402 btrfs_backref_drop_node_buffer(upper);
2403 }
2404
2405 if (!upper->eb) {
2406 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2407 if (ret) {
2408 if (ret > 0)
2409 ret = -ENOENT;
2410
2411 btrfs_release_path(path);
2412 break;
2413 }
2414
2415 if (!upper->eb) {
2416 upper->eb = path->nodes[upper->level];
2417 path->nodes[upper->level] = NULL;
2418 } else {
2419 BUG_ON(upper->eb != path->nodes[upper->level]);
2420 }
2421
2422 upper->locked = 1;
2423 path->locks[upper->level] = 0;
2424
2425 slot = path->slots[upper->level];
2426 btrfs_release_path(path);
2427 } else {
2428 ret = btrfs_bin_search(upper->eb, key, &slot);
2429 if (ret < 0)
2430 goto next;
2431 BUG_ON(ret);
2432 }
2433
2434 bytenr = btrfs_node_blockptr(upper->eb, slot);
2435 if (lowest) {
2436 if (bytenr != node->bytenr) {
2437 btrfs_err(root->fs_info,
2438 "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
2439 bytenr, node->bytenr, slot,
2440 upper->eb->start);
2441 ret = -EIO;
2442 goto next;
2443 }
2444 } else {
2445 if (node->eb->start == bytenr)
2446 goto next;
2447 }
2448
2449 blocksize = root->fs_info->nodesize;
2450 eb = btrfs_read_node_slot(upper->eb, slot);
2451 if (IS_ERR(eb)) {
2452 ret = PTR_ERR(eb);
2453 goto next;
2454 }
2455 btrfs_tree_lock(eb);
2456
2457 if (!node->eb) {
2458 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2459 slot, &eb, BTRFS_NESTING_COW);
2460 btrfs_tree_unlock(eb);
2461 free_extent_buffer(eb);
2462 if (ret < 0)
2463 goto next;
2464
2465
2466
2467
2468 ASSERT(node->eb == eb);
2469 } else {
2470 btrfs_set_node_blockptr(upper->eb, slot,
2471 node->eb->start);
2472 btrfs_set_node_ptr_generation(upper->eb, slot,
2473 trans->transid);
2474 btrfs_mark_buffer_dirty(upper->eb);
2475
2476 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
2477 node->eb->start, blocksize,
2478 upper->eb->start);
2479 btrfs_init_tree_ref(&ref, node->level,
2480 btrfs_header_owner(upper->eb),
2481 root->root_key.objectid, false);
2482 ret = btrfs_inc_extent_ref(trans, &ref);
2483 if (!ret)
2484 ret = btrfs_drop_subtree(trans, root, eb,
2485 upper->eb);
2486 if (ret)
2487 btrfs_abort_transaction(trans, ret);
2488 }
2489next:
2490 if (!upper->pending)
2491 btrfs_backref_drop_node_buffer(upper);
2492 else
2493 btrfs_backref_unlock_node_buffer(upper);
2494 if (ret)
2495 break;
2496 }
2497
2498 if (!ret && node->pending) {
2499 btrfs_backref_drop_node_buffer(node);
2500 list_move_tail(&node->list, &rc->backref_cache.changed);
2501 node->pending = 0;
2502 }
2503
2504 path->lowest_level = 0;
2505
2506
2507
2508
2509
2510 ASSERT(ret != -ENOSPC);
2511 return ret;
2512}
2513
2514static int link_to_upper(struct btrfs_trans_handle *trans,
2515 struct reloc_control *rc,
2516 struct btrfs_backref_node *node,
2517 struct btrfs_path *path)
2518{
2519 struct btrfs_key key;
2520
2521 btrfs_node_key_to_cpu(node->eb, &key, 0);
2522 return do_relocation(trans, rc, node, &key, path, 0);
2523}
2524
2525static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2526 struct reloc_control *rc,
2527 struct btrfs_path *path, int err)
2528{
2529 LIST_HEAD(list);
2530 struct btrfs_backref_cache *cache = &rc->backref_cache;
2531 struct btrfs_backref_node *node;
2532 int level;
2533 int ret;
2534
2535 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2536 while (!list_empty(&cache->pending[level])) {
2537 node = list_entry(cache->pending[level].next,
2538 struct btrfs_backref_node, list);
2539 list_move_tail(&node->list, &list);
2540 BUG_ON(!node->pending);
2541
2542 if (!err) {
2543 ret = link_to_upper(trans, rc, node, path);
2544 if (ret < 0)
2545 err = ret;
2546 }
2547 }
2548 list_splice_init(&list, &cache->pending[level]);
2549 }
2550 return err;
2551}
2552
2553
2554
2555
2556
2557static void update_processed_blocks(struct reloc_control *rc,
2558 struct btrfs_backref_node *node)
2559{
2560 struct btrfs_backref_node *next = node;
2561 struct btrfs_backref_edge *edge;
2562 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2563 int index = 0;
2564
2565 while (next) {
2566 cond_resched();
2567 while (1) {
2568 if (next->processed)
2569 break;
2570
2571 mark_block_processed(rc, next);
2572
2573 if (list_empty(&next->upper))
2574 break;
2575
2576 edge = list_entry(next->upper.next,
2577 struct btrfs_backref_edge, list[LOWER]);
2578 edges[index++] = edge;
2579 next = edge->node[UPPER];
2580 }
2581 next = walk_down_backref(edges, &index);
2582 }
2583}
2584
2585static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
2586{
2587 u32 blocksize = rc->extent_root->fs_info->nodesize;
2588
2589 if (test_range_bit(&rc->processed_blocks, bytenr,
2590 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2591 return 1;
2592 return 0;
2593}
2594
2595static int get_tree_block_key(struct btrfs_fs_info *fs_info,
2596 struct tree_block *block)
2597{
2598 struct extent_buffer *eb;
2599
2600 eb = read_tree_block(fs_info, block->bytenr, block->owner,
2601 block->key.offset, block->level, NULL);
2602 if (IS_ERR(eb))
2603 return PTR_ERR(eb);
2604 if (!extent_buffer_uptodate(eb)) {
2605 free_extent_buffer(eb);
2606 return -EIO;
2607 }
2608 if (block->level == 0)
2609 btrfs_item_key_to_cpu(eb, &block->key, 0);
2610 else
2611 btrfs_node_key_to_cpu(eb, &block->key, 0);
2612 free_extent_buffer(eb);
2613 block->key_ready = 1;
2614 return 0;
2615}
2616
2617
2618
2619
2620static int relocate_tree_block(struct btrfs_trans_handle *trans,
2621 struct reloc_control *rc,
2622 struct btrfs_backref_node *node,
2623 struct btrfs_key *key,
2624 struct btrfs_path *path)
2625{
2626 struct btrfs_root *root;
2627 int ret = 0;
2628
2629 if (!node)
2630 return 0;
2631
2632
2633
2634
2635
2636 ret = reserve_metadata_space(trans, rc, node);
2637 if (ret)
2638 goto out;
2639
2640 BUG_ON(node->processed);
2641 root = select_one_root(node);
2642 if (IS_ERR(root)) {
2643 ret = PTR_ERR(root);
2644
2645
2646 ASSERT(ret == -ENOENT);
2647 if (ret == -ENOENT) {
2648 ret = 0;
2649 update_processed_blocks(rc, node);
2650 }
2651 goto out;
2652 }
2653
2654 if (root) {
2655 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669 ASSERT(node->new_bytenr == 0);
2670 ASSERT(list_empty(&node->list));
2671 if (node->new_bytenr || !list_empty(&node->list)) {
2672 btrfs_err(root->fs_info,
2673 "bytenr %llu has improper references to it",
2674 node->bytenr);
2675 ret = -EUCLEAN;
2676 goto out;
2677 }
2678 ret = btrfs_record_root_in_trans(trans, root);
2679 if (ret)
2680 goto out;
2681
2682
2683
2684
2685 if (!root->reloc_root) {
2686 ret = -ENOENT;
2687 goto out;
2688 }
2689 root = root->reloc_root;
2690 node->new_bytenr = root->node->start;
2691 btrfs_put_root(node->root);
2692 node->root = btrfs_grab_root(root);
2693 ASSERT(node->root);
2694 list_add_tail(&node->list, &rc->backref_cache.changed);
2695 } else {
2696 path->lowest_level = node->level;
2697 if (root == root->fs_info->chunk_root)
2698 btrfs_reserve_chunk_metadata(trans, false);
2699 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2700 btrfs_release_path(path);
2701 if (root == root->fs_info->chunk_root)
2702 btrfs_trans_release_chunk_metadata(trans);
2703 if (ret > 0)
2704 ret = 0;
2705 }
2706 if (!ret)
2707 update_processed_blocks(rc, node);
2708 } else {
2709 ret = do_relocation(trans, rc, node, key, path, 1);
2710 }
2711out:
2712 if (ret || node->level == 0 || node->cowonly)
2713 btrfs_backref_cleanup_node(&rc->backref_cache, node);
2714 return ret;
2715}
2716
2717
2718
2719
2720static noinline_for_stack
2721int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2722 struct reloc_control *rc, struct rb_root *blocks)
2723{
2724 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2725 struct btrfs_backref_node *node;
2726 struct btrfs_path *path;
2727 struct tree_block *block;
2728 struct tree_block *next;
2729 int ret;
2730 int err = 0;
2731
2732 path = btrfs_alloc_path();
2733 if (!path) {
2734 err = -ENOMEM;
2735 goto out_free_blocks;
2736 }
2737
2738
2739 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2740 if (!block->key_ready)
2741 btrfs_readahead_tree_block(fs_info, block->bytenr,
2742 block->owner, 0,
2743 block->level);
2744 }
2745
2746
2747 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2748 if (!block->key_ready) {
2749 err = get_tree_block_key(fs_info, block);
2750 if (err)
2751 goto out_free_path;
2752 }
2753 }
2754
2755
2756 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2757 node = build_backref_tree(rc, &block->key,
2758 block->level, block->bytenr);
2759 if (IS_ERR(node)) {
2760 err = PTR_ERR(node);
2761 goto out;
2762 }
2763
2764 ret = relocate_tree_block(trans, rc, node, &block->key,
2765 path);
2766 if (ret < 0) {
2767 err = ret;
2768 break;
2769 }
2770 }
2771out:
2772 err = finish_pending_nodes(trans, rc, path, err);
2773
2774out_free_path:
2775 btrfs_free_path(path);
2776out_free_blocks:
2777 free_block_list(blocks);
2778 return err;
2779}
2780
2781static noinline_for_stack int prealloc_file_extent_cluster(
2782 struct btrfs_inode *inode,
2783 struct file_extent_cluster *cluster)
2784{
2785 u64 alloc_hint = 0;
2786 u64 start;
2787 u64 end;
2788 u64 offset = inode->index_cnt;
2789 u64 num_bytes;
2790 int nr;
2791 int ret = 0;
2792 u64 i_size = i_size_read(&inode->vfs_inode);
2793 u64 prealloc_start = cluster->start - offset;
2794 u64 prealloc_end = cluster->end - offset;
2795 u64 cur_offset = prealloc_start;
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808 if (!IS_ALIGNED(i_size, PAGE_SIZE)) {
2809 struct address_space *mapping = inode->vfs_inode.i_mapping;
2810 struct btrfs_fs_info *fs_info = inode->root->fs_info;
2811 const u32 sectorsize = fs_info->sectorsize;
2812 struct page *page;
2813
2814 ASSERT(sectorsize < PAGE_SIZE);
2815 ASSERT(IS_ALIGNED(i_size, sectorsize));
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836 ret = filemap_write_and_wait(mapping);
2837 if (ret < 0)
2838 return ret;
2839
2840 clear_extent_bits(&inode->io_tree, i_size,
2841 round_up(i_size, PAGE_SIZE) - 1,
2842 EXTENT_UPTODATE);
2843 page = find_lock_page(mapping, i_size >> PAGE_SHIFT);
2844
2845
2846
2847
2848 if (page) {
2849 btrfs_subpage_clear_uptodate(fs_info, page, i_size,
2850 round_up(i_size, PAGE_SIZE) - i_size);
2851 unlock_page(page);
2852 put_page(page);
2853 }
2854 }
2855
2856 BUG_ON(cluster->start != cluster->boundary[0]);
2857 ret = btrfs_alloc_data_chunk_ondemand(inode,
2858 prealloc_end + 1 - prealloc_start);
2859 if (ret)
2860 return ret;
2861
2862 btrfs_inode_lock(&inode->vfs_inode, 0);
2863 for (nr = 0; nr < cluster->nr; nr++) {
2864 start = cluster->boundary[nr] - offset;
2865 if (nr + 1 < cluster->nr)
2866 end = cluster->boundary[nr + 1] - 1 - offset;
2867 else
2868 end = cluster->end - offset;
2869
2870 lock_extent(&inode->io_tree, start, end);
2871 num_bytes = end + 1 - start;
2872 ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
2873 num_bytes, num_bytes,
2874 end + 1, &alloc_hint);
2875 cur_offset = end + 1;
2876 unlock_extent(&inode->io_tree, start, end);
2877 if (ret)
2878 break;
2879 }
2880 btrfs_inode_unlock(&inode->vfs_inode, 0);
2881
2882 if (cur_offset < prealloc_end)
2883 btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
2884 prealloc_end + 1 - cur_offset);
2885 return ret;
2886}
2887
2888static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode,
2889 u64 start, u64 end, u64 block_start)
2890{
2891 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2892 struct extent_map *em;
2893 int ret = 0;
2894
2895 em = alloc_extent_map();
2896 if (!em)
2897 return -ENOMEM;
2898
2899 em->start = start;
2900 em->len = end + 1 - start;
2901 em->block_len = em->len;
2902 em->block_start = block_start;
2903 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2904
2905 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2906 while (1) {
2907 write_lock(&em_tree->lock);
2908 ret = add_extent_mapping(em_tree, em, 0);
2909 write_unlock(&em_tree->lock);
2910 if (ret != -EEXIST) {
2911 free_extent_map(em);
2912 break;
2913 }
2914 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
2915 }
2916 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2917 return ret;
2918}
2919
2920
2921
2922
2923noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
2924{
2925 return atomic_read(&fs_info->balance_cancel_req) ||
2926 atomic_read(&fs_info->reloc_cancel_req) ||
2927 fatal_signal_pending(current);
2928}
2929ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
2930
2931static u64 get_cluster_boundary_end(struct file_extent_cluster *cluster,
2932 int cluster_nr)
2933{
2934
2935 if (cluster_nr >= cluster->nr - 1)
2936 return cluster->end;
2937
2938
2939 return cluster->boundary[cluster_nr + 1] - 1;
2940}
2941
2942static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
2943 struct file_extent_cluster *cluster,
2944 int *cluster_nr, unsigned long page_index)
2945{
2946 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2947 u64 offset = BTRFS_I(inode)->index_cnt;
2948 const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
2949 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
2950 struct page *page;
2951 u64 page_start;
2952 u64 page_end;
2953 u64 cur;
2954 int ret;
2955
2956 ASSERT(page_index <= last_index);
2957 page = find_lock_page(inode->i_mapping, page_index);
2958 if (!page) {
2959 page_cache_sync_readahead(inode->i_mapping, ra, NULL,
2960 page_index, last_index + 1 - page_index);
2961 page = find_or_create_page(inode->i_mapping, page_index, mask);
2962 if (!page)
2963 return -ENOMEM;
2964 }
2965 ret = set_page_extent_mapped(page);
2966 if (ret < 0)
2967 goto release_page;
2968
2969 if (PageReadahead(page))
2970 page_cache_async_readahead(inode->i_mapping, ra, NULL, page,
2971 page_index, last_index + 1 - page_index);
2972
2973 if (!PageUptodate(page)) {
2974 btrfs_readpage(NULL, page);
2975 lock_page(page);
2976 if (!PageUptodate(page)) {
2977 ret = -EIO;
2978 goto release_page;
2979 }
2980 }
2981
2982 page_start = page_offset(page);
2983 page_end = page_start + PAGE_SIZE - 1;
2984
2985
2986
2987
2988
2989 cur = max(page_start, cluster->boundary[*cluster_nr] - offset);
2990 while (cur <= page_end) {
2991 u64 extent_start = cluster->boundary[*cluster_nr] - offset;
2992 u64 extent_end = get_cluster_boundary_end(cluster,
2993 *cluster_nr) - offset;
2994 u64 clamped_start = max(page_start, extent_start);
2995 u64 clamped_end = min(page_end, extent_end);
2996 u32 clamped_len = clamped_end + 1 - clamped_start;
2997
2998
2999 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
3000 clamped_len, clamped_len);
3001 if (ret)
3002 goto release_page;
3003
3004
3005 lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
3006 ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
3007 clamped_end, 0, NULL);
3008 if (ret) {
3009 clear_extent_bits(&BTRFS_I(inode)->io_tree,
3010 clamped_start, clamped_end,
3011 EXTENT_LOCKED | EXTENT_BOUNDARY);
3012 btrfs_delalloc_release_metadata(BTRFS_I(inode),
3013 clamped_len, true);
3014 btrfs_delalloc_release_extents(BTRFS_I(inode),
3015 clamped_len);
3016 goto release_page;
3017 }
3018 btrfs_page_set_dirty(fs_info, page, clamped_start, clamped_len);
3019
3020
3021
3022
3023
3024
3025
3026
3027 if (in_range(cluster->boundary[*cluster_nr] - offset,
3028 page_start, PAGE_SIZE)) {
3029 u64 boundary_start = cluster->boundary[*cluster_nr] -
3030 offset;
3031 u64 boundary_end = boundary_start +
3032 fs_info->sectorsize - 1;
3033
3034 set_extent_bits(&BTRFS_I(inode)->io_tree,
3035 boundary_start, boundary_end,
3036 EXTENT_BOUNDARY);
3037 }
3038 unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
3039 btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
3040 cur += clamped_len;
3041
3042
3043 if (cur >= extent_end) {
3044 (*cluster_nr)++;
3045
3046 if (*cluster_nr >= cluster->nr)
3047 break;
3048 }
3049 }
3050 unlock_page(page);
3051 put_page(page);
3052
3053 balance_dirty_pages_ratelimited(inode->i_mapping);
3054 btrfs_throttle(fs_info);
3055 if (btrfs_should_cancel_balance(fs_info))
3056 ret = -ECANCELED;
3057 return ret;
3058
3059release_page:
3060 unlock_page(page);
3061 put_page(page);
3062 return ret;
3063}
3064
3065static int relocate_file_extent_cluster(struct inode *inode,
3066 struct file_extent_cluster *cluster)
3067{
3068 u64 offset = BTRFS_I(inode)->index_cnt;
3069 unsigned long index;
3070 unsigned long last_index;
3071 struct file_ra_state *ra;
3072 int cluster_nr = 0;
3073 int ret = 0;
3074
3075 if (!cluster->nr)
3076 return 0;
3077
3078 ra = kzalloc(sizeof(*ra), GFP_NOFS);
3079 if (!ra)
3080 return -ENOMEM;
3081
3082 ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
3083 if (ret)
3084 goto out;
3085
3086 file_ra_state_init(ra, inode->i_mapping);
3087
3088 ret = setup_relocation_extent_mapping(inode, cluster->start - offset,
3089 cluster->end - offset, cluster->start);
3090 if (ret)
3091 goto out;
3092
3093 last_index = (cluster->end - offset) >> PAGE_SHIFT;
3094 for (index = (cluster->start - offset) >> PAGE_SHIFT;
3095 index <= last_index && !ret; index++)
3096 ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
3097 if (ret == 0)
3098 WARN_ON(cluster_nr != cluster->nr);
3099out:
3100 kfree(ra);
3101 return ret;
3102}
3103
3104static noinline_for_stack
3105int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3106 struct file_extent_cluster *cluster)
3107{
3108 int ret;
3109
3110 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
3111 ret = relocate_file_extent_cluster(inode, cluster);
3112 if (ret)
3113 return ret;
3114 cluster->nr = 0;
3115 }
3116
3117 if (!cluster->nr)
3118 cluster->start = extent_key->objectid;
3119 else
3120 BUG_ON(cluster->nr >= MAX_EXTENTS);
3121 cluster->end = extent_key->objectid + extent_key->offset - 1;
3122 cluster->boundary[cluster->nr] = extent_key->objectid;
3123 cluster->nr++;
3124
3125 if (cluster->nr >= MAX_EXTENTS) {
3126 ret = relocate_file_extent_cluster(inode, cluster);
3127 if (ret)
3128 return ret;
3129 cluster->nr = 0;
3130 }
3131 return 0;
3132}
3133
3134
3135
3136
3137
3138static int add_tree_block(struct reloc_control *rc,
3139 struct btrfs_key *extent_key,
3140 struct btrfs_path *path,
3141 struct rb_root *blocks)
3142{
3143 struct extent_buffer *eb;
3144 struct btrfs_extent_item *ei;
3145 struct btrfs_tree_block_info *bi;
3146 struct tree_block *block;
3147 struct rb_node *rb_node;
3148 u32 item_size;
3149 int level = -1;
3150 u64 generation;
3151 u64 owner = 0;
3152
3153 eb = path->nodes[0];
3154 item_size = btrfs_item_size(eb, path->slots[0]);
3155
3156 if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
3157 item_size >= sizeof(*ei) + sizeof(*bi)) {
3158 unsigned long ptr = 0, end;
3159
3160 ei = btrfs_item_ptr(eb, path->slots[0],
3161 struct btrfs_extent_item);
3162 end = (unsigned long)ei + item_size;
3163 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
3164 bi = (struct btrfs_tree_block_info *)(ei + 1);
3165 level = btrfs_tree_block_level(eb, bi);
3166 ptr = (unsigned long)(bi + 1);
3167 } else {
3168 level = (int)extent_key->offset;
3169 ptr = (unsigned long)(ei + 1);
3170 }
3171 generation = btrfs_extent_generation(eb, ei);
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188 if (btrfs_extent_refs(eb, ei) == 1 &&
3189 !(btrfs_extent_flags(eb, ei) &
3190 BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
3191 ptr < end) {
3192 struct btrfs_extent_inline_ref *iref;
3193 int type;
3194
3195 iref = (struct btrfs_extent_inline_ref *)ptr;
3196 type = btrfs_get_extent_inline_ref_type(eb, iref,
3197 BTRFS_REF_TYPE_BLOCK);
3198 if (type == BTRFS_REF_TYPE_INVALID)
3199 return -EINVAL;
3200 if (type == BTRFS_TREE_BLOCK_REF_KEY)
3201 owner = btrfs_extent_inline_ref_offset(eb, iref);
3202 }
3203 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
3204 btrfs_print_v0_err(eb->fs_info);
3205 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3206 return -EINVAL;
3207 } else {
3208 BUG();
3209 }
3210
3211 btrfs_release_path(path);
3212
3213 BUG_ON(level == -1);
3214
3215 block = kmalloc(sizeof(*block), GFP_NOFS);
3216 if (!block)
3217 return -ENOMEM;
3218
3219 block->bytenr = extent_key->objectid;
3220 block->key.objectid = rc->extent_root->fs_info->nodesize;
3221 block->key.offset = generation;
3222 block->level = level;
3223 block->key_ready = 0;
3224 block->owner = owner;
3225
3226 rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
3227 if (rb_node)
3228 btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
3229 -EEXIST);
3230
3231 return 0;
3232}
3233
3234
3235
3236
3237static int __add_tree_block(struct reloc_control *rc,
3238 u64 bytenr, u32 blocksize,
3239 struct rb_root *blocks)
3240{
3241 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3242 struct btrfs_path *path;
3243 struct btrfs_key key;
3244 int ret;
3245 bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
3246
3247 if (tree_block_processed(bytenr, rc))
3248 return 0;
3249
3250 if (rb_simple_search(blocks, bytenr))
3251 return 0;
3252
3253 path = btrfs_alloc_path();
3254 if (!path)
3255 return -ENOMEM;
3256again:
3257 key.objectid = bytenr;
3258 if (skinny) {
3259 key.type = BTRFS_METADATA_ITEM_KEY;
3260 key.offset = (u64)-1;
3261 } else {
3262 key.type = BTRFS_EXTENT_ITEM_KEY;
3263 key.offset = blocksize;
3264 }
3265
3266 path->search_commit_root = 1;
3267 path->skip_locking = 1;
3268 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3269 if (ret < 0)
3270 goto out;
3271
3272 if (ret > 0 && skinny) {
3273 if (path->slots[0]) {
3274 path->slots[0]--;
3275 btrfs_item_key_to_cpu(path->nodes[0], &key,
3276 path->slots[0]);
3277 if (key.objectid == bytenr &&
3278 (key.type == BTRFS_METADATA_ITEM_KEY ||
3279 (key.type == BTRFS_EXTENT_ITEM_KEY &&
3280 key.offset == blocksize)))
3281 ret = 0;
3282 }
3283
3284 if (ret) {
3285 skinny = false;
3286 btrfs_release_path(path);
3287 goto again;
3288 }
3289 }
3290 if (ret) {
3291 ASSERT(ret == 1);
3292 btrfs_print_leaf(path->nodes[0]);
3293 btrfs_err(fs_info,
3294 "tree block extent item (%llu) is not found in extent tree",
3295 bytenr);
3296 WARN_ON(1);
3297 ret = -EINVAL;
3298 goto out;
3299 }
3300
3301 ret = add_tree_block(rc, &key, path, blocks);
3302out:
3303 btrfs_free_path(path);
3304 return ret;
3305}
3306
3307static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3308 struct btrfs_block_group *block_group,
3309 struct inode *inode,
3310 u64 ino)
3311{
3312 struct btrfs_root *root = fs_info->tree_root;
3313 struct btrfs_trans_handle *trans;
3314 int ret = 0;
3315
3316 if (inode)
3317 goto truncate;
3318
3319 inode = btrfs_iget(fs_info->sb, ino, root);
3320 if (IS_ERR(inode))
3321 return -ENOENT;
3322
3323truncate:
3324 ret = btrfs_check_trunc_cache_free_space(fs_info,
3325 &fs_info->global_block_rsv);
3326 if (ret)
3327 goto out;
3328
3329 trans = btrfs_join_transaction(root);
3330 if (IS_ERR(trans)) {
3331 ret = PTR_ERR(trans);
3332 goto out;
3333 }
3334
3335 ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
3336
3337 btrfs_end_transaction(trans);
3338 btrfs_btree_balance_dirty(fs_info);
3339out:
3340 iput(inode);
3341 return ret;
3342}
3343
3344
3345
3346
3347
3348static int delete_v1_space_cache(struct extent_buffer *leaf,
3349 struct btrfs_block_group *block_group,
3350 u64 data_bytenr)
3351{
3352 u64 space_cache_ino;
3353 struct btrfs_file_extent_item *ei;
3354 struct btrfs_key key;
3355 bool found = false;
3356 int i;
3357 int ret;
3358
3359 if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
3360 return 0;
3361
3362 for (i = 0; i < btrfs_header_nritems(leaf); i++) {
3363 u8 type;
3364
3365 btrfs_item_key_to_cpu(leaf, &key, i);
3366 if (key.type != BTRFS_EXTENT_DATA_KEY)
3367 continue;
3368 ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3369 type = btrfs_file_extent_type(leaf, ei);
3370
3371 if ((type == BTRFS_FILE_EXTENT_REG ||
3372 type == BTRFS_FILE_EXTENT_PREALLOC) &&
3373 btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
3374 found = true;
3375 space_cache_ino = key.objectid;
3376 break;
3377 }
3378 }
3379 if (!found)
3380 return -ENOENT;
3381 ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
3382 space_cache_ino);
3383 return ret;
3384}
3385
3386
3387
3388
3389static noinline_for_stack
3390int add_data_references(struct reloc_control *rc,
3391 struct btrfs_key *extent_key,
3392 struct btrfs_path *path,
3393 struct rb_root *blocks)
3394{
3395 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3396 struct ulist *leaves = NULL;
3397 struct ulist_iterator leaf_uiter;
3398 struct ulist_node *ref_node = NULL;
3399 const u32 blocksize = fs_info->nodesize;
3400 int ret = 0;
3401
3402 btrfs_release_path(path);
3403 ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
3404 0, &leaves, NULL, true);
3405 if (ret < 0)
3406 return ret;
3407
3408 ULIST_ITER_INIT(&leaf_uiter);
3409 while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
3410 struct extent_buffer *eb;
3411
3412 eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL);
3413 if (IS_ERR(eb)) {
3414 ret = PTR_ERR(eb);
3415 break;
3416 }
3417 ret = delete_v1_space_cache(eb, rc->block_group,
3418 extent_key->objectid);
3419 free_extent_buffer(eb);
3420 if (ret < 0)
3421 break;
3422 ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
3423 if (ret < 0)
3424 break;
3425 }
3426 if (ret < 0)
3427 free_block_list(blocks);
3428 ulist_free(leaves);
3429 return ret;
3430}
3431
3432
3433
3434
3435static noinline_for_stack
3436int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
3437 struct btrfs_key *extent_key)
3438{
3439 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3440 struct btrfs_key key;
3441 struct extent_buffer *leaf;
3442 u64 start, end, last;
3443 int ret;
3444
3445 last = rc->block_group->start + rc->block_group->length;
3446 while (1) {
3447 cond_resched();
3448 if (rc->search_start >= last) {
3449 ret = 1;
3450 break;
3451 }
3452
3453 key.objectid = rc->search_start;
3454 key.type = BTRFS_EXTENT_ITEM_KEY;
3455 key.offset = 0;
3456
3457 path->search_commit_root = 1;
3458 path->skip_locking = 1;
3459 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3460 0, 0);
3461 if (ret < 0)
3462 break;
3463next:
3464 leaf = path->nodes[0];
3465 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3466 ret = btrfs_next_leaf(rc->extent_root, path);
3467 if (ret != 0)
3468 break;
3469 leaf = path->nodes[0];
3470 }
3471
3472 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3473 if (key.objectid >= last) {
3474 ret = 1;
3475 break;
3476 }
3477
3478 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3479 key.type != BTRFS_METADATA_ITEM_KEY) {
3480 path->slots[0]++;
3481 goto next;
3482 }
3483
3484 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3485 key.objectid + key.offset <= rc->search_start) {
3486 path->slots[0]++;
3487 goto next;
3488 }
3489
3490 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3491 key.objectid + fs_info->nodesize <=
3492 rc->search_start) {
3493 path->slots[0]++;
3494 goto next;
3495 }
3496
3497 ret = find_first_extent_bit(&rc->processed_blocks,
3498 key.objectid, &start, &end,
3499 EXTENT_DIRTY, NULL);
3500
3501 if (ret == 0 && start <= key.objectid) {
3502 btrfs_release_path(path);
3503 rc->search_start = end + 1;
3504 } else {
3505 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3506 rc->search_start = key.objectid + key.offset;
3507 else
3508 rc->search_start = key.objectid +
3509 fs_info->nodesize;
3510 memcpy(extent_key, &key, sizeof(key));
3511 return 0;
3512 }
3513 }
3514 btrfs_release_path(path);
3515 return ret;
3516}
3517
3518static void set_reloc_control(struct reloc_control *rc)
3519{
3520 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3521
3522 mutex_lock(&fs_info->reloc_mutex);
3523 fs_info->reloc_ctl = rc;
3524 mutex_unlock(&fs_info->reloc_mutex);
3525}
3526
3527static void unset_reloc_control(struct reloc_control *rc)
3528{
3529 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3530
3531 mutex_lock(&fs_info->reloc_mutex);
3532 fs_info->reloc_ctl = NULL;
3533 mutex_unlock(&fs_info->reloc_mutex);
3534}
3535
3536static noinline_for_stack
3537int prepare_to_relocate(struct reloc_control *rc)
3538{
3539 struct btrfs_trans_handle *trans;
3540 int ret;
3541
3542 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root->fs_info,
3543 BTRFS_BLOCK_RSV_TEMP);
3544 if (!rc->block_rsv)
3545 return -ENOMEM;
3546
3547 memset(&rc->cluster, 0, sizeof(rc->cluster));
3548 rc->search_start = rc->block_group->start;
3549 rc->extents_found = 0;
3550 rc->nodes_relocated = 0;
3551 rc->merging_rsv_size = 0;
3552 rc->reserved_bytes = 0;
3553 rc->block_rsv->size = rc->extent_root->fs_info->nodesize *
3554 RELOCATION_RESERVED_NODES;
3555 ret = btrfs_block_rsv_refill(rc->extent_root->fs_info,
3556 rc->block_rsv, rc->block_rsv->size,
3557 BTRFS_RESERVE_FLUSH_ALL);
3558 if (ret)
3559 return ret;
3560
3561 rc->create_reloc_tree = 1;
3562 set_reloc_control(rc);
3563
3564 trans = btrfs_join_transaction(rc->extent_root);
3565 if (IS_ERR(trans)) {
3566 unset_reloc_control(rc);
3567
3568
3569
3570
3571
3572 return PTR_ERR(trans);
3573 }
3574 return btrfs_commit_transaction(trans);
3575}
3576
3577static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3578{
3579 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3580 struct rb_root blocks = RB_ROOT;
3581 struct btrfs_key key;
3582 struct btrfs_trans_handle *trans = NULL;
3583 struct btrfs_path *path;
3584 struct btrfs_extent_item *ei;
3585 u64 flags;
3586 int ret;
3587 int err = 0;
3588 int progress = 0;
3589
3590 path = btrfs_alloc_path();
3591 if (!path)
3592 return -ENOMEM;
3593 path->reada = READA_FORWARD;
3594
3595 ret = prepare_to_relocate(rc);
3596 if (ret) {
3597 err = ret;
3598 goto out_free;
3599 }
3600
3601 while (1) {
3602 rc->reserved_bytes = 0;
3603 ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv,
3604 rc->block_rsv->size,
3605 BTRFS_RESERVE_FLUSH_ALL);
3606 if (ret) {
3607 err = ret;
3608 break;
3609 }
3610 progress++;
3611 trans = btrfs_start_transaction(rc->extent_root, 0);
3612 if (IS_ERR(trans)) {
3613 err = PTR_ERR(trans);
3614 trans = NULL;
3615 break;
3616 }
3617restart:
3618 if (update_backref_cache(trans, &rc->backref_cache)) {
3619 btrfs_end_transaction(trans);
3620 trans = NULL;
3621 continue;
3622 }
3623
3624 ret = find_next_extent(rc, path, &key);
3625 if (ret < 0)
3626 err = ret;
3627 if (ret != 0)
3628 break;
3629
3630 rc->extents_found++;
3631
3632 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3633 struct btrfs_extent_item);
3634 flags = btrfs_extent_flags(path->nodes[0], ei);
3635
3636 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3637 ret = add_tree_block(rc, &key, path, &blocks);
3638 } else if (rc->stage == UPDATE_DATA_PTRS &&
3639 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3640 ret = add_data_references(rc, &key, path, &blocks);
3641 } else {
3642 btrfs_release_path(path);
3643 ret = 0;
3644 }
3645 if (ret < 0) {
3646 err = ret;
3647 break;
3648 }
3649
3650 if (!RB_EMPTY_ROOT(&blocks)) {
3651 ret = relocate_tree_blocks(trans, rc, &blocks);
3652 if (ret < 0) {
3653 if (ret != -EAGAIN) {
3654 err = ret;
3655 break;
3656 }
3657 rc->extents_found--;
3658 rc->search_start = key.objectid;
3659 }
3660 }
3661
3662 btrfs_end_transaction_throttle(trans);
3663 btrfs_btree_balance_dirty(fs_info);
3664 trans = NULL;
3665
3666 if (rc->stage == MOVE_DATA_EXTENTS &&
3667 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3668 rc->found_file_extent = 1;
3669 ret = relocate_data_extent(rc->data_inode,
3670 &key, &rc->cluster);
3671 if (ret < 0) {
3672 err = ret;
3673 break;
3674 }
3675 }
3676 if (btrfs_should_cancel_balance(fs_info)) {
3677 err = -ECANCELED;
3678 break;
3679 }
3680 }
3681 if (trans && progress && err == -ENOSPC) {
3682 ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
3683 if (ret == 1) {
3684 err = 0;
3685 progress = 0;
3686 goto restart;
3687 }
3688 }
3689
3690 btrfs_release_path(path);
3691 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
3692
3693 if (trans) {
3694 btrfs_end_transaction_throttle(trans);
3695 btrfs_btree_balance_dirty(fs_info);
3696 }
3697
3698 if (!err) {
3699 ret = relocate_file_extent_cluster(rc->data_inode,
3700 &rc->cluster);
3701 if (ret < 0)
3702 err = ret;
3703 }
3704
3705 rc->create_reloc_tree = 0;
3706 set_reloc_control(rc);
3707
3708 btrfs_backref_release_cache(&rc->backref_cache);
3709 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719 err = prepare_to_merge(rc, err);
3720
3721 merge_reloc_roots(rc);
3722
3723 rc->merge_reloc_tree = 0;
3724 unset_reloc_control(rc);
3725 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
3726
3727
3728 trans = btrfs_join_transaction(rc->extent_root);
3729 if (IS_ERR(trans)) {
3730 err = PTR_ERR(trans);
3731 goto out_free;
3732 }
3733 ret = btrfs_commit_transaction(trans);
3734 if (ret && !err)
3735 err = ret;
3736out_free:
3737 ret = clean_dirty_subvols(rc);
3738 if (ret < 0 && !err)
3739 err = ret;
3740 btrfs_free_block_rsv(fs_info, rc->block_rsv);
3741 btrfs_free_path(path);
3742 return err;
3743}
3744
3745static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3746 struct btrfs_root *root, u64 objectid)
3747{
3748 struct btrfs_path *path;
3749 struct btrfs_inode_item *item;
3750 struct extent_buffer *leaf;
3751 int ret;
3752
3753 path = btrfs_alloc_path();
3754 if (!path)
3755 return -ENOMEM;
3756
3757 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3758 if (ret)
3759 goto out;
3760
3761 leaf = path->nodes[0];
3762 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3763 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3764 btrfs_set_inode_generation(leaf, item, 1);
3765 btrfs_set_inode_size(leaf, item, 0);
3766 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3767 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3768 BTRFS_INODE_PREALLOC);
3769 btrfs_mark_buffer_dirty(leaf);
3770out:
3771 btrfs_free_path(path);
3772 return ret;
3773}
3774
3775static void delete_orphan_inode(struct btrfs_trans_handle *trans,
3776 struct btrfs_root *root, u64 objectid)
3777{
3778 struct btrfs_path *path;
3779 struct btrfs_key key;
3780 int ret = 0;
3781
3782 path = btrfs_alloc_path();
3783 if (!path) {
3784 ret = -ENOMEM;
3785 goto out;
3786 }
3787
3788 key.objectid = objectid;
3789 key.type = BTRFS_INODE_ITEM_KEY;
3790 key.offset = 0;
3791 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3792 if (ret) {
3793 if (ret > 0)
3794 ret = -ENOENT;
3795 goto out;
3796 }
3797 ret = btrfs_del_item(trans, root, path);
3798out:
3799 if (ret)
3800 btrfs_abort_transaction(trans, ret);
3801 btrfs_free_path(path);
3802}
3803
3804
3805
3806
3807
3808static noinline_for_stack
3809struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3810 struct btrfs_block_group *group)
3811{
3812 struct inode *inode = NULL;
3813 struct btrfs_trans_handle *trans;
3814 struct btrfs_root *root;
3815 u64 objectid;
3816 int err = 0;
3817
3818 root = btrfs_grab_root(fs_info->data_reloc_root);
3819 trans = btrfs_start_transaction(root, 6);
3820 if (IS_ERR(trans)) {
3821 btrfs_put_root(root);
3822 return ERR_CAST(trans);
3823 }
3824
3825 err = btrfs_get_free_objectid(root, &objectid);
3826 if (err)
3827 goto out;
3828
3829 err = __insert_orphan_inode(trans, root, objectid);
3830 if (err)
3831 goto out;
3832
3833 inode = btrfs_iget(fs_info->sb, objectid, root);
3834 if (IS_ERR(inode)) {
3835 delete_orphan_inode(trans, root, objectid);
3836 err = PTR_ERR(inode);
3837 inode = NULL;
3838 goto out;
3839 }
3840 BTRFS_I(inode)->index_cnt = group->start;
3841
3842 err = btrfs_orphan_add(trans, BTRFS_I(inode));
3843out:
3844 btrfs_put_root(root);
3845 btrfs_end_transaction(trans);
3846 btrfs_btree_balance_dirty(fs_info);
3847 if (err) {
3848 if (inode)
3849 iput(inode);
3850 inode = ERR_PTR(err);
3851 }
3852 return inode;
3853}
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
3865{
3866 if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
3867
3868 btrfs_err(fs_info, "reloc already running, cannot start");
3869 return -EINPROGRESS;
3870 }
3871
3872 if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
3873 btrfs_info(fs_info, "chunk relocation canceled on start");
3874
3875
3876
3877
3878 atomic_set(&fs_info->reloc_cancel_req, 0);
3879 return -ECANCELED;
3880 }
3881 return 0;
3882}
3883
3884
3885
3886
3887static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
3888{
3889
3890 if (atomic_read(&fs_info->reloc_cancel_req) > 0)
3891 btrfs_info(fs_info, "chunk relocation canceled during operation");
3892 clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
3893 atomic_set(&fs_info->reloc_cancel_req, 0);
3894}
3895
3896static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
3897{
3898 struct reloc_control *rc;
3899
3900 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3901 if (!rc)
3902 return NULL;
3903
3904 INIT_LIST_HEAD(&rc->reloc_roots);
3905 INIT_LIST_HEAD(&rc->dirty_subvol_roots);
3906 btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
3907 mapping_tree_init(&rc->reloc_root_tree);
3908 extent_io_tree_init(fs_info, &rc->processed_blocks,
3909 IO_TREE_RELOC_BLOCKS, NULL);
3910 return rc;
3911}
3912
3913static void free_reloc_control(struct reloc_control *rc)
3914{
3915 struct mapping_node *node, *tmp;
3916
3917 free_reloc_roots(&rc->reloc_roots);
3918 rbtree_postorder_for_each_entry_safe(node, tmp,
3919 &rc->reloc_root_tree.rb_root, rb_node)
3920 kfree(node);
3921
3922 kfree(rc);
3923}
3924
3925
3926
3927
3928static void describe_relocation(struct btrfs_fs_info *fs_info,
3929 struct btrfs_block_group *block_group)
3930{
3931 char buf[128] = {'\0'};
3932
3933 btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
3934
3935 btrfs_info(fs_info,
3936 "relocating block group %llu flags %s",
3937 block_group->start, buf);
3938}
3939
3940static const char *stage_to_string(int stage)
3941{
3942 if (stage == MOVE_DATA_EXTENTS)
3943 return "move data extents";
3944 if (stage == UPDATE_DATA_PTRS)
3945 return "update data pointers";
3946 return "unknown";
3947}
3948
3949
3950
3951
3952int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
3953{
3954 struct btrfs_block_group *bg;
3955 struct btrfs_root *extent_root = btrfs_extent_root(fs_info, group_start);
3956 struct reloc_control *rc;
3957 struct inode *inode;
3958 struct btrfs_path *path;
3959 int ret;
3960 int rw = 0;
3961 int err = 0;
3962
3963
3964
3965
3966
3967
3968 ret = wait_on_bit(&fs_info->flags, BTRFS_FS_UNFINISHED_DROPS, TASK_INTERRUPTIBLE);
3969 if (ret)
3970 return ret;
3971
3972
3973 if (btrfs_fs_closing(fs_info))
3974 return -EINTR;
3975
3976 bg = btrfs_lookup_block_group(fs_info, group_start);
3977 if (!bg)
3978 return -ENOENT;
3979
3980 if (btrfs_pinned_by_swapfile(fs_info, bg)) {
3981 btrfs_put_block_group(bg);
3982 return -ETXTBSY;
3983 }
3984
3985 rc = alloc_reloc_control(fs_info);
3986 if (!rc) {
3987 btrfs_put_block_group(bg);
3988 return -ENOMEM;
3989 }
3990
3991 ret = reloc_chunk_start(fs_info);
3992 if (ret < 0) {
3993 err = ret;
3994 goto out_put_bg;
3995 }
3996
3997 rc->extent_root = extent_root;
3998 rc->block_group = bg;
3999
4000 ret = btrfs_inc_block_group_ro(rc->block_group, true);
4001 if (ret) {
4002 err = ret;
4003 goto out;
4004 }
4005 rw = 1;
4006
4007 path = btrfs_alloc_path();
4008 if (!path) {
4009 err = -ENOMEM;
4010 goto out;
4011 }
4012
4013 inode = lookup_free_space_inode(rc->block_group, path);
4014 btrfs_free_path(path);
4015
4016 if (!IS_ERR(inode))
4017 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
4018 else
4019 ret = PTR_ERR(inode);
4020
4021 if (ret && ret != -ENOENT) {
4022 err = ret;
4023 goto out;
4024 }
4025
4026 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
4027 if (IS_ERR(rc->data_inode)) {
4028 err = PTR_ERR(rc->data_inode);
4029 rc->data_inode = NULL;
4030 goto out;
4031 }
4032
4033 describe_relocation(fs_info, rc->block_group);
4034
4035 btrfs_wait_block_group_reservations(rc->block_group);
4036 btrfs_wait_nocow_writers(rc->block_group);
4037 btrfs_wait_ordered_roots(fs_info, U64_MAX,
4038 rc->block_group->start,
4039 rc->block_group->length);
4040
4041 ret = btrfs_zone_finish(rc->block_group);
4042 WARN_ON(ret && ret != -EAGAIN);
4043
4044 while (1) {
4045 int finishes_stage;
4046
4047 mutex_lock(&fs_info->cleaner_mutex);
4048 ret = relocate_block_group(rc);
4049 mutex_unlock(&fs_info->cleaner_mutex);
4050 if (ret < 0)
4051 err = ret;
4052
4053 finishes_stage = rc->stage;
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4064 ret = btrfs_wait_ordered_range(rc->data_inode, 0,
4065 (u64)-1);
4066 if (ret)
4067 err = ret;
4068 invalidate_mapping_pages(rc->data_inode->i_mapping,
4069 0, -1);
4070 rc->stage = UPDATE_DATA_PTRS;
4071 }
4072
4073 if (err < 0)
4074 goto out;
4075
4076 if (rc->extents_found == 0)
4077 break;
4078
4079 btrfs_info(fs_info, "found %llu extents, stage: %s",
4080 rc->extents_found, stage_to_string(finishes_stage));
4081 }
4082
4083 WARN_ON(rc->block_group->pinned > 0);
4084 WARN_ON(rc->block_group->reserved > 0);
4085 WARN_ON(rc->block_group->used > 0);
4086out:
4087 if (err && rw)
4088 btrfs_dec_block_group_ro(rc->block_group);
4089 iput(rc->data_inode);
4090out_put_bg:
4091 btrfs_put_block_group(bg);
4092 reloc_chunk_end(fs_info);
4093 free_reloc_control(rc);
4094 return err;
4095}
4096
4097static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4098{
4099 struct btrfs_fs_info *fs_info = root->fs_info;
4100 struct btrfs_trans_handle *trans;
4101 int ret, err;
4102
4103 trans = btrfs_start_transaction(fs_info->tree_root, 0);
4104 if (IS_ERR(trans))
4105 return PTR_ERR(trans);
4106
4107 memset(&root->root_item.drop_progress, 0,
4108 sizeof(root->root_item.drop_progress));
4109 btrfs_set_root_drop_level(&root->root_item, 0);
4110 btrfs_set_root_refs(&root->root_item, 0);
4111 ret = btrfs_update_root(trans, fs_info->tree_root,
4112 &root->root_key, &root->root_item);
4113
4114 err = btrfs_end_transaction(trans);
4115 if (err)
4116 return err;
4117 return ret;
4118}
4119
4120
4121
4122
4123
4124
4125
4126int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
4127{
4128 LIST_HEAD(reloc_roots);
4129 struct btrfs_key key;
4130 struct btrfs_root *fs_root;
4131 struct btrfs_root *reloc_root;
4132 struct btrfs_path *path;
4133 struct extent_buffer *leaf;
4134 struct reloc_control *rc = NULL;
4135 struct btrfs_trans_handle *trans;
4136 int ret;
4137 int err = 0;
4138
4139 path = btrfs_alloc_path();
4140 if (!path)
4141 return -ENOMEM;
4142 path->reada = READA_BACK;
4143
4144 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4145 key.type = BTRFS_ROOT_ITEM_KEY;
4146 key.offset = (u64)-1;
4147
4148 while (1) {
4149 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
4150 path, 0, 0);
4151 if (ret < 0) {
4152 err = ret;
4153 goto out;
4154 }
4155 if (ret > 0) {
4156 if (path->slots[0] == 0)
4157 break;
4158 path->slots[0]--;
4159 }
4160 leaf = path->nodes[0];
4161 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4162 btrfs_release_path(path);
4163
4164 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4165 key.type != BTRFS_ROOT_ITEM_KEY)
4166 break;
4167
4168 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
4169 if (IS_ERR(reloc_root)) {
4170 err = PTR_ERR(reloc_root);
4171 goto out;
4172 }
4173
4174 set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
4175 list_add(&reloc_root->root_list, &reloc_roots);
4176
4177 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4178 fs_root = btrfs_get_fs_root(fs_info,
4179 reloc_root->root_key.offset, false);
4180 if (IS_ERR(fs_root)) {
4181 ret = PTR_ERR(fs_root);
4182 if (ret != -ENOENT) {
4183 err = ret;
4184 goto out;
4185 }
4186 ret = mark_garbage_root(reloc_root);
4187 if (ret < 0) {
4188 err = ret;
4189 goto out;
4190 }
4191 } else {
4192 btrfs_put_root(fs_root);
4193 }
4194 }
4195
4196 if (key.offset == 0)
4197 break;
4198
4199 key.offset--;
4200 }
4201 btrfs_release_path(path);
4202
4203 if (list_empty(&reloc_roots))
4204 goto out;
4205
4206 rc = alloc_reloc_control(fs_info);
4207 if (!rc) {
4208 err = -ENOMEM;
4209 goto out;
4210 }
4211
4212 ret = reloc_chunk_start(fs_info);
4213 if (ret < 0) {
4214 err = ret;
4215 goto out_end;
4216 }
4217
4218 rc->extent_root = btrfs_extent_root(fs_info, 0);
4219
4220 set_reloc_control(rc);
4221
4222 trans = btrfs_join_transaction(rc->extent_root);
4223 if (IS_ERR(trans)) {
4224 err = PTR_ERR(trans);
4225 goto out_unset;
4226 }
4227
4228 rc->merge_reloc_tree = 1;
4229
4230 while (!list_empty(&reloc_roots)) {
4231 reloc_root = list_entry(reloc_roots.next,
4232 struct btrfs_root, root_list);
4233 list_del(&reloc_root->root_list);
4234
4235 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4236 list_add_tail(&reloc_root->root_list,
4237 &rc->reloc_roots);
4238 continue;
4239 }
4240
4241 fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
4242 false);
4243 if (IS_ERR(fs_root)) {
4244 err = PTR_ERR(fs_root);
4245 list_add_tail(&reloc_root->root_list, &reloc_roots);
4246 btrfs_end_transaction(trans);
4247 goto out_unset;
4248 }
4249
4250 err = __add_reloc_root(reloc_root);
4251 ASSERT(err != -EEXIST);
4252 if (err) {
4253 list_add_tail(&reloc_root->root_list, &reloc_roots);
4254 btrfs_put_root(fs_root);
4255 btrfs_end_transaction(trans);
4256 goto out_unset;
4257 }
4258 fs_root->reloc_root = btrfs_grab_root(reloc_root);
4259 btrfs_put_root(fs_root);
4260 }
4261
4262 err = btrfs_commit_transaction(trans);
4263 if (err)
4264 goto out_unset;
4265
4266 merge_reloc_roots(rc);
4267
4268 unset_reloc_control(rc);
4269
4270 trans = btrfs_join_transaction(rc->extent_root);
4271 if (IS_ERR(trans)) {
4272 err = PTR_ERR(trans);
4273 goto out_clean;
4274 }
4275 err = btrfs_commit_transaction(trans);
4276out_clean:
4277 ret = clean_dirty_subvols(rc);
4278 if (ret < 0 && !err)
4279 err = ret;
4280out_unset:
4281 unset_reloc_control(rc);
4282out_end:
4283 reloc_chunk_end(fs_info);
4284 free_reloc_control(rc);
4285out:
4286 free_reloc_roots(&reloc_roots);
4287
4288 btrfs_free_path(path);
4289
4290 if (err == 0) {
4291
4292 fs_root = btrfs_grab_root(fs_info->data_reloc_root);
4293 ASSERT(fs_root);
4294 err = btrfs_orphan_cleanup(fs_root);
4295 btrfs_put_root(fs_root);
4296 }
4297 return err;
4298}
4299
4300
4301
4302
4303
4304
4305
4306int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
4307{
4308 struct btrfs_fs_info *fs_info = inode->root->fs_info;
4309 struct btrfs_root *csum_root;
4310 struct btrfs_ordered_sum *sums;
4311 struct btrfs_ordered_extent *ordered;
4312 int ret;
4313 u64 disk_bytenr;
4314 u64 new_bytenr;
4315 LIST_HEAD(list);
4316
4317 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4318 BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
4319
4320 disk_bytenr = file_pos + inode->index_cnt;
4321 csum_root = btrfs_csum_root(fs_info, disk_bytenr);
4322 ret = btrfs_lookup_csums_range(csum_root, disk_bytenr,
4323 disk_bytenr + len - 1, &list, 0);
4324 if (ret)
4325 goto out;
4326
4327 while (!list_empty(&list)) {
4328 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4329 list_del_init(&sums->list);
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343 new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
4344 sums->bytenr = new_bytenr;
4345
4346 btrfs_add_ordered_sum(ordered, sums);
4347 }
4348out:
4349 btrfs_put_ordered_extent(ordered);
4350 return ret;
4351}
4352
4353int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4354 struct btrfs_root *root, struct extent_buffer *buf,
4355 struct extent_buffer *cow)
4356{
4357 struct btrfs_fs_info *fs_info = root->fs_info;
4358 struct reloc_control *rc;
4359 struct btrfs_backref_node *node;
4360 int first_cow = 0;
4361 int level;
4362 int ret = 0;
4363
4364 rc = fs_info->reloc_ctl;
4365 if (!rc)
4366 return 0;
4367
4368 BUG_ON(rc->stage == UPDATE_DATA_PTRS && btrfs_is_data_reloc_root(root));
4369
4370 level = btrfs_header_level(buf);
4371 if (btrfs_header_generation(buf) <=
4372 btrfs_root_last_snapshot(&root->root_item))
4373 first_cow = 1;
4374
4375 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4376 rc->create_reloc_tree) {
4377 WARN_ON(!first_cow && level == 0);
4378
4379 node = rc->backref_cache.path[level];
4380 BUG_ON(node->bytenr != buf->start &&
4381 node->new_bytenr != buf->start);
4382
4383 btrfs_backref_drop_node_buffer(node);
4384 atomic_inc(&cow->refs);
4385 node->eb = cow;
4386 node->new_bytenr = cow->start;
4387
4388 if (!node->pending) {
4389 list_move_tail(&node->list,
4390 &rc->backref_cache.pending[level]);
4391 node->pending = 1;
4392 }
4393
4394 if (first_cow)
4395 mark_block_processed(rc, node);
4396
4397 if (first_cow && level > 0)
4398 rc->nodes_relocated += buf->len;
4399 }
4400
4401 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
4402 ret = replace_file_extents(trans, rc, root, cow);
4403 return ret;
4404}
4405
4406
4407
4408
4409
4410void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
4411 u64 *bytes_to_reserve)
4412{
4413 struct btrfs_root *root = pending->root;
4414 struct reloc_control *rc = root->fs_info->reloc_ctl;
4415
4416 if (!rc || !have_reloc_root(root))
4417 return;
4418
4419 if (!rc->merge_reloc_tree)
4420 return;
4421
4422 root = root->reloc_root;
4423 BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434 *bytes_to_reserve += rc->nodes_relocated;
4435}
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4446 struct btrfs_pending_snapshot *pending)
4447{
4448 struct btrfs_root *root = pending->root;
4449 struct btrfs_root *reloc_root;
4450 struct btrfs_root *new_root;
4451 struct reloc_control *rc = root->fs_info->reloc_ctl;
4452 int ret;
4453
4454 if (!rc || !have_reloc_root(root))
4455 return 0;
4456
4457 rc = root->fs_info->reloc_ctl;
4458 rc->merging_rsv_size += rc->nodes_relocated;
4459
4460 if (rc->merge_reloc_tree) {
4461 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4462 rc->block_rsv,
4463 rc->nodes_relocated, true);
4464 if (ret)
4465 return ret;
4466 }
4467
4468 new_root = pending->snap;
4469 reloc_root = create_reloc_root(trans, root->reloc_root,
4470 new_root->root_key.objectid);
4471 if (IS_ERR(reloc_root))
4472 return PTR_ERR(reloc_root);
4473
4474 ret = __add_reloc_root(reloc_root);
4475 ASSERT(ret != -EEXIST);
4476 if (ret) {
4477
4478 btrfs_put_root(reloc_root);
4479 return ret;
4480 }
4481 new_root->reloc_root = btrfs_grab_root(reloc_root);
4482
4483 if (rc->create_reloc_tree)
4484 ret = clone_backref_node(trans, rc, root, reloc_root);
4485 return ret;
4486}
4487