1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "misc.h"
20#include "tree-log.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "locking.h"
26#include "free-space-cache.h"
27#include "free-space-tree.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31#include "space-info.h"
32#include "block-rsv.h"
33#include "delalloc-space.h"
34#include "block-group.h"
35#include "discard.h"
36#include "rcu-string.h"
37#include "zoned.h"
38#include "dev-replace.h"
39
40#undef SCRAMBLE_DELAYED_REFS
41
42
43static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
44 struct btrfs_delayed_ref_node *node, u64 parent,
45 u64 root_objectid, u64 owner_objectid,
46 u64 owner_offset, int refs_to_drop,
47 struct btrfs_delayed_extent_op *extra_op);
48static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
49 struct extent_buffer *leaf,
50 struct btrfs_extent_item *ei);
51static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
52 u64 parent, u64 root_objectid,
53 u64 flags, u64 owner, u64 offset,
54 struct btrfs_key *ins, int ref_mod);
55static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
56 struct btrfs_delayed_ref_node *node,
57 struct btrfs_delayed_extent_op *extent_op);
58static int find_next_key(struct btrfs_path *path, int level,
59 struct btrfs_key *key);
60
61static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
62{
63 return (cache->flags & bits) == bits;
64}
65
66int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
67 u64 start, u64 num_bytes)
68{
69 u64 end = start + num_bytes - 1;
70 set_extent_bits(&fs_info->excluded_extents, start, end,
71 EXTENT_UPTODATE);
72 return 0;
73}
74
75void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
76{
77 struct btrfs_fs_info *fs_info = cache->fs_info;
78 u64 start, end;
79
80 start = cache->start;
81 end = start + cache->length - 1;
82
83 clear_extent_bits(&fs_info->excluded_extents, start, end,
84 EXTENT_UPTODATE);
85}
86
87
88int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
89{
90 struct btrfs_root *root = btrfs_extent_root(fs_info, start);
91 int ret;
92 struct btrfs_key key;
93 struct btrfs_path *path;
94
95 path = btrfs_alloc_path();
96 if (!path)
97 return -ENOMEM;
98
99 key.objectid = start;
100 key.offset = len;
101 key.type = BTRFS_EXTENT_ITEM_KEY;
102 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
103 btrfs_free_path(path);
104 return ret;
105}
106
107
108
109
110
111
112
113
114
115
116int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
117 struct btrfs_fs_info *fs_info, u64 bytenr,
118 u64 offset, int metadata, u64 *refs, u64 *flags)
119{
120 struct btrfs_root *extent_root;
121 struct btrfs_delayed_ref_head *head;
122 struct btrfs_delayed_ref_root *delayed_refs;
123 struct btrfs_path *path;
124 struct btrfs_extent_item *ei;
125 struct extent_buffer *leaf;
126 struct btrfs_key key;
127 u32 item_size;
128 u64 num_refs;
129 u64 extent_flags;
130 int ret;
131
132
133
134
135
136 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
137 offset = fs_info->nodesize;
138 metadata = 0;
139 }
140
141 path = btrfs_alloc_path();
142 if (!path)
143 return -ENOMEM;
144
145 if (!trans) {
146 path->skip_locking = 1;
147 path->search_commit_root = 1;
148 }
149
150search_again:
151 key.objectid = bytenr;
152 key.offset = offset;
153 if (metadata)
154 key.type = BTRFS_METADATA_ITEM_KEY;
155 else
156 key.type = BTRFS_EXTENT_ITEM_KEY;
157
158 extent_root = btrfs_extent_root(fs_info, bytenr);
159 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
160 if (ret < 0)
161 goto out_free;
162
163 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
164 if (path->slots[0]) {
165 path->slots[0]--;
166 btrfs_item_key_to_cpu(path->nodes[0], &key,
167 path->slots[0]);
168 if (key.objectid == bytenr &&
169 key.type == BTRFS_EXTENT_ITEM_KEY &&
170 key.offset == fs_info->nodesize)
171 ret = 0;
172 }
173 }
174
175 if (ret == 0) {
176 leaf = path->nodes[0];
177 item_size = btrfs_item_size(leaf, path->slots[0]);
178 if (item_size >= sizeof(*ei)) {
179 ei = btrfs_item_ptr(leaf, path->slots[0],
180 struct btrfs_extent_item);
181 num_refs = btrfs_extent_refs(leaf, ei);
182 extent_flags = btrfs_extent_flags(leaf, ei);
183 } else {
184 ret = -EINVAL;
185 btrfs_print_v0_err(fs_info);
186 if (trans)
187 btrfs_abort_transaction(trans, ret);
188 else
189 btrfs_handle_fs_error(fs_info, ret, NULL);
190
191 goto out_free;
192 }
193
194 BUG_ON(num_refs == 0);
195 } else {
196 num_refs = 0;
197 extent_flags = 0;
198 ret = 0;
199 }
200
201 if (!trans)
202 goto out;
203
204 delayed_refs = &trans->transaction->delayed_refs;
205 spin_lock(&delayed_refs->lock);
206 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
207 if (head) {
208 if (!mutex_trylock(&head->mutex)) {
209 refcount_inc(&head->refs);
210 spin_unlock(&delayed_refs->lock);
211
212 btrfs_release_path(path);
213
214
215
216
217
218 mutex_lock(&head->mutex);
219 mutex_unlock(&head->mutex);
220 btrfs_put_delayed_ref_head(head);
221 goto search_again;
222 }
223 spin_lock(&head->lock);
224 if (head->extent_op && head->extent_op->update_flags)
225 extent_flags |= head->extent_op->flags_to_set;
226 else
227 BUG_ON(num_refs == 0);
228
229 num_refs += head->ref_mod;
230 spin_unlock(&head->lock);
231 mutex_unlock(&head->mutex);
232 }
233 spin_unlock(&delayed_refs->lock);
234out:
235 WARN_ON(num_refs == 0);
236 if (refs)
237 *refs = num_refs;
238 if (flags)
239 *flags = extent_flags;
240out_free:
241 btrfs_free_path(path);
242 return ret;
243}
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
357 struct btrfs_extent_inline_ref *iref,
358 enum btrfs_inline_ref_type is_data)
359{
360 int type = btrfs_extent_inline_ref_type(eb, iref);
361 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
362
363 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
364 type == BTRFS_SHARED_BLOCK_REF_KEY ||
365 type == BTRFS_SHARED_DATA_REF_KEY ||
366 type == BTRFS_EXTENT_DATA_REF_KEY) {
367 if (is_data == BTRFS_REF_TYPE_BLOCK) {
368 if (type == BTRFS_TREE_BLOCK_REF_KEY)
369 return type;
370 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
371 ASSERT(eb->fs_info);
372
373
374
375
376 if (offset &&
377 IS_ALIGNED(offset, eb->fs_info->sectorsize))
378 return type;
379 }
380 } else if (is_data == BTRFS_REF_TYPE_DATA) {
381 if (type == BTRFS_EXTENT_DATA_REF_KEY)
382 return type;
383 if (type == BTRFS_SHARED_DATA_REF_KEY) {
384 ASSERT(eb->fs_info);
385
386
387
388
389 if (offset &&
390 IS_ALIGNED(offset, eb->fs_info->sectorsize))
391 return type;
392 }
393 } else {
394 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
395 return type;
396 }
397 }
398
399 btrfs_print_leaf((struct extent_buffer *)eb);
400 btrfs_err(eb->fs_info,
401 "eb %llu iref 0x%lx invalid extent inline ref type %d",
402 eb->start, (unsigned long)iref, type);
403 WARN_ON(1);
404
405 return BTRFS_REF_TYPE_INVALID;
406}
407
408u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
409{
410 u32 high_crc = ~(u32)0;
411 u32 low_crc = ~(u32)0;
412 __le64 lenum;
413
414 lenum = cpu_to_le64(root_objectid);
415 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
416 lenum = cpu_to_le64(owner);
417 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
418 lenum = cpu_to_le64(offset);
419 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
420
421 return ((u64)high_crc << 31) ^ (u64)low_crc;
422}
423
424static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
425 struct btrfs_extent_data_ref *ref)
426{
427 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
428 btrfs_extent_data_ref_objectid(leaf, ref),
429 btrfs_extent_data_ref_offset(leaf, ref));
430}
431
432static int match_extent_data_ref(struct extent_buffer *leaf,
433 struct btrfs_extent_data_ref *ref,
434 u64 root_objectid, u64 owner, u64 offset)
435{
436 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
437 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
438 btrfs_extent_data_ref_offset(leaf, ref) != offset)
439 return 0;
440 return 1;
441}
442
443static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
444 struct btrfs_path *path,
445 u64 bytenr, u64 parent,
446 u64 root_objectid,
447 u64 owner, u64 offset)
448{
449 struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
450 struct btrfs_key key;
451 struct btrfs_extent_data_ref *ref;
452 struct extent_buffer *leaf;
453 u32 nritems;
454 int ret;
455 int recow;
456 int err = -ENOENT;
457
458 key.objectid = bytenr;
459 if (parent) {
460 key.type = BTRFS_SHARED_DATA_REF_KEY;
461 key.offset = parent;
462 } else {
463 key.type = BTRFS_EXTENT_DATA_REF_KEY;
464 key.offset = hash_extent_data_ref(root_objectid,
465 owner, offset);
466 }
467again:
468 recow = 0;
469 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
470 if (ret < 0) {
471 err = ret;
472 goto fail;
473 }
474
475 if (parent) {
476 if (!ret)
477 return 0;
478 goto fail;
479 }
480
481 leaf = path->nodes[0];
482 nritems = btrfs_header_nritems(leaf);
483 while (1) {
484 if (path->slots[0] >= nritems) {
485 ret = btrfs_next_leaf(root, path);
486 if (ret < 0)
487 err = ret;
488 if (ret)
489 goto fail;
490
491 leaf = path->nodes[0];
492 nritems = btrfs_header_nritems(leaf);
493 recow = 1;
494 }
495
496 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
497 if (key.objectid != bytenr ||
498 key.type != BTRFS_EXTENT_DATA_REF_KEY)
499 goto fail;
500
501 ref = btrfs_item_ptr(leaf, path->slots[0],
502 struct btrfs_extent_data_ref);
503
504 if (match_extent_data_ref(leaf, ref, root_objectid,
505 owner, offset)) {
506 if (recow) {
507 btrfs_release_path(path);
508 goto again;
509 }
510 err = 0;
511 break;
512 }
513 path->slots[0]++;
514 }
515fail:
516 return err;
517}
518
519static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
520 struct btrfs_path *path,
521 u64 bytenr, u64 parent,
522 u64 root_objectid, u64 owner,
523 u64 offset, int refs_to_add)
524{
525 struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
526 struct btrfs_key key;
527 struct extent_buffer *leaf;
528 u32 size;
529 u32 num_refs;
530 int ret;
531
532 key.objectid = bytenr;
533 if (parent) {
534 key.type = BTRFS_SHARED_DATA_REF_KEY;
535 key.offset = parent;
536 size = sizeof(struct btrfs_shared_data_ref);
537 } else {
538 key.type = BTRFS_EXTENT_DATA_REF_KEY;
539 key.offset = hash_extent_data_ref(root_objectid,
540 owner, offset);
541 size = sizeof(struct btrfs_extent_data_ref);
542 }
543
544 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
545 if (ret && ret != -EEXIST)
546 goto fail;
547
548 leaf = path->nodes[0];
549 if (parent) {
550 struct btrfs_shared_data_ref *ref;
551 ref = btrfs_item_ptr(leaf, path->slots[0],
552 struct btrfs_shared_data_ref);
553 if (ret == 0) {
554 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
555 } else {
556 num_refs = btrfs_shared_data_ref_count(leaf, ref);
557 num_refs += refs_to_add;
558 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
559 }
560 } else {
561 struct btrfs_extent_data_ref *ref;
562 while (ret == -EEXIST) {
563 ref = btrfs_item_ptr(leaf, path->slots[0],
564 struct btrfs_extent_data_ref);
565 if (match_extent_data_ref(leaf, ref, root_objectid,
566 owner, offset))
567 break;
568 btrfs_release_path(path);
569 key.offset++;
570 ret = btrfs_insert_empty_item(trans, root, path, &key,
571 size);
572 if (ret && ret != -EEXIST)
573 goto fail;
574
575 leaf = path->nodes[0];
576 }
577 ref = btrfs_item_ptr(leaf, path->slots[0],
578 struct btrfs_extent_data_ref);
579 if (ret == 0) {
580 btrfs_set_extent_data_ref_root(leaf, ref,
581 root_objectid);
582 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
583 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
584 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
585 } else {
586 num_refs = btrfs_extent_data_ref_count(leaf, ref);
587 num_refs += refs_to_add;
588 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
589 }
590 }
591 btrfs_mark_buffer_dirty(leaf);
592 ret = 0;
593fail:
594 btrfs_release_path(path);
595 return ret;
596}
597
598static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
599 struct btrfs_root *root,
600 struct btrfs_path *path,
601 int refs_to_drop)
602{
603 struct btrfs_key key;
604 struct btrfs_extent_data_ref *ref1 = NULL;
605 struct btrfs_shared_data_ref *ref2 = NULL;
606 struct extent_buffer *leaf;
607 u32 num_refs = 0;
608 int ret = 0;
609
610 leaf = path->nodes[0];
611 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
612
613 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
614 ref1 = btrfs_item_ptr(leaf, path->slots[0],
615 struct btrfs_extent_data_ref);
616 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
617 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
618 ref2 = btrfs_item_ptr(leaf, path->slots[0],
619 struct btrfs_shared_data_ref);
620 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
621 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
622 btrfs_print_v0_err(trans->fs_info);
623 btrfs_abort_transaction(trans, -EINVAL);
624 return -EINVAL;
625 } else {
626 BUG();
627 }
628
629 BUG_ON(num_refs < refs_to_drop);
630 num_refs -= refs_to_drop;
631
632 if (num_refs == 0) {
633 ret = btrfs_del_item(trans, root, path);
634 } else {
635 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
636 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
637 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
638 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
639 btrfs_mark_buffer_dirty(leaf);
640 }
641 return ret;
642}
643
644static noinline u32 extent_data_ref_count(struct btrfs_path *path,
645 struct btrfs_extent_inline_ref *iref)
646{
647 struct btrfs_key key;
648 struct extent_buffer *leaf;
649 struct btrfs_extent_data_ref *ref1;
650 struct btrfs_shared_data_ref *ref2;
651 u32 num_refs = 0;
652 int type;
653
654 leaf = path->nodes[0];
655 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
656
657 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
658 if (iref) {
659
660
661
662
663 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
664 ASSERT(type != BTRFS_REF_TYPE_INVALID);
665 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
666 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
667 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
668 } else {
669 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
670 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
671 }
672 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
673 ref1 = btrfs_item_ptr(leaf, path->slots[0],
674 struct btrfs_extent_data_ref);
675 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
676 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
677 ref2 = btrfs_item_ptr(leaf, path->slots[0],
678 struct btrfs_shared_data_ref);
679 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
680 } else {
681 WARN_ON(1);
682 }
683 return num_refs;
684}
685
686static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
687 struct btrfs_path *path,
688 u64 bytenr, u64 parent,
689 u64 root_objectid)
690{
691 struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
692 struct btrfs_key key;
693 int ret;
694
695 key.objectid = bytenr;
696 if (parent) {
697 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
698 key.offset = parent;
699 } else {
700 key.type = BTRFS_TREE_BLOCK_REF_KEY;
701 key.offset = root_objectid;
702 }
703
704 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
705 if (ret > 0)
706 ret = -ENOENT;
707 return ret;
708}
709
710static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
711 struct btrfs_path *path,
712 u64 bytenr, u64 parent,
713 u64 root_objectid)
714{
715 struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
716 struct btrfs_key key;
717 int ret;
718
719 key.objectid = bytenr;
720 if (parent) {
721 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
722 key.offset = parent;
723 } else {
724 key.type = BTRFS_TREE_BLOCK_REF_KEY;
725 key.offset = root_objectid;
726 }
727
728 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
729 btrfs_release_path(path);
730 return ret;
731}
732
733static inline int extent_ref_type(u64 parent, u64 owner)
734{
735 int type;
736 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
737 if (parent > 0)
738 type = BTRFS_SHARED_BLOCK_REF_KEY;
739 else
740 type = BTRFS_TREE_BLOCK_REF_KEY;
741 } else {
742 if (parent > 0)
743 type = BTRFS_SHARED_DATA_REF_KEY;
744 else
745 type = BTRFS_EXTENT_DATA_REF_KEY;
746 }
747 return type;
748}
749
750static int find_next_key(struct btrfs_path *path, int level,
751 struct btrfs_key *key)
752
753{
754 for (; level < BTRFS_MAX_LEVEL; level++) {
755 if (!path->nodes[level])
756 break;
757 if (path->slots[level] + 1 >=
758 btrfs_header_nritems(path->nodes[level]))
759 continue;
760 if (level == 0)
761 btrfs_item_key_to_cpu(path->nodes[level], key,
762 path->slots[level] + 1);
763 else
764 btrfs_node_key_to_cpu(path->nodes[level], key,
765 path->slots[level] + 1);
766 return 0;
767 }
768 return 1;
769}
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784static noinline_for_stack
785int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
786 struct btrfs_path *path,
787 struct btrfs_extent_inline_ref **ref_ret,
788 u64 bytenr, u64 num_bytes,
789 u64 parent, u64 root_objectid,
790 u64 owner, u64 offset, int insert)
791{
792 struct btrfs_fs_info *fs_info = trans->fs_info;
793 struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr);
794 struct btrfs_key key;
795 struct extent_buffer *leaf;
796 struct btrfs_extent_item *ei;
797 struct btrfs_extent_inline_ref *iref;
798 u64 flags;
799 u64 item_size;
800 unsigned long ptr;
801 unsigned long end;
802 int extra_size;
803 int type;
804 int want;
805 int ret;
806 int err = 0;
807 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
808 int needed;
809
810 key.objectid = bytenr;
811 key.type = BTRFS_EXTENT_ITEM_KEY;
812 key.offset = num_bytes;
813
814 want = extent_ref_type(parent, owner);
815 if (insert) {
816 extra_size = btrfs_extent_inline_ref_size(want);
817 path->search_for_extension = 1;
818 path->keep_locks = 1;
819 } else
820 extra_size = -1;
821
822
823
824
825
826 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
827 key.type = BTRFS_METADATA_ITEM_KEY;
828 key.offset = owner;
829 }
830
831again:
832 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
833 if (ret < 0) {
834 err = ret;
835 goto out;
836 }
837
838
839
840
841
842 if (ret > 0 && skinny_metadata) {
843 skinny_metadata = false;
844 if (path->slots[0]) {
845 path->slots[0]--;
846 btrfs_item_key_to_cpu(path->nodes[0], &key,
847 path->slots[0]);
848 if (key.objectid == bytenr &&
849 key.type == BTRFS_EXTENT_ITEM_KEY &&
850 key.offset == num_bytes)
851 ret = 0;
852 }
853 if (ret) {
854 key.objectid = bytenr;
855 key.type = BTRFS_EXTENT_ITEM_KEY;
856 key.offset = num_bytes;
857 btrfs_release_path(path);
858 goto again;
859 }
860 }
861
862 if (ret && !insert) {
863 err = -ENOENT;
864 goto out;
865 } else if (WARN_ON(ret)) {
866 err = -EIO;
867 goto out;
868 }
869
870 leaf = path->nodes[0];
871 item_size = btrfs_item_size(leaf, path->slots[0]);
872 if (unlikely(item_size < sizeof(*ei))) {
873 err = -EINVAL;
874 btrfs_print_v0_err(fs_info);
875 btrfs_abort_transaction(trans, err);
876 goto out;
877 }
878
879 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
880 flags = btrfs_extent_flags(leaf, ei);
881
882 ptr = (unsigned long)(ei + 1);
883 end = (unsigned long)ei + item_size;
884
885 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
886 ptr += sizeof(struct btrfs_tree_block_info);
887 BUG_ON(ptr > end);
888 }
889
890 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
891 needed = BTRFS_REF_TYPE_DATA;
892 else
893 needed = BTRFS_REF_TYPE_BLOCK;
894
895 err = -ENOENT;
896 while (1) {
897 if (ptr >= end) {
898 if (ptr > end) {
899 err = -EUCLEAN;
900 btrfs_print_leaf(path->nodes[0]);
901 btrfs_crit(fs_info,
902"overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
903 path->slots[0], root_objectid, owner, offset, parent);
904 }
905 break;
906 }
907 iref = (struct btrfs_extent_inline_ref *)ptr;
908 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
909 if (type == BTRFS_REF_TYPE_INVALID) {
910 err = -EUCLEAN;
911 goto out;
912 }
913
914 if (want < type)
915 break;
916 if (want > type) {
917 ptr += btrfs_extent_inline_ref_size(type);
918 continue;
919 }
920
921 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
922 struct btrfs_extent_data_ref *dref;
923 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
924 if (match_extent_data_ref(leaf, dref, root_objectid,
925 owner, offset)) {
926 err = 0;
927 break;
928 }
929 if (hash_extent_data_ref_item(leaf, dref) <
930 hash_extent_data_ref(root_objectid, owner, offset))
931 break;
932 } else {
933 u64 ref_offset;
934 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
935 if (parent > 0) {
936 if (parent == ref_offset) {
937 err = 0;
938 break;
939 }
940 if (ref_offset < parent)
941 break;
942 } else {
943 if (root_objectid == ref_offset) {
944 err = 0;
945 break;
946 }
947 if (ref_offset < root_objectid)
948 break;
949 }
950 }
951 ptr += btrfs_extent_inline_ref_size(type);
952 }
953 if (err == -ENOENT && insert) {
954 if (item_size + extra_size >=
955 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
956 err = -EAGAIN;
957 goto out;
958 }
959
960
961
962
963
964
965 if (find_next_key(path, 0, &key) == 0 &&
966 key.objectid == bytenr &&
967 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
968 err = -EAGAIN;
969 goto out;
970 }
971 }
972 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
973out:
974 if (insert) {
975 path->keep_locks = 0;
976 path->search_for_extension = 0;
977 btrfs_unlock_up_safe(path, 1);
978 }
979 return err;
980}
981
982
983
984
985static noinline_for_stack
986void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
987 struct btrfs_path *path,
988 struct btrfs_extent_inline_ref *iref,
989 u64 parent, u64 root_objectid,
990 u64 owner, u64 offset, int refs_to_add,
991 struct btrfs_delayed_extent_op *extent_op)
992{
993 struct extent_buffer *leaf;
994 struct btrfs_extent_item *ei;
995 unsigned long ptr;
996 unsigned long end;
997 unsigned long item_offset;
998 u64 refs;
999 int size;
1000 int type;
1001
1002 leaf = path->nodes[0];
1003 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1004 item_offset = (unsigned long)iref - (unsigned long)ei;
1005
1006 type = extent_ref_type(parent, owner);
1007 size = btrfs_extent_inline_ref_size(type);
1008
1009 btrfs_extend_item(path, size);
1010
1011 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1012 refs = btrfs_extent_refs(leaf, ei);
1013 refs += refs_to_add;
1014 btrfs_set_extent_refs(leaf, ei, refs);
1015 if (extent_op)
1016 __run_delayed_extent_op(extent_op, leaf, ei);
1017
1018 ptr = (unsigned long)ei + item_offset;
1019 end = (unsigned long)ei + btrfs_item_size(leaf, path->slots[0]);
1020 if (ptr < end - size)
1021 memmove_extent_buffer(leaf, ptr + size, ptr,
1022 end - size - ptr);
1023
1024 iref = (struct btrfs_extent_inline_ref *)ptr;
1025 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1026 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1027 struct btrfs_extent_data_ref *dref;
1028 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1029 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1030 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1031 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1032 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1033 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1034 struct btrfs_shared_data_ref *sref;
1035 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1036 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1037 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1038 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1039 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1040 } else {
1041 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1042 }
1043 btrfs_mark_buffer_dirty(leaf);
1044}
1045
1046static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1047 struct btrfs_path *path,
1048 struct btrfs_extent_inline_ref **ref_ret,
1049 u64 bytenr, u64 num_bytes, u64 parent,
1050 u64 root_objectid, u64 owner, u64 offset)
1051{
1052 int ret;
1053
1054 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1055 num_bytes, parent, root_objectid,
1056 owner, offset, 0);
1057 if (ret != -ENOENT)
1058 return ret;
1059
1060 btrfs_release_path(path);
1061 *ref_ret = NULL;
1062
1063 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1064 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1065 root_objectid);
1066 } else {
1067 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1068 root_objectid, owner, offset);
1069 }
1070 return ret;
1071}
1072
1073
1074
1075
1076static noinline_for_stack
1077void update_inline_extent_backref(struct btrfs_path *path,
1078 struct btrfs_extent_inline_ref *iref,
1079 int refs_to_mod,
1080 struct btrfs_delayed_extent_op *extent_op)
1081{
1082 struct extent_buffer *leaf = path->nodes[0];
1083 struct btrfs_extent_item *ei;
1084 struct btrfs_extent_data_ref *dref = NULL;
1085 struct btrfs_shared_data_ref *sref = NULL;
1086 unsigned long ptr;
1087 unsigned long end;
1088 u32 item_size;
1089 int size;
1090 int type;
1091 u64 refs;
1092
1093 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1094 refs = btrfs_extent_refs(leaf, ei);
1095 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1096 refs += refs_to_mod;
1097 btrfs_set_extent_refs(leaf, ei, refs);
1098 if (extent_op)
1099 __run_delayed_extent_op(extent_op, leaf, ei);
1100
1101
1102
1103
1104
1105 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1106 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1107
1108 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1109 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1110 refs = btrfs_extent_data_ref_count(leaf, dref);
1111 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1112 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1113 refs = btrfs_shared_data_ref_count(leaf, sref);
1114 } else {
1115 refs = 1;
1116 BUG_ON(refs_to_mod != -1);
1117 }
1118
1119 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1120 refs += refs_to_mod;
1121
1122 if (refs > 0) {
1123 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1124 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1125 else
1126 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1127 } else {
1128 size = btrfs_extent_inline_ref_size(type);
1129 item_size = btrfs_item_size(leaf, path->slots[0]);
1130 ptr = (unsigned long)iref;
1131 end = (unsigned long)ei + item_size;
1132 if (ptr + size < end)
1133 memmove_extent_buffer(leaf, ptr, ptr + size,
1134 end - ptr - size);
1135 item_size -= size;
1136 btrfs_truncate_item(path, item_size, 1);
1137 }
1138 btrfs_mark_buffer_dirty(leaf);
1139}
1140
1141static noinline_for_stack
1142int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1143 struct btrfs_path *path,
1144 u64 bytenr, u64 num_bytes, u64 parent,
1145 u64 root_objectid, u64 owner,
1146 u64 offset, int refs_to_add,
1147 struct btrfs_delayed_extent_op *extent_op)
1148{
1149 struct btrfs_extent_inline_ref *iref;
1150 int ret;
1151
1152 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1153 num_bytes, parent, root_objectid,
1154 owner, offset, 1);
1155 if (ret == 0) {
1156
1157
1158
1159
1160 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1161 btrfs_crit(trans->fs_info,
1162"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu",
1163 bytenr, num_bytes, root_objectid);
1164 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
1165 WARN_ON(1);
1166 btrfs_crit(trans->fs_info,
1167 "path->slots[0]=%d path->nodes[0]:", path->slots[0]);
1168 btrfs_print_leaf(path->nodes[0]);
1169 }
1170 return -EUCLEAN;
1171 }
1172 update_inline_extent_backref(path, iref, refs_to_add, extent_op);
1173 } else if (ret == -ENOENT) {
1174 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1175 root_objectid, owner, offset,
1176 refs_to_add, extent_op);
1177 ret = 0;
1178 }
1179 return ret;
1180}
1181
1182static int remove_extent_backref(struct btrfs_trans_handle *trans,
1183 struct btrfs_root *root,
1184 struct btrfs_path *path,
1185 struct btrfs_extent_inline_ref *iref,
1186 int refs_to_drop, int is_data)
1187{
1188 int ret = 0;
1189
1190 BUG_ON(!is_data && refs_to_drop != 1);
1191 if (iref)
1192 update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
1193 else if (is_data)
1194 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1195 else
1196 ret = btrfs_del_item(trans, root, path);
1197 return ret;
1198}
1199
1200static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1201 u64 *discarded_bytes)
1202{
1203 int j, ret = 0;
1204 u64 bytes_left, end;
1205 u64 aligned_start = ALIGN(start, 1 << 9);
1206
1207 if (WARN_ON(start != aligned_start)) {
1208 len -= aligned_start - start;
1209 len = round_down(len, 1 << 9);
1210 start = aligned_start;
1211 }
1212
1213 *discarded_bytes = 0;
1214
1215 if (!len)
1216 return 0;
1217
1218 end = start + len;
1219 bytes_left = len;
1220
1221
1222 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1223 u64 sb_start = btrfs_sb_offset(j);
1224 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1225 u64 size = sb_start - start;
1226
1227 if (!in_range(sb_start, start, bytes_left) &&
1228 !in_range(sb_end, start, bytes_left) &&
1229 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1230 continue;
1231
1232
1233
1234
1235
1236 if (sb_start <= start) {
1237 start += sb_end - start;
1238 if (start > end) {
1239 bytes_left = 0;
1240 break;
1241 }
1242 bytes_left = end - start;
1243 continue;
1244 }
1245
1246 if (size) {
1247 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1248 GFP_NOFS);
1249 if (!ret)
1250 *discarded_bytes += size;
1251 else if (ret != -EOPNOTSUPP)
1252 return ret;
1253 }
1254
1255 start = sb_end;
1256 if (start > end) {
1257 bytes_left = 0;
1258 break;
1259 }
1260 bytes_left = end - start;
1261 }
1262
1263 if (bytes_left) {
1264 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1265 GFP_NOFS);
1266 if (!ret)
1267 *discarded_bytes += bytes_left;
1268 }
1269 return ret;
1270}
1271
1272static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
1273{
1274 struct btrfs_device *dev = stripe->dev;
1275 struct btrfs_fs_info *fs_info = dev->fs_info;
1276 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
1277 u64 phys = stripe->physical;
1278 u64 len = stripe->length;
1279 u64 discarded = 0;
1280 int ret = 0;
1281
1282
1283 if (btrfs_can_zone_reset(dev, phys, len)) {
1284 u64 src_disc;
1285
1286 ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
1287 if (ret)
1288 goto out;
1289
1290 if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
1291 dev != dev_replace->srcdev)
1292 goto out;
1293
1294 src_disc = discarded;
1295
1296
1297 ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
1298 &discarded);
1299 discarded += src_disc;
1300 } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
1301 ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
1302 } else {
1303 ret = 0;
1304 *bytes = 0;
1305 }
1306
1307out:
1308 *bytes = discarded;
1309 return ret;
1310}
1311
1312int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1313 u64 num_bytes, u64 *actual_bytes)
1314{
1315 int ret = 0;
1316 u64 discarded_bytes = 0;
1317 u64 end = bytenr + num_bytes;
1318 u64 cur = bytenr;
1319 struct btrfs_io_context *bioc = NULL;
1320
1321
1322
1323
1324
1325 btrfs_bio_counter_inc_blocked(fs_info);
1326 while (cur < end) {
1327 struct btrfs_io_stripe *stripe;
1328 int i;
1329
1330 num_bytes = end - cur;
1331
1332 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
1333 &num_bytes, &bioc, 0);
1334
1335
1336
1337
1338
1339 if (ret < 0)
1340 goto out;
1341
1342 stripe = bioc->stripes;
1343 for (i = 0; i < bioc->num_stripes; i++, stripe++) {
1344 u64 bytes;
1345 struct btrfs_device *device = stripe->dev;
1346
1347 if (!device->bdev) {
1348 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1349 continue;
1350 }
1351
1352 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
1353 continue;
1354
1355 ret = do_discard_extent(stripe, &bytes);
1356 if (!ret) {
1357 discarded_bytes += bytes;
1358 } else if (ret != -EOPNOTSUPP) {
1359
1360
1361
1362
1363
1364
1365
1366 btrfs_put_bioc(bioc);
1367 goto out;
1368 }
1369
1370
1371
1372
1373
1374
1375 ret = 0;
1376 }
1377 btrfs_put_bioc(bioc);
1378 cur += num_bytes;
1379 }
1380out:
1381 btrfs_bio_counter_dec(fs_info);
1382
1383 if (actual_bytes)
1384 *actual_bytes = discarded_bytes;
1385
1386
1387 if (ret == -EOPNOTSUPP)
1388 ret = 0;
1389 return ret;
1390}
1391
1392
1393int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1394 struct btrfs_ref *generic_ref)
1395{
1396 struct btrfs_fs_info *fs_info = trans->fs_info;
1397 int ret;
1398
1399 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1400 generic_ref->action);
1401 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1402 generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
1403
1404 if (generic_ref->type == BTRFS_REF_METADATA)
1405 ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
1406 else
1407 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
1408
1409 btrfs_ref_tree_mod(fs_info, generic_ref);
1410
1411 return ret;
1412}
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1452 struct btrfs_delayed_ref_node *node,
1453 u64 parent, u64 root_objectid,
1454 u64 owner, u64 offset, int refs_to_add,
1455 struct btrfs_delayed_extent_op *extent_op)
1456{
1457 struct btrfs_path *path;
1458 struct extent_buffer *leaf;
1459 struct btrfs_extent_item *item;
1460 struct btrfs_key key;
1461 u64 bytenr = node->bytenr;
1462 u64 num_bytes = node->num_bytes;
1463 u64 refs;
1464 int ret;
1465
1466 path = btrfs_alloc_path();
1467 if (!path)
1468 return -ENOMEM;
1469
1470
1471 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1472 parent, root_objectid, owner,
1473 offset, refs_to_add, extent_op);
1474 if ((ret < 0 && ret != -EAGAIN) || !ret)
1475 goto out;
1476
1477
1478
1479
1480
1481
1482 leaf = path->nodes[0];
1483 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1484 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1485 refs = btrfs_extent_refs(leaf, item);
1486 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1487 if (extent_op)
1488 __run_delayed_extent_op(extent_op, leaf, item);
1489
1490 btrfs_mark_buffer_dirty(leaf);
1491 btrfs_release_path(path);
1492
1493
1494 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1495 BUG_ON(refs_to_add != 1);
1496 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1497 root_objectid);
1498 } else {
1499 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1500 root_objectid, owner, offset,
1501 refs_to_add);
1502 }
1503 if (ret)
1504 btrfs_abort_transaction(trans, ret);
1505out:
1506 btrfs_free_path(path);
1507 return ret;
1508}
1509
1510static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1511 struct btrfs_delayed_ref_node *node,
1512 struct btrfs_delayed_extent_op *extent_op,
1513 int insert_reserved)
1514{
1515 int ret = 0;
1516 struct btrfs_delayed_data_ref *ref;
1517 struct btrfs_key ins;
1518 u64 parent = 0;
1519 u64 ref_root = 0;
1520 u64 flags = 0;
1521
1522 ins.objectid = node->bytenr;
1523 ins.offset = node->num_bytes;
1524 ins.type = BTRFS_EXTENT_ITEM_KEY;
1525
1526 ref = btrfs_delayed_node_to_data_ref(node);
1527 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1528
1529 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1530 parent = ref->parent;
1531 ref_root = ref->root;
1532
1533 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1534 if (extent_op)
1535 flags |= extent_op->flags_to_set;
1536 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1537 flags, ref->objectid,
1538 ref->offset, &ins,
1539 node->ref_mod);
1540 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1541 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1542 ref->objectid, ref->offset,
1543 node->ref_mod, extent_op);
1544 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1545 ret = __btrfs_free_extent(trans, node, parent,
1546 ref_root, ref->objectid,
1547 ref->offset, node->ref_mod,
1548 extent_op);
1549 } else {
1550 BUG();
1551 }
1552 return ret;
1553}
1554
1555static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1556 struct extent_buffer *leaf,
1557 struct btrfs_extent_item *ei)
1558{
1559 u64 flags = btrfs_extent_flags(leaf, ei);
1560 if (extent_op->update_flags) {
1561 flags |= extent_op->flags_to_set;
1562 btrfs_set_extent_flags(leaf, ei, flags);
1563 }
1564
1565 if (extent_op->update_key) {
1566 struct btrfs_tree_block_info *bi;
1567 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1568 bi = (struct btrfs_tree_block_info *)(ei + 1);
1569 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1570 }
1571}
1572
1573static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1574 struct btrfs_delayed_ref_head *head,
1575 struct btrfs_delayed_extent_op *extent_op)
1576{
1577 struct btrfs_fs_info *fs_info = trans->fs_info;
1578 struct btrfs_root *root;
1579 struct btrfs_key key;
1580 struct btrfs_path *path;
1581 struct btrfs_extent_item *ei;
1582 struct extent_buffer *leaf;
1583 u32 item_size;
1584 int ret;
1585 int err = 0;
1586 int metadata = 1;
1587
1588 if (TRANS_ABORTED(trans))
1589 return 0;
1590
1591 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1592 metadata = 0;
1593
1594 path = btrfs_alloc_path();
1595 if (!path)
1596 return -ENOMEM;
1597
1598 key.objectid = head->bytenr;
1599
1600 if (metadata) {
1601 key.type = BTRFS_METADATA_ITEM_KEY;
1602 key.offset = extent_op->level;
1603 } else {
1604 key.type = BTRFS_EXTENT_ITEM_KEY;
1605 key.offset = head->num_bytes;
1606 }
1607
1608 root = btrfs_extent_root(fs_info, key.objectid);
1609again:
1610 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1611 if (ret < 0) {
1612 err = ret;
1613 goto out;
1614 }
1615 if (ret > 0) {
1616 if (metadata) {
1617 if (path->slots[0] > 0) {
1618 path->slots[0]--;
1619 btrfs_item_key_to_cpu(path->nodes[0], &key,
1620 path->slots[0]);
1621 if (key.objectid == head->bytenr &&
1622 key.type == BTRFS_EXTENT_ITEM_KEY &&
1623 key.offset == head->num_bytes)
1624 ret = 0;
1625 }
1626 if (ret > 0) {
1627 btrfs_release_path(path);
1628 metadata = 0;
1629
1630 key.objectid = head->bytenr;
1631 key.offset = head->num_bytes;
1632 key.type = BTRFS_EXTENT_ITEM_KEY;
1633 goto again;
1634 }
1635 } else {
1636 err = -EIO;
1637 goto out;
1638 }
1639 }
1640
1641 leaf = path->nodes[0];
1642 item_size = btrfs_item_size(leaf, path->slots[0]);
1643
1644 if (unlikely(item_size < sizeof(*ei))) {
1645 err = -EINVAL;
1646 btrfs_print_v0_err(fs_info);
1647 btrfs_abort_transaction(trans, err);
1648 goto out;
1649 }
1650
1651 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1652 __run_delayed_extent_op(extent_op, leaf, ei);
1653
1654 btrfs_mark_buffer_dirty(leaf);
1655out:
1656 btrfs_free_path(path);
1657 return err;
1658}
1659
1660static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1661 struct btrfs_delayed_ref_node *node,
1662 struct btrfs_delayed_extent_op *extent_op,
1663 int insert_reserved)
1664{
1665 int ret = 0;
1666 struct btrfs_delayed_tree_ref *ref;
1667 u64 parent = 0;
1668 u64 ref_root = 0;
1669
1670 ref = btrfs_delayed_node_to_tree_ref(node);
1671 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1672
1673 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1674 parent = ref->parent;
1675 ref_root = ref->root;
1676
1677 if (node->ref_mod != 1) {
1678 btrfs_err(trans->fs_info,
1679 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1680 node->bytenr, node->ref_mod, node->action, ref_root,
1681 parent);
1682 return -EIO;
1683 }
1684 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1685 BUG_ON(!extent_op || !extent_op->update_flags);
1686 ret = alloc_reserved_tree_block(trans, node, extent_op);
1687 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1688 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1689 ref->level, 0, 1, extent_op);
1690 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1691 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1692 ref->level, 0, 1, extent_op);
1693 } else {
1694 BUG();
1695 }
1696 return ret;
1697}
1698
1699
1700static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1701 struct btrfs_delayed_ref_node *node,
1702 struct btrfs_delayed_extent_op *extent_op,
1703 int insert_reserved)
1704{
1705 int ret = 0;
1706
1707 if (TRANS_ABORTED(trans)) {
1708 if (insert_reserved)
1709 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1710 return 0;
1711 }
1712
1713 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1714 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1715 ret = run_delayed_tree_ref(trans, node, extent_op,
1716 insert_reserved);
1717 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1718 node->type == BTRFS_SHARED_DATA_REF_KEY)
1719 ret = run_delayed_data_ref(trans, node, extent_op,
1720 insert_reserved);
1721 else
1722 BUG();
1723 if (ret && insert_reserved)
1724 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1725 return ret;
1726}
1727
1728static inline struct btrfs_delayed_ref_node *
1729select_delayed_ref(struct btrfs_delayed_ref_head *head)
1730{
1731 struct btrfs_delayed_ref_node *ref;
1732
1733 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1734 return NULL;
1735
1736
1737
1738
1739
1740
1741
1742 if (!list_empty(&head->ref_add_list))
1743 return list_first_entry(&head->ref_add_list,
1744 struct btrfs_delayed_ref_node, add_list);
1745
1746 ref = rb_entry(rb_first_cached(&head->ref_tree),
1747 struct btrfs_delayed_ref_node, ref_node);
1748 ASSERT(list_empty(&ref->add_list));
1749 return ref;
1750}
1751
1752static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1753 struct btrfs_delayed_ref_head *head)
1754{
1755 spin_lock(&delayed_refs->lock);
1756 head->processing = 0;
1757 delayed_refs->num_heads_ready++;
1758 spin_unlock(&delayed_refs->lock);
1759 btrfs_delayed_ref_unlock(head);
1760}
1761
1762static struct btrfs_delayed_extent_op *cleanup_extent_op(
1763 struct btrfs_delayed_ref_head *head)
1764{
1765 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1766
1767 if (!extent_op)
1768 return NULL;
1769
1770 if (head->must_insert_reserved) {
1771 head->extent_op = NULL;
1772 btrfs_free_delayed_extent_op(extent_op);
1773 return NULL;
1774 }
1775 return extent_op;
1776}
1777
1778static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1779 struct btrfs_delayed_ref_head *head)
1780{
1781 struct btrfs_delayed_extent_op *extent_op;
1782 int ret;
1783
1784 extent_op = cleanup_extent_op(head);
1785 if (!extent_op)
1786 return 0;
1787 head->extent_op = NULL;
1788 spin_unlock(&head->lock);
1789 ret = run_delayed_extent_op(trans, head, extent_op);
1790 btrfs_free_delayed_extent_op(extent_op);
1791 return ret ? ret : 1;
1792}
1793
1794void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1795 struct btrfs_delayed_ref_root *delayed_refs,
1796 struct btrfs_delayed_ref_head *head)
1797{
1798 int nr_items = 1;
1799
1800
1801
1802
1803
1804 if (head->total_ref_mod < 0 && head->is_data) {
1805 spin_lock(&delayed_refs->lock);
1806 delayed_refs->pending_csums -= head->num_bytes;
1807 spin_unlock(&delayed_refs->lock);
1808 nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
1809 }
1810
1811 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1812}
1813
1814static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1815 struct btrfs_delayed_ref_head *head)
1816{
1817
1818 struct btrfs_fs_info *fs_info = trans->fs_info;
1819 struct btrfs_delayed_ref_root *delayed_refs;
1820 int ret;
1821
1822 delayed_refs = &trans->transaction->delayed_refs;
1823
1824 ret = run_and_cleanup_extent_op(trans, head);
1825 if (ret < 0) {
1826 unselect_delayed_ref_head(delayed_refs, head);
1827 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1828 return ret;
1829 } else if (ret) {
1830 return ret;
1831 }
1832
1833
1834
1835
1836
1837 spin_unlock(&head->lock);
1838 spin_lock(&delayed_refs->lock);
1839 spin_lock(&head->lock);
1840 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1841 spin_unlock(&head->lock);
1842 spin_unlock(&delayed_refs->lock);
1843 return 1;
1844 }
1845 btrfs_delete_ref_head(delayed_refs, head);
1846 spin_unlock(&head->lock);
1847 spin_unlock(&delayed_refs->lock);
1848
1849 if (head->must_insert_reserved) {
1850 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
1851 if (head->is_data) {
1852 struct btrfs_root *csum_root;
1853
1854 csum_root = btrfs_csum_root(fs_info, head->bytenr);
1855 ret = btrfs_del_csums(trans, csum_root, head->bytenr,
1856 head->num_bytes);
1857 }
1858 }
1859
1860 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1861
1862 trace_run_delayed_ref_head(fs_info, head, 0);
1863 btrfs_delayed_ref_unlock(head);
1864 btrfs_put_delayed_ref_head(head);
1865 return ret;
1866}
1867
1868static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1869 struct btrfs_trans_handle *trans)
1870{
1871 struct btrfs_delayed_ref_root *delayed_refs =
1872 &trans->transaction->delayed_refs;
1873 struct btrfs_delayed_ref_head *head = NULL;
1874 int ret;
1875
1876 spin_lock(&delayed_refs->lock);
1877 head = btrfs_select_ref_head(delayed_refs);
1878 if (!head) {
1879 spin_unlock(&delayed_refs->lock);
1880 return head;
1881 }
1882
1883
1884
1885
1886
1887 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1888 spin_unlock(&delayed_refs->lock);
1889
1890
1891
1892
1893
1894
1895 if (ret == -EAGAIN)
1896 head = ERR_PTR(-EAGAIN);
1897
1898 return head;
1899}
1900
1901static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1902 struct btrfs_delayed_ref_head *locked_ref,
1903 unsigned long *run_refs)
1904{
1905 struct btrfs_fs_info *fs_info = trans->fs_info;
1906 struct btrfs_delayed_ref_root *delayed_refs;
1907 struct btrfs_delayed_extent_op *extent_op;
1908 struct btrfs_delayed_ref_node *ref;
1909 int must_insert_reserved = 0;
1910 int ret;
1911
1912 delayed_refs = &trans->transaction->delayed_refs;
1913
1914 lockdep_assert_held(&locked_ref->mutex);
1915 lockdep_assert_held(&locked_ref->lock);
1916
1917 while ((ref = select_delayed_ref(locked_ref))) {
1918 if (ref->seq &&
1919 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1920 spin_unlock(&locked_ref->lock);
1921 unselect_delayed_ref_head(delayed_refs, locked_ref);
1922 return -EAGAIN;
1923 }
1924
1925 (*run_refs)++;
1926 ref->in_tree = 0;
1927 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1928 RB_CLEAR_NODE(&ref->ref_node);
1929 if (!list_empty(&ref->add_list))
1930 list_del(&ref->add_list);
1931
1932
1933
1934
1935 switch (ref->action) {
1936 case BTRFS_ADD_DELAYED_REF:
1937 case BTRFS_ADD_DELAYED_EXTENT:
1938 locked_ref->ref_mod -= ref->ref_mod;
1939 break;
1940 case BTRFS_DROP_DELAYED_REF:
1941 locked_ref->ref_mod += ref->ref_mod;
1942 break;
1943 default:
1944 WARN_ON(1);
1945 }
1946 atomic_dec(&delayed_refs->num_entries);
1947
1948
1949
1950
1951
1952 must_insert_reserved = locked_ref->must_insert_reserved;
1953 locked_ref->must_insert_reserved = 0;
1954
1955 extent_op = locked_ref->extent_op;
1956 locked_ref->extent_op = NULL;
1957 spin_unlock(&locked_ref->lock);
1958
1959 ret = run_one_delayed_ref(trans, ref, extent_op,
1960 must_insert_reserved);
1961
1962 btrfs_free_delayed_extent_op(extent_op);
1963 if (ret) {
1964 unselect_delayed_ref_head(delayed_refs, locked_ref);
1965 btrfs_put_delayed_ref(ref);
1966 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1967 ret);
1968 return ret;
1969 }
1970
1971 btrfs_put_delayed_ref(ref);
1972 cond_resched();
1973
1974 spin_lock(&locked_ref->lock);
1975 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1976 }
1977
1978 return 0;
1979}
1980
1981
1982
1983
1984
1985static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1986 unsigned long nr)
1987{
1988 struct btrfs_fs_info *fs_info = trans->fs_info;
1989 struct btrfs_delayed_ref_root *delayed_refs;
1990 struct btrfs_delayed_ref_head *locked_ref = NULL;
1991 ktime_t start = ktime_get();
1992 int ret;
1993 unsigned long count = 0;
1994 unsigned long actual_count = 0;
1995
1996 delayed_refs = &trans->transaction->delayed_refs;
1997 do {
1998 if (!locked_ref) {
1999 locked_ref = btrfs_obtain_ref_head(trans);
2000 if (IS_ERR_OR_NULL(locked_ref)) {
2001 if (PTR_ERR(locked_ref) == -EAGAIN) {
2002 continue;
2003 } else {
2004 break;
2005 }
2006 }
2007 count++;
2008 }
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021 spin_lock(&locked_ref->lock);
2022 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2023
2024 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2025 &actual_count);
2026 if (ret < 0 && ret != -EAGAIN) {
2027
2028
2029
2030
2031 return ret;
2032 } else if (!ret) {
2033
2034
2035
2036
2037 ret = cleanup_ref_head(trans, locked_ref);
2038 if (ret > 0 ) {
2039
2040 ret = 0;
2041 continue;
2042 } else if (ret) {
2043 return ret;
2044 }
2045 }
2046
2047
2048
2049
2050
2051
2052 locked_ref = NULL;
2053 cond_resched();
2054 } while ((nr != -1 && count < nr) || locked_ref);
2055
2056
2057
2058
2059
2060
2061 if (actual_count > 0) {
2062 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2063 u64 avg;
2064
2065
2066
2067
2068
2069 spin_lock(&delayed_refs->lock);
2070 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2071 fs_info->avg_delayed_ref_runtime = avg >> 2;
2072 spin_unlock(&delayed_refs->lock);
2073 }
2074 return 0;
2075}
2076
2077#ifdef SCRAMBLE_DELAYED_REFS
2078
2079
2080
2081
2082
2083static u64 find_middle(struct rb_root *root)
2084{
2085 struct rb_node *n = root->rb_node;
2086 struct btrfs_delayed_ref_node *entry;
2087 int alt = 1;
2088 u64 middle;
2089 u64 first = 0, last = 0;
2090
2091 n = rb_first(root);
2092 if (n) {
2093 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2094 first = entry->bytenr;
2095 }
2096 n = rb_last(root);
2097 if (n) {
2098 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2099 last = entry->bytenr;
2100 }
2101 n = root->rb_node;
2102
2103 while (n) {
2104 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2105 WARN_ON(!entry->in_tree);
2106
2107 middle = entry->bytenr;
2108
2109 if (alt)
2110 n = n->rb_left;
2111 else
2112 n = n->rb_right;
2113
2114 alt = 1 - alt;
2115 }
2116 return middle;
2117}
2118#endif
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2131 unsigned long count)
2132{
2133 struct btrfs_fs_info *fs_info = trans->fs_info;
2134 struct rb_node *node;
2135 struct btrfs_delayed_ref_root *delayed_refs;
2136 struct btrfs_delayed_ref_head *head;
2137 int ret;
2138 int run_all = count == (unsigned long)-1;
2139
2140
2141 if (TRANS_ABORTED(trans))
2142 return 0;
2143
2144 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2145 return 0;
2146
2147 delayed_refs = &trans->transaction->delayed_refs;
2148 if (count == 0)
2149 count = delayed_refs->num_heads_ready;
2150
2151again:
2152#ifdef SCRAMBLE_DELAYED_REFS
2153 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2154#endif
2155 ret = __btrfs_run_delayed_refs(trans, count);
2156 if (ret < 0) {
2157 btrfs_abort_transaction(trans, ret);
2158 return ret;
2159 }
2160
2161 if (run_all) {
2162 btrfs_create_pending_block_groups(trans);
2163
2164 spin_lock(&delayed_refs->lock);
2165 node = rb_first_cached(&delayed_refs->href_root);
2166 if (!node) {
2167 spin_unlock(&delayed_refs->lock);
2168 goto out;
2169 }
2170 head = rb_entry(node, struct btrfs_delayed_ref_head,
2171 href_node);
2172 refcount_inc(&head->refs);
2173 spin_unlock(&delayed_refs->lock);
2174
2175
2176 mutex_lock(&head->mutex);
2177 mutex_unlock(&head->mutex);
2178
2179 btrfs_put_delayed_ref_head(head);
2180 cond_resched();
2181 goto again;
2182 }
2183out:
2184 return 0;
2185}
2186
2187int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2188 struct extent_buffer *eb, u64 flags,
2189 int level)
2190{
2191 struct btrfs_delayed_extent_op *extent_op;
2192 int ret;
2193
2194 extent_op = btrfs_alloc_delayed_extent_op();
2195 if (!extent_op)
2196 return -ENOMEM;
2197
2198 extent_op->flags_to_set = flags;
2199 extent_op->update_flags = true;
2200 extent_op->update_key = false;
2201 extent_op->level = level;
2202
2203 ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
2204 if (ret)
2205 btrfs_free_delayed_extent_op(extent_op);
2206 return ret;
2207}
2208
2209static noinline int check_delayed_ref(struct btrfs_root *root,
2210 struct btrfs_path *path,
2211 u64 objectid, u64 offset, u64 bytenr)
2212{
2213 struct btrfs_delayed_ref_head *head;
2214 struct btrfs_delayed_ref_node *ref;
2215 struct btrfs_delayed_data_ref *data_ref;
2216 struct btrfs_delayed_ref_root *delayed_refs;
2217 struct btrfs_transaction *cur_trans;
2218 struct rb_node *node;
2219 int ret = 0;
2220
2221 spin_lock(&root->fs_info->trans_lock);
2222 cur_trans = root->fs_info->running_transaction;
2223 if (cur_trans)
2224 refcount_inc(&cur_trans->use_count);
2225 spin_unlock(&root->fs_info->trans_lock);
2226 if (!cur_trans)
2227 return 0;
2228
2229 delayed_refs = &cur_trans->delayed_refs;
2230 spin_lock(&delayed_refs->lock);
2231 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2232 if (!head) {
2233 spin_unlock(&delayed_refs->lock);
2234 btrfs_put_transaction(cur_trans);
2235 return 0;
2236 }
2237
2238 if (!mutex_trylock(&head->mutex)) {
2239 refcount_inc(&head->refs);
2240 spin_unlock(&delayed_refs->lock);
2241
2242 btrfs_release_path(path);
2243
2244
2245
2246
2247
2248 mutex_lock(&head->mutex);
2249 mutex_unlock(&head->mutex);
2250 btrfs_put_delayed_ref_head(head);
2251 btrfs_put_transaction(cur_trans);
2252 return -EAGAIN;
2253 }
2254 spin_unlock(&delayed_refs->lock);
2255
2256 spin_lock(&head->lock);
2257
2258
2259
2260
2261 for (node = rb_first_cached(&head->ref_tree); node;
2262 node = rb_next(node)) {
2263 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2264
2265 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2266 ret = 1;
2267 break;
2268 }
2269
2270 data_ref = btrfs_delayed_node_to_data_ref(ref);
2271
2272
2273
2274
2275
2276 if (data_ref->root != root->root_key.objectid ||
2277 data_ref->objectid != objectid ||
2278 data_ref->offset != offset) {
2279 ret = 1;
2280 break;
2281 }
2282 }
2283 spin_unlock(&head->lock);
2284 mutex_unlock(&head->mutex);
2285 btrfs_put_transaction(cur_trans);
2286 return ret;
2287}
2288
2289static noinline int check_committed_ref(struct btrfs_root *root,
2290 struct btrfs_path *path,
2291 u64 objectid, u64 offset, u64 bytenr,
2292 bool strict)
2293{
2294 struct btrfs_fs_info *fs_info = root->fs_info;
2295 struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
2296 struct extent_buffer *leaf;
2297 struct btrfs_extent_data_ref *ref;
2298 struct btrfs_extent_inline_ref *iref;
2299 struct btrfs_extent_item *ei;
2300 struct btrfs_key key;
2301 u32 item_size;
2302 int type;
2303 int ret;
2304
2305 key.objectid = bytenr;
2306 key.offset = (u64)-1;
2307 key.type = BTRFS_EXTENT_ITEM_KEY;
2308
2309 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2310 if (ret < 0)
2311 goto out;
2312 BUG_ON(ret == 0);
2313
2314 ret = -ENOENT;
2315 if (path->slots[0] == 0)
2316 goto out;
2317
2318 path->slots[0]--;
2319 leaf = path->nodes[0];
2320 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2321
2322 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2323 goto out;
2324
2325 ret = 1;
2326 item_size = btrfs_item_size(leaf, path->slots[0]);
2327 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2328
2329
2330 if (item_size != sizeof(*ei) +
2331 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2332 goto out;
2333
2334
2335
2336
2337
2338 if (!strict &&
2339 (btrfs_extent_generation(leaf, ei) <=
2340 btrfs_root_last_snapshot(&root->root_item)))
2341 goto out;
2342
2343 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2344
2345
2346 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2347 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2348 goto out;
2349
2350 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2351 if (btrfs_extent_refs(leaf, ei) !=
2352 btrfs_extent_data_ref_count(leaf, ref) ||
2353 btrfs_extent_data_ref_root(leaf, ref) !=
2354 root->root_key.objectid ||
2355 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2356 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2357 goto out;
2358
2359 ret = 0;
2360out:
2361 return ret;
2362}
2363
2364int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2365 u64 bytenr, bool strict, struct btrfs_path *path)
2366{
2367 int ret;
2368
2369 do {
2370 ret = check_committed_ref(root, path, objectid,
2371 offset, bytenr, strict);
2372 if (ret && ret != -ENOENT)
2373 goto out;
2374
2375 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2376 } while (ret == -EAGAIN);
2377
2378out:
2379 btrfs_release_path(path);
2380 if (btrfs_is_data_reloc_root(root))
2381 WARN_ON(ret > 0);
2382 return ret;
2383}
2384
2385static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2386 struct btrfs_root *root,
2387 struct extent_buffer *buf,
2388 int full_backref, int inc)
2389{
2390 struct btrfs_fs_info *fs_info = root->fs_info;
2391 u64 bytenr;
2392 u64 num_bytes;
2393 u64 parent;
2394 u64 ref_root;
2395 u32 nritems;
2396 struct btrfs_key key;
2397 struct btrfs_file_extent_item *fi;
2398 struct btrfs_ref generic_ref = { 0 };
2399 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2400 int i;
2401 int action;
2402 int level;
2403 int ret = 0;
2404
2405 if (btrfs_is_testing(fs_info))
2406 return 0;
2407
2408 ref_root = btrfs_header_owner(buf);
2409 nritems = btrfs_header_nritems(buf);
2410 level = btrfs_header_level(buf);
2411
2412 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
2413 return 0;
2414
2415 if (full_backref)
2416 parent = buf->start;
2417 else
2418 parent = 0;
2419 if (inc)
2420 action = BTRFS_ADD_DELAYED_REF;
2421 else
2422 action = BTRFS_DROP_DELAYED_REF;
2423
2424 for (i = 0; i < nritems; i++) {
2425 if (level == 0) {
2426 btrfs_item_key_to_cpu(buf, &key, i);
2427 if (key.type != BTRFS_EXTENT_DATA_KEY)
2428 continue;
2429 fi = btrfs_item_ptr(buf, i,
2430 struct btrfs_file_extent_item);
2431 if (btrfs_file_extent_type(buf, fi) ==
2432 BTRFS_FILE_EXTENT_INLINE)
2433 continue;
2434 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2435 if (bytenr == 0)
2436 continue;
2437
2438 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2439 key.offset -= btrfs_file_extent_offset(buf, fi);
2440 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2441 num_bytes, parent);
2442 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2443 key.offset, root->root_key.objectid,
2444 for_reloc);
2445 if (inc)
2446 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2447 else
2448 ret = btrfs_free_extent(trans, &generic_ref);
2449 if (ret)
2450 goto fail;
2451 } else {
2452 bytenr = btrfs_node_blockptr(buf, i);
2453 num_bytes = fs_info->nodesize;
2454 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2455 num_bytes, parent);
2456 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
2457 root->root_key.objectid, for_reloc);
2458 if (inc)
2459 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2460 else
2461 ret = btrfs_free_extent(trans, &generic_ref);
2462 if (ret)
2463 goto fail;
2464 }
2465 }
2466 return 0;
2467fail:
2468 return ret;
2469}
2470
2471int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2472 struct extent_buffer *buf, int full_backref)
2473{
2474 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2475}
2476
2477int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2478 struct extent_buffer *buf, int full_backref)
2479{
2480 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2481}
2482
2483static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2484{
2485 struct btrfs_fs_info *fs_info = root->fs_info;
2486 u64 flags;
2487 u64 ret;
2488
2489 if (data)
2490 flags = BTRFS_BLOCK_GROUP_DATA;
2491 else if (root == fs_info->chunk_root)
2492 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2493 else
2494 flags = BTRFS_BLOCK_GROUP_METADATA;
2495
2496 ret = btrfs_get_alloc_profile(fs_info, flags);
2497 return ret;
2498}
2499
2500static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
2501{
2502 struct rb_node *leftmost;
2503 u64 bytenr = 0;
2504
2505 read_lock(&fs_info->block_group_cache_lock);
2506
2507 leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
2508 if (leftmost) {
2509 struct btrfs_block_group *bg;
2510
2511 bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
2512 bytenr = bg->start;
2513 }
2514 read_unlock(&fs_info->block_group_cache_lock);
2515
2516 return bytenr;
2517}
2518
2519static int pin_down_extent(struct btrfs_trans_handle *trans,
2520 struct btrfs_block_group *cache,
2521 u64 bytenr, u64 num_bytes, int reserved)
2522{
2523 struct btrfs_fs_info *fs_info = cache->fs_info;
2524
2525 spin_lock(&cache->space_info->lock);
2526 spin_lock(&cache->lock);
2527 cache->pinned += num_bytes;
2528 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2529 num_bytes);
2530 if (reserved) {
2531 cache->reserved -= num_bytes;
2532 cache->space_info->bytes_reserved -= num_bytes;
2533 }
2534 spin_unlock(&cache->lock);
2535 spin_unlock(&cache->space_info->lock);
2536
2537 set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
2538 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2539 return 0;
2540}
2541
2542int btrfs_pin_extent(struct btrfs_trans_handle *trans,
2543 u64 bytenr, u64 num_bytes, int reserved)
2544{
2545 struct btrfs_block_group *cache;
2546
2547 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2548 BUG_ON(!cache);
2549
2550 pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
2551
2552 btrfs_put_block_group(cache);
2553 return 0;
2554}
2555
2556
2557
2558
2559int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
2560 u64 bytenr, u64 num_bytes)
2561{
2562 struct btrfs_block_group *cache;
2563 int ret;
2564
2565 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2566 if (!cache)
2567 return -EINVAL;
2568
2569
2570
2571
2572
2573
2574
2575 btrfs_cache_block_group(cache, 1);
2576
2577
2578
2579
2580 ret = btrfs_wait_block_group_cache_done(cache);
2581 if (ret)
2582 goto out;
2583
2584 pin_down_extent(trans, cache, bytenr, num_bytes, 0);
2585
2586
2587 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2588out:
2589 btrfs_put_block_group(cache);
2590 return ret;
2591}
2592
2593static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2594 u64 start, u64 num_bytes)
2595{
2596 int ret;
2597 struct btrfs_block_group *block_group;
2598
2599 block_group = btrfs_lookup_block_group(fs_info, start);
2600 if (!block_group)
2601 return -EINVAL;
2602
2603 btrfs_cache_block_group(block_group, 1);
2604
2605
2606
2607
2608 ret = btrfs_wait_block_group_cache_done(block_group);
2609 if (ret)
2610 goto out;
2611
2612 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2613out:
2614 btrfs_put_block_group(block_group);
2615 return ret;
2616}
2617
2618int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2619{
2620 struct btrfs_fs_info *fs_info = eb->fs_info;
2621 struct btrfs_file_extent_item *item;
2622 struct btrfs_key key;
2623 int found_type;
2624 int i;
2625 int ret = 0;
2626
2627 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2628 return 0;
2629
2630 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2631 btrfs_item_key_to_cpu(eb, &key, i);
2632 if (key.type != BTRFS_EXTENT_DATA_KEY)
2633 continue;
2634 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2635 found_type = btrfs_file_extent_type(eb, item);
2636 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2637 continue;
2638 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2639 continue;
2640 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2641 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2642 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2643 if (ret)
2644 break;
2645 }
2646
2647 return ret;
2648}
2649
2650static void
2651btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
2652{
2653 atomic_inc(&bg->reservations);
2654}
2655
2656
2657
2658
2659
2660static struct btrfs_free_cluster *
2661fetch_cluster_info(struct btrfs_fs_info *fs_info,
2662 struct btrfs_space_info *space_info, u64 *empty_cluster)
2663{
2664 struct btrfs_free_cluster *ret = NULL;
2665
2666 *empty_cluster = 0;
2667 if (btrfs_mixed_space_info(space_info))
2668 return ret;
2669
2670 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2671 ret = &fs_info->meta_alloc_cluster;
2672 if (btrfs_test_opt(fs_info, SSD))
2673 *empty_cluster = SZ_2M;
2674 else
2675 *empty_cluster = SZ_64K;
2676 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2677 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2678 *empty_cluster = SZ_2M;
2679 ret = &fs_info->data_alloc_cluster;
2680 }
2681
2682 return ret;
2683}
2684
2685static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2686 u64 start, u64 end,
2687 const bool return_free_space)
2688{
2689 struct btrfs_block_group *cache = NULL;
2690 struct btrfs_space_info *space_info;
2691 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2692 struct btrfs_free_cluster *cluster = NULL;
2693 u64 len;
2694 u64 total_unpinned = 0;
2695 u64 empty_cluster = 0;
2696 bool readonly;
2697
2698 while (start <= end) {
2699 readonly = false;
2700 if (!cache ||
2701 start >= cache->start + cache->length) {
2702 if (cache)
2703 btrfs_put_block_group(cache);
2704 total_unpinned = 0;
2705 cache = btrfs_lookup_block_group(fs_info, start);
2706 BUG_ON(!cache);
2707
2708 cluster = fetch_cluster_info(fs_info,
2709 cache->space_info,
2710 &empty_cluster);
2711 empty_cluster <<= 1;
2712 }
2713
2714 len = cache->start + cache->length - start;
2715 len = min(len, end + 1 - start);
2716
2717 down_read(&fs_info->commit_root_sem);
2718 if (start < cache->last_byte_to_unpin && return_free_space) {
2719 u64 add_len = min(len, cache->last_byte_to_unpin - start);
2720
2721 btrfs_add_free_space(cache, start, add_len);
2722 }
2723 up_read(&fs_info->commit_root_sem);
2724
2725 start += len;
2726 total_unpinned += len;
2727 space_info = cache->space_info;
2728
2729
2730
2731
2732
2733
2734
2735 if (cluster && cluster->fragmented &&
2736 total_unpinned > empty_cluster) {
2737 spin_lock(&cluster->lock);
2738 cluster->fragmented = 0;
2739 spin_unlock(&cluster->lock);
2740 }
2741
2742 spin_lock(&space_info->lock);
2743 spin_lock(&cache->lock);
2744 cache->pinned -= len;
2745 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2746 space_info->max_extent_size = 0;
2747 if (cache->ro) {
2748 space_info->bytes_readonly += len;
2749 readonly = true;
2750 } else if (btrfs_is_zoned(fs_info)) {
2751
2752 space_info->bytes_zone_unusable += len;
2753 readonly = true;
2754 }
2755 spin_unlock(&cache->lock);
2756 if (!readonly && return_free_space &&
2757 global_rsv->space_info == space_info) {
2758 spin_lock(&global_rsv->lock);
2759 if (!global_rsv->full) {
2760 u64 to_add = min(len, global_rsv->size -
2761 global_rsv->reserved);
2762
2763 global_rsv->reserved += to_add;
2764 btrfs_space_info_update_bytes_may_use(fs_info,
2765 space_info, to_add);
2766 if (global_rsv->reserved >= global_rsv->size)
2767 global_rsv->full = 1;
2768 len -= to_add;
2769 }
2770 spin_unlock(&global_rsv->lock);
2771 }
2772
2773 if (!readonly && return_free_space && len)
2774 btrfs_try_granting_tickets(fs_info, space_info);
2775 spin_unlock(&space_info->lock);
2776 }
2777
2778 if (cache)
2779 btrfs_put_block_group(cache);
2780 return 0;
2781}
2782
2783int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2784{
2785 struct btrfs_fs_info *fs_info = trans->fs_info;
2786 struct btrfs_block_group *block_group, *tmp;
2787 struct list_head *deleted_bgs;
2788 struct extent_io_tree *unpin;
2789 u64 start;
2790 u64 end;
2791 int ret;
2792
2793 unpin = &trans->transaction->pinned_extents;
2794
2795 while (!TRANS_ABORTED(trans)) {
2796 struct extent_state *cached_state = NULL;
2797
2798 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2799 ret = find_first_extent_bit(unpin, 0, &start, &end,
2800 EXTENT_DIRTY, &cached_state);
2801 if (ret) {
2802 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2803 break;
2804 }
2805
2806 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
2807 ret = btrfs_discard_extent(fs_info, start,
2808 end + 1 - start, NULL);
2809
2810 clear_extent_dirty(unpin, start, end, &cached_state);
2811 unpin_extent_range(fs_info, start, end, true);
2812 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2813 free_extent_state(cached_state);
2814 cond_resched();
2815 }
2816
2817 if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
2818 btrfs_discard_calc_delay(&fs_info->discard_ctl);
2819 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
2820 }
2821
2822
2823
2824
2825
2826
2827 deleted_bgs = &trans->transaction->deleted_bgs;
2828 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2829 u64 trimmed = 0;
2830
2831 ret = -EROFS;
2832 if (!TRANS_ABORTED(trans))
2833 ret = btrfs_discard_extent(fs_info,
2834 block_group->start,
2835 block_group->length,
2836 &trimmed);
2837
2838 list_del_init(&block_group->bg_list);
2839 btrfs_unfreeze_block_group(block_group);
2840 btrfs_put_block_group(block_group);
2841
2842 if (ret) {
2843 const char *errstr = btrfs_decode_error(ret);
2844 btrfs_warn(fs_info,
2845 "discard failed while removing blockgroup: errno=%d %s",
2846 ret, errstr);
2847 }
2848 }
2849
2850 return 0;
2851}
2852
2853static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
2854 u64 bytenr, u64 num_bytes, bool is_data)
2855{
2856 int ret;
2857
2858 if (is_data) {
2859 struct btrfs_root *csum_root;
2860
2861 csum_root = btrfs_csum_root(trans->fs_info, bytenr);
2862 ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
2863 if (ret) {
2864 btrfs_abort_transaction(trans, ret);
2865 return ret;
2866 }
2867 }
2868
2869 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
2870 if (ret) {
2871 btrfs_abort_transaction(trans, ret);
2872 return ret;
2873 }
2874
2875 ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
2876 if (ret)
2877 btrfs_abort_transaction(trans, ret);
2878
2879 return ret;
2880}
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2942 struct btrfs_delayed_ref_node *node, u64 parent,
2943 u64 root_objectid, u64 owner_objectid,
2944 u64 owner_offset, int refs_to_drop,
2945 struct btrfs_delayed_extent_op *extent_op)
2946{
2947 struct btrfs_fs_info *info = trans->fs_info;
2948 struct btrfs_key key;
2949 struct btrfs_path *path;
2950 struct btrfs_root *extent_root;
2951 struct extent_buffer *leaf;
2952 struct btrfs_extent_item *ei;
2953 struct btrfs_extent_inline_ref *iref;
2954 int ret;
2955 int is_data;
2956 int extent_slot = 0;
2957 int found_extent = 0;
2958 int num_to_del = 1;
2959 u32 item_size;
2960 u64 refs;
2961 u64 bytenr = node->bytenr;
2962 u64 num_bytes = node->num_bytes;
2963 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2964
2965 extent_root = btrfs_extent_root(info, bytenr);
2966 ASSERT(extent_root);
2967
2968 path = btrfs_alloc_path();
2969 if (!path)
2970 return -ENOMEM;
2971
2972 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2973
2974 if (!is_data && refs_to_drop != 1) {
2975 btrfs_crit(info,
2976"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
2977 node->bytenr, refs_to_drop);
2978 ret = -EINVAL;
2979 btrfs_abort_transaction(trans, ret);
2980 goto out;
2981 }
2982
2983 if (is_data)
2984 skinny_metadata = false;
2985
2986 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
2987 parent, root_objectid, owner_objectid,
2988 owner_offset);
2989 if (ret == 0) {
2990
2991
2992
2993
2994
2995
2996
2997 extent_slot = path->slots[0];
2998 while (extent_slot >= 0) {
2999 btrfs_item_key_to_cpu(path->nodes[0], &key,
3000 extent_slot);
3001 if (key.objectid != bytenr)
3002 break;
3003 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3004 key.offset == num_bytes) {
3005 found_extent = 1;
3006 break;
3007 }
3008 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3009 key.offset == owner_objectid) {
3010 found_extent = 1;
3011 break;
3012 }
3013
3014
3015 if (path->slots[0] - extent_slot > 5)
3016 break;
3017 extent_slot--;
3018 }
3019
3020 if (!found_extent) {
3021 if (iref) {
3022 btrfs_crit(info,
3023"invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref");
3024 btrfs_abort_transaction(trans, -EUCLEAN);
3025 goto err_dump;
3026 }
3027
3028 ret = remove_extent_backref(trans, extent_root, path,
3029 NULL, refs_to_drop, is_data);
3030 if (ret) {
3031 btrfs_abort_transaction(trans, ret);
3032 goto out;
3033 }
3034 btrfs_release_path(path);
3035
3036
3037 key.objectid = bytenr;
3038 key.type = BTRFS_EXTENT_ITEM_KEY;
3039 key.offset = num_bytes;
3040
3041 if (!is_data && skinny_metadata) {
3042 key.type = BTRFS_METADATA_ITEM_KEY;
3043 key.offset = owner_objectid;
3044 }
3045
3046 ret = btrfs_search_slot(trans, extent_root,
3047 &key, path, -1, 1);
3048 if (ret > 0 && skinny_metadata && path->slots[0]) {
3049
3050
3051
3052
3053 path->slots[0]--;
3054 btrfs_item_key_to_cpu(path->nodes[0], &key,
3055 path->slots[0]);
3056 if (key.objectid == bytenr &&
3057 key.type == BTRFS_EXTENT_ITEM_KEY &&
3058 key.offset == num_bytes)
3059 ret = 0;
3060 }
3061
3062 if (ret > 0 && skinny_metadata) {
3063 skinny_metadata = false;
3064 key.objectid = bytenr;
3065 key.type = BTRFS_EXTENT_ITEM_KEY;
3066 key.offset = num_bytes;
3067 btrfs_release_path(path);
3068 ret = btrfs_search_slot(trans, extent_root,
3069 &key, path, -1, 1);
3070 }
3071
3072 if (ret) {
3073 btrfs_err(info,
3074 "umm, got %d back from search, was looking for %llu",
3075 ret, bytenr);
3076 if (ret > 0)
3077 btrfs_print_leaf(path->nodes[0]);
3078 }
3079 if (ret < 0) {
3080 btrfs_abort_transaction(trans, ret);
3081 goto out;
3082 }
3083 extent_slot = path->slots[0];
3084 }
3085 } else if (WARN_ON(ret == -ENOENT)) {
3086 btrfs_print_leaf(path->nodes[0]);
3087 btrfs_err(info,
3088 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3089 bytenr, parent, root_objectid, owner_objectid,
3090 owner_offset);
3091 btrfs_abort_transaction(trans, ret);
3092 goto out;
3093 } else {
3094 btrfs_abort_transaction(trans, ret);
3095 goto out;
3096 }
3097
3098 leaf = path->nodes[0];
3099 item_size = btrfs_item_size(leaf, extent_slot);
3100 if (unlikely(item_size < sizeof(*ei))) {
3101 ret = -EINVAL;
3102 btrfs_print_v0_err(info);
3103 btrfs_abort_transaction(trans, ret);
3104 goto out;
3105 }
3106 ei = btrfs_item_ptr(leaf, extent_slot,
3107 struct btrfs_extent_item);
3108 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3109 key.type == BTRFS_EXTENT_ITEM_KEY) {
3110 struct btrfs_tree_block_info *bi;
3111 if (item_size < sizeof(*ei) + sizeof(*bi)) {
3112 btrfs_crit(info,
3113"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
3114 key.objectid, key.type, key.offset,
3115 owner_objectid, item_size,
3116 sizeof(*ei) + sizeof(*bi));
3117 btrfs_abort_transaction(trans, -EUCLEAN);
3118 goto err_dump;
3119 }
3120 bi = (struct btrfs_tree_block_info *)(ei + 1);
3121 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3122 }
3123
3124 refs = btrfs_extent_refs(leaf, ei);
3125 if (refs < refs_to_drop) {
3126 btrfs_crit(info,
3127 "trying to drop %d refs but we only have %llu for bytenr %llu",
3128 refs_to_drop, refs, bytenr);
3129 btrfs_abort_transaction(trans, -EUCLEAN);
3130 goto err_dump;
3131 }
3132 refs -= refs_to_drop;
3133
3134 if (refs > 0) {
3135 if (extent_op)
3136 __run_delayed_extent_op(extent_op, leaf, ei);
3137
3138
3139
3140
3141 if (iref) {
3142 if (!found_extent) {
3143 btrfs_crit(info,
3144"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found");
3145 btrfs_abort_transaction(trans, -EUCLEAN);
3146 goto err_dump;
3147 }
3148 } else {
3149 btrfs_set_extent_refs(leaf, ei, refs);
3150 btrfs_mark_buffer_dirty(leaf);
3151 }
3152 if (found_extent) {
3153 ret = remove_extent_backref(trans, extent_root, path,
3154 iref, refs_to_drop, is_data);
3155 if (ret) {
3156 btrfs_abort_transaction(trans, ret);
3157 goto out;
3158 }
3159 }
3160 } else {
3161
3162 if (found_extent) {
3163 if (is_data && refs_to_drop !=
3164 extent_data_ref_count(path, iref)) {
3165 btrfs_crit(info,
3166 "invalid refs_to_drop, current refs %u refs_to_drop %u",
3167 extent_data_ref_count(path, iref),
3168 refs_to_drop);
3169 btrfs_abort_transaction(trans, -EUCLEAN);
3170 goto err_dump;
3171 }
3172 if (iref) {
3173 if (path->slots[0] != extent_slot) {
3174 btrfs_crit(info,
3175"invalid iref, extent item key (%llu %u %llu) doesn't have wanted iref",
3176 key.objectid, key.type,
3177 key.offset);
3178 btrfs_abort_transaction(trans, -EUCLEAN);
3179 goto err_dump;
3180 }
3181 } else {
3182
3183
3184
3185
3186
3187
3188 if (path->slots[0] != extent_slot + 1) {
3189 btrfs_crit(info,
3190 "invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM");
3191 btrfs_abort_transaction(trans, -EUCLEAN);
3192 goto err_dump;
3193 }
3194 path->slots[0] = extent_slot;
3195 num_to_del = 2;
3196 }
3197 }
3198
3199 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3200 num_to_del);
3201 if (ret) {
3202 btrfs_abort_transaction(trans, ret);
3203 goto out;
3204 }
3205 btrfs_release_path(path);
3206
3207 ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
3208 }
3209 btrfs_release_path(path);
3210
3211out:
3212 btrfs_free_path(path);
3213 return ret;
3214err_dump:
3215
3216
3217
3218
3219 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
3220 btrfs_crit(info, "path->slots[0]=%d extent_slot=%d",
3221 path->slots[0], extent_slot);
3222 btrfs_print_leaf(path->nodes[0]);
3223 }
3224
3225 btrfs_free_path(path);
3226 return -EUCLEAN;
3227}
3228
3229
3230
3231
3232
3233
3234
3235static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3236 u64 bytenr)
3237{
3238 struct btrfs_delayed_ref_head *head;
3239 struct btrfs_delayed_ref_root *delayed_refs;
3240 int ret = 0;
3241
3242 delayed_refs = &trans->transaction->delayed_refs;
3243 spin_lock(&delayed_refs->lock);
3244 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3245 if (!head)
3246 goto out_delayed_unlock;
3247
3248 spin_lock(&head->lock);
3249 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3250 goto out;
3251
3252 if (cleanup_extent_op(head) != NULL)
3253 goto out;
3254
3255
3256
3257
3258
3259 if (!mutex_trylock(&head->mutex))
3260 goto out;
3261
3262 btrfs_delete_ref_head(delayed_refs, head);
3263 head->processing = 0;
3264
3265 spin_unlock(&head->lock);
3266 spin_unlock(&delayed_refs->lock);
3267
3268 BUG_ON(head->extent_op);
3269 if (head->must_insert_reserved)
3270 ret = 1;
3271
3272 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3273 mutex_unlock(&head->mutex);
3274 btrfs_put_delayed_ref_head(head);
3275 return ret;
3276out:
3277 spin_unlock(&head->lock);
3278
3279out_delayed_unlock:
3280 spin_unlock(&delayed_refs->lock);
3281 return 0;
3282}
3283
3284void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3285 u64 root_id,
3286 struct extent_buffer *buf,
3287 u64 parent, int last_ref)
3288{
3289 struct btrfs_fs_info *fs_info = trans->fs_info;
3290 struct btrfs_ref generic_ref = { 0 };
3291 int ret;
3292
3293 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3294 buf->start, buf->len, parent);
3295 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3296 root_id, 0, false);
3297
3298 if (root_id != BTRFS_TREE_LOG_OBJECTID) {
3299 btrfs_ref_tree_mod(fs_info, &generic_ref);
3300 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
3301 BUG_ON(ret);
3302 }
3303
3304 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3305 struct btrfs_block_group *cache;
3306 bool must_pin = false;
3307
3308 if (root_id != BTRFS_TREE_LOG_OBJECTID) {
3309 ret = check_ref_cleanup(trans, buf->start);
3310 if (!ret) {
3311 btrfs_redirty_list_add(trans->transaction, buf);
3312 goto out;
3313 }
3314 }
3315
3316 cache = btrfs_lookup_block_group(fs_info, buf->start);
3317
3318 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3319 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3320 btrfs_put_block_group(cache);
3321 goto out;
3322 }
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338 if (btrfs_header_level(buf) == 0 &&
3339 test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
3340 must_pin = true;
3341
3342 if (must_pin || btrfs_is_zoned(fs_info)) {
3343 btrfs_redirty_list_add(trans->transaction, buf);
3344 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3345 btrfs_put_block_group(cache);
3346 goto out;
3347 }
3348
3349 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3350
3351 btrfs_add_free_space(cache, buf->start, buf->len);
3352 btrfs_free_reserved_bytes(cache, buf->len, 0);
3353 btrfs_put_block_group(cache);
3354 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3355 }
3356out:
3357 if (last_ref) {
3358
3359
3360
3361
3362 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3363 }
3364}
3365
3366
3367int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3368{
3369 struct btrfs_fs_info *fs_info = trans->fs_info;
3370 int ret;
3371
3372 if (btrfs_is_testing(fs_info))
3373 return 0;
3374
3375
3376
3377
3378
3379 if ((ref->type == BTRFS_REF_METADATA &&
3380 ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
3381 (ref->type == BTRFS_REF_DATA &&
3382 ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
3383
3384 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
3385 ret = 0;
3386 } else if (ref->type == BTRFS_REF_METADATA) {
3387 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
3388 } else {
3389 ret = btrfs_add_delayed_data_ref(trans, ref, 0);
3390 }
3391
3392 if (!((ref->type == BTRFS_REF_METADATA &&
3393 ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
3394 (ref->type == BTRFS_REF_DATA &&
3395 ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
3396 btrfs_ref_tree_mod(fs_info, ref);
3397
3398 return ret;
3399}
3400
3401enum btrfs_loop_type {
3402 LOOP_CACHING_NOWAIT,
3403 LOOP_CACHING_WAIT,
3404 LOOP_ALLOC_CHUNK,
3405 LOOP_NO_EMPTY_SIZE,
3406};
3407
3408static inline void
3409btrfs_lock_block_group(struct btrfs_block_group *cache,
3410 int delalloc)
3411{
3412 if (delalloc)
3413 down_read(&cache->data_rwsem);
3414}
3415
3416static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3417 int delalloc)
3418{
3419 btrfs_get_block_group(cache);
3420 if (delalloc)
3421 down_read(&cache->data_rwsem);
3422}
3423
3424static struct btrfs_block_group *btrfs_lock_cluster(
3425 struct btrfs_block_group *block_group,
3426 struct btrfs_free_cluster *cluster,
3427 int delalloc)
3428 __acquires(&cluster->refill_lock)
3429{
3430 struct btrfs_block_group *used_bg = NULL;
3431
3432 spin_lock(&cluster->refill_lock);
3433 while (1) {
3434 used_bg = cluster->block_group;
3435 if (!used_bg)
3436 return NULL;
3437
3438 if (used_bg == block_group)
3439 return used_bg;
3440
3441 btrfs_get_block_group(used_bg);
3442
3443 if (!delalloc)
3444 return used_bg;
3445
3446 if (down_read_trylock(&used_bg->data_rwsem))
3447 return used_bg;
3448
3449 spin_unlock(&cluster->refill_lock);
3450
3451
3452 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3453
3454 spin_lock(&cluster->refill_lock);
3455 if (used_bg == cluster->block_group)
3456 return used_bg;
3457
3458 up_read(&used_bg->data_rwsem);
3459 btrfs_put_block_group(used_bg);
3460 }
3461}
3462
3463static inline void
3464btrfs_release_block_group(struct btrfs_block_group *cache,
3465 int delalloc)
3466{
3467 if (delalloc)
3468 up_read(&cache->data_rwsem);
3469 btrfs_put_block_group(cache);
3470}
3471
3472enum btrfs_extent_allocation_policy {
3473 BTRFS_EXTENT_ALLOC_CLUSTERED,
3474 BTRFS_EXTENT_ALLOC_ZONED,
3475};
3476
3477
3478
3479
3480
3481struct find_free_extent_ctl {
3482
3483 u64 ram_bytes;
3484 u64 num_bytes;
3485 u64 min_alloc_size;
3486 u64 empty_size;
3487 u64 flags;
3488 int delalloc;
3489
3490
3491 u64 search_start;
3492
3493
3494 u64 empty_cluster;
3495 struct btrfs_free_cluster *last_ptr;
3496 bool use_cluster;
3497
3498 bool have_caching_bg;
3499 bool orig_have_caching_bg;
3500
3501
3502 bool for_treelog;
3503
3504
3505 bool for_data_reloc;
3506
3507
3508 int index;
3509
3510
3511
3512
3513 int loop;
3514
3515
3516
3517
3518
3519 bool retry_clustered;
3520
3521
3522
3523
3524
3525 bool retry_unclustered;
3526
3527
3528 int cached;
3529
3530
3531 u64 max_extent_size;
3532
3533
3534 u64 total_free_space;
3535
3536
3537 u64 found_offset;
3538
3539
3540 u64 hint_byte;
3541
3542
3543 enum btrfs_extent_allocation_policy policy;
3544};
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555static int find_free_extent_clustered(struct btrfs_block_group *bg,
3556 struct find_free_extent_ctl *ffe_ctl,
3557 struct btrfs_block_group **cluster_bg_ret)
3558{
3559 struct btrfs_block_group *cluster_bg;
3560 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3561 u64 aligned_cluster;
3562 u64 offset;
3563 int ret;
3564
3565 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3566 if (!cluster_bg)
3567 goto refill_cluster;
3568 if (cluster_bg != bg && (cluster_bg->ro ||
3569 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3570 goto release_cluster;
3571
3572 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3573 ffe_ctl->num_bytes, cluster_bg->start,
3574 &ffe_ctl->max_extent_size);
3575 if (offset) {
3576
3577 spin_unlock(&last_ptr->refill_lock);
3578 trace_btrfs_reserve_extent_cluster(cluster_bg,
3579 ffe_ctl->search_start, ffe_ctl->num_bytes);
3580 *cluster_bg_ret = cluster_bg;
3581 ffe_ctl->found_offset = offset;
3582 return 0;
3583 }
3584 WARN_ON(last_ptr->block_group != cluster_bg);
3585
3586release_cluster:
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3599 spin_unlock(&last_ptr->refill_lock);
3600 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3601 return -ENOENT;
3602 }
3603
3604
3605 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3606
3607 if (cluster_bg != bg)
3608 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3609
3610refill_cluster:
3611 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3612 spin_unlock(&last_ptr->refill_lock);
3613 return -ENOENT;
3614 }
3615
3616 aligned_cluster = max_t(u64,
3617 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3618 bg->full_stripe_len);
3619 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3620 ffe_ctl->num_bytes, aligned_cluster);
3621 if (ret == 0) {
3622
3623 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3624 ffe_ctl->num_bytes, ffe_ctl->search_start,
3625 &ffe_ctl->max_extent_size);
3626 if (offset) {
3627
3628 spin_unlock(&last_ptr->refill_lock);
3629 trace_btrfs_reserve_extent_cluster(bg,
3630 ffe_ctl->search_start,
3631 ffe_ctl->num_bytes);
3632 ffe_ctl->found_offset = offset;
3633 return 0;
3634 }
3635 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3636 !ffe_ctl->retry_clustered) {
3637 spin_unlock(&last_ptr->refill_lock);
3638
3639 ffe_ctl->retry_clustered = true;
3640 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3641 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3642 return -EAGAIN;
3643 }
3644
3645
3646
3647
3648
3649 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3650 spin_unlock(&last_ptr->refill_lock);
3651 return 1;
3652}
3653
3654
3655
3656
3657
3658
3659static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3660 struct find_free_extent_ctl *ffe_ctl)
3661{
3662 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3663 u64 offset;
3664
3665
3666
3667
3668
3669
3670 if (unlikely(last_ptr)) {
3671 spin_lock(&last_ptr->lock);
3672 last_ptr->fragmented = 1;
3673 spin_unlock(&last_ptr->lock);
3674 }
3675 if (ffe_ctl->cached) {
3676 struct btrfs_free_space_ctl *free_space_ctl;
3677
3678 free_space_ctl = bg->free_space_ctl;
3679 spin_lock(&free_space_ctl->tree_lock);
3680 if (free_space_ctl->free_space <
3681 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3682 ffe_ctl->empty_size) {
3683 ffe_ctl->total_free_space = max_t(u64,
3684 ffe_ctl->total_free_space,
3685 free_space_ctl->free_space);
3686 spin_unlock(&free_space_ctl->tree_lock);
3687 return 1;
3688 }
3689 spin_unlock(&free_space_ctl->tree_lock);
3690 }
3691
3692 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3693 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3694 &ffe_ctl->max_extent_size);
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3706 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3707 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3708 ffe_ctl->empty_size);
3709 ffe_ctl->retry_unclustered = true;
3710 return -EAGAIN;
3711 } else if (!offset) {
3712 return 1;
3713 }
3714 ffe_ctl->found_offset = offset;
3715 return 0;
3716}
3717
3718static int do_allocation_clustered(struct btrfs_block_group *block_group,
3719 struct find_free_extent_ctl *ffe_ctl,
3720 struct btrfs_block_group **bg_ret)
3721{
3722 int ret;
3723
3724
3725 if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
3726 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
3727 if (ret >= 0 || ret == -EAGAIN)
3728 return ret;
3729
3730 }
3731
3732 return find_free_extent_unclustered(block_group, ffe_ctl);
3733}
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756static int do_allocation_zoned(struct btrfs_block_group *block_group,
3757 struct find_free_extent_ctl *ffe_ctl,
3758 struct btrfs_block_group **bg_ret)
3759{
3760 struct btrfs_fs_info *fs_info = block_group->fs_info;
3761 struct btrfs_space_info *space_info = block_group->space_info;
3762 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3763 u64 start = block_group->start;
3764 u64 num_bytes = ffe_ctl->num_bytes;
3765 u64 avail;
3766 u64 bytenr = block_group->start;
3767 u64 log_bytenr;
3768 u64 data_reloc_bytenr;
3769 int ret = 0;
3770 bool skip = false;
3771
3772 ASSERT(btrfs_is_zoned(block_group->fs_info));
3773
3774
3775
3776
3777
3778 spin_lock(&fs_info->treelog_bg_lock);
3779 log_bytenr = fs_info->treelog_bg;
3780 if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
3781 (!ffe_ctl->for_treelog && bytenr == log_bytenr)))
3782 skip = true;
3783 spin_unlock(&fs_info->treelog_bg_lock);
3784 if (skip)
3785 return 1;
3786
3787
3788
3789
3790
3791 spin_lock(&fs_info->relocation_bg_lock);
3792 data_reloc_bytenr = fs_info->data_reloc_bg;
3793 if (data_reloc_bytenr &&
3794 ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
3795 (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
3796 skip = true;
3797 spin_unlock(&fs_info->relocation_bg_lock);
3798 if (skip)
3799 return 1;
3800
3801
3802 spin_lock(&block_group->lock);
3803 if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
3804 ret = 1;
3805
3806
3807
3808
3809 }
3810 spin_unlock(&block_group->lock);
3811
3812 if (!ret && !btrfs_zone_activate(block_group)) {
3813 ret = 1;
3814
3815
3816
3817
3818 }
3819
3820 spin_lock(&space_info->lock);
3821 spin_lock(&block_group->lock);
3822 spin_lock(&fs_info->treelog_bg_lock);
3823 spin_lock(&fs_info->relocation_bg_lock);
3824
3825 if (ret)
3826 goto out;
3827
3828 ASSERT(!ffe_ctl->for_treelog ||
3829 block_group->start == fs_info->treelog_bg ||
3830 fs_info->treelog_bg == 0);
3831 ASSERT(!ffe_ctl->for_data_reloc ||
3832 block_group->start == fs_info->data_reloc_bg ||
3833 fs_info->data_reloc_bg == 0);
3834
3835 if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
3836 ret = 1;
3837 goto out;
3838 }
3839
3840
3841
3842
3843
3844 if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
3845 (block_group->used || block_group->reserved)) {
3846 ret = 1;
3847 goto out;
3848 }
3849
3850
3851
3852
3853
3854 if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
3855 (block_group->used || block_group->reserved)) {
3856 ret = 1;
3857 goto out;
3858 }
3859
3860 WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
3861 avail = block_group->zone_capacity - block_group->alloc_offset;
3862 if (avail < num_bytes) {
3863 if (ffe_ctl->max_extent_size < avail) {
3864
3865
3866
3867
3868 ffe_ctl->max_extent_size = avail;
3869 ffe_ctl->total_free_space = avail;
3870 }
3871 ret = 1;
3872 goto out;
3873 }
3874
3875 if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
3876 fs_info->treelog_bg = block_group->start;
3877
3878 if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
3879 fs_info->data_reloc_bg = block_group->start;
3880
3881 ffe_ctl->found_offset = start + block_group->alloc_offset;
3882 block_group->alloc_offset += num_bytes;
3883 spin_lock(&ctl->tree_lock);
3884 ctl->free_space -= num_bytes;
3885 spin_unlock(&ctl->tree_lock);
3886
3887
3888
3889
3890
3891
3892 ffe_ctl->search_start = ffe_ctl->found_offset;
3893
3894out:
3895 if (ret && ffe_ctl->for_treelog)
3896 fs_info->treelog_bg = 0;
3897 if (ret && ffe_ctl->for_data_reloc &&
3898 fs_info->data_reloc_bg == block_group->start) {
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912 block_group->zoned_data_reloc_ongoing = 1;
3913 fs_info->data_reloc_bg = 0;
3914 }
3915 spin_unlock(&fs_info->relocation_bg_lock);
3916 spin_unlock(&fs_info->treelog_bg_lock);
3917 spin_unlock(&block_group->lock);
3918 spin_unlock(&space_info->lock);
3919 return ret;
3920}
3921
3922static int do_allocation(struct btrfs_block_group *block_group,
3923 struct find_free_extent_ctl *ffe_ctl,
3924 struct btrfs_block_group **bg_ret)
3925{
3926 switch (ffe_ctl->policy) {
3927 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3928 return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
3929 case BTRFS_EXTENT_ALLOC_ZONED:
3930 return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
3931 default:
3932 BUG();
3933 }
3934}
3935
3936static void release_block_group(struct btrfs_block_group *block_group,
3937 struct find_free_extent_ctl *ffe_ctl,
3938 int delalloc)
3939{
3940 switch (ffe_ctl->policy) {
3941 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3942 ffe_ctl->retry_clustered = false;
3943 ffe_ctl->retry_unclustered = false;
3944 break;
3945 case BTRFS_EXTENT_ALLOC_ZONED:
3946
3947 break;
3948 default:
3949 BUG();
3950 }
3951
3952 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
3953 ffe_ctl->index);
3954 btrfs_release_block_group(block_group, delalloc);
3955}
3956
3957static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
3958 struct btrfs_key *ins)
3959{
3960 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3961
3962 if (!ffe_ctl->use_cluster && last_ptr) {
3963 spin_lock(&last_ptr->lock);
3964 last_ptr->window_start = ins->objectid;
3965 spin_unlock(&last_ptr->lock);
3966 }
3967}
3968
3969static void found_extent(struct find_free_extent_ctl *ffe_ctl,
3970 struct btrfs_key *ins)
3971{
3972 switch (ffe_ctl->policy) {
3973 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3974 found_extent_clustered(ffe_ctl, ins);
3975 break;
3976 case BTRFS_EXTENT_ALLOC_ZONED:
3977
3978 break;
3979 default:
3980 BUG();
3981 }
3982}
3983
3984static bool can_allocate_chunk(struct btrfs_fs_info *fs_info,
3985 struct find_free_extent_ctl *ffe_ctl)
3986{
3987 switch (ffe_ctl->policy) {
3988 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3989 return true;
3990 case BTRFS_EXTENT_ALLOC_ZONED:
3991
3992
3993
3994
3995
3996
3997 if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
3998 !btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
3999 return false;
4000 return true;
4001 default:
4002 BUG();
4003 }
4004}
4005
4006static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
4007{
4008 switch (ffe_ctl->policy) {
4009 case BTRFS_EXTENT_ALLOC_CLUSTERED:
4010
4011
4012
4013
4014 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
4015 return 0;
4016 case BTRFS_EXTENT_ALLOC_ZONED:
4017
4018 return -ENOSPC;
4019 default:
4020 BUG();
4021 }
4022}
4023
4024
4025
4026
4027
4028
4029static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
4030 struct btrfs_key *ins,
4031 struct find_free_extent_ctl *ffe_ctl,
4032 bool full_search)
4033{
4034 struct btrfs_root *root = fs_info->chunk_root;
4035 int ret;
4036
4037 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
4038 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
4039 ffe_ctl->orig_have_caching_bg = true;
4040
4041 if (ins->objectid) {
4042 found_extent(ffe_ctl, ins);
4043 return 0;
4044 }
4045
4046 if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
4047 return 1;
4048
4049 ffe_ctl->index++;
4050 if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
4051 return 1;
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
4062 ffe_ctl->index = 0;
4063 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
4064
4065
4066
4067
4068
4069 if (ffe_ctl->orig_have_caching_bg || !full_search)
4070 ffe_ctl->loop = LOOP_CACHING_WAIT;
4071 else
4072 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
4073 } else {
4074 ffe_ctl->loop++;
4075 }
4076
4077 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
4078 struct btrfs_trans_handle *trans;
4079 int exist = 0;
4080
4081
4082 if (!can_allocate_chunk(fs_info, ffe_ctl))
4083 return -ENOSPC;
4084
4085 trans = current->journal_info;
4086 if (trans)
4087 exist = 1;
4088 else
4089 trans = btrfs_join_transaction(root);
4090
4091 if (IS_ERR(trans)) {
4092 ret = PTR_ERR(trans);
4093 return ret;
4094 }
4095
4096 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
4097 CHUNK_ALLOC_FORCE_FOR_EXTENT);
4098
4099
4100 if (ret == -ENOSPC)
4101 ret = chunk_allocation_failed(ffe_ctl);
4102 else if (ret < 0)
4103 btrfs_abort_transaction(trans, ret);
4104 else
4105 ret = 0;
4106 if (!exist)
4107 btrfs_end_transaction(trans);
4108 if (ret)
4109 return ret;
4110 }
4111
4112 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
4113 if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
4114 return -ENOSPC;
4115
4116
4117
4118
4119
4120 if (ffe_ctl->empty_size == 0 &&
4121 ffe_ctl->empty_cluster == 0)
4122 return -ENOSPC;
4123 ffe_ctl->empty_size = 0;
4124 ffe_ctl->empty_cluster = 0;
4125 }
4126 return 1;
4127 }
4128 return -ENOSPC;
4129}
4130
4131static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
4132 struct find_free_extent_ctl *ffe_ctl,
4133 struct btrfs_space_info *space_info,
4134 struct btrfs_key *ins)
4135{
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146 if (space_info->max_extent_size) {
4147 spin_lock(&space_info->lock);
4148 if (space_info->max_extent_size &&
4149 ffe_ctl->num_bytes > space_info->max_extent_size) {
4150 ins->offset = space_info->max_extent_size;
4151 spin_unlock(&space_info->lock);
4152 return -ENOSPC;
4153 } else if (space_info->max_extent_size) {
4154 ffe_ctl->use_cluster = false;
4155 }
4156 spin_unlock(&space_info->lock);
4157 }
4158
4159 ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
4160 &ffe_ctl->empty_cluster);
4161 if (ffe_ctl->last_ptr) {
4162 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
4163
4164 spin_lock(&last_ptr->lock);
4165 if (last_ptr->block_group)
4166 ffe_ctl->hint_byte = last_ptr->window_start;
4167 if (last_ptr->fragmented) {
4168
4169
4170
4171
4172
4173 ffe_ctl->hint_byte = last_ptr->window_start;
4174 ffe_ctl->use_cluster = false;
4175 }
4176 spin_unlock(&last_ptr->lock);
4177 }
4178
4179 return 0;
4180}
4181
4182static int prepare_allocation(struct btrfs_fs_info *fs_info,
4183 struct find_free_extent_ctl *ffe_ctl,
4184 struct btrfs_space_info *space_info,
4185 struct btrfs_key *ins)
4186{
4187 switch (ffe_ctl->policy) {
4188 case BTRFS_EXTENT_ALLOC_CLUSTERED:
4189 return prepare_allocation_clustered(fs_info, ffe_ctl,
4190 space_info, ins);
4191 case BTRFS_EXTENT_ALLOC_ZONED:
4192 if (ffe_ctl->for_treelog) {
4193 spin_lock(&fs_info->treelog_bg_lock);
4194 if (fs_info->treelog_bg)
4195 ffe_ctl->hint_byte = fs_info->treelog_bg;
4196 spin_unlock(&fs_info->treelog_bg_lock);
4197 }
4198 if (ffe_ctl->for_data_reloc) {
4199 spin_lock(&fs_info->relocation_bg_lock);
4200 if (fs_info->data_reloc_bg)
4201 ffe_ctl->hint_byte = fs_info->data_reloc_bg;
4202 spin_unlock(&fs_info->relocation_bg_lock);
4203 }
4204 return 0;
4205 default:
4206 BUG();
4207 }
4208}
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235static noinline int find_free_extent(struct btrfs_root *root,
4236 struct btrfs_key *ins,
4237 struct find_free_extent_ctl *ffe_ctl)
4238{
4239 struct btrfs_fs_info *fs_info = root->fs_info;
4240 int ret = 0;
4241 int cache_block_group_error = 0;
4242 struct btrfs_block_group *block_group = NULL;
4243 struct btrfs_space_info *space_info;
4244 bool full_search = false;
4245
4246 WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
4247
4248 ffe_ctl->search_start = 0;
4249
4250 ffe_ctl->empty_cluster = 0;
4251 ffe_ctl->last_ptr = NULL;
4252 ffe_ctl->use_cluster = true;
4253 ffe_ctl->have_caching_bg = false;
4254 ffe_ctl->orig_have_caching_bg = false;
4255 ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
4256 ffe_ctl->loop = 0;
4257
4258 ffe_ctl->retry_clustered = false;
4259 ffe_ctl->retry_unclustered = false;
4260 ffe_ctl->cached = 0;
4261 ffe_ctl->max_extent_size = 0;
4262 ffe_ctl->total_free_space = 0;
4263 ffe_ctl->found_offset = 0;
4264 ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
4265
4266 if (btrfs_is_zoned(fs_info))
4267 ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
4268
4269 ins->type = BTRFS_EXTENT_ITEM_KEY;
4270 ins->objectid = 0;
4271 ins->offset = 0;
4272
4273 trace_find_free_extent(root, ffe_ctl->num_bytes, ffe_ctl->empty_size,
4274 ffe_ctl->flags);
4275
4276 space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
4277 if (!space_info) {
4278 btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
4279 return -ENOSPC;
4280 }
4281
4282 ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
4283 if (ret < 0)
4284 return ret;
4285
4286 ffe_ctl->search_start = max(ffe_ctl->search_start,
4287 first_logical_byte(fs_info));
4288 ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
4289 if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
4290 block_group = btrfs_lookup_block_group(fs_info,
4291 ffe_ctl->search_start);
4292
4293
4294
4295
4296
4297
4298
4299 if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
4300 block_group->cached != BTRFS_CACHE_NO) {
4301 down_read(&space_info->groups_sem);
4302 if (list_empty(&block_group->list) ||
4303 block_group->ro) {
4304
4305
4306
4307
4308
4309
4310 btrfs_put_block_group(block_group);
4311 up_read(&space_info->groups_sem);
4312 } else {
4313 ffe_ctl->index = btrfs_bg_flags_to_raid_index(
4314 block_group->flags);
4315 btrfs_lock_block_group(block_group,
4316 ffe_ctl->delalloc);
4317 goto have_block_group;
4318 }
4319 } else if (block_group) {
4320 btrfs_put_block_group(block_group);
4321 }
4322 }
4323search:
4324 ffe_ctl->have_caching_bg = false;
4325 if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
4326 ffe_ctl->index == 0)
4327 full_search = true;
4328 down_read(&space_info->groups_sem);
4329 list_for_each_entry(block_group,
4330 &space_info->block_groups[ffe_ctl->index], list) {
4331 struct btrfs_block_group *bg_ret;
4332
4333
4334 if (unlikely(block_group->ro)) {
4335 if (ffe_ctl->for_treelog)
4336 btrfs_clear_treelog_bg(block_group);
4337 if (ffe_ctl->for_data_reloc)
4338 btrfs_clear_data_reloc_bg(block_group);
4339 continue;
4340 }
4341
4342 btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
4343 ffe_ctl->search_start = block_group->start;
4344
4345
4346
4347
4348
4349
4350 if (!block_group_bits(block_group, ffe_ctl->flags)) {
4351 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4352 BTRFS_BLOCK_GROUP_RAID1_MASK |
4353 BTRFS_BLOCK_GROUP_RAID56_MASK |
4354 BTRFS_BLOCK_GROUP_RAID10;
4355
4356
4357
4358
4359
4360
4361 if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
4362 goto loop;
4363
4364
4365
4366
4367
4368
4369 btrfs_release_block_group(block_group, ffe_ctl->delalloc);
4370 continue;
4371 }
4372
4373have_block_group:
4374 ffe_ctl->cached = btrfs_block_group_done(block_group);
4375 if (unlikely(!ffe_ctl->cached)) {
4376 ffe_ctl->have_caching_bg = true;
4377 ret = btrfs_cache_block_group(block_group, 0);
4378
4379
4380
4381
4382
4383
4384
4385
4386 if (ret < 0) {
4387 if (!cache_block_group_error)
4388 cache_block_group_error = ret;
4389 ret = 0;
4390 goto loop;
4391 }
4392 ret = 0;
4393 }
4394
4395 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
4396 goto loop;
4397
4398 bg_ret = NULL;
4399 ret = do_allocation(block_group, ffe_ctl, &bg_ret);
4400 if (ret == 0) {
4401 if (bg_ret && bg_ret != block_group) {
4402 btrfs_release_block_group(block_group,
4403 ffe_ctl->delalloc);
4404 block_group = bg_ret;
4405 }
4406 } else if (ret == -EAGAIN) {
4407 goto have_block_group;
4408 } else if (ret > 0) {
4409 goto loop;
4410 }
4411
4412
4413 ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
4414 fs_info->stripesize);
4415
4416
4417 if (ffe_ctl->search_start + ffe_ctl->num_bytes >
4418 block_group->start + block_group->length) {
4419 btrfs_add_free_space_unused(block_group,
4420 ffe_ctl->found_offset,
4421 ffe_ctl->num_bytes);
4422 goto loop;
4423 }
4424
4425 if (ffe_ctl->found_offset < ffe_ctl->search_start)
4426 btrfs_add_free_space_unused(block_group,
4427 ffe_ctl->found_offset,
4428 ffe_ctl->search_start - ffe_ctl->found_offset);
4429
4430 ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
4431 ffe_ctl->num_bytes,
4432 ffe_ctl->delalloc);
4433 if (ret == -EAGAIN) {
4434 btrfs_add_free_space_unused(block_group,
4435 ffe_ctl->found_offset,
4436 ffe_ctl->num_bytes);
4437 goto loop;
4438 }
4439 btrfs_inc_block_group_reservations(block_group);
4440
4441
4442 ins->objectid = ffe_ctl->search_start;
4443 ins->offset = ffe_ctl->num_bytes;
4444
4445 trace_btrfs_reserve_extent(block_group, ffe_ctl->search_start,
4446 ffe_ctl->num_bytes);
4447 btrfs_release_block_group(block_group, ffe_ctl->delalloc);
4448 break;
4449loop:
4450 release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
4451 cond_resched();
4452 }
4453 up_read(&space_info->groups_sem);
4454
4455 ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
4456 if (ret > 0)
4457 goto search;
4458
4459 if (ret == -ENOSPC && !cache_block_group_error) {
4460
4461
4462
4463
4464 if (!ffe_ctl->max_extent_size)
4465 ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
4466 spin_lock(&space_info->lock);
4467 space_info->max_extent_size = ffe_ctl->max_extent_size;
4468 spin_unlock(&space_info->lock);
4469 ins->offset = ffe_ctl->max_extent_size;
4470 } else if (ret == -ENOSPC) {
4471 ret = cache_block_group_error;
4472 }
4473 return ret;
4474}
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4522 u64 num_bytes, u64 min_alloc_size,
4523 u64 empty_size, u64 hint_byte,
4524 struct btrfs_key *ins, int is_data, int delalloc)
4525{
4526 struct btrfs_fs_info *fs_info = root->fs_info;
4527 struct find_free_extent_ctl ffe_ctl = {};
4528 bool final_tried = num_bytes == min_alloc_size;
4529 u64 flags;
4530 int ret;
4531 bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4532 bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
4533
4534 flags = get_alloc_profile_by_root(root, is_data);
4535again:
4536 WARN_ON(num_bytes < fs_info->sectorsize);
4537
4538 ffe_ctl.ram_bytes = ram_bytes;
4539 ffe_ctl.num_bytes = num_bytes;
4540 ffe_ctl.min_alloc_size = min_alloc_size;
4541 ffe_ctl.empty_size = empty_size;
4542 ffe_ctl.flags = flags;
4543 ffe_ctl.delalloc = delalloc;
4544 ffe_ctl.hint_byte = hint_byte;
4545 ffe_ctl.for_treelog = for_treelog;
4546 ffe_ctl.for_data_reloc = for_data_reloc;
4547
4548 ret = find_free_extent(root, ins, &ffe_ctl);
4549 if (!ret && !is_data) {
4550 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4551 } else if (ret == -ENOSPC) {
4552 if (!final_tried && ins->offset) {
4553 num_bytes = min(num_bytes >> 1, ins->offset);
4554 num_bytes = round_down(num_bytes,
4555 fs_info->sectorsize);
4556 num_bytes = max(num_bytes, min_alloc_size);
4557 ram_bytes = num_bytes;
4558 if (num_bytes == min_alloc_size)
4559 final_tried = true;
4560 goto again;
4561 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4562 struct btrfs_space_info *sinfo;
4563
4564 sinfo = btrfs_find_space_info(fs_info, flags);
4565 btrfs_err(fs_info,
4566 "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
4567 flags, num_bytes, for_treelog, for_data_reloc);
4568 if (sinfo)
4569 btrfs_dump_space_info(fs_info, sinfo,
4570 num_bytes, 1);
4571 }
4572 }
4573
4574 return ret;
4575}
4576
4577int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4578 u64 start, u64 len, int delalloc)
4579{
4580 struct btrfs_block_group *cache;
4581
4582 cache = btrfs_lookup_block_group(fs_info, start);
4583 if (!cache) {
4584 btrfs_err(fs_info, "Unable to find block group for %llu",
4585 start);
4586 return -ENOSPC;
4587 }
4588
4589 btrfs_add_free_space(cache, start, len);
4590 btrfs_free_reserved_bytes(cache, len, delalloc);
4591 trace_btrfs_reserved_extent_free(fs_info, start, len);
4592
4593 btrfs_put_block_group(cache);
4594 return 0;
4595}
4596
4597int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
4598 u64 len)
4599{
4600 struct btrfs_block_group *cache;
4601 int ret = 0;
4602
4603 cache = btrfs_lookup_block_group(trans->fs_info, start);
4604 if (!cache) {
4605 btrfs_err(trans->fs_info, "unable to find block group for %llu",
4606 start);
4607 return -ENOSPC;
4608 }
4609
4610 ret = pin_down_extent(trans, cache, start, len, 1);
4611 btrfs_put_block_group(cache);
4612 return ret;
4613}
4614
4615static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
4616 u64 num_bytes)
4617{
4618 struct btrfs_fs_info *fs_info = trans->fs_info;
4619 int ret;
4620
4621 ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
4622 if (ret)
4623 return ret;
4624
4625 ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
4626 if (ret) {
4627 ASSERT(!ret);
4628 btrfs_err(fs_info, "update block group failed for %llu %llu",
4629 bytenr, num_bytes);
4630 return ret;
4631 }
4632
4633 trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
4634 return 0;
4635}
4636
4637static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4638 u64 parent, u64 root_objectid,
4639 u64 flags, u64 owner, u64 offset,
4640 struct btrfs_key *ins, int ref_mod)
4641{
4642 struct btrfs_fs_info *fs_info = trans->fs_info;
4643 struct btrfs_root *extent_root;
4644 int ret;
4645 struct btrfs_extent_item *extent_item;
4646 struct btrfs_extent_inline_ref *iref;
4647 struct btrfs_path *path;
4648 struct extent_buffer *leaf;
4649 int type;
4650 u32 size;
4651
4652 if (parent > 0)
4653 type = BTRFS_SHARED_DATA_REF_KEY;
4654 else
4655 type = BTRFS_EXTENT_DATA_REF_KEY;
4656
4657 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4658
4659 path = btrfs_alloc_path();
4660 if (!path)
4661 return -ENOMEM;
4662
4663 extent_root = btrfs_extent_root(fs_info, ins->objectid);
4664 ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size);
4665 if (ret) {
4666 btrfs_free_path(path);
4667 return ret;
4668 }
4669
4670 leaf = path->nodes[0];
4671 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4672 struct btrfs_extent_item);
4673 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4674 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4675 btrfs_set_extent_flags(leaf, extent_item,
4676 flags | BTRFS_EXTENT_FLAG_DATA);
4677
4678 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4679 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4680 if (parent > 0) {
4681 struct btrfs_shared_data_ref *ref;
4682 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4683 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4684 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4685 } else {
4686 struct btrfs_extent_data_ref *ref;
4687 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4688 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4689 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4690 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4691 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4692 }
4693
4694 btrfs_mark_buffer_dirty(path->nodes[0]);
4695 btrfs_free_path(path);
4696
4697 return alloc_reserved_extent(trans, ins->objectid, ins->offset);
4698}
4699
4700static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4701 struct btrfs_delayed_ref_node *node,
4702 struct btrfs_delayed_extent_op *extent_op)
4703{
4704 struct btrfs_fs_info *fs_info = trans->fs_info;
4705 struct btrfs_root *extent_root;
4706 int ret;
4707 struct btrfs_extent_item *extent_item;
4708 struct btrfs_key extent_key;
4709 struct btrfs_tree_block_info *block_info;
4710 struct btrfs_extent_inline_ref *iref;
4711 struct btrfs_path *path;
4712 struct extent_buffer *leaf;
4713 struct btrfs_delayed_tree_ref *ref;
4714 u32 size = sizeof(*extent_item) + sizeof(*iref);
4715 u64 flags = extent_op->flags_to_set;
4716 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4717
4718 ref = btrfs_delayed_node_to_tree_ref(node);
4719
4720 extent_key.objectid = node->bytenr;
4721 if (skinny_metadata) {
4722 extent_key.offset = ref->level;
4723 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4724 } else {
4725 extent_key.offset = node->num_bytes;
4726 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4727 size += sizeof(*block_info);
4728 }
4729
4730 path = btrfs_alloc_path();
4731 if (!path)
4732 return -ENOMEM;
4733
4734 extent_root = btrfs_extent_root(fs_info, extent_key.objectid);
4735 ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key,
4736 size);
4737 if (ret) {
4738 btrfs_free_path(path);
4739 return ret;
4740 }
4741
4742 leaf = path->nodes[0];
4743 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4744 struct btrfs_extent_item);
4745 btrfs_set_extent_refs(leaf, extent_item, 1);
4746 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4747 btrfs_set_extent_flags(leaf, extent_item,
4748 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4749
4750 if (skinny_metadata) {
4751 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4752 } else {
4753 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4754 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4755 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4756 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4757 }
4758
4759 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4760 btrfs_set_extent_inline_ref_type(leaf, iref,
4761 BTRFS_SHARED_BLOCK_REF_KEY);
4762 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4763 } else {
4764 btrfs_set_extent_inline_ref_type(leaf, iref,
4765 BTRFS_TREE_BLOCK_REF_KEY);
4766 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4767 }
4768
4769 btrfs_mark_buffer_dirty(leaf);
4770 btrfs_free_path(path);
4771
4772 return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
4773}
4774
4775int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4776 struct btrfs_root *root, u64 owner,
4777 u64 offset, u64 ram_bytes,
4778 struct btrfs_key *ins)
4779{
4780 struct btrfs_ref generic_ref = { 0 };
4781
4782 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4783
4784 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4785 ins->objectid, ins->offset, 0);
4786 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
4787 offset, 0, false);
4788 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4789
4790 return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
4791}
4792
4793
4794
4795
4796
4797
4798int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4799 u64 root_objectid, u64 owner, u64 offset,
4800 struct btrfs_key *ins)
4801{
4802 struct btrfs_fs_info *fs_info = trans->fs_info;
4803 int ret;
4804 struct btrfs_block_group *block_group;
4805 struct btrfs_space_info *space_info;
4806
4807
4808
4809
4810
4811 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4812 ret = __exclude_logged_extent(fs_info, ins->objectid,
4813 ins->offset);
4814 if (ret)
4815 return ret;
4816 }
4817
4818 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4819 if (!block_group)
4820 return -EINVAL;
4821
4822 space_info = block_group->space_info;
4823 spin_lock(&space_info->lock);
4824 spin_lock(&block_group->lock);
4825 space_info->bytes_reserved += ins->offset;
4826 block_group->reserved += ins->offset;
4827 spin_unlock(&block_group->lock);
4828 spin_unlock(&space_info->lock);
4829
4830 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4831 offset, ins, 1);
4832 if (ret)
4833 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
4834 btrfs_put_block_group(block_group);
4835 return ret;
4836}
4837
4838static struct extent_buffer *
4839btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4840 u64 bytenr, int level, u64 owner,
4841 enum btrfs_lock_nesting nest)
4842{
4843 struct btrfs_fs_info *fs_info = root->fs_info;
4844 struct extent_buffer *buf;
4845
4846 buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
4847 if (IS_ERR(buf))
4848 return buf;
4849
4850
4851
4852
4853
4854
4855 if (buf->lock_owner == current->pid) {
4856 btrfs_err_rl(fs_info,
4857"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4858 buf->start, btrfs_header_owner(buf), current->pid);
4859 free_extent_buffer(buf);
4860 return ERR_PTR(-EUCLEAN);
4861 }
4862
4863
4864
4865
4866
4867
4868 btrfs_set_buffer_lockdep_class(owner, buf, level);
4869 __btrfs_tree_lock(buf, nest);
4870 btrfs_clean_tree_block(buf);
4871 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4872 clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
4873
4874 set_extent_buffer_uptodate(buf);
4875
4876 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4877 btrfs_set_header_level(buf, level);
4878 btrfs_set_header_bytenr(buf, buf->start);
4879 btrfs_set_header_generation(buf, trans->transid);
4880 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4881 btrfs_set_header_owner(buf, owner);
4882 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4883 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4884 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4885 buf->log_index = root->log_transid % 2;
4886
4887
4888
4889
4890 if (buf->log_index == 0)
4891 set_extent_dirty(&root->dirty_log_pages, buf->start,
4892 buf->start + buf->len - 1, GFP_NOFS);
4893 else
4894 set_extent_new(&root->dirty_log_pages, buf->start,
4895 buf->start + buf->len - 1);
4896 } else {
4897 buf->log_index = -1;
4898 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4899 buf->start + buf->len - 1, GFP_NOFS);
4900 }
4901
4902 return buf;
4903}
4904
4905
4906
4907
4908
4909struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4910 struct btrfs_root *root,
4911 u64 parent, u64 root_objectid,
4912 const struct btrfs_disk_key *key,
4913 int level, u64 hint,
4914 u64 empty_size,
4915 enum btrfs_lock_nesting nest)
4916{
4917 struct btrfs_fs_info *fs_info = root->fs_info;
4918 struct btrfs_key ins;
4919 struct btrfs_block_rsv *block_rsv;
4920 struct extent_buffer *buf;
4921 struct btrfs_delayed_extent_op *extent_op;
4922 struct btrfs_ref generic_ref = { 0 };
4923 u64 flags = 0;
4924 int ret;
4925 u32 blocksize = fs_info->nodesize;
4926 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4927
4928#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4929 if (btrfs_is_testing(fs_info)) {
4930 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4931 level, root_objectid, nest);
4932 if (!IS_ERR(buf))
4933 root->alloc_bytenr += blocksize;
4934 return buf;
4935 }
4936#endif
4937
4938 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4939 if (IS_ERR(block_rsv))
4940 return ERR_CAST(block_rsv);
4941
4942 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4943 empty_size, hint, &ins, 0, 0);
4944 if (ret)
4945 goto out_unuse;
4946
4947 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4948 root_objectid, nest);
4949 if (IS_ERR(buf)) {
4950 ret = PTR_ERR(buf);
4951 goto out_free_reserved;
4952 }
4953
4954 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4955 if (parent == 0)
4956 parent = ins.objectid;
4957 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4958 } else
4959 BUG_ON(parent > 0);
4960
4961 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4962 extent_op = btrfs_alloc_delayed_extent_op();
4963 if (!extent_op) {
4964 ret = -ENOMEM;
4965 goto out_free_buf;
4966 }
4967 if (key)
4968 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4969 else
4970 memset(&extent_op->key, 0, sizeof(extent_op->key));
4971 extent_op->flags_to_set = flags;
4972 extent_op->update_key = skinny_metadata ? false : true;
4973 extent_op->update_flags = true;
4974 extent_op->level = level;
4975
4976 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4977 ins.objectid, ins.offset, parent);
4978 btrfs_init_tree_ref(&generic_ref, level, root_objectid,
4979 root->root_key.objectid, false);
4980 btrfs_ref_tree_mod(fs_info, &generic_ref);
4981 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
4982 if (ret)
4983 goto out_free_delayed;
4984 }
4985 return buf;
4986
4987out_free_delayed:
4988 btrfs_free_delayed_extent_op(extent_op);
4989out_free_buf:
4990 btrfs_tree_unlock(buf);
4991 free_extent_buffer(buf);
4992out_free_reserved:
4993 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
4994out_unuse:
4995 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
4996 return ERR_PTR(ret);
4997}
4998
4999struct walk_control {
5000 u64 refs[BTRFS_MAX_LEVEL];
5001 u64 flags[BTRFS_MAX_LEVEL];
5002 struct btrfs_key update_progress;
5003 struct btrfs_key drop_progress;
5004 int drop_level;
5005 int stage;
5006 int level;
5007 int shared_level;
5008 int update_ref;
5009 int keep_locks;
5010 int reada_slot;
5011 int reada_count;
5012 int restarted;
5013};
5014
5015#define DROP_REFERENCE 1
5016#define UPDATE_BACKREF 2
5017
5018static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
5019 struct btrfs_root *root,
5020 struct walk_control *wc,
5021 struct btrfs_path *path)
5022{
5023 struct btrfs_fs_info *fs_info = root->fs_info;
5024 u64 bytenr;
5025 u64 generation;
5026 u64 refs;
5027 u64 flags;
5028 u32 nritems;
5029 struct btrfs_key key;
5030 struct extent_buffer *eb;
5031 int ret;
5032 int slot;
5033 int nread = 0;
5034
5035 if (path->slots[wc->level] < wc->reada_slot) {
5036 wc->reada_count = wc->reada_count * 2 / 3;
5037 wc->reada_count = max(wc->reada_count, 2);
5038 } else {
5039 wc->reada_count = wc->reada_count * 3 / 2;
5040 wc->reada_count = min_t(int, wc->reada_count,
5041 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
5042 }
5043
5044 eb = path->nodes[wc->level];
5045 nritems = btrfs_header_nritems(eb);
5046
5047 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
5048 if (nread >= wc->reada_count)
5049 break;
5050
5051 cond_resched();
5052 bytenr = btrfs_node_blockptr(eb, slot);
5053 generation = btrfs_node_ptr_generation(eb, slot);
5054
5055 if (slot == path->slots[wc->level])
5056 goto reada;
5057
5058 if (wc->stage == UPDATE_BACKREF &&
5059 generation <= root->root_key.offset)
5060 continue;
5061
5062
5063 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
5064 wc->level - 1, 1, &refs,
5065 &flags);
5066
5067 if (ret < 0)
5068 continue;
5069 BUG_ON(refs == 0);
5070
5071 if (wc->stage == DROP_REFERENCE) {
5072 if (refs == 1)
5073 goto reada;
5074
5075 if (wc->level == 1 &&
5076 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5077 continue;
5078 if (!wc->update_ref ||
5079 generation <= root->root_key.offset)
5080 continue;
5081 btrfs_node_key_to_cpu(eb, &key, slot);
5082 ret = btrfs_comp_cpu_keys(&key,
5083 &wc->update_progress);
5084 if (ret < 0)
5085 continue;
5086 } else {
5087 if (wc->level == 1 &&
5088 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5089 continue;
5090 }
5091reada:
5092 btrfs_readahead_node_child(eb, slot);
5093 nread++;
5094 }
5095 wc->reada_slot = slot;
5096}
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5107 struct btrfs_root *root,
5108 struct btrfs_path *path,
5109 struct walk_control *wc, int lookup_info)
5110{
5111 struct btrfs_fs_info *fs_info = root->fs_info;
5112 int level = wc->level;
5113 struct extent_buffer *eb = path->nodes[level];
5114 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5115 int ret;
5116
5117 if (wc->stage == UPDATE_BACKREF &&
5118 btrfs_header_owner(eb) != root->root_key.objectid)
5119 return 1;
5120
5121
5122
5123
5124
5125 if (lookup_info &&
5126 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
5127 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
5128 BUG_ON(!path->locks[level]);
5129 ret = btrfs_lookup_extent_info(trans, fs_info,
5130 eb->start, level, 1,
5131 &wc->refs[level],
5132 &wc->flags[level]);
5133 BUG_ON(ret == -ENOMEM);
5134 if (ret)
5135 return ret;
5136 BUG_ON(wc->refs[level] == 0);
5137 }
5138
5139 if (wc->stage == DROP_REFERENCE) {
5140 if (wc->refs[level] > 1)
5141 return 1;
5142
5143 if (path->locks[level] && !wc->keep_locks) {
5144 btrfs_tree_unlock_rw(eb, path->locks[level]);
5145 path->locks[level] = 0;
5146 }
5147 return 0;
5148 }
5149
5150
5151 if (!(wc->flags[level] & flag)) {
5152 BUG_ON(!path->locks[level]);
5153 ret = btrfs_inc_ref(trans, root, eb, 1);
5154 BUG_ON(ret);
5155 ret = btrfs_dec_ref(trans, root, eb, 0);
5156 BUG_ON(ret);
5157 ret = btrfs_set_disk_extent_flags(trans, eb, flag,
5158 btrfs_header_level(eb));
5159 BUG_ON(ret);
5160 wc->flags[level] |= flag;
5161 }
5162
5163
5164
5165
5166
5167 if (path->locks[level] && level > 0) {
5168 btrfs_tree_unlock_rw(eb, path->locks[level]);
5169 path->locks[level] = 0;
5170 }
5171 return 0;
5172}
5173
5174
5175
5176
5177
5178static int check_ref_exists(struct btrfs_trans_handle *trans,
5179 struct btrfs_root *root, u64 bytenr, u64 parent,
5180 int level)
5181{
5182 struct btrfs_path *path;
5183 struct btrfs_extent_inline_ref *iref;
5184 int ret;
5185
5186 path = btrfs_alloc_path();
5187 if (!path)
5188 return -ENOMEM;
5189
5190 ret = lookup_extent_backref(trans, path, &iref, bytenr,
5191 root->fs_info->nodesize, parent,
5192 root->root_key.objectid, level, 0);
5193 btrfs_free_path(path);
5194 if (ret == -ENOENT)
5195 return 0;
5196 if (ret < 0)
5197 return ret;
5198 return 1;
5199}
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5215 struct btrfs_root *root,
5216 struct btrfs_path *path,
5217 struct walk_control *wc, int *lookup_info)
5218{
5219 struct btrfs_fs_info *fs_info = root->fs_info;
5220 u64 bytenr;
5221 u64 generation;
5222 u64 parent;
5223 struct btrfs_key key;
5224 struct btrfs_key first_key;
5225 struct btrfs_ref ref = { 0 };
5226 struct extent_buffer *next;
5227 int level = wc->level;
5228 int reada = 0;
5229 int ret = 0;
5230 bool need_account = false;
5231
5232 generation = btrfs_node_ptr_generation(path->nodes[level],
5233 path->slots[level]);
5234
5235
5236
5237
5238
5239 if (wc->stage == UPDATE_BACKREF &&
5240 generation <= root->root_key.offset) {
5241 *lookup_info = 1;
5242 return 1;
5243 }
5244
5245 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
5246 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
5247 path->slots[level]);
5248
5249 next = find_extent_buffer(fs_info, bytenr);
5250 if (!next) {
5251 next = btrfs_find_create_tree_block(fs_info, bytenr,
5252 root->root_key.objectid, level - 1);
5253 if (IS_ERR(next))
5254 return PTR_ERR(next);
5255 reada = 1;
5256 }
5257 btrfs_tree_lock(next);
5258
5259 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
5260 &wc->refs[level - 1],
5261 &wc->flags[level - 1]);
5262 if (ret < 0)
5263 goto out_unlock;
5264
5265 if (unlikely(wc->refs[level - 1] == 0)) {
5266 btrfs_err(fs_info, "Missing references.");
5267 ret = -EIO;
5268 goto out_unlock;
5269 }
5270 *lookup_info = 0;
5271
5272 if (wc->stage == DROP_REFERENCE) {
5273 if (wc->refs[level - 1] > 1) {
5274 need_account = true;
5275 if (level == 1 &&
5276 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5277 goto skip;
5278
5279 if (!wc->update_ref ||
5280 generation <= root->root_key.offset)
5281 goto skip;
5282
5283 btrfs_node_key_to_cpu(path->nodes[level], &key,
5284 path->slots[level]);
5285 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
5286 if (ret < 0)
5287 goto skip;
5288
5289 wc->stage = UPDATE_BACKREF;
5290 wc->shared_level = level - 1;
5291 }
5292 } else {
5293 if (level == 1 &&
5294 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5295 goto skip;
5296 }
5297
5298 if (!btrfs_buffer_uptodate(next, generation, 0)) {
5299 btrfs_tree_unlock(next);
5300 free_extent_buffer(next);
5301 next = NULL;
5302 *lookup_info = 1;
5303 }
5304
5305 if (!next) {
5306 if (reada && level == 1)
5307 reada_walk_down(trans, root, wc, path);
5308 next = read_tree_block(fs_info, bytenr, root->root_key.objectid,
5309 generation, level - 1, &first_key);
5310 if (IS_ERR(next)) {
5311 return PTR_ERR(next);
5312 } else if (!extent_buffer_uptodate(next)) {
5313 free_extent_buffer(next);
5314 return -EIO;
5315 }
5316 btrfs_tree_lock(next);
5317 }
5318
5319 level--;
5320 ASSERT(level == btrfs_header_level(next));
5321 if (level != btrfs_header_level(next)) {
5322 btrfs_err(root->fs_info, "mismatched level");
5323 ret = -EIO;
5324 goto out_unlock;
5325 }
5326 path->nodes[level] = next;
5327 path->slots[level] = 0;
5328 path->locks[level] = BTRFS_WRITE_LOCK;
5329 wc->level = level;
5330 if (wc->level == 1)
5331 wc->reada_slot = 0;
5332 return 0;
5333skip:
5334 wc->refs[level - 1] = 0;
5335 wc->flags[level - 1] = 0;
5336 if (wc->stage == DROP_REFERENCE) {
5337 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5338 parent = path->nodes[level]->start;
5339 } else {
5340 ASSERT(root->root_key.objectid ==
5341 btrfs_header_owner(path->nodes[level]));
5342 if (root->root_key.objectid !=
5343 btrfs_header_owner(path->nodes[level])) {
5344 btrfs_err(root->fs_info,
5345 "mismatched block owner");
5346 ret = -EIO;
5347 goto out_unlock;
5348 }
5349 parent = 0;
5350 }
5351
5352
5353
5354
5355
5356
5357
5358 if (wc->restarted) {
5359 ret = check_ref_exists(trans, root, bytenr, parent,
5360 level - 1);
5361 if (ret < 0)
5362 goto out_unlock;
5363 if (ret == 0)
5364 goto no_delete;
5365 ret = 0;
5366 wc->restarted = 0;
5367 }
5368
5369
5370
5371
5372
5373
5374 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
5375 need_account) {
5376 ret = btrfs_qgroup_trace_subtree(trans, next,
5377 generation, level - 1);
5378 if (ret) {
5379 btrfs_err_rl(fs_info,
5380 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
5381 ret);
5382 }
5383 }
5384
5385
5386
5387
5388
5389
5390
5391 wc->drop_level = level;
5392 find_next_key(path, level, &wc->drop_progress);
5393
5394 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
5395 fs_info->nodesize, parent);
5396 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
5397 0, false);
5398 ret = btrfs_free_extent(trans, &ref);
5399 if (ret)
5400 goto out_unlock;
5401 }
5402no_delete:
5403 *lookup_info = 1;
5404 ret = 1;
5405
5406out_unlock:
5407 btrfs_tree_unlock(next);
5408 free_extent_buffer(next);
5409
5410 return ret;
5411}
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5426 struct btrfs_root *root,
5427 struct btrfs_path *path,
5428 struct walk_control *wc)
5429{
5430 struct btrfs_fs_info *fs_info = root->fs_info;
5431 int ret;
5432 int level = wc->level;
5433 struct extent_buffer *eb = path->nodes[level];
5434 u64 parent = 0;
5435
5436 if (wc->stage == UPDATE_BACKREF) {
5437 BUG_ON(wc->shared_level < level);
5438 if (level < wc->shared_level)
5439 goto out;
5440
5441 ret = find_next_key(path, level + 1, &wc->update_progress);
5442 if (ret > 0)
5443 wc->update_ref = 0;
5444
5445 wc->stage = DROP_REFERENCE;
5446 wc->shared_level = -1;
5447 path->slots[level] = 0;
5448
5449
5450
5451
5452
5453
5454 if (!path->locks[level]) {
5455 BUG_ON(level == 0);
5456 btrfs_tree_lock(eb);
5457 path->locks[level] = BTRFS_WRITE_LOCK;
5458
5459 ret = btrfs_lookup_extent_info(trans, fs_info,
5460 eb->start, level, 1,
5461 &wc->refs[level],
5462 &wc->flags[level]);
5463 if (ret < 0) {
5464 btrfs_tree_unlock_rw(eb, path->locks[level]);
5465 path->locks[level] = 0;
5466 return ret;
5467 }
5468 BUG_ON(wc->refs[level] == 0);
5469 if (wc->refs[level] == 1) {
5470 btrfs_tree_unlock_rw(eb, path->locks[level]);
5471 path->locks[level] = 0;
5472 return 1;
5473 }
5474 }
5475 }
5476
5477
5478 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5479
5480 if (wc->refs[level] == 1) {
5481 if (level == 0) {
5482 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5483 ret = btrfs_dec_ref(trans, root, eb, 1);
5484 else
5485 ret = btrfs_dec_ref(trans, root, eb, 0);
5486 BUG_ON(ret);
5487 if (is_fstree(root->root_key.objectid)) {
5488 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5489 if (ret) {
5490 btrfs_err_rl(fs_info,
5491 "error %d accounting leaf items, quota is out of sync, rescan required",
5492 ret);
5493 }
5494 }
5495 }
5496
5497 if (!path->locks[level] &&
5498 btrfs_header_generation(eb) == trans->transid) {
5499 btrfs_tree_lock(eb);
5500 path->locks[level] = BTRFS_WRITE_LOCK;
5501 }
5502 btrfs_clean_tree_block(eb);
5503 }
5504
5505 if (eb == root->node) {
5506 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5507 parent = eb->start;
5508 else if (root->root_key.objectid != btrfs_header_owner(eb))
5509 goto owner_mismatch;
5510 } else {
5511 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5512 parent = path->nodes[level + 1]->start;
5513 else if (root->root_key.objectid !=
5514 btrfs_header_owner(path->nodes[level + 1]))
5515 goto owner_mismatch;
5516 }
5517
5518 btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
5519 wc->refs[level] == 1);
5520out:
5521 wc->refs[level] = 0;
5522 wc->flags[level] = 0;
5523 return 0;
5524
5525owner_mismatch:
5526 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5527 btrfs_header_owner(eb), root->root_key.objectid);
5528 return -EUCLEAN;
5529}
5530
5531static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5532 struct btrfs_root *root,
5533 struct btrfs_path *path,
5534 struct walk_control *wc)
5535{
5536 int level = wc->level;
5537 int lookup_info = 1;
5538 int ret;
5539
5540 while (level >= 0) {
5541 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5542 if (ret > 0)
5543 break;
5544
5545 if (level == 0)
5546 break;
5547
5548 if (path->slots[level] >=
5549 btrfs_header_nritems(path->nodes[level]))
5550 break;
5551
5552 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5553 if (ret > 0) {
5554 path->slots[level]++;
5555 continue;
5556 } else if (ret < 0)
5557 return ret;
5558 level = wc->level;
5559 }
5560 return 0;
5561}
5562
5563static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5564 struct btrfs_root *root,
5565 struct btrfs_path *path,
5566 struct walk_control *wc, int max_level)
5567{
5568 int level = wc->level;
5569 int ret;
5570
5571 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5572 while (level < max_level && path->nodes[level]) {
5573 wc->level = level;
5574 if (path->slots[level] + 1 <
5575 btrfs_header_nritems(path->nodes[level])) {
5576 path->slots[level]++;
5577 return 0;
5578 } else {
5579 ret = walk_up_proc(trans, root, path, wc);
5580 if (ret > 0)
5581 return 0;
5582 if (ret < 0)
5583 return ret;
5584
5585 if (path->locks[level]) {
5586 btrfs_tree_unlock_rw(path->nodes[level],
5587 path->locks[level]);
5588 path->locks[level] = 0;
5589 }
5590 free_extent_buffer(path->nodes[level]);
5591 path->nodes[level] = NULL;
5592 level++;
5593 }
5594 }
5595 return 1;
5596}
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
5612{
5613 struct btrfs_fs_info *fs_info = root->fs_info;
5614 struct btrfs_path *path;
5615 struct btrfs_trans_handle *trans;
5616 struct btrfs_root *tree_root = fs_info->tree_root;
5617 struct btrfs_root_item *root_item = &root->root_item;
5618 struct walk_control *wc;
5619 struct btrfs_key key;
5620 int err = 0;
5621 int ret;
5622 int level;
5623 bool root_dropped = false;
5624 bool unfinished_drop = false;
5625
5626 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5627
5628 path = btrfs_alloc_path();
5629 if (!path) {
5630 err = -ENOMEM;
5631 goto out;
5632 }
5633
5634 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5635 if (!wc) {
5636 btrfs_free_path(path);
5637 err = -ENOMEM;
5638 goto out;
5639 }
5640
5641
5642
5643
5644
5645 if (for_reloc)
5646 trans = btrfs_join_transaction(tree_root);
5647 else
5648 trans = btrfs_start_transaction(tree_root, 0);
5649 if (IS_ERR(trans)) {
5650 err = PTR_ERR(trans);
5651 goto out_free;
5652 }
5653
5654 err = btrfs_run_delayed_items(trans);
5655 if (err)
5656 goto out_end_trans;
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666 set_bit(BTRFS_ROOT_DELETING, &root->state);
5667 unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
5668
5669 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5670 level = btrfs_header_level(root->node);
5671 path->nodes[level] = btrfs_lock_root_node(root);
5672 path->slots[level] = 0;
5673 path->locks[level] = BTRFS_WRITE_LOCK;
5674 memset(&wc->update_progress, 0,
5675 sizeof(wc->update_progress));
5676 } else {
5677 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5678 memcpy(&wc->update_progress, &key,
5679 sizeof(wc->update_progress));
5680
5681 level = btrfs_root_drop_level(root_item);
5682 BUG_ON(level == 0);
5683 path->lowest_level = level;
5684 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5685 path->lowest_level = 0;
5686 if (ret < 0) {
5687 err = ret;
5688 goto out_end_trans;
5689 }
5690 WARN_ON(ret > 0);
5691
5692
5693
5694
5695
5696 btrfs_unlock_up_safe(path, 0);
5697
5698 level = btrfs_header_level(root->node);
5699 while (1) {
5700 btrfs_tree_lock(path->nodes[level]);
5701 path->locks[level] = BTRFS_WRITE_LOCK;
5702
5703 ret = btrfs_lookup_extent_info(trans, fs_info,
5704 path->nodes[level]->start,
5705 level, 1, &wc->refs[level],
5706 &wc->flags[level]);
5707 if (ret < 0) {
5708 err = ret;
5709 goto out_end_trans;
5710 }
5711 BUG_ON(wc->refs[level] == 0);
5712
5713 if (level == btrfs_root_drop_level(root_item))
5714 break;
5715
5716 btrfs_tree_unlock(path->nodes[level]);
5717 path->locks[level] = 0;
5718 WARN_ON(wc->refs[level] != 1);
5719 level--;
5720 }
5721 }
5722
5723 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5724 wc->level = level;
5725 wc->shared_level = -1;
5726 wc->stage = DROP_REFERENCE;
5727 wc->update_ref = update_ref;
5728 wc->keep_locks = 0;
5729 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5730
5731 while (1) {
5732
5733 ret = walk_down_tree(trans, root, path, wc);
5734 if (ret < 0) {
5735 err = ret;
5736 break;
5737 }
5738
5739 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5740 if (ret < 0) {
5741 err = ret;
5742 break;
5743 }
5744
5745 if (ret > 0) {
5746 BUG_ON(wc->stage != DROP_REFERENCE);
5747 break;
5748 }
5749
5750 if (wc->stage == DROP_REFERENCE) {
5751 wc->drop_level = wc->level;
5752 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5753 &wc->drop_progress,
5754 path->slots[wc->drop_level]);
5755 }
5756 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5757 &wc->drop_progress);
5758 btrfs_set_root_drop_level(root_item, wc->drop_level);
5759
5760 BUG_ON(wc->level == 0);
5761 if (btrfs_should_end_transaction(trans) ||
5762 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5763 ret = btrfs_update_root(trans, tree_root,
5764 &root->root_key,
5765 root_item);
5766 if (ret) {
5767 btrfs_abort_transaction(trans, ret);
5768 err = ret;
5769 goto out_end_trans;
5770 }
5771
5772 btrfs_end_transaction_throttle(trans);
5773 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5774 btrfs_debug(fs_info,
5775 "drop snapshot early exit");
5776 err = -EAGAIN;
5777 goto out_free;
5778 }
5779
5780
5781
5782
5783
5784
5785 if (for_reloc)
5786 trans = btrfs_join_transaction(tree_root);
5787 else
5788 trans = btrfs_start_transaction(tree_root, 0);
5789 if (IS_ERR(trans)) {
5790 err = PTR_ERR(trans);
5791 goto out_free;
5792 }
5793 }
5794 }
5795 btrfs_release_path(path);
5796 if (err)
5797 goto out_end_trans;
5798
5799 ret = btrfs_del_root(trans, &root->root_key);
5800 if (ret) {
5801 btrfs_abort_transaction(trans, ret);
5802 err = ret;
5803 goto out_end_trans;
5804 }
5805
5806 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5807 ret = btrfs_find_root(tree_root, &root->root_key, path,
5808 NULL, NULL);
5809 if (ret < 0) {
5810 btrfs_abort_transaction(trans, ret);
5811 err = ret;
5812 goto out_end_trans;
5813 } else if (ret > 0) {
5814
5815
5816
5817
5818
5819 btrfs_del_orphan_item(trans, tree_root,
5820 root->root_key.objectid);
5821 }
5822 }
5823
5824
5825
5826
5827
5828
5829 btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
5830 btrfs_qgroup_free_meta_all_pertrans(root);
5831
5832 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
5833 btrfs_add_dropped_root(trans, root);
5834 else
5835 btrfs_put_root(root);
5836 root_dropped = true;
5837out_end_trans:
5838 btrfs_end_transaction_throttle(trans);
5839out_free:
5840 kfree(wc);
5841 btrfs_free_path(path);
5842out:
5843
5844
5845
5846
5847 if (!err && unfinished_drop)
5848 btrfs_maybe_wake_unfinished_drop(fs_info);
5849
5850
5851
5852
5853
5854
5855
5856
5857 if (!for_reloc && !root_dropped)
5858 btrfs_add_dead_root(root);
5859 return err;
5860}
5861
5862
5863
5864
5865
5866
5867
5868int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5869 struct btrfs_root *root,
5870 struct extent_buffer *node,
5871 struct extent_buffer *parent)
5872{
5873 struct btrfs_fs_info *fs_info = root->fs_info;
5874 struct btrfs_path *path;
5875 struct walk_control *wc;
5876 int level;
5877 int parent_level;
5878 int ret = 0;
5879 int wret;
5880
5881 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5882
5883 path = btrfs_alloc_path();
5884 if (!path)
5885 return -ENOMEM;
5886
5887 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5888 if (!wc) {
5889 btrfs_free_path(path);
5890 return -ENOMEM;
5891 }
5892
5893 btrfs_assert_tree_write_locked(parent);
5894 parent_level = btrfs_header_level(parent);
5895 atomic_inc(&parent->refs);
5896 path->nodes[parent_level] = parent;
5897 path->slots[parent_level] = btrfs_header_nritems(parent);
5898
5899 btrfs_assert_tree_write_locked(node);
5900 level = btrfs_header_level(node);
5901 path->nodes[level] = node;
5902 path->slots[level] = 0;
5903 path->locks[level] = BTRFS_WRITE_LOCK;
5904
5905 wc->refs[parent_level] = 1;
5906 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5907 wc->level = level;
5908 wc->shared_level = -1;
5909 wc->stage = DROP_REFERENCE;
5910 wc->update_ref = 0;
5911 wc->keep_locks = 1;
5912 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5913
5914 while (1) {
5915 wret = walk_down_tree(trans, root, path, wc);
5916 if (wret < 0) {
5917 ret = wret;
5918 break;
5919 }
5920
5921 wret = walk_up_tree(trans, root, path, wc, parent_level);
5922 if (wret < 0)
5923 ret = wret;
5924 if (wret != 0)
5925 break;
5926 }
5927
5928 kfree(wc);
5929 btrfs_free_path(path);
5930 return ret;
5931}
5932
5933
5934
5935
5936
5937u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5938{
5939 struct btrfs_block_group *block_group;
5940 u64 free_bytes = 0;
5941 int factor;
5942
5943
5944 if (list_empty(&sinfo->ro_bgs))
5945 return 0;
5946
5947 spin_lock(&sinfo->lock);
5948 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5949 spin_lock(&block_group->lock);
5950
5951 if (!block_group->ro) {
5952 spin_unlock(&block_group->lock);
5953 continue;
5954 }
5955
5956 factor = btrfs_bg_type_to_factor(block_group->flags);
5957 free_bytes += (block_group->length -
5958 block_group->used) * factor;
5959
5960 spin_unlock(&block_group->lock);
5961 }
5962 spin_unlock(&sinfo->lock);
5963
5964 return free_bytes;
5965}
5966
5967int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5968 u64 start, u64 end)
5969{
5970 return unpin_extent_range(fs_info, start, end, false);
5971}
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5994{
5995 u64 start = SZ_1M, len = 0, end = 0;
5996 int ret;
5997
5998 *trimmed = 0;
5999
6000
6001 if (!bdev_max_discard_sectors(device->bdev))
6002 return 0;
6003
6004
6005 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
6006 return 0;
6007
6008
6009 if (device->total_bytes <= device->bytes_used)
6010 return 0;
6011
6012 ret = 0;
6013
6014 while (1) {
6015 struct btrfs_fs_info *fs_info = device->fs_info;
6016 u64 bytes;
6017
6018 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
6019 if (ret)
6020 break;
6021
6022 find_first_clear_extent_bit(&device->alloc_state, start,
6023 &start, &end,
6024 CHUNK_TRIMMED | CHUNK_ALLOCATED);
6025
6026
6027 if (start > device->total_bytes) {
6028 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
6029 btrfs_warn_in_rcu(fs_info,
6030"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
6031 start, end - start + 1,
6032 rcu_str_deref(device->name),
6033 device->total_bytes);
6034 mutex_unlock(&fs_info->chunk_mutex);
6035 ret = 0;
6036 break;
6037 }
6038
6039
6040 start = max_t(u64, start, SZ_1M);
6041
6042
6043
6044
6045
6046
6047 end = min(end, device->total_bytes - 1);
6048
6049 len = end - start + 1;
6050
6051
6052 if (!len) {
6053 mutex_unlock(&fs_info->chunk_mutex);
6054 ret = 0;
6055 break;
6056 }
6057
6058 ret = btrfs_issue_discard(device->bdev, start, len,
6059 &bytes);
6060 if (!ret)
6061 set_extent_bits(&device->alloc_state, start,
6062 start + bytes - 1,
6063 CHUNK_TRIMMED);
6064 mutex_unlock(&fs_info->chunk_mutex);
6065
6066 if (ret)
6067 break;
6068
6069 start += len;
6070 *trimmed += bytes;
6071
6072 if (fatal_signal_pending(current)) {
6073 ret = -ERESTARTSYS;
6074 break;
6075 }
6076
6077 cond_resched();
6078 }
6079
6080 return ret;
6081}
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
6093{
6094 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
6095 struct btrfs_block_group *cache = NULL;
6096 struct btrfs_device *device;
6097 u64 group_trimmed;
6098 u64 range_end = U64_MAX;
6099 u64 start;
6100 u64 end;
6101 u64 trimmed = 0;
6102 u64 bg_failed = 0;
6103 u64 dev_failed = 0;
6104 int bg_ret = 0;
6105 int dev_ret = 0;
6106 int ret = 0;
6107
6108 if (range->start == U64_MAX)
6109 return -EINVAL;
6110
6111
6112
6113
6114
6115 if (range->len != U64_MAX &&
6116 check_add_overflow(range->start, range->len, &range_end))
6117 return -EINVAL;
6118
6119 cache = btrfs_lookup_first_block_group(fs_info, range->start);
6120 for (; cache; cache = btrfs_next_block_group(cache)) {
6121 if (cache->start >= range_end) {
6122 btrfs_put_block_group(cache);
6123 break;
6124 }
6125
6126 start = max(range->start, cache->start);
6127 end = min(range_end, cache->start + cache->length);
6128
6129 if (end - start >= range->minlen) {
6130 if (!btrfs_block_group_done(cache)) {
6131 ret = btrfs_cache_block_group(cache, 0);
6132 if (ret) {
6133 bg_failed++;
6134 bg_ret = ret;
6135 continue;
6136 }
6137 ret = btrfs_wait_block_group_cache_done(cache);
6138 if (ret) {
6139 bg_failed++;
6140 bg_ret = ret;
6141 continue;
6142 }
6143 }
6144 ret = btrfs_trim_block_group(cache,
6145 &group_trimmed,
6146 start,
6147 end,
6148 range->minlen);
6149
6150 trimmed += group_trimmed;
6151 if (ret) {
6152 bg_failed++;
6153 bg_ret = ret;
6154 continue;
6155 }
6156 }
6157 }
6158
6159 if (bg_failed)
6160 btrfs_warn(fs_info,
6161 "failed to trim %llu block group(s), last error %d",
6162 bg_failed, bg_ret);
6163
6164 mutex_lock(&fs_devices->device_list_mutex);
6165 list_for_each_entry(device, &fs_devices->devices, dev_list) {
6166 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
6167 continue;
6168
6169 ret = btrfs_trim_free_extents(device, &group_trimmed);
6170 if (ret) {
6171 dev_failed++;
6172 dev_ret = ret;
6173 break;
6174 }
6175
6176 trimmed += group_trimmed;
6177 }
6178 mutex_unlock(&fs_devices->device_list_mutex);
6179
6180 if (dev_failed)
6181 btrfs_warn(fs_info,
6182 "failed to trim %llu device(s), last error %d",
6183 dev_failed, dev_ret);
6184 range->len = trimmed;
6185 if (bg_ret)
6186 return bg_ret;
6187 return dev_ret;
6188}
6189