1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "misc.h"
20#include "tree-log.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "locking.h"
26#include "free-space-cache.h"
27#include "free-space-tree.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31#include "space-info.h"
32#include "block-rsv.h"
33#include "delalloc-space.h"
34#include "block-group.h"
35#include "discard.h"
36#include "rcu-string.h"
37#include "zoned.h"
38#include "dev-replace.h"
39
40#undef SCRAMBLE_DELAYED_REFS
41
42
43static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
44 struct btrfs_delayed_ref_node *node, u64 parent,
45 u64 root_objectid, u64 owner_objectid,
46 u64 owner_offset, int refs_to_drop,
47 struct btrfs_delayed_extent_op *extra_op);
48static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
49 struct extent_buffer *leaf,
50 struct btrfs_extent_item *ei);
51static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
52 u64 parent, u64 root_objectid,
53 u64 flags, u64 owner, u64 offset,
54 struct btrfs_key *ins, int ref_mod);
55static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
56 struct btrfs_delayed_ref_node *node,
57 struct btrfs_delayed_extent_op *extent_op);
58static int find_next_key(struct btrfs_path *path, int level,
59 struct btrfs_key *key);
60
61static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
62{
63 return (cache->flags & bits) == bits;
64}
65
66int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
67 u64 start, u64 num_bytes)
68{
69 u64 end = start + num_bytes - 1;
70 set_extent_bits(&fs_info->excluded_extents, start, end,
71 EXTENT_UPTODATE);
72 return 0;
73}
74
75void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
76{
77 struct btrfs_fs_info *fs_info = cache->fs_info;
78 u64 start, end;
79
80 start = cache->start;
81 end = start + cache->length - 1;
82
83 clear_extent_bits(&fs_info->excluded_extents, start, end,
84 EXTENT_UPTODATE);
85}
86
87
88int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
89{
90 int ret;
91 struct btrfs_key key;
92 struct btrfs_path *path;
93
94 path = btrfs_alloc_path();
95 if (!path)
96 return -ENOMEM;
97
98 key.objectid = start;
99 key.offset = len;
100 key.type = BTRFS_EXTENT_ITEM_KEY;
101 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
102 btrfs_free_path(path);
103 return ret;
104}
105
106
107
108
109
110
111
112
113
114
115int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
116 struct btrfs_fs_info *fs_info, u64 bytenr,
117 u64 offset, int metadata, u64 *refs, u64 *flags)
118{
119 struct btrfs_delayed_ref_head *head;
120 struct btrfs_delayed_ref_root *delayed_refs;
121 struct btrfs_path *path;
122 struct btrfs_extent_item *ei;
123 struct extent_buffer *leaf;
124 struct btrfs_key key;
125 u32 item_size;
126 u64 num_refs;
127 u64 extent_flags;
128 int ret;
129
130
131
132
133
134 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
135 offset = fs_info->nodesize;
136 metadata = 0;
137 }
138
139 path = btrfs_alloc_path();
140 if (!path)
141 return -ENOMEM;
142
143 if (!trans) {
144 path->skip_locking = 1;
145 path->search_commit_root = 1;
146 }
147
148search_again:
149 key.objectid = bytenr;
150 key.offset = offset;
151 if (metadata)
152 key.type = BTRFS_METADATA_ITEM_KEY;
153 else
154 key.type = BTRFS_EXTENT_ITEM_KEY;
155
156 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
157 if (ret < 0)
158 goto out_free;
159
160 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
161 if (path->slots[0]) {
162 path->slots[0]--;
163 btrfs_item_key_to_cpu(path->nodes[0], &key,
164 path->slots[0]);
165 if (key.objectid == bytenr &&
166 key.type == BTRFS_EXTENT_ITEM_KEY &&
167 key.offset == fs_info->nodesize)
168 ret = 0;
169 }
170 }
171
172 if (ret == 0) {
173 leaf = path->nodes[0];
174 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
175 if (item_size >= sizeof(*ei)) {
176 ei = btrfs_item_ptr(leaf, path->slots[0],
177 struct btrfs_extent_item);
178 num_refs = btrfs_extent_refs(leaf, ei);
179 extent_flags = btrfs_extent_flags(leaf, ei);
180 } else {
181 ret = -EINVAL;
182 btrfs_print_v0_err(fs_info);
183 if (trans)
184 btrfs_abort_transaction(trans, ret);
185 else
186 btrfs_handle_fs_error(fs_info, ret, NULL);
187
188 goto out_free;
189 }
190
191 BUG_ON(num_refs == 0);
192 } else {
193 num_refs = 0;
194 extent_flags = 0;
195 ret = 0;
196 }
197
198 if (!trans)
199 goto out;
200
201 delayed_refs = &trans->transaction->delayed_refs;
202 spin_lock(&delayed_refs->lock);
203 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
204 if (head) {
205 if (!mutex_trylock(&head->mutex)) {
206 refcount_inc(&head->refs);
207 spin_unlock(&delayed_refs->lock);
208
209 btrfs_release_path(path);
210
211
212
213
214
215 mutex_lock(&head->mutex);
216 mutex_unlock(&head->mutex);
217 btrfs_put_delayed_ref_head(head);
218 goto search_again;
219 }
220 spin_lock(&head->lock);
221 if (head->extent_op && head->extent_op->update_flags)
222 extent_flags |= head->extent_op->flags_to_set;
223 else
224 BUG_ON(num_refs == 0);
225
226 num_refs += head->ref_mod;
227 spin_unlock(&head->lock);
228 mutex_unlock(&head->mutex);
229 }
230 spin_unlock(&delayed_refs->lock);
231out:
232 WARN_ON(num_refs == 0);
233 if (refs)
234 *refs = num_refs;
235 if (flags)
236 *flags = extent_flags;
237out_free:
238 btrfs_free_path(path);
239 return ret;
240}
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
354 struct btrfs_extent_inline_ref *iref,
355 enum btrfs_inline_ref_type is_data)
356{
357 int type = btrfs_extent_inline_ref_type(eb, iref);
358 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
359
360 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
361 type == BTRFS_SHARED_BLOCK_REF_KEY ||
362 type == BTRFS_SHARED_DATA_REF_KEY ||
363 type == BTRFS_EXTENT_DATA_REF_KEY) {
364 if (is_data == BTRFS_REF_TYPE_BLOCK) {
365 if (type == BTRFS_TREE_BLOCK_REF_KEY)
366 return type;
367 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
368 ASSERT(eb->fs_info);
369
370
371
372
373 if (offset &&
374 IS_ALIGNED(offset, eb->fs_info->sectorsize))
375 return type;
376 }
377 } else if (is_data == BTRFS_REF_TYPE_DATA) {
378 if (type == BTRFS_EXTENT_DATA_REF_KEY)
379 return type;
380 if (type == BTRFS_SHARED_DATA_REF_KEY) {
381 ASSERT(eb->fs_info);
382
383
384
385
386 if (offset &&
387 IS_ALIGNED(offset, eb->fs_info->sectorsize))
388 return type;
389 }
390 } else {
391 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
392 return type;
393 }
394 }
395
396 btrfs_print_leaf((struct extent_buffer *)eb);
397 btrfs_err(eb->fs_info,
398 "eb %llu iref 0x%lx invalid extent inline ref type %d",
399 eb->start, (unsigned long)iref, type);
400 WARN_ON(1);
401
402 return BTRFS_REF_TYPE_INVALID;
403}
404
405u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
406{
407 u32 high_crc = ~(u32)0;
408 u32 low_crc = ~(u32)0;
409 __le64 lenum;
410
411 lenum = cpu_to_le64(root_objectid);
412 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
413 lenum = cpu_to_le64(owner);
414 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
415 lenum = cpu_to_le64(offset);
416 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
417
418 return ((u64)high_crc << 31) ^ (u64)low_crc;
419}
420
421static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
422 struct btrfs_extent_data_ref *ref)
423{
424 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
425 btrfs_extent_data_ref_objectid(leaf, ref),
426 btrfs_extent_data_ref_offset(leaf, ref));
427}
428
429static int match_extent_data_ref(struct extent_buffer *leaf,
430 struct btrfs_extent_data_ref *ref,
431 u64 root_objectid, u64 owner, u64 offset)
432{
433 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
434 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
435 btrfs_extent_data_ref_offset(leaf, ref) != offset)
436 return 0;
437 return 1;
438}
439
440static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
441 struct btrfs_path *path,
442 u64 bytenr, u64 parent,
443 u64 root_objectid,
444 u64 owner, u64 offset)
445{
446 struct btrfs_root *root = trans->fs_info->extent_root;
447 struct btrfs_key key;
448 struct btrfs_extent_data_ref *ref;
449 struct extent_buffer *leaf;
450 u32 nritems;
451 int ret;
452 int recow;
453 int err = -ENOENT;
454
455 key.objectid = bytenr;
456 if (parent) {
457 key.type = BTRFS_SHARED_DATA_REF_KEY;
458 key.offset = parent;
459 } else {
460 key.type = BTRFS_EXTENT_DATA_REF_KEY;
461 key.offset = hash_extent_data_ref(root_objectid,
462 owner, offset);
463 }
464again:
465 recow = 0;
466 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
467 if (ret < 0) {
468 err = ret;
469 goto fail;
470 }
471
472 if (parent) {
473 if (!ret)
474 return 0;
475 goto fail;
476 }
477
478 leaf = path->nodes[0];
479 nritems = btrfs_header_nritems(leaf);
480 while (1) {
481 if (path->slots[0] >= nritems) {
482 ret = btrfs_next_leaf(root, path);
483 if (ret < 0)
484 err = ret;
485 if (ret)
486 goto fail;
487
488 leaf = path->nodes[0];
489 nritems = btrfs_header_nritems(leaf);
490 recow = 1;
491 }
492
493 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
494 if (key.objectid != bytenr ||
495 key.type != BTRFS_EXTENT_DATA_REF_KEY)
496 goto fail;
497
498 ref = btrfs_item_ptr(leaf, path->slots[0],
499 struct btrfs_extent_data_ref);
500
501 if (match_extent_data_ref(leaf, ref, root_objectid,
502 owner, offset)) {
503 if (recow) {
504 btrfs_release_path(path);
505 goto again;
506 }
507 err = 0;
508 break;
509 }
510 path->slots[0]++;
511 }
512fail:
513 return err;
514}
515
516static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
517 struct btrfs_path *path,
518 u64 bytenr, u64 parent,
519 u64 root_objectid, u64 owner,
520 u64 offset, int refs_to_add)
521{
522 struct btrfs_root *root = trans->fs_info->extent_root;
523 struct btrfs_key key;
524 struct extent_buffer *leaf;
525 u32 size;
526 u32 num_refs;
527 int ret;
528
529 key.objectid = bytenr;
530 if (parent) {
531 key.type = BTRFS_SHARED_DATA_REF_KEY;
532 key.offset = parent;
533 size = sizeof(struct btrfs_shared_data_ref);
534 } else {
535 key.type = BTRFS_EXTENT_DATA_REF_KEY;
536 key.offset = hash_extent_data_ref(root_objectid,
537 owner, offset);
538 size = sizeof(struct btrfs_extent_data_ref);
539 }
540
541 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
542 if (ret && ret != -EEXIST)
543 goto fail;
544
545 leaf = path->nodes[0];
546 if (parent) {
547 struct btrfs_shared_data_ref *ref;
548 ref = btrfs_item_ptr(leaf, path->slots[0],
549 struct btrfs_shared_data_ref);
550 if (ret == 0) {
551 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
552 } else {
553 num_refs = btrfs_shared_data_ref_count(leaf, ref);
554 num_refs += refs_to_add;
555 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
556 }
557 } else {
558 struct btrfs_extent_data_ref *ref;
559 while (ret == -EEXIST) {
560 ref = btrfs_item_ptr(leaf, path->slots[0],
561 struct btrfs_extent_data_ref);
562 if (match_extent_data_ref(leaf, ref, root_objectid,
563 owner, offset))
564 break;
565 btrfs_release_path(path);
566 key.offset++;
567 ret = btrfs_insert_empty_item(trans, root, path, &key,
568 size);
569 if (ret && ret != -EEXIST)
570 goto fail;
571
572 leaf = path->nodes[0];
573 }
574 ref = btrfs_item_ptr(leaf, path->slots[0],
575 struct btrfs_extent_data_ref);
576 if (ret == 0) {
577 btrfs_set_extent_data_ref_root(leaf, ref,
578 root_objectid);
579 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
580 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
581 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
582 } else {
583 num_refs = btrfs_extent_data_ref_count(leaf, ref);
584 num_refs += refs_to_add;
585 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
586 }
587 }
588 btrfs_mark_buffer_dirty(leaf);
589 ret = 0;
590fail:
591 btrfs_release_path(path);
592 return ret;
593}
594
595static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
596 struct btrfs_path *path,
597 int refs_to_drop, int *last_ref)
598{
599 struct btrfs_key key;
600 struct btrfs_extent_data_ref *ref1 = NULL;
601 struct btrfs_shared_data_ref *ref2 = NULL;
602 struct extent_buffer *leaf;
603 u32 num_refs = 0;
604 int ret = 0;
605
606 leaf = path->nodes[0];
607 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
608
609 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
610 ref1 = btrfs_item_ptr(leaf, path->slots[0],
611 struct btrfs_extent_data_ref);
612 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
613 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
614 ref2 = btrfs_item_ptr(leaf, path->slots[0],
615 struct btrfs_shared_data_ref);
616 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
617 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
618 btrfs_print_v0_err(trans->fs_info);
619 btrfs_abort_transaction(trans, -EINVAL);
620 return -EINVAL;
621 } else {
622 BUG();
623 }
624
625 BUG_ON(num_refs < refs_to_drop);
626 num_refs -= refs_to_drop;
627
628 if (num_refs == 0) {
629 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
630 *last_ref = 1;
631 } else {
632 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
633 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
634 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
635 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
636 btrfs_mark_buffer_dirty(leaf);
637 }
638 return ret;
639}
640
641static noinline u32 extent_data_ref_count(struct btrfs_path *path,
642 struct btrfs_extent_inline_ref *iref)
643{
644 struct btrfs_key key;
645 struct extent_buffer *leaf;
646 struct btrfs_extent_data_ref *ref1;
647 struct btrfs_shared_data_ref *ref2;
648 u32 num_refs = 0;
649 int type;
650
651 leaf = path->nodes[0];
652 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
653
654 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
655 if (iref) {
656
657
658
659
660 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
661 ASSERT(type != BTRFS_REF_TYPE_INVALID);
662 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
663 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
664 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
665 } else {
666 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
667 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
668 }
669 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
670 ref1 = btrfs_item_ptr(leaf, path->slots[0],
671 struct btrfs_extent_data_ref);
672 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
673 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
674 ref2 = btrfs_item_ptr(leaf, path->slots[0],
675 struct btrfs_shared_data_ref);
676 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
677 } else {
678 WARN_ON(1);
679 }
680 return num_refs;
681}
682
683static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
684 struct btrfs_path *path,
685 u64 bytenr, u64 parent,
686 u64 root_objectid)
687{
688 struct btrfs_root *root = trans->fs_info->extent_root;
689 struct btrfs_key key;
690 int ret;
691
692 key.objectid = bytenr;
693 if (parent) {
694 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
695 key.offset = parent;
696 } else {
697 key.type = BTRFS_TREE_BLOCK_REF_KEY;
698 key.offset = root_objectid;
699 }
700
701 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
702 if (ret > 0)
703 ret = -ENOENT;
704 return ret;
705}
706
707static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
708 struct btrfs_path *path,
709 u64 bytenr, u64 parent,
710 u64 root_objectid)
711{
712 struct btrfs_key key;
713 int ret;
714
715 key.objectid = bytenr;
716 if (parent) {
717 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
718 key.offset = parent;
719 } else {
720 key.type = BTRFS_TREE_BLOCK_REF_KEY;
721 key.offset = root_objectid;
722 }
723
724 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
725 path, &key, 0);
726 btrfs_release_path(path);
727 return ret;
728}
729
730static inline int extent_ref_type(u64 parent, u64 owner)
731{
732 int type;
733 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
734 if (parent > 0)
735 type = BTRFS_SHARED_BLOCK_REF_KEY;
736 else
737 type = BTRFS_TREE_BLOCK_REF_KEY;
738 } else {
739 if (parent > 0)
740 type = BTRFS_SHARED_DATA_REF_KEY;
741 else
742 type = BTRFS_EXTENT_DATA_REF_KEY;
743 }
744 return type;
745}
746
747static int find_next_key(struct btrfs_path *path, int level,
748 struct btrfs_key *key)
749
750{
751 for (; level < BTRFS_MAX_LEVEL; level++) {
752 if (!path->nodes[level])
753 break;
754 if (path->slots[level] + 1 >=
755 btrfs_header_nritems(path->nodes[level]))
756 continue;
757 if (level == 0)
758 btrfs_item_key_to_cpu(path->nodes[level], key,
759 path->slots[level] + 1);
760 else
761 btrfs_node_key_to_cpu(path->nodes[level], key,
762 path->slots[level] + 1);
763 return 0;
764 }
765 return 1;
766}
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781static noinline_for_stack
782int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
783 struct btrfs_path *path,
784 struct btrfs_extent_inline_ref **ref_ret,
785 u64 bytenr, u64 num_bytes,
786 u64 parent, u64 root_objectid,
787 u64 owner, u64 offset, int insert)
788{
789 struct btrfs_fs_info *fs_info = trans->fs_info;
790 struct btrfs_root *root = fs_info->extent_root;
791 struct btrfs_key key;
792 struct extent_buffer *leaf;
793 struct btrfs_extent_item *ei;
794 struct btrfs_extent_inline_ref *iref;
795 u64 flags;
796 u64 item_size;
797 unsigned long ptr;
798 unsigned long end;
799 int extra_size;
800 int type;
801 int want;
802 int ret;
803 int err = 0;
804 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
805 int needed;
806
807 key.objectid = bytenr;
808 key.type = BTRFS_EXTENT_ITEM_KEY;
809 key.offset = num_bytes;
810
811 want = extent_ref_type(parent, owner);
812 if (insert) {
813 extra_size = btrfs_extent_inline_ref_size(want);
814 path->search_for_extension = 1;
815 path->keep_locks = 1;
816 } else
817 extra_size = -1;
818
819
820
821
822
823 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
824 key.type = BTRFS_METADATA_ITEM_KEY;
825 key.offset = owner;
826 }
827
828again:
829 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
830 if (ret < 0) {
831 err = ret;
832 goto out;
833 }
834
835
836
837
838
839 if (ret > 0 && skinny_metadata) {
840 skinny_metadata = false;
841 if (path->slots[0]) {
842 path->slots[0]--;
843 btrfs_item_key_to_cpu(path->nodes[0], &key,
844 path->slots[0]);
845 if (key.objectid == bytenr &&
846 key.type == BTRFS_EXTENT_ITEM_KEY &&
847 key.offset == num_bytes)
848 ret = 0;
849 }
850 if (ret) {
851 key.objectid = bytenr;
852 key.type = BTRFS_EXTENT_ITEM_KEY;
853 key.offset = num_bytes;
854 btrfs_release_path(path);
855 goto again;
856 }
857 }
858
859 if (ret && !insert) {
860 err = -ENOENT;
861 goto out;
862 } else if (WARN_ON(ret)) {
863 err = -EIO;
864 goto out;
865 }
866
867 leaf = path->nodes[0];
868 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
869 if (unlikely(item_size < sizeof(*ei))) {
870 err = -EINVAL;
871 btrfs_print_v0_err(fs_info);
872 btrfs_abort_transaction(trans, err);
873 goto out;
874 }
875
876 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
877 flags = btrfs_extent_flags(leaf, ei);
878
879 ptr = (unsigned long)(ei + 1);
880 end = (unsigned long)ei + item_size;
881
882 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
883 ptr += sizeof(struct btrfs_tree_block_info);
884 BUG_ON(ptr > end);
885 }
886
887 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
888 needed = BTRFS_REF_TYPE_DATA;
889 else
890 needed = BTRFS_REF_TYPE_BLOCK;
891
892 err = -ENOENT;
893 while (1) {
894 if (ptr >= end) {
895 WARN_ON(ptr > end);
896 break;
897 }
898 iref = (struct btrfs_extent_inline_ref *)ptr;
899 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
900 if (type == BTRFS_REF_TYPE_INVALID) {
901 err = -EUCLEAN;
902 goto out;
903 }
904
905 if (want < type)
906 break;
907 if (want > type) {
908 ptr += btrfs_extent_inline_ref_size(type);
909 continue;
910 }
911
912 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
913 struct btrfs_extent_data_ref *dref;
914 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
915 if (match_extent_data_ref(leaf, dref, root_objectid,
916 owner, offset)) {
917 err = 0;
918 break;
919 }
920 if (hash_extent_data_ref_item(leaf, dref) <
921 hash_extent_data_ref(root_objectid, owner, offset))
922 break;
923 } else {
924 u64 ref_offset;
925 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
926 if (parent > 0) {
927 if (parent == ref_offset) {
928 err = 0;
929 break;
930 }
931 if (ref_offset < parent)
932 break;
933 } else {
934 if (root_objectid == ref_offset) {
935 err = 0;
936 break;
937 }
938 if (ref_offset < root_objectid)
939 break;
940 }
941 }
942 ptr += btrfs_extent_inline_ref_size(type);
943 }
944 if (err == -ENOENT && insert) {
945 if (item_size + extra_size >=
946 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
947 err = -EAGAIN;
948 goto out;
949 }
950
951
952
953
954
955
956 if (find_next_key(path, 0, &key) == 0 &&
957 key.objectid == bytenr &&
958 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
959 err = -EAGAIN;
960 goto out;
961 }
962 }
963 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
964out:
965 if (insert) {
966 path->keep_locks = 0;
967 path->search_for_extension = 0;
968 btrfs_unlock_up_safe(path, 1);
969 }
970 return err;
971}
972
973
974
975
976static noinline_for_stack
977void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
978 struct btrfs_path *path,
979 struct btrfs_extent_inline_ref *iref,
980 u64 parent, u64 root_objectid,
981 u64 owner, u64 offset, int refs_to_add,
982 struct btrfs_delayed_extent_op *extent_op)
983{
984 struct extent_buffer *leaf;
985 struct btrfs_extent_item *ei;
986 unsigned long ptr;
987 unsigned long end;
988 unsigned long item_offset;
989 u64 refs;
990 int size;
991 int type;
992
993 leaf = path->nodes[0];
994 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
995 item_offset = (unsigned long)iref - (unsigned long)ei;
996
997 type = extent_ref_type(parent, owner);
998 size = btrfs_extent_inline_ref_size(type);
999
1000 btrfs_extend_item(path, size);
1001
1002 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1003 refs = btrfs_extent_refs(leaf, ei);
1004 refs += refs_to_add;
1005 btrfs_set_extent_refs(leaf, ei, refs);
1006 if (extent_op)
1007 __run_delayed_extent_op(extent_op, leaf, ei);
1008
1009 ptr = (unsigned long)ei + item_offset;
1010 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1011 if (ptr < end - size)
1012 memmove_extent_buffer(leaf, ptr + size, ptr,
1013 end - size - ptr);
1014
1015 iref = (struct btrfs_extent_inline_ref *)ptr;
1016 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1017 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1018 struct btrfs_extent_data_ref *dref;
1019 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1020 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1021 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1022 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1023 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1024 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1025 struct btrfs_shared_data_ref *sref;
1026 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1027 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1028 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1029 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1030 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1031 } else {
1032 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1033 }
1034 btrfs_mark_buffer_dirty(leaf);
1035}
1036
1037static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1038 struct btrfs_path *path,
1039 struct btrfs_extent_inline_ref **ref_ret,
1040 u64 bytenr, u64 num_bytes, u64 parent,
1041 u64 root_objectid, u64 owner, u64 offset)
1042{
1043 int ret;
1044
1045 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1046 num_bytes, parent, root_objectid,
1047 owner, offset, 0);
1048 if (ret != -ENOENT)
1049 return ret;
1050
1051 btrfs_release_path(path);
1052 *ref_ret = NULL;
1053
1054 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1055 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1056 root_objectid);
1057 } else {
1058 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1059 root_objectid, owner, offset);
1060 }
1061 return ret;
1062}
1063
1064
1065
1066
1067static noinline_for_stack
1068void update_inline_extent_backref(struct btrfs_path *path,
1069 struct btrfs_extent_inline_ref *iref,
1070 int refs_to_mod,
1071 struct btrfs_delayed_extent_op *extent_op,
1072 int *last_ref)
1073{
1074 struct extent_buffer *leaf = path->nodes[0];
1075 struct btrfs_extent_item *ei;
1076 struct btrfs_extent_data_ref *dref = NULL;
1077 struct btrfs_shared_data_ref *sref = NULL;
1078 unsigned long ptr;
1079 unsigned long end;
1080 u32 item_size;
1081 int size;
1082 int type;
1083 u64 refs;
1084
1085 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1086 refs = btrfs_extent_refs(leaf, ei);
1087 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1088 refs += refs_to_mod;
1089 btrfs_set_extent_refs(leaf, ei, refs);
1090 if (extent_op)
1091 __run_delayed_extent_op(extent_op, leaf, ei);
1092
1093
1094
1095
1096
1097 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1098 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1099
1100 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1101 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1102 refs = btrfs_extent_data_ref_count(leaf, dref);
1103 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1104 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1105 refs = btrfs_shared_data_ref_count(leaf, sref);
1106 } else {
1107 refs = 1;
1108 BUG_ON(refs_to_mod != -1);
1109 }
1110
1111 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1112 refs += refs_to_mod;
1113
1114 if (refs > 0) {
1115 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1116 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1117 else
1118 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1119 } else {
1120 *last_ref = 1;
1121 size = btrfs_extent_inline_ref_size(type);
1122 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1123 ptr = (unsigned long)iref;
1124 end = (unsigned long)ei + item_size;
1125 if (ptr + size < end)
1126 memmove_extent_buffer(leaf, ptr, ptr + size,
1127 end - ptr - size);
1128 item_size -= size;
1129 btrfs_truncate_item(path, item_size, 1);
1130 }
1131 btrfs_mark_buffer_dirty(leaf);
1132}
1133
1134static noinline_for_stack
1135int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1136 struct btrfs_path *path,
1137 u64 bytenr, u64 num_bytes, u64 parent,
1138 u64 root_objectid, u64 owner,
1139 u64 offset, int refs_to_add,
1140 struct btrfs_delayed_extent_op *extent_op)
1141{
1142 struct btrfs_extent_inline_ref *iref;
1143 int ret;
1144
1145 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1146 num_bytes, parent, root_objectid,
1147 owner, offset, 1);
1148 if (ret == 0) {
1149
1150
1151
1152
1153 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1154 btrfs_crit(trans->fs_info,
1155"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu",
1156 bytenr, num_bytes, root_objectid);
1157 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
1158 WARN_ON(1);
1159 btrfs_crit(trans->fs_info,
1160 "path->slots[0]=%d path->nodes[0]:", path->slots[0]);
1161 btrfs_print_leaf(path->nodes[0]);
1162 }
1163 return -EUCLEAN;
1164 }
1165 update_inline_extent_backref(path, iref, refs_to_add,
1166 extent_op, NULL);
1167 } else if (ret == -ENOENT) {
1168 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1169 root_objectid, owner, offset,
1170 refs_to_add, extent_op);
1171 ret = 0;
1172 }
1173 return ret;
1174}
1175
1176static int remove_extent_backref(struct btrfs_trans_handle *trans,
1177 struct btrfs_path *path,
1178 struct btrfs_extent_inline_ref *iref,
1179 int refs_to_drop, int is_data, int *last_ref)
1180{
1181 int ret = 0;
1182
1183 BUG_ON(!is_data && refs_to_drop != 1);
1184 if (iref) {
1185 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1186 last_ref);
1187 } else if (is_data) {
1188 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1189 last_ref);
1190 } else {
1191 *last_ref = 1;
1192 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1193 }
1194 return ret;
1195}
1196
1197static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1198 u64 *discarded_bytes)
1199{
1200 int j, ret = 0;
1201 u64 bytes_left, end;
1202 u64 aligned_start = ALIGN(start, 1 << 9);
1203
1204 if (WARN_ON(start != aligned_start)) {
1205 len -= aligned_start - start;
1206 len = round_down(len, 1 << 9);
1207 start = aligned_start;
1208 }
1209
1210 *discarded_bytes = 0;
1211
1212 if (!len)
1213 return 0;
1214
1215 end = start + len;
1216 bytes_left = len;
1217
1218
1219 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1220 u64 sb_start = btrfs_sb_offset(j);
1221 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1222 u64 size = sb_start - start;
1223
1224 if (!in_range(sb_start, start, bytes_left) &&
1225 !in_range(sb_end, start, bytes_left) &&
1226 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1227 continue;
1228
1229
1230
1231
1232
1233 if (sb_start <= start) {
1234 start += sb_end - start;
1235 if (start > end) {
1236 bytes_left = 0;
1237 break;
1238 }
1239 bytes_left = end - start;
1240 continue;
1241 }
1242
1243 if (size) {
1244 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1245 GFP_NOFS, 0);
1246 if (!ret)
1247 *discarded_bytes += size;
1248 else if (ret != -EOPNOTSUPP)
1249 return ret;
1250 }
1251
1252 start = sb_end;
1253 if (start > end) {
1254 bytes_left = 0;
1255 break;
1256 }
1257 bytes_left = end - start;
1258 }
1259
1260 if (bytes_left) {
1261 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1262 GFP_NOFS, 0);
1263 if (!ret)
1264 *discarded_bytes += bytes_left;
1265 }
1266 return ret;
1267}
1268
1269static int do_discard_extent(struct btrfs_bio_stripe *stripe, u64 *bytes)
1270{
1271 struct btrfs_device *dev = stripe->dev;
1272 struct btrfs_fs_info *fs_info = dev->fs_info;
1273 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
1274 u64 phys = stripe->physical;
1275 u64 len = stripe->length;
1276 u64 discarded = 0;
1277 int ret = 0;
1278
1279
1280 if (btrfs_can_zone_reset(dev, phys, len)) {
1281 u64 src_disc;
1282
1283 ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
1284 if (ret)
1285 goto out;
1286
1287 if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
1288 dev != dev_replace->srcdev)
1289 goto out;
1290
1291 src_disc = discarded;
1292
1293
1294 ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
1295 &discarded);
1296 discarded += src_disc;
1297 } else if (blk_queue_discard(bdev_get_queue(stripe->dev->bdev))) {
1298 ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
1299 } else {
1300 ret = 0;
1301 *bytes = 0;
1302 }
1303
1304out:
1305 *bytes = discarded;
1306 return ret;
1307}
1308
1309int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1310 u64 num_bytes, u64 *actual_bytes)
1311{
1312 int ret = 0;
1313 u64 discarded_bytes = 0;
1314 u64 end = bytenr + num_bytes;
1315 u64 cur = bytenr;
1316 struct btrfs_bio *bbio = NULL;
1317
1318
1319
1320
1321
1322
1323 btrfs_bio_counter_inc_blocked(fs_info);
1324 while (cur < end) {
1325 struct btrfs_bio_stripe *stripe;
1326 int i;
1327
1328 num_bytes = end - cur;
1329
1330 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
1331 &num_bytes, &bbio, 0);
1332
1333
1334
1335
1336
1337 if (ret < 0)
1338 goto out;
1339
1340 stripe = bbio->stripes;
1341 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1342 u64 bytes;
1343 struct btrfs_device *device = stripe->dev;
1344
1345 if (!device->bdev) {
1346 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1347 continue;
1348 }
1349
1350 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
1351 continue;
1352
1353 ret = do_discard_extent(stripe, &bytes);
1354 if (!ret) {
1355 discarded_bytes += bytes;
1356 } else if (ret != -EOPNOTSUPP) {
1357
1358
1359
1360
1361
1362
1363
1364 btrfs_put_bbio(bbio);
1365 goto out;
1366 }
1367
1368
1369
1370
1371
1372
1373 ret = 0;
1374 }
1375 btrfs_put_bbio(bbio);
1376 cur += num_bytes;
1377 }
1378out:
1379 btrfs_bio_counter_dec(fs_info);
1380
1381 if (actual_bytes)
1382 *actual_bytes = discarded_bytes;
1383
1384
1385 if (ret == -EOPNOTSUPP)
1386 ret = 0;
1387 return ret;
1388}
1389
1390
1391int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1392 struct btrfs_ref *generic_ref)
1393{
1394 struct btrfs_fs_info *fs_info = trans->fs_info;
1395 int ret;
1396
1397 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1398 generic_ref->action);
1399 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1400 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
1401
1402 if (generic_ref->type == BTRFS_REF_METADATA)
1403 ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
1404 else
1405 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
1406
1407 btrfs_ref_tree_mod(fs_info, generic_ref);
1408
1409 return ret;
1410}
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1450 struct btrfs_delayed_ref_node *node,
1451 u64 parent, u64 root_objectid,
1452 u64 owner, u64 offset, int refs_to_add,
1453 struct btrfs_delayed_extent_op *extent_op)
1454{
1455 struct btrfs_path *path;
1456 struct extent_buffer *leaf;
1457 struct btrfs_extent_item *item;
1458 struct btrfs_key key;
1459 u64 bytenr = node->bytenr;
1460 u64 num_bytes = node->num_bytes;
1461 u64 refs;
1462 int ret;
1463
1464 path = btrfs_alloc_path();
1465 if (!path)
1466 return -ENOMEM;
1467
1468
1469 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1470 parent, root_objectid, owner,
1471 offset, refs_to_add, extent_op);
1472 if ((ret < 0 && ret != -EAGAIN) || !ret)
1473 goto out;
1474
1475
1476
1477
1478
1479
1480 leaf = path->nodes[0];
1481 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1482 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1483 refs = btrfs_extent_refs(leaf, item);
1484 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1485 if (extent_op)
1486 __run_delayed_extent_op(extent_op, leaf, item);
1487
1488 btrfs_mark_buffer_dirty(leaf);
1489 btrfs_release_path(path);
1490
1491
1492 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1493 BUG_ON(refs_to_add != 1);
1494 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1495 root_objectid);
1496 } else {
1497 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1498 root_objectid, owner, offset,
1499 refs_to_add);
1500 }
1501 if (ret)
1502 btrfs_abort_transaction(trans, ret);
1503out:
1504 btrfs_free_path(path);
1505 return ret;
1506}
1507
1508static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1509 struct btrfs_delayed_ref_node *node,
1510 struct btrfs_delayed_extent_op *extent_op,
1511 int insert_reserved)
1512{
1513 int ret = 0;
1514 struct btrfs_delayed_data_ref *ref;
1515 struct btrfs_key ins;
1516 u64 parent = 0;
1517 u64 ref_root = 0;
1518 u64 flags = 0;
1519
1520 ins.objectid = node->bytenr;
1521 ins.offset = node->num_bytes;
1522 ins.type = BTRFS_EXTENT_ITEM_KEY;
1523
1524 ref = btrfs_delayed_node_to_data_ref(node);
1525 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1526
1527 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1528 parent = ref->parent;
1529 ref_root = ref->root;
1530
1531 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1532 if (extent_op)
1533 flags |= extent_op->flags_to_set;
1534 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1535 flags, ref->objectid,
1536 ref->offset, &ins,
1537 node->ref_mod);
1538 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1539 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1540 ref->objectid, ref->offset,
1541 node->ref_mod, extent_op);
1542 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1543 ret = __btrfs_free_extent(trans, node, parent,
1544 ref_root, ref->objectid,
1545 ref->offset, node->ref_mod,
1546 extent_op);
1547 } else {
1548 BUG();
1549 }
1550 return ret;
1551}
1552
1553static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1554 struct extent_buffer *leaf,
1555 struct btrfs_extent_item *ei)
1556{
1557 u64 flags = btrfs_extent_flags(leaf, ei);
1558 if (extent_op->update_flags) {
1559 flags |= extent_op->flags_to_set;
1560 btrfs_set_extent_flags(leaf, ei, flags);
1561 }
1562
1563 if (extent_op->update_key) {
1564 struct btrfs_tree_block_info *bi;
1565 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1566 bi = (struct btrfs_tree_block_info *)(ei + 1);
1567 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1568 }
1569}
1570
1571static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1572 struct btrfs_delayed_ref_head *head,
1573 struct btrfs_delayed_extent_op *extent_op)
1574{
1575 struct btrfs_fs_info *fs_info = trans->fs_info;
1576 struct btrfs_key key;
1577 struct btrfs_path *path;
1578 struct btrfs_extent_item *ei;
1579 struct extent_buffer *leaf;
1580 u32 item_size;
1581 int ret;
1582 int err = 0;
1583 int metadata = !extent_op->is_data;
1584
1585 if (TRANS_ABORTED(trans))
1586 return 0;
1587
1588 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1589 metadata = 0;
1590
1591 path = btrfs_alloc_path();
1592 if (!path)
1593 return -ENOMEM;
1594
1595 key.objectid = head->bytenr;
1596
1597 if (metadata) {
1598 key.type = BTRFS_METADATA_ITEM_KEY;
1599 key.offset = extent_op->level;
1600 } else {
1601 key.type = BTRFS_EXTENT_ITEM_KEY;
1602 key.offset = head->num_bytes;
1603 }
1604
1605again:
1606 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
1607 if (ret < 0) {
1608 err = ret;
1609 goto out;
1610 }
1611 if (ret > 0) {
1612 if (metadata) {
1613 if (path->slots[0] > 0) {
1614 path->slots[0]--;
1615 btrfs_item_key_to_cpu(path->nodes[0], &key,
1616 path->slots[0]);
1617 if (key.objectid == head->bytenr &&
1618 key.type == BTRFS_EXTENT_ITEM_KEY &&
1619 key.offset == head->num_bytes)
1620 ret = 0;
1621 }
1622 if (ret > 0) {
1623 btrfs_release_path(path);
1624 metadata = 0;
1625
1626 key.objectid = head->bytenr;
1627 key.offset = head->num_bytes;
1628 key.type = BTRFS_EXTENT_ITEM_KEY;
1629 goto again;
1630 }
1631 } else {
1632 err = -EIO;
1633 goto out;
1634 }
1635 }
1636
1637 leaf = path->nodes[0];
1638 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1639
1640 if (unlikely(item_size < sizeof(*ei))) {
1641 err = -EINVAL;
1642 btrfs_print_v0_err(fs_info);
1643 btrfs_abort_transaction(trans, err);
1644 goto out;
1645 }
1646
1647 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1648 __run_delayed_extent_op(extent_op, leaf, ei);
1649
1650 btrfs_mark_buffer_dirty(leaf);
1651out:
1652 btrfs_free_path(path);
1653 return err;
1654}
1655
1656static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1657 struct btrfs_delayed_ref_node *node,
1658 struct btrfs_delayed_extent_op *extent_op,
1659 int insert_reserved)
1660{
1661 int ret = 0;
1662 struct btrfs_delayed_tree_ref *ref;
1663 u64 parent = 0;
1664 u64 ref_root = 0;
1665
1666 ref = btrfs_delayed_node_to_tree_ref(node);
1667 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1668
1669 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1670 parent = ref->parent;
1671 ref_root = ref->root;
1672
1673 if (node->ref_mod != 1) {
1674 btrfs_err(trans->fs_info,
1675 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1676 node->bytenr, node->ref_mod, node->action, ref_root,
1677 parent);
1678 return -EIO;
1679 }
1680 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1681 BUG_ON(!extent_op || !extent_op->update_flags);
1682 ret = alloc_reserved_tree_block(trans, node, extent_op);
1683 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1684 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1685 ref->level, 0, 1, extent_op);
1686 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1687 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1688 ref->level, 0, 1, extent_op);
1689 } else {
1690 BUG();
1691 }
1692 return ret;
1693}
1694
1695
1696static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1697 struct btrfs_delayed_ref_node *node,
1698 struct btrfs_delayed_extent_op *extent_op,
1699 int insert_reserved)
1700{
1701 int ret = 0;
1702
1703 if (TRANS_ABORTED(trans)) {
1704 if (insert_reserved)
1705 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1706 return 0;
1707 }
1708
1709 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1710 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1711 ret = run_delayed_tree_ref(trans, node, extent_op,
1712 insert_reserved);
1713 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1714 node->type == BTRFS_SHARED_DATA_REF_KEY)
1715 ret = run_delayed_data_ref(trans, node, extent_op,
1716 insert_reserved);
1717 else
1718 BUG();
1719 if (ret && insert_reserved)
1720 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1721 return ret;
1722}
1723
1724static inline struct btrfs_delayed_ref_node *
1725select_delayed_ref(struct btrfs_delayed_ref_head *head)
1726{
1727 struct btrfs_delayed_ref_node *ref;
1728
1729 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1730 return NULL;
1731
1732
1733
1734
1735
1736
1737
1738 if (!list_empty(&head->ref_add_list))
1739 return list_first_entry(&head->ref_add_list,
1740 struct btrfs_delayed_ref_node, add_list);
1741
1742 ref = rb_entry(rb_first_cached(&head->ref_tree),
1743 struct btrfs_delayed_ref_node, ref_node);
1744 ASSERT(list_empty(&ref->add_list));
1745 return ref;
1746}
1747
1748static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1749 struct btrfs_delayed_ref_head *head)
1750{
1751 spin_lock(&delayed_refs->lock);
1752 head->processing = 0;
1753 delayed_refs->num_heads_ready++;
1754 spin_unlock(&delayed_refs->lock);
1755 btrfs_delayed_ref_unlock(head);
1756}
1757
1758static struct btrfs_delayed_extent_op *cleanup_extent_op(
1759 struct btrfs_delayed_ref_head *head)
1760{
1761 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1762
1763 if (!extent_op)
1764 return NULL;
1765
1766 if (head->must_insert_reserved) {
1767 head->extent_op = NULL;
1768 btrfs_free_delayed_extent_op(extent_op);
1769 return NULL;
1770 }
1771 return extent_op;
1772}
1773
1774static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1775 struct btrfs_delayed_ref_head *head)
1776{
1777 struct btrfs_delayed_extent_op *extent_op;
1778 int ret;
1779
1780 extent_op = cleanup_extent_op(head);
1781 if (!extent_op)
1782 return 0;
1783 head->extent_op = NULL;
1784 spin_unlock(&head->lock);
1785 ret = run_delayed_extent_op(trans, head, extent_op);
1786 btrfs_free_delayed_extent_op(extent_op);
1787 return ret ? ret : 1;
1788}
1789
1790void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1791 struct btrfs_delayed_ref_root *delayed_refs,
1792 struct btrfs_delayed_ref_head *head)
1793{
1794 int nr_items = 1;
1795
1796
1797
1798
1799
1800 if (head->total_ref_mod < 0 && head->is_data) {
1801 spin_lock(&delayed_refs->lock);
1802 delayed_refs->pending_csums -= head->num_bytes;
1803 spin_unlock(&delayed_refs->lock);
1804 nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
1805 }
1806
1807 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1808}
1809
1810static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1811 struct btrfs_delayed_ref_head *head)
1812{
1813
1814 struct btrfs_fs_info *fs_info = trans->fs_info;
1815 struct btrfs_delayed_ref_root *delayed_refs;
1816 int ret;
1817
1818 delayed_refs = &trans->transaction->delayed_refs;
1819
1820 ret = run_and_cleanup_extent_op(trans, head);
1821 if (ret < 0) {
1822 unselect_delayed_ref_head(delayed_refs, head);
1823 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1824 return ret;
1825 } else if (ret) {
1826 return ret;
1827 }
1828
1829
1830
1831
1832
1833 spin_unlock(&head->lock);
1834 spin_lock(&delayed_refs->lock);
1835 spin_lock(&head->lock);
1836 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1837 spin_unlock(&head->lock);
1838 spin_unlock(&delayed_refs->lock);
1839 return 1;
1840 }
1841 btrfs_delete_ref_head(delayed_refs, head);
1842 spin_unlock(&head->lock);
1843 spin_unlock(&delayed_refs->lock);
1844
1845 if (head->must_insert_reserved) {
1846 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
1847 if (head->is_data) {
1848 ret = btrfs_del_csums(trans, fs_info->csum_root,
1849 head->bytenr, head->num_bytes);
1850 }
1851 }
1852
1853 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1854
1855 trace_run_delayed_ref_head(fs_info, head, 0);
1856 btrfs_delayed_ref_unlock(head);
1857 btrfs_put_delayed_ref_head(head);
1858 return ret;
1859}
1860
1861static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1862 struct btrfs_trans_handle *trans)
1863{
1864 struct btrfs_delayed_ref_root *delayed_refs =
1865 &trans->transaction->delayed_refs;
1866 struct btrfs_delayed_ref_head *head = NULL;
1867 int ret;
1868
1869 spin_lock(&delayed_refs->lock);
1870 head = btrfs_select_ref_head(delayed_refs);
1871 if (!head) {
1872 spin_unlock(&delayed_refs->lock);
1873 return head;
1874 }
1875
1876
1877
1878
1879
1880 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1881 spin_unlock(&delayed_refs->lock);
1882
1883
1884
1885
1886
1887
1888 if (ret == -EAGAIN)
1889 head = ERR_PTR(-EAGAIN);
1890
1891 return head;
1892}
1893
1894static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1895 struct btrfs_delayed_ref_head *locked_ref,
1896 unsigned long *run_refs)
1897{
1898 struct btrfs_fs_info *fs_info = trans->fs_info;
1899 struct btrfs_delayed_ref_root *delayed_refs;
1900 struct btrfs_delayed_extent_op *extent_op;
1901 struct btrfs_delayed_ref_node *ref;
1902 int must_insert_reserved = 0;
1903 int ret;
1904
1905 delayed_refs = &trans->transaction->delayed_refs;
1906
1907 lockdep_assert_held(&locked_ref->mutex);
1908 lockdep_assert_held(&locked_ref->lock);
1909
1910 while ((ref = select_delayed_ref(locked_ref))) {
1911 if (ref->seq &&
1912 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1913 spin_unlock(&locked_ref->lock);
1914 unselect_delayed_ref_head(delayed_refs, locked_ref);
1915 return -EAGAIN;
1916 }
1917
1918 (*run_refs)++;
1919 ref->in_tree = 0;
1920 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1921 RB_CLEAR_NODE(&ref->ref_node);
1922 if (!list_empty(&ref->add_list))
1923 list_del(&ref->add_list);
1924
1925
1926
1927
1928 switch (ref->action) {
1929 case BTRFS_ADD_DELAYED_REF:
1930 case BTRFS_ADD_DELAYED_EXTENT:
1931 locked_ref->ref_mod -= ref->ref_mod;
1932 break;
1933 case BTRFS_DROP_DELAYED_REF:
1934 locked_ref->ref_mod += ref->ref_mod;
1935 break;
1936 default:
1937 WARN_ON(1);
1938 }
1939 atomic_dec(&delayed_refs->num_entries);
1940
1941
1942
1943
1944
1945 must_insert_reserved = locked_ref->must_insert_reserved;
1946 locked_ref->must_insert_reserved = 0;
1947
1948 extent_op = locked_ref->extent_op;
1949 locked_ref->extent_op = NULL;
1950 spin_unlock(&locked_ref->lock);
1951
1952 ret = run_one_delayed_ref(trans, ref, extent_op,
1953 must_insert_reserved);
1954
1955 btrfs_free_delayed_extent_op(extent_op);
1956 if (ret) {
1957 unselect_delayed_ref_head(delayed_refs, locked_ref);
1958 btrfs_put_delayed_ref(ref);
1959 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1960 ret);
1961 return ret;
1962 }
1963
1964 btrfs_put_delayed_ref(ref);
1965 cond_resched();
1966
1967 spin_lock(&locked_ref->lock);
1968 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1969 }
1970
1971 return 0;
1972}
1973
1974
1975
1976
1977
1978static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1979 unsigned long nr)
1980{
1981 struct btrfs_fs_info *fs_info = trans->fs_info;
1982 struct btrfs_delayed_ref_root *delayed_refs;
1983 struct btrfs_delayed_ref_head *locked_ref = NULL;
1984 ktime_t start = ktime_get();
1985 int ret;
1986 unsigned long count = 0;
1987 unsigned long actual_count = 0;
1988
1989 delayed_refs = &trans->transaction->delayed_refs;
1990 do {
1991 if (!locked_ref) {
1992 locked_ref = btrfs_obtain_ref_head(trans);
1993 if (IS_ERR_OR_NULL(locked_ref)) {
1994 if (PTR_ERR(locked_ref) == -EAGAIN) {
1995 continue;
1996 } else {
1997 break;
1998 }
1999 }
2000 count++;
2001 }
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014 spin_lock(&locked_ref->lock);
2015 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2016
2017 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2018 &actual_count);
2019 if (ret < 0 && ret != -EAGAIN) {
2020
2021
2022
2023
2024 return ret;
2025 } else if (!ret) {
2026
2027
2028
2029
2030 ret = cleanup_ref_head(trans, locked_ref);
2031 if (ret > 0 ) {
2032
2033 ret = 0;
2034 continue;
2035 } else if (ret) {
2036 return ret;
2037 }
2038 }
2039
2040
2041
2042
2043
2044
2045 locked_ref = NULL;
2046 cond_resched();
2047 } while ((nr != -1 && count < nr) || locked_ref);
2048
2049
2050
2051
2052
2053
2054 if (actual_count > 0) {
2055 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2056 u64 avg;
2057
2058
2059
2060
2061
2062 spin_lock(&delayed_refs->lock);
2063 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2064 fs_info->avg_delayed_ref_runtime = avg >> 2;
2065 spin_unlock(&delayed_refs->lock);
2066 }
2067 return 0;
2068}
2069
2070#ifdef SCRAMBLE_DELAYED_REFS
2071
2072
2073
2074
2075
2076static u64 find_middle(struct rb_root *root)
2077{
2078 struct rb_node *n = root->rb_node;
2079 struct btrfs_delayed_ref_node *entry;
2080 int alt = 1;
2081 u64 middle;
2082 u64 first = 0, last = 0;
2083
2084 n = rb_first(root);
2085 if (n) {
2086 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2087 first = entry->bytenr;
2088 }
2089 n = rb_last(root);
2090 if (n) {
2091 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2092 last = entry->bytenr;
2093 }
2094 n = root->rb_node;
2095
2096 while (n) {
2097 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2098 WARN_ON(!entry->in_tree);
2099
2100 middle = entry->bytenr;
2101
2102 if (alt)
2103 n = n->rb_left;
2104 else
2105 n = n->rb_right;
2106
2107 alt = 1 - alt;
2108 }
2109 return middle;
2110}
2111#endif
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2124 unsigned long count)
2125{
2126 struct btrfs_fs_info *fs_info = trans->fs_info;
2127 struct rb_node *node;
2128 struct btrfs_delayed_ref_root *delayed_refs;
2129 struct btrfs_delayed_ref_head *head;
2130 int ret;
2131 int run_all = count == (unsigned long)-1;
2132
2133
2134 if (TRANS_ABORTED(trans))
2135 return 0;
2136
2137 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2138 return 0;
2139
2140 delayed_refs = &trans->transaction->delayed_refs;
2141 if (count == 0)
2142 count = delayed_refs->num_heads_ready;
2143
2144again:
2145#ifdef SCRAMBLE_DELAYED_REFS
2146 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2147#endif
2148 ret = __btrfs_run_delayed_refs(trans, count);
2149 if (ret < 0) {
2150 btrfs_abort_transaction(trans, ret);
2151 return ret;
2152 }
2153
2154 if (run_all) {
2155 btrfs_create_pending_block_groups(trans);
2156
2157 spin_lock(&delayed_refs->lock);
2158 node = rb_first_cached(&delayed_refs->href_root);
2159 if (!node) {
2160 spin_unlock(&delayed_refs->lock);
2161 goto out;
2162 }
2163 head = rb_entry(node, struct btrfs_delayed_ref_head,
2164 href_node);
2165 refcount_inc(&head->refs);
2166 spin_unlock(&delayed_refs->lock);
2167
2168
2169 mutex_lock(&head->mutex);
2170 mutex_unlock(&head->mutex);
2171
2172 btrfs_put_delayed_ref_head(head);
2173 cond_resched();
2174 goto again;
2175 }
2176out:
2177 return 0;
2178}
2179
2180int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2181 struct extent_buffer *eb, u64 flags,
2182 int level, int is_data)
2183{
2184 struct btrfs_delayed_extent_op *extent_op;
2185 int ret;
2186
2187 extent_op = btrfs_alloc_delayed_extent_op();
2188 if (!extent_op)
2189 return -ENOMEM;
2190
2191 extent_op->flags_to_set = flags;
2192 extent_op->update_flags = true;
2193 extent_op->update_key = false;
2194 extent_op->is_data = is_data ? true : false;
2195 extent_op->level = level;
2196
2197 ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
2198 if (ret)
2199 btrfs_free_delayed_extent_op(extent_op);
2200 return ret;
2201}
2202
2203static noinline int check_delayed_ref(struct btrfs_root *root,
2204 struct btrfs_path *path,
2205 u64 objectid, u64 offset, u64 bytenr)
2206{
2207 struct btrfs_delayed_ref_head *head;
2208 struct btrfs_delayed_ref_node *ref;
2209 struct btrfs_delayed_data_ref *data_ref;
2210 struct btrfs_delayed_ref_root *delayed_refs;
2211 struct btrfs_transaction *cur_trans;
2212 struct rb_node *node;
2213 int ret = 0;
2214
2215 spin_lock(&root->fs_info->trans_lock);
2216 cur_trans = root->fs_info->running_transaction;
2217 if (cur_trans)
2218 refcount_inc(&cur_trans->use_count);
2219 spin_unlock(&root->fs_info->trans_lock);
2220 if (!cur_trans)
2221 return 0;
2222
2223 delayed_refs = &cur_trans->delayed_refs;
2224 spin_lock(&delayed_refs->lock);
2225 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2226 if (!head) {
2227 spin_unlock(&delayed_refs->lock);
2228 btrfs_put_transaction(cur_trans);
2229 return 0;
2230 }
2231
2232 if (!mutex_trylock(&head->mutex)) {
2233 refcount_inc(&head->refs);
2234 spin_unlock(&delayed_refs->lock);
2235
2236 btrfs_release_path(path);
2237
2238
2239
2240
2241
2242 mutex_lock(&head->mutex);
2243 mutex_unlock(&head->mutex);
2244 btrfs_put_delayed_ref_head(head);
2245 btrfs_put_transaction(cur_trans);
2246 return -EAGAIN;
2247 }
2248 spin_unlock(&delayed_refs->lock);
2249
2250 spin_lock(&head->lock);
2251
2252
2253
2254
2255 for (node = rb_first_cached(&head->ref_tree); node;
2256 node = rb_next(node)) {
2257 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2258
2259 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2260 ret = 1;
2261 break;
2262 }
2263
2264 data_ref = btrfs_delayed_node_to_data_ref(ref);
2265
2266
2267
2268
2269
2270 if (data_ref->root != root->root_key.objectid ||
2271 data_ref->objectid != objectid ||
2272 data_ref->offset != offset) {
2273 ret = 1;
2274 break;
2275 }
2276 }
2277 spin_unlock(&head->lock);
2278 mutex_unlock(&head->mutex);
2279 btrfs_put_transaction(cur_trans);
2280 return ret;
2281}
2282
2283static noinline int check_committed_ref(struct btrfs_root *root,
2284 struct btrfs_path *path,
2285 u64 objectid, u64 offset, u64 bytenr,
2286 bool strict)
2287{
2288 struct btrfs_fs_info *fs_info = root->fs_info;
2289 struct btrfs_root *extent_root = fs_info->extent_root;
2290 struct extent_buffer *leaf;
2291 struct btrfs_extent_data_ref *ref;
2292 struct btrfs_extent_inline_ref *iref;
2293 struct btrfs_extent_item *ei;
2294 struct btrfs_key key;
2295 u32 item_size;
2296 int type;
2297 int ret;
2298
2299 key.objectid = bytenr;
2300 key.offset = (u64)-1;
2301 key.type = BTRFS_EXTENT_ITEM_KEY;
2302
2303 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2304 if (ret < 0)
2305 goto out;
2306 BUG_ON(ret == 0);
2307
2308 ret = -ENOENT;
2309 if (path->slots[0] == 0)
2310 goto out;
2311
2312 path->slots[0]--;
2313 leaf = path->nodes[0];
2314 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2315
2316 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2317 goto out;
2318
2319 ret = 1;
2320 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2321 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2322
2323
2324 if (item_size != sizeof(*ei) +
2325 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2326 goto out;
2327
2328
2329
2330
2331
2332 if (!strict &&
2333 (btrfs_extent_generation(leaf, ei) <=
2334 btrfs_root_last_snapshot(&root->root_item)))
2335 goto out;
2336
2337 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2338
2339
2340 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2341 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2342 goto out;
2343
2344 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2345 if (btrfs_extent_refs(leaf, ei) !=
2346 btrfs_extent_data_ref_count(leaf, ref) ||
2347 btrfs_extent_data_ref_root(leaf, ref) !=
2348 root->root_key.objectid ||
2349 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2350 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2351 goto out;
2352
2353 ret = 0;
2354out:
2355 return ret;
2356}
2357
2358int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2359 u64 bytenr, bool strict)
2360{
2361 struct btrfs_path *path;
2362 int ret;
2363
2364 path = btrfs_alloc_path();
2365 if (!path)
2366 return -ENOMEM;
2367
2368 do {
2369 ret = check_committed_ref(root, path, objectid,
2370 offset, bytenr, strict);
2371 if (ret && ret != -ENOENT)
2372 goto out;
2373
2374 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2375 } while (ret == -EAGAIN);
2376
2377out:
2378 btrfs_free_path(path);
2379 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2380 WARN_ON(ret > 0);
2381 return ret;
2382}
2383
2384static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2385 struct btrfs_root *root,
2386 struct extent_buffer *buf,
2387 int full_backref, int inc)
2388{
2389 struct btrfs_fs_info *fs_info = root->fs_info;
2390 u64 bytenr;
2391 u64 num_bytes;
2392 u64 parent;
2393 u64 ref_root;
2394 u32 nritems;
2395 struct btrfs_key key;
2396 struct btrfs_file_extent_item *fi;
2397 struct btrfs_ref generic_ref = { 0 };
2398 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2399 int i;
2400 int action;
2401 int level;
2402 int ret = 0;
2403
2404 if (btrfs_is_testing(fs_info))
2405 return 0;
2406
2407 ref_root = btrfs_header_owner(buf);
2408 nritems = btrfs_header_nritems(buf);
2409 level = btrfs_header_level(buf);
2410
2411 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
2412 return 0;
2413
2414 if (full_backref)
2415 parent = buf->start;
2416 else
2417 parent = 0;
2418 if (inc)
2419 action = BTRFS_ADD_DELAYED_REF;
2420 else
2421 action = BTRFS_DROP_DELAYED_REF;
2422
2423 for (i = 0; i < nritems; i++) {
2424 if (level == 0) {
2425 btrfs_item_key_to_cpu(buf, &key, i);
2426 if (key.type != BTRFS_EXTENT_DATA_KEY)
2427 continue;
2428 fi = btrfs_item_ptr(buf, i,
2429 struct btrfs_file_extent_item);
2430 if (btrfs_file_extent_type(buf, fi) ==
2431 BTRFS_FILE_EXTENT_INLINE)
2432 continue;
2433 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2434 if (bytenr == 0)
2435 continue;
2436
2437 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2438 key.offset -= btrfs_file_extent_offset(buf, fi);
2439 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2440 num_bytes, parent);
2441 generic_ref.real_root = root->root_key.objectid;
2442 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2443 key.offset);
2444 generic_ref.skip_qgroup = for_reloc;
2445 if (inc)
2446 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2447 else
2448 ret = btrfs_free_extent(trans, &generic_ref);
2449 if (ret)
2450 goto fail;
2451 } else {
2452 bytenr = btrfs_node_blockptr(buf, i);
2453 num_bytes = fs_info->nodesize;
2454 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2455 num_bytes, parent);
2456 generic_ref.real_root = root->root_key.objectid;
2457 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
2458 generic_ref.skip_qgroup = for_reloc;
2459 if (inc)
2460 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2461 else
2462 ret = btrfs_free_extent(trans, &generic_ref);
2463 if (ret)
2464 goto fail;
2465 }
2466 }
2467 return 0;
2468fail:
2469 return ret;
2470}
2471
2472int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2473 struct extent_buffer *buf, int full_backref)
2474{
2475 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2476}
2477
2478int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2479 struct extent_buffer *buf, int full_backref)
2480{
2481 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2482}
2483
2484static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2485{
2486 struct btrfs_fs_info *fs_info = root->fs_info;
2487 u64 flags;
2488 u64 ret;
2489
2490 if (data)
2491 flags = BTRFS_BLOCK_GROUP_DATA;
2492 else if (root == fs_info->chunk_root)
2493 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2494 else
2495 flags = BTRFS_BLOCK_GROUP_METADATA;
2496
2497 ret = btrfs_get_alloc_profile(fs_info, flags);
2498 return ret;
2499}
2500
2501static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
2502{
2503 struct btrfs_block_group *cache;
2504 u64 bytenr;
2505
2506 spin_lock(&fs_info->block_group_cache_lock);
2507 bytenr = fs_info->first_logical_byte;
2508 spin_unlock(&fs_info->block_group_cache_lock);
2509
2510 if (bytenr < (u64)-1)
2511 return bytenr;
2512
2513 cache = btrfs_lookup_first_block_group(fs_info, search_start);
2514 if (!cache)
2515 return 0;
2516
2517 bytenr = cache->start;
2518 btrfs_put_block_group(cache);
2519
2520 return bytenr;
2521}
2522
2523static int pin_down_extent(struct btrfs_trans_handle *trans,
2524 struct btrfs_block_group *cache,
2525 u64 bytenr, u64 num_bytes, int reserved)
2526{
2527 struct btrfs_fs_info *fs_info = cache->fs_info;
2528
2529 spin_lock(&cache->space_info->lock);
2530 spin_lock(&cache->lock);
2531 cache->pinned += num_bytes;
2532 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2533 num_bytes);
2534 if (reserved) {
2535 cache->reserved -= num_bytes;
2536 cache->space_info->bytes_reserved -= num_bytes;
2537 }
2538 spin_unlock(&cache->lock);
2539 spin_unlock(&cache->space_info->lock);
2540
2541 set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
2542 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2543 return 0;
2544}
2545
2546int btrfs_pin_extent(struct btrfs_trans_handle *trans,
2547 u64 bytenr, u64 num_bytes, int reserved)
2548{
2549 struct btrfs_block_group *cache;
2550
2551 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2552 BUG_ON(!cache);
2553
2554 pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
2555
2556 btrfs_put_block_group(cache);
2557 return 0;
2558}
2559
2560
2561
2562
2563int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
2564 u64 bytenr, u64 num_bytes)
2565{
2566 struct btrfs_block_group *cache;
2567 int ret;
2568
2569 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2570 if (!cache)
2571 return -EINVAL;
2572
2573
2574
2575
2576
2577
2578
2579 btrfs_cache_block_group(cache, 1);
2580
2581
2582
2583
2584 ret = btrfs_wait_block_group_cache_done(cache);
2585 if (ret)
2586 goto out;
2587
2588 pin_down_extent(trans, cache, bytenr, num_bytes, 0);
2589
2590
2591 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2592out:
2593 btrfs_put_block_group(cache);
2594 return ret;
2595}
2596
2597static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2598 u64 start, u64 num_bytes)
2599{
2600 int ret;
2601 struct btrfs_block_group *block_group;
2602
2603 block_group = btrfs_lookup_block_group(fs_info, start);
2604 if (!block_group)
2605 return -EINVAL;
2606
2607 btrfs_cache_block_group(block_group, 1);
2608
2609
2610
2611
2612 ret = btrfs_wait_block_group_cache_done(block_group);
2613 if (ret)
2614 goto out;
2615
2616 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2617out:
2618 btrfs_put_block_group(block_group);
2619 return ret;
2620}
2621
2622int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2623{
2624 struct btrfs_fs_info *fs_info = eb->fs_info;
2625 struct btrfs_file_extent_item *item;
2626 struct btrfs_key key;
2627 int found_type;
2628 int i;
2629 int ret = 0;
2630
2631 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2632 return 0;
2633
2634 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2635 btrfs_item_key_to_cpu(eb, &key, i);
2636 if (key.type != BTRFS_EXTENT_DATA_KEY)
2637 continue;
2638 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2639 found_type = btrfs_file_extent_type(eb, item);
2640 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2641 continue;
2642 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2643 continue;
2644 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2645 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2646 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2647 if (ret)
2648 break;
2649 }
2650
2651 return ret;
2652}
2653
2654static void
2655btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
2656{
2657 atomic_inc(&bg->reservations);
2658}
2659
2660
2661
2662
2663
2664static struct btrfs_free_cluster *
2665fetch_cluster_info(struct btrfs_fs_info *fs_info,
2666 struct btrfs_space_info *space_info, u64 *empty_cluster)
2667{
2668 struct btrfs_free_cluster *ret = NULL;
2669
2670 *empty_cluster = 0;
2671 if (btrfs_mixed_space_info(space_info))
2672 return ret;
2673
2674 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2675 ret = &fs_info->meta_alloc_cluster;
2676 if (btrfs_test_opt(fs_info, SSD))
2677 *empty_cluster = SZ_2M;
2678 else
2679 *empty_cluster = SZ_64K;
2680 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2681 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2682 *empty_cluster = SZ_2M;
2683 ret = &fs_info->data_alloc_cluster;
2684 }
2685
2686 return ret;
2687}
2688
2689static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2690 u64 start, u64 end,
2691 const bool return_free_space)
2692{
2693 struct btrfs_block_group *cache = NULL;
2694 struct btrfs_space_info *space_info;
2695 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2696 struct btrfs_free_cluster *cluster = NULL;
2697 u64 len;
2698 u64 total_unpinned = 0;
2699 u64 empty_cluster = 0;
2700 bool readonly;
2701
2702 while (start <= end) {
2703 readonly = false;
2704 if (!cache ||
2705 start >= cache->start + cache->length) {
2706 if (cache)
2707 btrfs_put_block_group(cache);
2708 total_unpinned = 0;
2709 cache = btrfs_lookup_block_group(fs_info, start);
2710 BUG_ON(!cache);
2711
2712 cluster = fetch_cluster_info(fs_info,
2713 cache->space_info,
2714 &empty_cluster);
2715 empty_cluster <<= 1;
2716 }
2717
2718 len = cache->start + cache->length - start;
2719 len = min(len, end + 1 - start);
2720
2721 down_read(&fs_info->commit_root_sem);
2722 if (start < cache->last_byte_to_unpin && return_free_space) {
2723 u64 add_len = min(len, cache->last_byte_to_unpin - start);
2724
2725 btrfs_add_free_space(cache, start, add_len);
2726 }
2727 up_read(&fs_info->commit_root_sem);
2728
2729 start += len;
2730 total_unpinned += len;
2731 space_info = cache->space_info;
2732
2733
2734
2735
2736
2737
2738
2739 if (cluster && cluster->fragmented &&
2740 total_unpinned > empty_cluster) {
2741 spin_lock(&cluster->lock);
2742 cluster->fragmented = 0;
2743 spin_unlock(&cluster->lock);
2744 }
2745
2746 spin_lock(&space_info->lock);
2747 spin_lock(&cache->lock);
2748 cache->pinned -= len;
2749 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2750 space_info->max_extent_size = 0;
2751 if (cache->ro) {
2752 space_info->bytes_readonly += len;
2753 readonly = true;
2754 } else if (btrfs_is_zoned(fs_info)) {
2755
2756 space_info->bytes_zone_unusable += len;
2757 readonly = true;
2758 }
2759 spin_unlock(&cache->lock);
2760 if (!readonly && return_free_space &&
2761 global_rsv->space_info == space_info) {
2762 u64 to_add = len;
2763
2764 spin_lock(&global_rsv->lock);
2765 if (!global_rsv->full) {
2766 to_add = min(len, global_rsv->size -
2767 global_rsv->reserved);
2768 global_rsv->reserved += to_add;
2769 btrfs_space_info_update_bytes_may_use(fs_info,
2770 space_info, to_add);
2771 if (global_rsv->reserved >= global_rsv->size)
2772 global_rsv->full = 1;
2773 len -= to_add;
2774 }
2775 spin_unlock(&global_rsv->lock);
2776 }
2777
2778 if (!readonly && return_free_space && len)
2779 btrfs_try_granting_tickets(fs_info, space_info);
2780 spin_unlock(&space_info->lock);
2781 }
2782
2783 if (cache)
2784 btrfs_put_block_group(cache);
2785 return 0;
2786}
2787
2788int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2789{
2790 struct btrfs_fs_info *fs_info = trans->fs_info;
2791 struct btrfs_block_group *block_group, *tmp;
2792 struct list_head *deleted_bgs;
2793 struct extent_io_tree *unpin;
2794 u64 start;
2795 u64 end;
2796 int ret;
2797
2798 unpin = &trans->transaction->pinned_extents;
2799
2800 while (!TRANS_ABORTED(trans)) {
2801 struct extent_state *cached_state = NULL;
2802
2803 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2804 ret = find_first_extent_bit(unpin, 0, &start, &end,
2805 EXTENT_DIRTY, &cached_state);
2806 if (ret) {
2807 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2808 break;
2809 }
2810
2811 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
2812 ret = btrfs_discard_extent(fs_info, start,
2813 end + 1 - start, NULL);
2814
2815 clear_extent_dirty(unpin, start, end, &cached_state);
2816 unpin_extent_range(fs_info, start, end, true);
2817 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2818 free_extent_state(cached_state);
2819 cond_resched();
2820 }
2821
2822 if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
2823 btrfs_discard_calc_delay(&fs_info->discard_ctl);
2824 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
2825 }
2826
2827
2828
2829
2830
2831
2832 deleted_bgs = &trans->transaction->deleted_bgs;
2833 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2834 u64 trimmed = 0;
2835
2836 ret = -EROFS;
2837 if (!TRANS_ABORTED(trans))
2838 ret = btrfs_discard_extent(fs_info,
2839 block_group->start,
2840 block_group->length,
2841 &trimmed);
2842
2843 list_del_init(&block_group->bg_list);
2844 btrfs_unfreeze_block_group(block_group);
2845 btrfs_put_block_group(block_group);
2846
2847 if (ret) {
2848 const char *errstr = btrfs_decode_error(ret);
2849 btrfs_warn(fs_info,
2850 "discard failed while removing blockgroup: errno=%d %s",
2851 ret, errstr);
2852 }
2853 }
2854
2855 return 0;
2856}
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2918 struct btrfs_delayed_ref_node *node, u64 parent,
2919 u64 root_objectid, u64 owner_objectid,
2920 u64 owner_offset, int refs_to_drop,
2921 struct btrfs_delayed_extent_op *extent_op)
2922{
2923 struct btrfs_fs_info *info = trans->fs_info;
2924 struct btrfs_key key;
2925 struct btrfs_path *path;
2926 struct btrfs_root *extent_root = info->extent_root;
2927 struct extent_buffer *leaf;
2928 struct btrfs_extent_item *ei;
2929 struct btrfs_extent_inline_ref *iref;
2930 int ret;
2931 int is_data;
2932 int extent_slot = 0;
2933 int found_extent = 0;
2934 int num_to_del = 1;
2935 u32 item_size;
2936 u64 refs;
2937 u64 bytenr = node->bytenr;
2938 u64 num_bytes = node->num_bytes;
2939 int last_ref = 0;
2940 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2941
2942 path = btrfs_alloc_path();
2943 if (!path)
2944 return -ENOMEM;
2945
2946 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2947
2948 if (!is_data && refs_to_drop != 1) {
2949 btrfs_crit(info,
2950"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
2951 node->bytenr, refs_to_drop);
2952 ret = -EINVAL;
2953 btrfs_abort_transaction(trans, ret);
2954 goto out;
2955 }
2956
2957 if (is_data)
2958 skinny_metadata = false;
2959
2960 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
2961 parent, root_objectid, owner_objectid,
2962 owner_offset);
2963 if (ret == 0) {
2964
2965
2966
2967
2968
2969
2970
2971 extent_slot = path->slots[0];
2972 while (extent_slot >= 0) {
2973 btrfs_item_key_to_cpu(path->nodes[0], &key,
2974 extent_slot);
2975 if (key.objectid != bytenr)
2976 break;
2977 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2978 key.offset == num_bytes) {
2979 found_extent = 1;
2980 break;
2981 }
2982 if (key.type == BTRFS_METADATA_ITEM_KEY &&
2983 key.offset == owner_objectid) {
2984 found_extent = 1;
2985 break;
2986 }
2987
2988
2989 if (path->slots[0] - extent_slot > 5)
2990 break;
2991 extent_slot--;
2992 }
2993
2994 if (!found_extent) {
2995 if (iref) {
2996 btrfs_crit(info,
2997"invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref");
2998 btrfs_abort_transaction(trans, -EUCLEAN);
2999 goto err_dump;
3000 }
3001
3002 ret = remove_extent_backref(trans, path, NULL,
3003 refs_to_drop,
3004 is_data, &last_ref);
3005 if (ret) {
3006 btrfs_abort_transaction(trans, ret);
3007 goto out;
3008 }
3009 btrfs_release_path(path);
3010
3011
3012 key.objectid = bytenr;
3013 key.type = BTRFS_EXTENT_ITEM_KEY;
3014 key.offset = num_bytes;
3015
3016 if (!is_data && skinny_metadata) {
3017 key.type = BTRFS_METADATA_ITEM_KEY;
3018 key.offset = owner_objectid;
3019 }
3020
3021 ret = btrfs_search_slot(trans, extent_root,
3022 &key, path, -1, 1);
3023 if (ret > 0 && skinny_metadata && path->slots[0]) {
3024
3025
3026
3027
3028 path->slots[0]--;
3029 btrfs_item_key_to_cpu(path->nodes[0], &key,
3030 path->slots[0]);
3031 if (key.objectid == bytenr &&
3032 key.type == BTRFS_EXTENT_ITEM_KEY &&
3033 key.offset == num_bytes)
3034 ret = 0;
3035 }
3036
3037 if (ret > 0 && skinny_metadata) {
3038 skinny_metadata = false;
3039 key.objectid = bytenr;
3040 key.type = BTRFS_EXTENT_ITEM_KEY;
3041 key.offset = num_bytes;
3042 btrfs_release_path(path);
3043 ret = btrfs_search_slot(trans, extent_root,
3044 &key, path, -1, 1);
3045 }
3046
3047 if (ret) {
3048 btrfs_err(info,
3049 "umm, got %d back from search, was looking for %llu",
3050 ret, bytenr);
3051 if (ret > 0)
3052 btrfs_print_leaf(path->nodes[0]);
3053 }
3054 if (ret < 0) {
3055 btrfs_abort_transaction(trans, ret);
3056 goto out;
3057 }
3058 extent_slot = path->slots[0];
3059 }
3060 } else if (WARN_ON(ret == -ENOENT)) {
3061 btrfs_print_leaf(path->nodes[0]);
3062 btrfs_err(info,
3063 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3064 bytenr, parent, root_objectid, owner_objectid,
3065 owner_offset);
3066 btrfs_abort_transaction(trans, ret);
3067 goto out;
3068 } else {
3069 btrfs_abort_transaction(trans, ret);
3070 goto out;
3071 }
3072
3073 leaf = path->nodes[0];
3074 item_size = btrfs_item_size_nr(leaf, extent_slot);
3075 if (unlikely(item_size < sizeof(*ei))) {
3076 ret = -EINVAL;
3077 btrfs_print_v0_err(info);
3078 btrfs_abort_transaction(trans, ret);
3079 goto out;
3080 }
3081 ei = btrfs_item_ptr(leaf, extent_slot,
3082 struct btrfs_extent_item);
3083 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3084 key.type == BTRFS_EXTENT_ITEM_KEY) {
3085 struct btrfs_tree_block_info *bi;
3086 if (item_size < sizeof(*ei) + sizeof(*bi)) {
3087 btrfs_crit(info,
3088"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
3089 key.objectid, key.type, key.offset,
3090 owner_objectid, item_size,
3091 sizeof(*ei) + sizeof(*bi));
3092 btrfs_abort_transaction(trans, -EUCLEAN);
3093 goto err_dump;
3094 }
3095 bi = (struct btrfs_tree_block_info *)(ei + 1);
3096 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3097 }
3098
3099 refs = btrfs_extent_refs(leaf, ei);
3100 if (refs < refs_to_drop) {
3101 btrfs_crit(info,
3102 "trying to drop %d refs but we only have %llu for bytenr %llu",
3103 refs_to_drop, refs, bytenr);
3104 btrfs_abort_transaction(trans, -EUCLEAN);
3105 goto err_dump;
3106 }
3107 refs -= refs_to_drop;
3108
3109 if (refs > 0) {
3110 if (extent_op)
3111 __run_delayed_extent_op(extent_op, leaf, ei);
3112
3113
3114
3115
3116 if (iref) {
3117 if (!found_extent) {
3118 btrfs_crit(info,
3119"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found");
3120 btrfs_abort_transaction(trans, -EUCLEAN);
3121 goto err_dump;
3122 }
3123 } else {
3124 btrfs_set_extent_refs(leaf, ei, refs);
3125 btrfs_mark_buffer_dirty(leaf);
3126 }
3127 if (found_extent) {
3128 ret = remove_extent_backref(trans, path, iref,
3129 refs_to_drop, is_data,
3130 &last_ref);
3131 if (ret) {
3132 btrfs_abort_transaction(trans, ret);
3133 goto out;
3134 }
3135 }
3136 } else {
3137
3138 if (found_extent) {
3139 if (is_data && refs_to_drop !=
3140 extent_data_ref_count(path, iref)) {
3141 btrfs_crit(info,
3142 "invalid refs_to_drop, current refs %u refs_to_drop %u",
3143 extent_data_ref_count(path, iref),
3144 refs_to_drop);
3145 btrfs_abort_transaction(trans, -EUCLEAN);
3146 goto err_dump;
3147 }
3148 if (iref) {
3149 if (path->slots[0] != extent_slot) {
3150 btrfs_crit(info,
3151"invalid iref, extent item key (%llu %u %llu) doesn't have wanted iref",
3152 key.objectid, key.type,
3153 key.offset);
3154 btrfs_abort_transaction(trans, -EUCLEAN);
3155 goto err_dump;
3156 }
3157 } else {
3158
3159
3160
3161
3162
3163
3164 if (path->slots[0] != extent_slot + 1) {
3165 btrfs_crit(info,
3166 "invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM");
3167 btrfs_abort_transaction(trans, -EUCLEAN);
3168 goto err_dump;
3169 }
3170 path->slots[0] = extent_slot;
3171 num_to_del = 2;
3172 }
3173 }
3174
3175 last_ref = 1;
3176 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3177 num_to_del);
3178 if (ret) {
3179 btrfs_abort_transaction(trans, ret);
3180 goto out;
3181 }
3182 btrfs_release_path(path);
3183
3184 if (is_data) {
3185 ret = btrfs_del_csums(trans, info->csum_root, bytenr,
3186 num_bytes);
3187 if (ret) {
3188 btrfs_abort_transaction(trans, ret);
3189 goto out;
3190 }
3191 }
3192
3193 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
3194 if (ret) {
3195 btrfs_abort_transaction(trans, ret);
3196 goto out;
3197 }
3198
3199 ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
3200 if (ret) {
3201 btrfs_abort_transaction(trans, ret);
3202 goto out;
3203 }
3204 }
3205 btrfs_release_path(path);
3206
3207out:
3208 btrfs_free_path(path);
3209 return ret;
3210err_dump:
3211
3212
3213
3214
3215 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
3216 btrfs_crit(info, "path->slots[0]=%d extent_slot=%d",
3217 path->slots[0], extent_slot);
3218 btrfs_print_leaf(path->nodes[0]);
3219 }
3220
3221 btrfs_free_path(path);
3222 return -EUCLEAN;
3223}
3224
3225
3226
3227
3228
3229
3230
3231static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3232 u64 bytenr)
3233{
3234 struct btrfs_delayed_ref_head *head;
3235 struct btrfs_delayed_ref_root *delayed_refs;
3236 int ret = 0;
3237
3238 delayed_refs = &trans->transaction->delayed_refs;
3239 spin_lock(&delayed_refs->lock);
3240 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3241 if (!head)
3242 goto out_delayed_unlock;
3243
3244 spin_lock(&head->lock);
3245 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3246 goto out;
3247
3248 if (cleanup_extent_op(head) != NULL)
3249 goto out;
3250
3251
3252
3253
3254
3255 if (!mutex_trylock(&head->mutex))
3256 goto out;
3257
3258 btrfs_delete_ref_head(delayed_refs, head);
3259 head->processing = 0;
3260
3261 spin_unlock(&head->lock);
3262 spin_unlock(&delayed_refs->lock);
3263
3264 BUG_ON(head->extent_op);
3265 if (head->must_insert_reserved)
3266 ret = 1;
3267
3268 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3269 mutex_unlock(&head->mutex);
3270 btrfs_put_delayed_ref_head(head);
3271 return ret;
3272out:
3273 spin_unlock(&head->lock);
3274
3275out_delayed_unlock:
3276 spin_unlock(&delayed_refs->lock);
3277 return 0;
3278}
3279
3280void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3281 struct btrfs_root *root,
3282 struct extent_buffer *buf,
3283 u64 parent, int last_ref)
3284{
3285 struct btrfs_fs_info *fs_info = root->fs_info;
3286 struct btrfs_ref generic_ref = { 0 };
3287 int ret;
3288
3289 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3290 buf->start, buf->len, parent);
3291 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3292 root->root_key.objectid);
3293
3294 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3295 btrfs_ref_tree_mod(fs_info, &generic_ref);
3296 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
3297 BUG_ON(ret);
3298 }
3299
3300 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3301 struct btrfs_block_group *cache;
3302 bool must_pin = false;
3303
3304 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3305 ret = check_ref_cleanup(trans, buf->start);
3306 if (!ret) {
3307 btrfs_redirty_list_add(trans->transaction, buf);
3308 goto out;
3309 }
3310 }
3311
3312 cache = btrfs_lookup_block_group(fs_info, buf->start);
3313
3314 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3315 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3316 btrfs_put_block_group(cache);
3317 goto out;
3318 }
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334 if (btrfs_header_level(buf) == 0 &&
3335 test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
3336 must_pin = true;
3337
3338 if (must_pin || btrfs_is_zoned(fs_info)) {
3339 btrfs_redirty_list_add(trans->transaction, buf);
3340 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3341 btrfs_put_block_group(cache);
3342 goto out;
3343 }
3344
3345 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3346
3347 btrfs_add_free_space(cache, buf->start, buf->len);
3348 btrfs_free_reserved_bytes(cache, buf->len, 0);
3349 btrfs_put_block_group(cache);
3350 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3351 }
3352out:
3353 if (last_ref) {
3354
3355
3356
3357
3358 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3359 }
3360}
3361
3362
3363int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3364{
3365 struct btrfs_fs_info *fs_info = trans->fs_info;
3366 int ret;
3367
3368 if (btrfs_is_testing(fs_info))
3369 return 0;
3370
3371
3372
3373
3374
3375 if ((ref->type == BTRFS_REF_METADATA &&
3376 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3377 (ref->type == BTRFS_REF_DATA &&
3378 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
3379
3380 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
3381 ret = 0;
3382 } else if (ref->type == BTRFS_REF_METADATA) {
3383 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
3384 } else {
3385 ret = btrfs_add_delayed_data_ref(trans, ref, 0);
3386 }
3387
3388 if (!((ref->type == BTRFS_REF_METADATA &&
3389 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3390 (ref->type == BTRFS_REF_DATA &&
3391 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
3392 btrfs_ref_tree_mod(fs_info, ref);
3393
3394 return ret;
3395}
3396
3397enum btrfs_loop_type {
3398 LOOP_CACHING_NOWAIT,
3399 LOOP_CACHING_WAIT,
3400 LOOP_ALLOC_CHUNK,
3401 LOOP_NO_EMPTY_SIZE,
3402};
3403
3404static inline void
3405btrfs_lock_block_group(struct btrfs_block_group *cache,
3406 int delalloc)
3407{
3408 if (delalloc)
3409 down_read(&cache->data_rwsem);
3410}
3411
3412static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3413 int delalloc)
3414{
3415 btrfs_get_block_group(cache);
3416 if (delalloc)
3417 down_read(&cache->data_rwsem);
3418}
3419
3420static struct btrfs_block_group *btrfs_lock_cluster(
3421 struct btrfs_block_group *block_group,
3422 struct btrfs_free_cluster *cluster,
3423 int delalloc)
3424 __acquires(&cluster->refill_lock)
3425{
3426 struct btrfs_block_group *used_bg = NULL;
3427
3428 spin_lock(&cluster->refill_lock);
3429 while (1) {
3430 used_bg = cluster->block_group;
3431 if (!used_bg)
3432 return NULL;
3433
3434 if (used_bg == block_group)
3435 return used_bg;
3436
3437 btrfs_get_block_group(used_bg);
3438
3439 if (!delalloc)
3440 return used_bg;
3441
3442 if (down_read_trylock(&used_bg->data_rwsem))
3443 return used_bg;
3444
3445 spin_unlock(&cluster->refill_lock);
3446
3447
3448 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3449
3450 spin_lock(&cluster->refill_lock);
3451 if (used_bg == cluster->block_group)
3452 return used_bg;
3453
3454 up_read(&used_bg->data_rwsem);
3455 btrfs_put_block_group(used_bg);
3456 }
3457}
3458
3459static inline void
3460btrfs_release_block_group(struct btrfs_block_group *cache,
3461 int delalloc)
3462{
3463 if (delalloc)
3464 up_read(&cache->data_rwsem);
3465 btrfs_put_block_group(cache);
3466}
3467
3468enum btrfs_extent_allocation_policy {
3469 BTRFS_EXTENT_ALLOC_CLUSTERED,
3470 BTRFS_EXTENT_ALLOC_ZONED,
3471};
3472
3473
3474
3475
3476
3477struct find_free_extent_ctl {
3478
3479 u64 num_bytes;
3480 u64 empty_size;
3481 u64 flags;
3482 int delalloc;
3483
3484
3485 u64 search_start;
3486
3487
3488 u64 empty_cluster;
3489 struct btrfs_free_cluster *last_ptr;
3490 bool use_cluster;
3491
3492 bool have_caching_bg;
3493 bool orig_have_caching_bg;
3494
3495
3496 bool for_treelog;
3497
3498
3499 int index;
3500
3501
3502
3503
3504 int loop;
3505
3506
3507
3508
3509
3510 bool retry_clustered;
3511
3512
3513
3514
3515
3516 bool retry_unclustered;
3517
3518
3519 int cached;
3520
3521
3522 u64 max_extent_size;
3523
3524
3525 u64 total_free_space;
3526
3527
3528 u64 found_offset;
3529
3530
3531 u64 hint_byte;
3532
3533
3534 enum btrfs_extent_allocation_policy policy;
3535};
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546static int find_free_extent_clustered(struct btrfs_block_group *bg,
3547 struct find_free_extent_ctl *ffe_ctl,
3548 struct btrfs_block_group **cluster_bg_ret)
3549{
3550 struct btrfs_block_group *cluster_bg;
3551 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3552 u64 aligned_cluster;
3553 u64 offset;
3554 int ret;
3555
3556 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3557 if (!cluster_bg)
3558 goto refill_cluster;
3559 if (cluster_bg != bg && (cluster_bg->ro ||
3560 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3561 goto release_cluster;
3562
3563 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3564 ffe_ctl->num_bytes, cluster_bg->start,
3565 &ffe_ctl->max_extent_size);
3566 if (offset) {
3567
3568 spin_unlock(&last_ptr->refill_lock);
3569 trace_btrfs_reserve_extent_cluster(cluster_bg,
3570 ffe_ctl->search_start, ffe_ctl->num_bytes);
3571 *cluster_bg_ret = cluster_bg;
3572 ffe_ctl->found_offset = offset;
3573 return 0;
3574 }
3575 WARN_ON(last_ptr->block_group != cluster_bg);
3576
3577release_cluster:
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3590 spin_unlock(&last_ptr->refill_lock);
3591 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3592 return -ENOENT;
3593 }
3594
3595
3596 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3597
3598 if (cluster_bg != bg)
3599 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3600
3601refill_cluster:
3602 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3603 spin_unlock(&last_ptr->refill_lock);
3604 return -ENOENT;
3605 }
3606
3607 aligned_cluster = max_t(u64,
3608 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3609 bg->full_stripe_len);
3610 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3611 ffe_ctl->num_bytes, aligned_cluster);
3612 if (ret == 0) {
3613
3614 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3615 ffe_ctl->num_bytes, ffe_ctl->search_start,
3616 &ffe_ctl->max_extent_size);
3617 if (offset) {
3618
3619 spin_unlock(&last_ptr->refill_lock);
3620 trace_btrfs_reserve_extent_cluster(bg,
3621 ffe_ctl->search_start,
3622 ffe_ctl->num_bytes);
3623 ffe_ctl->found_offset = offset;
3624 return 0;
3625 }
3626 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3627 !ffe_ctl->retry_clustered) {
3628 spin_unlock(&last_ptr->refill_lock);
3629
3630 ffe_ctl->retry_clustered = true;
3631 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3632 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3633 return -EAGAIN;
3634 }
3635
3636
3637
3638
3639
3640 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3641 spin_unlock(&last_ptr->refill_lock);
3642 return 1;
3643}
3644
3645
3646
3647
3648
3649
3650static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3651 struct find_free_extent_ctl *ffe_ctl)
3652{
3653 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3654 u64 offset;
3655
3656
3657
3658
3659
3660
3661 if (unlikely(last_ptr)) {
3662 spin_lock(&last_ptr->lock);
3663 last_ptr->fragmented = 1;
3664 spin_unlock(&last_ptr->lock);
3665 }
3666 if (ffe_ctl->cached) {
3667 struct btrfs_free_space_ctl *free_space_ctl;
3668
3669 free_space_ctl = bg->free_space_ctl;
3670 spin_lock(&free_space_ctl->tree_lock);
3671 if (free_space_ctl->free_space <
3672 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3673 ffe_ctl->empty_size) {
3674 ffe_ctl->total_free_space = max_t(u64,
3675 ffe_ctl->total_free_space,
3676 free_space_ctl->free_space);
3677 spin_unlock(&free_space_ctl->tree_lock);
3678 return 1;
3679 }
3680 spin_unlock(&free_space_ctl->tree_lock);
3681 }
3682
3683 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3684 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3685 &ffe_ctl->max_extent_size);
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3697 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3698 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3699 ffe_ctl->empty_size);
3700 ffe_ctl->retry_unclustered = true;
3701 return -EAGAIN;
3702 } else if (!offset) {
3703 return 1;
3704 }
3705 ffe_ctl->found_offset = offset;
3706 return 0;
3707}
3708
3709static int do_allocation_clustered(struct btrfs_block_group *block_group,
3710 struct find_free_extent_ctl *ffe_ctl,
3711 struct btrfs_block_group **bg_ret)
3712{
3713 int ret;
3714
3715
3716 if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
3717 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
3718 if (ret >= 0 || ret == -EAGAIN)
3719 return ret;
3720
3721 }
3722
3723 return find_free_extent_unclustered(block_group, ffe_ctl);
3724}
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747static int do_allocation_zoned(struct btrfs_block_group *block_group,
3748 struct find_free_extent_ctl *ffe_ctl,
3749 struct btrfs_block_group **bg_ret)
3750{
3751 struct btrfs_fs_info *fs_info = block_group->fs_info;
3752 struct btrfs_space_info *space_info = block_group->space_info;
3753 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3754 u64 start = block_group->start;
3755 u64 num_bytes = ffe_ctl->num_bytes;
3756 u64 avail;
3757 u64 bytenr = block_group->start;
3758 u64 log_bytenr;
3759 int ret = 0;
3760 bool skip;
3761
3762 ASSERT(btrfs_is_zoned(block_group->fs_info));
3763
3764
3765
3766
3767
3768 spin_lock(&fs_info->treelog_bg_lock);
3769 log_bytenr = fs_info->treelog_bg;
3770 skip = log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
3771 (!ffe_ctl->for_treelog && bytenr == log_bytenr));
3772 spin_unlock(&fs_info->treelog_bg_lock);
3773 if (skip)
3774 return 1;
3775
3776 spin_lock(&space_info->lock);
3777 spin_lock(&block_group->lock);
3778 spin_lock(&fs_info->treelog_bg_lock);
3779
3780 ASSERT(!ffe_ctl->for_treelog ||
3781 block_group->start == fs_info->treelog_bg ||
3782 fs_info->treelog_bg == 0);
3783
3784 if (block_group->ro) {
3785 ret = 1;
3786 goto out;
3787 }
3788
3789
3790
3791
3792
3793 if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
3794 (block_group->used || block_group->reserved)) {
3795 ret = 1;
3796 goto out;
3797 }
3798
3799 avail = block_group->length - block_group->alloc_offset;
3800 if (avail < num_bytes) {
3801 if (ffe_ctl->max_extent_size < avail) {
3802
3803
3804
3805
3806 ffe_ctl->max_extent_size = avail;
3807 ffe_ctl->total_free_space = avail;
3808 }
3809 ret = 1;
3810 goto out;
3811 }
3812
3813 if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
3814 fs_info->treelog_bg = block_group->start;
3815
3816 ffe_ctl->found_offset = start + block_group->alloc_offset;
3817 block_group->alloc_offset += num_bytes;
3818 spin_lock(&ctl->tree_lock);
3819 ctl->free_space -= num_bytes;
3820 spin_unlock(&ctl->tree_lock);
3821
3822
3823
3824
3825
3826
3827 ffe_ctl->search_start = ffe_ctl->found_offset;
3828
3829out:
3830 if (ret && ffe_ctl->for_treelog)
3831 fs_info->treelog_bg = 0;
3832 spin_unlock(&fs_info->treelog_bg_lock);
3833 spin_unlock(&block_group->lock);
3834 spin_unlock(&space_info->lock);
3835 return ret;
3836}
3837
3838static int do_allocation(struct btrfs_block_group *block_group,
3839 struct find_free_extent_ctl *ffe_ctl,
3840 struct btrfs_block_group **bg_ret)
3841{
3842 switch (ffe_ctl->policy) {
3843 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3844 return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
3845 case BTRFS_EXTENT_ALLOC_ZONED:
3846 return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
3847 default:
3848 BUG();
3849 }
3850}
3851
3852static void release_block_group(struct btrfs_block_group *block_group,
3853 struct find_free_extent_ctl *ffe_ctl,
3854 int delalloc)
3855{
3856 switch (ffe_ctl->policy) {
3857 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3858 ffe_ctl->retry_clustered = false;
3859 ffe_ctl->retry_unclustered = false;
3860 break;
3861 case BTRFS_EXTENT_ALLOC_ZONED:
3862
3863 break;
3864 default:
3865 BUG();
3866 }
3867
3868 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
3869 ffe_ctl->index);
3870 btrfs_release_block_group(block_group, delalloc);
3871}
3872
3873static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
3874 struct btrfs_key *ins)
3875{
3876 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3877
3878 if (!ffe_ctl->use_cluster && last_ptr) {
3879 spin_lock(&last_ptr->lock);
3880 last_ptr->window_start = ins->objectid;
3881 spin_unlock(&last_ptr->lock);
3882 }
3883}
3884
3885static void found_extent(struct find_free_extent_ctl *ffe_ctl,
3886 struct btrfs_key *ins)
3887{
3888 switch (ffe_ctl->policy) {
3889 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3890 found_extent_clustered(ffe_ctl, ins);
3891 break;
3892 case BTRFS_EXTENT_ALLOC_ZONED:
3893
3894 break;
3895 default:
3896 BUG();
3897 }
3898}
3899
3900static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
3901{
3902 switch (ffe_ctl->policy) {
3903 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3904
3905
3906
3907
3908 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
3909 return 0;
3910 case BTRFS_EXTENT_ALLOC_ZONED:
3911
3912 return -ENOSPC;
3913 default:
3914 BUG();
3915 }
3916}
3917
3918
3919
3920
3921
3922
3923static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3924 struct btrfs_key *ins,
3925 struct find_free_extent_ctl *ffe_ctl,
3926 bool full_search)
3927{
3928 struct btrfs_root *root = fs_info->extent_root;
3929 int ret;
3930
3931 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3932 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3933 ffe_ctl->orig_have_caching_bg = true;
3934
3935 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
3936 ffe_ctl->have_caching_bg)
3937 return 1;
3938
3939 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
3940 return 1;
3941
3942 if (ins->objectid) {
3943 found_extent(ffe_ctl, ins);
3944 return 0;
3945 }
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
3956 ffe_ctl->index = 0;
3957 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
3958
3959
3960
3961
3962
3963 if (ffe_ctl->orig_have_caching_bg || !full_search)
3964 ffe_ctl->loop = LOOP_CACHING_WAIT;
3965 else
3966 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
3967 } else {
3968 ffe_ctl->loop++;
3969 }
3970
3971 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
3972 struct btrfs_trans_handle *trans;
3973 int exist = 0;
3974
3975 trans = current->journal_info;
3976 if (trans)
3977 exist = 1;
3978 else
3979 trans = btrfs_join_transaction(root);
3980
3981 if (IS_ERR(trans)) {
3982 ret = PTR_ERR(trans);
3983 return ret;
3984 }
3985
3986 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
3987 CHUNK_ALLOC_FORCE);
3988
3989
3990 if (ret == -ENOSPC)
3991 ret = chunk_allocation_failed(ffe_ctl);
3992 else if (ret < 0)
3993 btrfs_abort_transaction(trans, ret);
3994 else
3995 ret = 0;
3996 if (!exist)
3997 btrfs_end_transaction(trans);
3998 if (ret)
3999 return ret;
4000 }
4001
4002 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
4003 if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
4004 return -ENOSPC;
4005
4006
4007
4008
4009
4010 if (ffe_ctl->empty_size == 0 &&
4011 ffe_ctl->empty_cluster == 0)
4012 return -ENOSPC;
4013 ffe_ctl->empty_size = 0;
4014 ffe_ctl->empty_cluster = 0;
4015 }
4016 return 1;
4017 }
4018 return -ENOSPC;
4019}
4020
4021static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
4022 struct find_free_extent_ctl *ffe_ctl,
4023 struct btrfs_space_info *space_info,
4024 struct btrfs_key *ins)
4025{
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036 if (space_info->max_extent_size) {
4037 spin_lock(&space_info->lock);
4038 if (space_info->max_extent_size &&
4039 ffe_ctl->num_bytes > space_info->max_extent_size) {
4040 ins->offset = space_info->max_extent_size;
4041 spin_unlock(&space_info->lock);
4042 return -ENOSPC;
4043 } else if (space_info->max_extent_size) {
4044 ffe_ctl->use_cluster = false;
4045 }
4046 spin_unlock(&space_info->lock);
4047 }
4048
4049 ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
4050 &ffe_ctl->empty_cluster);
4051 if (ffe_ctl->last_ptr) {
4052 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
4053
4054 spin_lock(&last_ptr->lock);
4055 if (last_ptr->block_group)
4056 ffe_ctl->hint_byte = last_ptr->window_start;
4057 if (last_ptr->fragmented) {
4058
4059
4060
4061
4062
4063 ffe_ctl->hint_byte = last_ptr->window_start;
4064 ffe_ctl->use_cluster = false;
4065 }
4066 spin_unlock(&last_ptr->lock);
4067 }
4068
4069 return 0;
4070}
4071
4072static int prepare_allocation(struct btrfs_fs_info *fs_info,
4073 struct find_free_extent_ctl *ffe_ctl,
4074 struct btrfs_space_info *space_info,
4075 struct btrfs_key *ins)
4076{
4077 switch (ffe_ctl->policy) {
4078 case BTRFS_EXTENT_ALLOC_CLUSTERED:
4079 return prepare_allocation_clustered(fs_info, ffe_ctl,
4080 space_info, ins);
4081 case BTRFS_EXTENT_ALLOC_ZONED:
4082 if (ffe_ctl->for_treelog) {
4083 spin_lock(&fs_info->treelog_bg_lock);
4084 if (fs_info->treelog_bg)
4085 ffe_ctl->hint_byte = fs_info->treelog_bg;
4086 spin_unlock(&fs_info->treelog_bg_lock);
4087 }
4088 return 0;
4089 default:
4090 BUG();
4091 }
4092}
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119static noinline int find_free_extent(struct btrfs_root *root,
4120 u64 ram_bytes, u64 num_bytes, u64 empty_size,
4121 u64 hint_byte_orig, struct btrfs_key *ins,
4122 u64 flags, int delalloc)
4123{
4124 struct btrfs_fs_info *fs_info = root->fs_info;
4125 int ret = 0;
4126 int cache_block_group_error = 0;
4127 struct btrfs_block_group *block_group = NULL;
4128 struct find_free_extent_ctl ffe_ctl = {0};
4129 struct btrfs_space_info *space_info;
4130 bool full_search = false;
4131 bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4132
4133 WARN_ON(num_bytes < fs_info->sectorsize);
4134
4135 ffe_ctl.num_bytes = num_bytes;
4136 ffe_ctl.empty_size = empty_size;
4137 ffe_ctl.flags = flags;
4138 ffe_ctl.search_start = 0;
4139 ffe_ctl.delalloc = delalloc;
4140 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
4141 ffe_ctl.have_caching_bg = false;
4142 ffe_ctl.orig_have_caching_bg = false;
4143 ffe_ctl.found_offset = 0;
4144 ffe_ctl.hint_byte = hint_byte_orig;
4145 ffe_ctl.for_treelog = for_treelog;
4146 ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
4147
4148
4149 ffe_ctl.retry_clustered = false;
4150 ffe_ctl.retry_unclustered = false;
4151 ffe_ctl.last_ptr = NULL;
4152 ffe_ctl.use_cluster = true;
4153
4154 if (btrfs_is_zoned(fs_info))
4155 ffe_ctl.policy = BTRFS_EXTENT_ALLOC_ZONED;
4156
4157 ins->type = BTRFS_EXTENT_ITEM_KEY;
4158 ins->objectid = 0;
4159 ins->offset = 0;
4160
4161 trace_find_free_extent(root, num_bytes, empty_size, flags);
4162
4163 space_info = btrfs_find_space_info(fs_info, flags);
4164 if (!space_info) {
4165 btrfs_err(fs_info, "No space info for %llu", flags);
4166 return -ENOSPC;
4167 }
4168
4169 ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins);
4170 if (ret < 0)
4171 return ret;
4172
4173 ffe_ctl.search_start = max(ffe_ctl.search_start,
4174 first_logical_byte(fs_info, 0));
4175 ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte);
4176 if (ffe_ctl.search_start == ffe_ctl.hint_byte) {
4177 block_group = btrfs_lookup_block_group(fs_info,
4178 ffe_ctl.search_start);
4179
4180
4181
4182
4183
4184
4185
4186 if (block_group && block_group_bits(block_group, flags) &&
4187 block_group->cached != BTRFS_CACHE_NO) {
4188 down_read(&space_info->groups_sem);
4189 if (list_empty(&block_group->list) ||
4190 block_group->ro) {
4191
4192
4193
4194
4195
4196
4197 btrfs_put_block_group(block_group);
4198 up_read(&space_info->groups_sem);
4199 } else {
4200 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
4201 block_group->flags);
4202 btrfs_lock_block_group(block_group, delalloc);
4203 goto have_block_group;
4204 }
4205 } else if (block_group) {
4206 btrfs_put_block_group(block_group);
4207 }
4208 }
4209search:
4210 ffe_ctl.have_caching_bg = false;
4211 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
4212 ffe_ctl.index == 0)
4213 full_search = true;
4214 down_read(&space_info->groups_sem);
4215 list_for_each_entry(block_group,
4216 &space_info->block_groups[ffe_ctl.index], list) {
4217 struct btrfs_block_group *bg_ret;
4218
4219
4220 if (unlikely(block_group->ro)) {
4221 if (for_treelog)
4222 btrfs_clear_treelog_bg(block_group);
4223 continue;
4224 }
4225
4226 btrfs_grab_block_group(block_group, delalloc);
4227 ffe_ctl.search_start = block_group->start;
4228
4229
4230
4231
4232
4233
4234 if (!block_group_bits(block_group, flags)) {
4235 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4236 BTRFS_BLOCK_GROUP_RAID1_MASK |
4237 BTRFS_BLOCK_GROUP_RAID56_MASK |
4238 BTRFS_BLOCK_GROUP_RAID10;
4239
4240
4241
4242
4243
4244
4245 if ((flags & extra) && !(block_group->flags & extra))
4246 goto loop;
4247
4248
4249
4250
4251
4252
4253 btrfs_release_block_group(block_group, delalloc);
4254 continue;
4255 }
4256
4257have_block_group:
4258 ffe_ctl.cached = btrfs_block_group_done(block_group);
4259 if (unlikely(!ffe_ctl.cached)) {
4260 ffe_ctl.have_caching_bg = true;
4261 ret = btrfs_cache_block_group(block_group, 0);
4262
4263
4264
4265
4266
4267
4268
4269
4270 if (ret < 0) {
4271 if (!cache_block_group_error)
4272 cache_block_group_error = ret;
4273 ret = 0;
4274 goto loop;
4275 }
4276 ret = 0;
4277 }
4278
4279 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
4280 goto loop;
4281
4282 bg_ret = NULL;
4283 ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
4284 if (ret == 0) {
4285 if (bg_ret && bg_ret != block_group) {
4286 btrfs_release_block_group(block_group, delalloc);
4287 block_group = bg_ret;
4288 }
4289 } else if (ret == -EAGAIN) {
4290 goto have_block_group;
4291 } else if (ret > 0) {
4292 goto loop;
4293 }
4294
4295
4296 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
4297 fs_info->stripesize);
4298
4299
4300 if (ffe_ctl.search_start + num_bytes >
4301 block_group->start + block_group->length) {
4302 btrfs_add_free_space_unused(block_group,
4303 ffe_ctl.found_offset, num_bytes);
4304 goto loop;
4305 }
4306
4307 if (ffe_ctl.found_offset < ffe_ctl.search_start)
4308 btrfs_add_free_space_unused(block_group,
4309 ffe_ctl.found_offset,
4310 ffe_ctl.search_start - ffe_ctl.found_offset);
4311
4312 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
4313 num_bytes, delalloc);
4314 if (ret == -EAGAIN) {
4315 btrfs_add_free_space_unused(block_group,
4316 ffe_ctl.found_offset, num_bytes);
4317 goto loop;
4318 }
4319 btrfs_inc_block_group_reservations(block_group);
4320
4321
4322 ins->objectid = ffe_ctl.search_start;
4323 ins->offset = num_bytes;
4324
4325 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
4326 num_bytes);
4327 btrfs_release_block_group(block_group, delalloc);
4328 break;
4329loop:
4330 release_block_group(block_group, &ffe_ctl, delalloc);
4331 cond_resched();
4332 }
4333 up_read(&space_info->groups_sem);
4334
4335 ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search);
4336 if (ret > 0)
4337 goto search;
4338
4339 if (ret == -ENOSPC && !cache_block_group_error) {
4340
4341
4342
4343
4344 if (!ffe_ctl.max_extent_size)
4345 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
4346 spin_lock(&space_info->lock);
4347 space_info->max_extent_size = ffe_ctl.max_extent_size;
4348 spin_unlock(&space_info->lock);
4349 ins->offset = ffe_ctl.max_extent_size;
4350 } else if (ret == -ENOSPC) {
4351 ret = cache_block_group_error;
4352 }
4353 return ret;
4354}
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4402 u64 num_bytes, u64 min_alloc_size,
4403 u64 empty_size, u64 hint_byte,
4404 struct btrfs_key *ins, int is_data, int delalloc)
4405{
4406 struct btrfs_fs_info *fs_info = root->fs_info;
4407 bool final_tried = num_bytes == min_alloc_size;
4408 u64 flags;
4409 int ret;
4410 bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4411
4412 flags = get_alloc_profile_by_root(root, is_data);
4413again:
4414 WARN_ON(num_bytes < fs_info->sectorsize);
4415 ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
4416 hint_byte, ins, flags, delalloc);
4417 if (!ret && !is_data) {
4418 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4419 } else if (ret == -ENOSPC) {
4420 if (!final_tried && ins->offset) {
4421 num_bytes = min(num_bytes >> 1, ins->offset);
4422 num_bytes = round_down(num_bytes,
4423 fs_info->sectorsize);
4424 num_bytes = max(num_bytes, min_alloc_size);
4425 ram_bytes = num_bytes;
4426 if (num_bytes == min_alloc_size)
4427 final_tried = true;
4428 goto again;
4429 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4430 struct btrfs_space_info *sinfo;
4431
4432 sinfo = btrfs_find_space_info(fs_info, flags);
4433 btrfs_err(fs_info,
4434 "allocation failed flags %llu, wanted %llu tree-log %d",
4435 flags, num_bytes, for_treelog);
4436 if (sinfo)
4437 btrfs_dump_space_info(fs_info, sinfo,
4438 num_bytes, 1);
4439 }
4440 }
4441
4442 return ret;
4443}
4444
4445int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4446 u64 start, u64 len, int delalloc)
4447{
4448 struct btrfs_block_group *cache;
4449
4450 cache = btrfs_lookup_block_group(fs_info, start);
4451 if (!cache) {
4452 btrfs_err(fs_info, "Unable to find block group for %llu",
4453 start);
4454 return -ENOSPC;
4455 }
4456
4457 btrfs_add_free_space(cache, start, len);
4458 btrfs_free_reserved_bytes(cache, len, delalloc);
4459 trace_btrfs_reserved_extent_free(fs_info, start, len);
4460
4461 btrfs_put_block_group(cache);
4462 return 0;
4463}
4464
4465int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
4466 u64 len)
4467{
4468 struct btrfs_block_group *cache;
4469 int ret = 0;
4470
4471 cache = btrfs_lookup_block_group(trans->fs_info, start);
4472 if (!cache) {
4473 btrfs_err(trans->fs_info, "unable to find block group for %llu",
4474 start);
4475 return -ENOSPC;
4476 }
4477
4478 ret = pin_down_extent(trans, cache, start, len, 1);
4479 btrfs_put_block_group(cache);
4480 return ret;
4481}
4482
4483static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4484 u64 parent, u64 root_objectid,
4485 u64 flags, u64 owner, u64 offset,
4486 struct btrfs_key *ins, int ref_mod)
4487{
4488 struct btrfs_fs_info *fs_info = trans->fs_info;
4489 int ret;
4490 struct btrfs_extent_item *extent_item;
4491 struct btrfs_extent_inline_ref *iref;
4492 struct btrfs_path *path;
4493 struct extent_buffer *leaf;
4494 int type;
4495 u32 size;
4496
4497 if (parent > 0)
4498 type = BTRFS_SHARED_DATA_REF_KEY;
4499 else
4500 type = BTRFS_EXTENT_DATA_REF_KEY;
4501
4502 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4503
4504 path = btrfs_alloc_path();
4505 if (!path)
4506 return -ENOMEM;
4507
4508 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4509 ins, size);
4510 if (ret) {
4511 btrfs_free_path(path);
4512 return ret;
4513 }
4514
4515 leaf = path->nodes[0];
4516 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4517 struct btrfs_extent_item);
4518 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4519 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4520 btrfs_set_extent_flags(leaf, extent_item,
4521 flags | BTRFS_EXTENT_FLAG_DATA);
4522
4523 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4524 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4525 if (parent > 0) {
4526 struct btrfs_shared_data_ref *ref;
4527 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4528 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4529 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4530 } else {
4531 struct btrfs_extent_data_ref *ref;
4532 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4533 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4534 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4535 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4536 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4537 }
4538
4539 btrfs_mark_buffer_dirty(path->nodes[0]);
4540 btrfs_free_path(path);
4541
4542 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
4543 if (ret)
4544 return ret;
4545
4546 ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
4547 if (ret) {
4548 btrfs_err(fs_info, "update block group failed for %llu %llu",
4549 ins->objectid, ins->offset);
4550 BUG();
4551 }
4552 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
4553 return ret;
4554}
4555
4556static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4557 struct btrfs_delayed_ref_node *node,
4558 struct btrfs_delayed_extent_op *extent_op)
4559{
4560 struct btrfs_fs_info *fs_info = trans->fs_info;
4561 int ret;
4562 struct btrfs_extent_item *extent_item;
4563 struct btrfs_key extent_key;
4564 struct btrfs_tree_block_info *block_info;
4565 struct btrfs_extent_inline_ref *iref;
4566 struct btrfs_path *path;
4567 struct extent_buffer *leaf;
4568 struct btrfs_delayed_tree_ref *ref;
4569 u32 size = sizeof(*extent_item) + sizeof(*iref);
4570 u64 num_bytes;
4571 u64 flags = extent_op->flags_to_set;
4572 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4573
4574 ref = btrfs_delayed_node_to_tree_ref(node);
4575
4576 extent_key.objectid = node->bytenr;
4577 if (skinny_metadata) {
4578 extent_key.offset = ref->level;
4579 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4580 num_bytes = fs_info->nodesize;
4581 } else {
4582 extent_key.offset = node->num_bytes;
4583 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4584 size += sizeof(*block_info);
4585 num_bytes = node->num_bytes;
4586 }
4587
4588 path = btrfs_alloc_path();
4589 if (!path)
4590 return -ENOMEM;
4591
4592 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4593 &extent_key, size);
4594 if (ret) {
4595 btrfs_free_path(path);
4596 return ret;
4597 }
4598
4599 leaf = path->nodes[0];
4600 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4601 struct btrfs_extent_item);
4602 btrfs_set_extent_refs(leaf, extent_item, 1);
4603 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4604 btrfs_set_extent_flags(leaf, extent_item,
4605 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4606
4607 if (skinny_metadata) {
4608 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4609 } else {
4610 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4611 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4612 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4613 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4614 }
4615
4616 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4617 btrfs_set_extent_inline_ref_type(leaf, iref,
4618 BTRFS_SHARED_BLOCK_REF_KEY);
4619 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4620 } else {
4621 btrfs_set_extent_inline_ref_type(leaf, iref,
4622 BTRFS_TREE_BLOCK_REF_KEY);
4623 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4624 }
4625
4626 btrfs_mark_buffer_dirty(leaf);
4627 btrfs_free_path(path);
4628
4629 ret = remove_from_free_space_tree(trans, extent_key.objectid,
4630 num_bytes);
4631 if (ret)
4632 return ret;
4633
4634 ret = btrfs_update_block_group(trans, extent_key.objectid,
4635 fs_info->nodesize, 1);
4636 if (ret) {
4637 btrfs_err(fs_info, "update block group failed for %llu %llu",
4638 extent_key.objectid, extent_key.offset);
4639 BUG();
4640 }
4641
4642 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
4643 fs_info->nodesize);
4644 return ret;
4645}
4646
4647int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4648 struct btrfs_root *root, u64 owner,
4649 u64 offset, u64 ram_bytes,
4650 struct btrfs_key *ins)
4651{
4652 struct btrfs_ref generic_ref = { 0 };
4653
4654 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4655
4656 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4657 ins->objectid, ins->offset, 0);
4658 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
4659 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4660
4661 return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
4662}
4663
4664
4665
4666
4667
4668
4669int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4670 u64 root_objectid, u64 owner, u64 offset,
4671 struct btrfs_key *ins)
4672{
4673 struct btrfs_fs_info *fs_info = trans->fs_info;
4674 int ret;
4675 struct btrfs_block_group *block_group;
4676 struct btrfs_space_info *space_info;
4677
4678
4679
4680
4681
4682 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4683 ret = __exclude_logged_extent(fs_info, ins->objectid,
4684 ins->offset);
4685 if (ret)
4686 return ret;
4687 }
4688
4689 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4690 if (!block_group)
4691 return -EINVAL;
4692
4693 space_info = block_group->space_info;
4694 spin_lock(&space_info->lock);
4695 spin_lock(&block_group->lock);
4696 space_info->bytes_reserved += ins->offset;
4697 block_group->reserved += ins->offset;
4698 spin_unlock(&block_group->lock);
4699 spin_unlock(&space_info->lock);
4700
4701 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4702 offset, ins, 1);
4703 if (ret)
4704 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
4705 btrfs_put_block_group(block_group);
4706 return ret;
4707}
4708
4709static struct extent_buffer *
4710btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4711 u64 bytenr, int level, u64 owner,
4712 enum btrfs_lock_nesting nest)
4713{
4714 struct btrfs_fs_info *fs_info = root->fs_info;
4715 struct extent_buffer *buf;
4716
4717 buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
4718 if (IS_ERR(buf))
4719 return buf;
4720
4721
4722
4723
4724
4725
4726 if (buf->lock_owner == current->pid) {
4727 btrfs_err_rl(fs_info,
4728"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4729 buf->start, btrfs_header_owner(buf), current->pid);
4730 free_extent_buffer(buf);
4731 return ERR_PTR(-EUCLEAN);
4732 }
4733
4734
4735
4736
4737
4738
4739 btrfs_set_buffer_lockdep_class(owner, buf, level);
4740 __btrfs_tree_lock(buf, nest);
4741 btrfs_clean_tree_block(buf);
4742 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4743 clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
4744
4745 set_extent_buffer_uptodate(buf);
4746
4747 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4748 btrfs_set_header_level(buf, level);
4749 btrfs_set_header_bytenr(buf, buf->start);
4750 btrfs_set_header_generation(buf, trans->transid);
4751 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4752 btrfs_set_header_owner(buf, owner);
4753 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4754 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4755 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4756 buf->log_index = root->log_transid % 2;
4757
4758
4759
4760
4761 if (buf->log_index == 0)
4762 set_extent_dirty(&root->dirty_log_pages, buf->start,
4763 buf->start + buf->len - 1, GFP_NOFS);
4764 else
4765 set_extent_new(&root->dirty_log_pages, buf->start,
4766 buf->start + buf->len - 1);
4767 } else {
4768 buf->log_index = -1;
4769 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4770 buf->start + buf->len - 1, GFP_NOFS);
4771 }
4772
4773 return buf;
4774}
4775
4776
4777
4778
4779
4780struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4781 struct btrfs_root *root,
4782 u64 parent, u64 root_objectid,
4783 const struct btrfs_disk_key *key,
4784 int level, u64 hint,
4785 u64 empty_size,
4786 enum btrfs_lock_nesting nest)
4787{
4788 struct btrfs_fs_info *fs_info = root->fs_info;
4789 struct btrfs_key ins;
4790 struct btrfs_block_rsv *block_rsv;
4791 struct extent_buffer *buf;
4792 struct btrfs_delayed_extent_op *extent_op;
4793 struct btrfs_ref generic_ref = { 0 };
4794 u64 flags = 0;
4795 int ret;
4796 u32 blocksize = fs_info->nodesize;
4797 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4798
4799#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4800 if (btrfs_is_testing(fs_info)) {
4801 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4802 level, root_objectid, nest);
4803 if (!IS_ERR(buf))
4804 root->alloc_bytenr += blocksize;
4805 return buf;
4806 }
4807#endif
4808
4809 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4810 if (IS_ERR(block_rsv))
4811 return ERR_CAST(block_rsv);
4812
4813 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4814 empty_size, hint, &ins, 0, 0);
4815 if (ret)
4816 goto out_unuse;
4817
4818 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4819 root_objectid, nest);
4820 if (IS_ERR(buf)) {
4821 ret = PTR_ERR(buf);
4822 goto out_free_reserved;
4823 }
4824
4825 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4826 if (parent == 0)
4827 parent = ins.objectid;
4828 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4829 } else
4830 BUG_ON(parent > 0);
4831
4832 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4833 extent_op = btrfs_alloc_delayed_extent_op();
4834 if (!extent_op) {
4835 ret = -ENOMEM;
4836 goto out_free_buf;
4837 }
4838 if (key)
4839 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4840 else
4841 memset(&extent_op->key, 0, sizeof(extent_op->key));
4842 extent_op->flags_to_set = flags;
4843 extent_op->update_key = skinny_metadata ? false : true;
4844 extent_op->update_flags = true;
4845 extent_op->is_data = false;
4846 extent_op->level = level;
4847
4848 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4849 ins.objectid, ins.offset, parent);
4850 generic_ref.real_root = root->root_key.objectid;
4851 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
4852 btrfs_ref_tree_mod(fs_info, &generic_ref);
4853 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
4854 if (ret)
4855 goto out_free_delayed;
4856 }
4857 return buf;
4858
4859out_free_delayed:
4860 btrfs_free_delayed_extent_op(extent_op);
4861out_free_buf:
4862 free_extent_buffer(buf);
4863out_free_reserved:
4864 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
4865out_unuse:
4866 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
4867 return ERR_PTR(ret);
4868}
4869
4870struct walk_control {
4871 u64 refs[BTRFS_MAX_LEVEL];
4872 u64 flags[BTRFS_MAX_LEVEL];
4873 struct btrfs_key update_progress;
4874 struct btrfs_key drop_progress;
4875 int drop_level;
4876 int stage;
4877 int level;
4878 int shared_level;
4879 int update_ref;
4880 int keep_locks;
4881 int reada_slot;
4882 int reada_count;
4883 int restarted;
4884};
4885
4886#define DROP_REFERENCE 1
4887#define UPDATE_BACKREF 2
4888
4889static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4890 struct btrfs_root *root,
4891 struct walk_control *wc,
4892 struct btrfs_path *path)
4893{
4894 struct btrfs_fs_info *fs_info = root->fs_info;
4895 u64 bytenr;
4896 u64 generation;
4897 u64 refs;
4898 u64 flags;
4899 u32 nritems;
4900 struct btrfs_key key;
4901 struct extent_buffer *eb;
4902 int ret;
4903 int slot;
4904 int nread = 0;
4905
4906 if (path->slots[wc->level] < wc->reada_slot) {
4907 wc->reada_count = wc->reada_count * 2 / 3;
4908 wc->reada_count = max(wc->reada_count, 2);
4909 } else {
4910 wc->reada_count = wc->reada_count * 3 / 2;
4911 wc->reada_count = min_t(int, wc->reada_count,
4912 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
4913 }
4914
4915 eb = path->nodes[wc->level];
4916 nritems = btrfs_header_nritems(eb);
4917
4918 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4919 if (nread >= wc->reada_count)
4920 break;
4921
4922 cond_resched();
4923 bytenr = btrfs_node_blockptr(eb, slot);
4924 generation = btrfs_node_ptr_generation(eb, slot);
4925
4926 if (slot == path->slots[wc->level])
4927 goto reada;
4928
4929 if (wc->stage == UPDATE_BACKREF &&
4930 generation <= root->root_key.offset)
4931 continue;
4932
4933
4934 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
4935 wc->level - 1, 1, &refs,
4936 &flags);
4937
4938 if (ret < 0)
4939 continue;
4940 BUG_ON(refs == 0);
4941
4942 if (wc->stage == DROP_REFERENCE) {
4943 if (refs == 1)
4944 goto reada;
4945
4946 if (wc->level == 1 &&
4947 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4948 continue;
4949 if (!wc->update_ref ||
4950 generation <= root->root_key.offset)
4951 continue;
4952 btrfs_node_key_to_cpu(eb, &key, slot);
4953 ret = btrfs_comp_cpu_keys(&key,
4954 &wc->update_progress);
4955 if (ret < 0)
4956 continue;
4957 } else {
4958 if (wc->level == 1 &&
4959 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4960 continue;
4961 }
4962reada:
4963 btrfs_readahead_node_child(eb, slot);
4964 nread++;
4965 }
4966 wc->reada_slot = slot;
4967}
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4978 struct btrfs_root *root,
4979 struct btrfs_path *path,
4980 struct walk_control *wc, int lookup_info)
4981{
4982 struct btrfs_fs_info *fs_info = root->fs_info;
4983 int level = wc->level;
4984 struct extent_buffer *eb = path->nodes[level];
4985 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4986 int ret;
4987
4988 if (wc->stage == UPDATE_BACKREF &&
4989 btrfs_header_owner(eb) != root->root_key.objectid)
4990 return 1;
4991
4992
4993
4994
4995
4996 if (lookup_info &&
4997 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4998 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4999 BUG_ON(!path->locks[level]);
5000 ret = btrfs_lookup_extent_info(trans, fs_info,
5001 eb->start, level, 1,
5002 &wc->refs[level],
5003 &wc->flags[level]);
5004 BUG_ON(ret == -ENOMEM);
5005 if (ret)
5006 return ret;
5007 BUG_ON(wc->refs[level] == 0);
5008 }
5009
5010 if (wc->stage == DROP_REFERENCE) {
5011 if (wc->refs[level] > 1)
5012 return 1;
5013
5014 if (path->locks[level] && !wc->keep_locks) {
5015 btrfs_tree_unlock_rw(eb, path->locks[level]);
5016 path->locks[level] = 0;
5017 }
5018 return 0;
5019 }
5020
5021
5022 if (!(wc->flags[level] & flag)) {
5023 BUG_ON(!path->locks[level]);
5024 ret = btrfs_inc_ref(trans, root, eb, 1);
5025 BUG_ON(ret);
5026 ret = btrfs_dec_ref(trans, root, eb, 0);
5027 BUG_ON(ret);
5028 ret = btrfs_set_disk_extent_flags(trans, eb, flag,
5029 btrfs_header_level(eb), 0);
5030 BUG_ON(ret);
5031 wc->flags[level] |= flag;
5032 }
5033
5034
5035
5036
5037
5038 if (path->locks[level] && level > 0) {
5039 btrfs_tree_unlock_rw(eb, path->locks[level]);
5040 path->locks[level] = 0;
5041 }
5042 return 0;
5043}
5044
5045
5046
5047
5048
5049static int check_ref_exists(struct btrfs_trans_handle *trans,
5050 struct btrfs_root *root, u64 bytenr, u64 parent,
5051 int level)
5052{
5053 struct btrfs_path *path;
5054 struct btrfs_extent_inline_ref *iref;
5055 int ret;
5056
5057 path = btrfs_alloc_path();
5058 if (!path)
5059 return -ENOMEM;
5060
5061 ret = lookup_extent_backref(trans, path, &iref, bytenr,
5062 root->fs_info->nodesize, parent,
5063 root->root_key.objectid, level, 0);
5064 btrfs_free_path(path);
5065 if (ret == -ENOENT)
5066 return 0;
5067 if (ret < 0)
5068 return ret;
5069 return 1;
5070}
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5086 struct btrfs_root *root,
5087 struct btrfs_path *path,
5088 struct walk_control *wc, int *lookup_info)
5089{
5090 struct btrfs_fs_info *fs_info = root->fs_info;
5091 u64 bytenr;
5092 u64 generation;
5093 u64 parent;
5094 struct btrfs_key key;
5095 struct btrfs_key first_key;
5096 struct btrfs_ref ref = { 0 };
5097 struct extent_buffer *next;
5098 int level = wc->level;
5099 int reada = 0;
5100 int ret = 0;
5101 bool need_account = false;
5102
5103 generation = btrfs_node_ptr_generation(path->nodes[level],
5104 path->slots[level]);
5105
5106
5107
5108
5109
5110 if (wc->stage == UPDATE_BACKREF &&
5111 generation <= root->root_key.offset) {
5112 *lookup_info = 1;
5113 return 1;
5114 }
5115
5116 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
5117 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
5118 path->slots[level]);
5119
5120 next = find_extent_buffer(fs_info, bytenr);
5121 if (!next) {
5122 next = btrfs_find_create_tree_block(fs_info, bytenr,
5123 root->root_key.objectid, level - 1);
5124 if (IS_ERR(next))
5125 return PTR_ERR(next);
5126 reada = 1;
5127 }
5128 btrfs_tree_lock(next);
5129
5130 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
5131 &wc->refs[level - 1],
5132 &wc->flags[level - 1]);
5133 if (ret < 0)
5134 goto out_unlock;
5135
5136 if (unlikely(wc->refs[level - 1] == 0)) {
5137 btrfs_err(fs_info, "Missing references.");
5138 ret = -EIO;
5139 goto out_unlock;
5140 }
5141 *lookup_info = 0;
5142
5143 if (wc->stage == DROP_REFERENCE) {
5144 if (wc->refs[level - 1] > 1) {
5145 need_account = true;
5146 if (level == 1 &&
5147 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5148 goto skip;
5149
5150 if (!wc->update_ref ||
5151 generation <= root->root_key.offset)
5152 goto skip;
5153
5154 btrfs_node_key_to_cpu(path->nodes[level], &key,
5155 path->slots[level]);
5156 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
5157 if (ret < 0)
5158 goto skip;
5159
5160 wc->stage = UPDATE_BACKREF;
5161 wc->shared_level = level - 1;
5162 }
5163 } else {
5164 if (level == 1 &&
5165 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5166 goto skip;
5167 }
5168
5169 if (!btrfs_buffer_uptodate(next, generation, 0)) {
5170 btrfs_tree_unlock(next);
5171 free_extent_buffer(next);
5172 next = NULL;
5173 *lookup_info = 1;
5174 }
5175
5176 if (!next) {
5177 if (reada && level == 1)
5178 reada_walk_down(trans, root, wc, path);
5179 next = read_tree_block(fs_info, bytenr, root->root_key.objectid,
5180 generation, level - 1, &first_key);
5181 if (IS_ERR(next)) {
5182 return PTR_ERR(next);
5183 } else if (!extent_buffer_uptodate(next)) {
5184 free_extent_buffer(next);
5185 return -EIO;
5186 }
5187 btrfs_tree_lock(next);
5188 }
5189
5190 level--;
5191 ASSERT(level == btrfs_header_level(next));
5192 if (level != btrfs_header_level(next)) {
5193 btrfs_err(root->fs_info, "mismatched level");
5194 ret = -EIO;
5195 goto out_unlock;
5196 }
5197 path->nodes[level] = next;
5198 path->slots[level] = 0;
5199 path->locks[level] = BTRFS_WRITE_LOCK;
5200 wc->level = level;
5201 if (wc->level == 1)
5202 wc->reada_slot = 0;
5203 return 0;
5204skip:
5205 wc->refs[level - 1] = 0;
5206 wc->flags[level - 1] = 0;
5207 if (wc->stage == DROP_REFERENCE) {
5208 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5209 parent = path->nodes[level]->start;
5210 } else {
5211 ASSERT(root->root_key.objectid ==
5212 btrfs_header_owner(path->nodes[level]));
5213 if (root->root_key.objectid !=
5214 btrfs_header_owner(path->nodes[level])) {
5215 btrfs_err(root->fs_info,
5216 "mismatched block owner");
5217 ret = -EIO;
5218 goto out_unlock;
5219 }
5220 parent = 0;
5221 }
5222
5223
5224
5225
5226
5227
5228
5229 if (wc->restarted) {
5230 ret = check_ref_exists(trans, root, bytenr, parent,
5231 level - 1);
5232 if (ret < 0)
5233 goto out_unlock;
5234 if (ret == 0)
5235 goto no_delete;
5236 ret = 0;
5237 wc->restarted = 0;
5238 }
5239
5240
5241
5242
5243
5244
5245 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
5246 need_account) {
5247 ret = btrfs_qgroup_trace_subtree(trans, next,
5248 generation, level - 1);
5249 if (ret) {
5250 btrfs_err_rl(fs_info,
5251 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
5252 ret);
5253 }
5254 }
5255
5256
5257
5258
5259
5260
5261
5262 wc->drop_level = level;
5263 find_next_key(path, level, &wc->drop_progress);
5264
5265 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
5266 fs_info->nodesize, parent);
5267 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
5268 ret = btrfs_free_extent(trans, &ref);
5269 if (ret)
5270 goto out_unlock;
5271 }
5272no_delete:
5273 *lookup_info = 1;
5274 ret = 1;
5275
5276out_unlock:
5277 btrfs_tree_unlock(next);
5278 free_extent_buffer(next);
5279
5280 return ret;
5281}
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5296 struct btrfs_root *root,
5297 struct btrfs_path *path,
5298 struct walk_control *wc)
5299{
5300 struct btrfs_fs_info *fs_info = root->fs_info;
5301 int ret;
5302 int level = wc->level;
5303 struct extent_buffer *eb = path->nodes[level];
5304 u64 parent = 0;
5305
5306 if (wc->stage == UPDATE_BACKREF) {
5307 BUG_ON(wc->shared_level < level);
5308 if (level < wc->shared_level)
5309 goto out;
5310
5311 ret = find_next_key(path, level + 1, &wc->update_progress);
5312 if (ret > 0)
5313 wc->update_ref = 0;
5314
5315 wc->stage = DROP_REFERENCE;
5316 wc->shared_level = -1;
5317 path->slots[level] = 0;
5318
5319
5320
5321
5322
5323
5324 if (!path->locks[level]) {
5325 BUG_ON(level == 0);
5326 btrfs_tree_lock(eb);
5327 path->locks[level] = BTRFS_WRITE_LOCK;
5328
5329 ret = btrfs_lookup_extent_info(trans, fs_info,
5330 eb->start, level, 1,
5331 &wc->refs[level],
5332 &wc->flags[level]);
5333 if (ret < 0) {
5334 btrfs_tree_unlock_rw(eb, path->locks[level]);
5335 path->locks[level] = 0;
5336 return ret;
5337 }
5338 BUG_ON(wc->refs[level] == 0);
5339 if (wc->refs[level] == 1) {
5340 btrfs_tree_unlock_rw(eb, path->locks[level]);
5341 path->locks[level] = 0;
5342 return 1;
5343 }
5344 }
5345 }
5346
5347
5348 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5349
5350 if (wc->refs[level] == 1) {
5351 if (level == 0) {
5352 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5353 ret = btrfs_dec_ref(trans, root, eb, 1);
5354 else
5355 ret = btrfs_dec_ref(trans, root, eb, 0);
5356 BUG_ON(ret);
5357 if (is_fstree(root->root_key.objectid)) {
5358 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5359 if (ret) {
5360 btrfs_err_rl(fs_info,
5361 "error %d accounting leaf items, quota is out of sync, rescan required",
5362 ret);
5363 }
5364 }
5365 }
5366
5367 if (!path->locks[level] &&
5368 btrfs_header_generation(eb) == trans->transid) {
5369 btrfs_tree_lock(eb);
5370 path->locks[level] = BTRFS_WRITE_LOCK;
5371 }
5372 btrfs_clean_tree_block(eb);
5373 }
5374
5375 if (eb == root->node) {
5376 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5377 parent = eb->start;
5378 else if (root->root_key.objectid != btrfs_header_owner(eb))
5379 goto owner_mismatch;
5380 } else {
5381 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5382 parent = path->nodes[level + 1]->start;
5383 else if (root->root_key.objectid !=
5384 btrfs_header_owner(path->nodes[level + 1]))
5385 goto owner_mismatch;
5386 }
5387
5388 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5389out:
5390 wc->refs[level] = 0;
5391 wc->flags[level] = 0;
5392 return 0;
5393
5394owner_mismatch:
5395 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5396 btrfs_header_owner(eb), root->root_key.objectid);
5397 return -EUCLEAN;
5398}
5399
5400static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5401 struct btrfs_root *root,
5402 struct btrfs_path *path,
5403 struct walk_control *wc)
5404{
5405 int level = wc->level;
5406 int lookup_info = 1;
5407 int ret;
5408
5409 while (level >= 0) {
5410 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5411 if (ret > 0)
5412 break;
5413
5414 if (level == 0)
5415 break;
5416
5417 if (path->slots[level] >=
5418 btrfs_header_nritems(path->nodes[level]))
5419 break;
5420
5421 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5422 if (ret > 0) {
5423 path->slots[level]++;
5424 continue;
5425 } else if (ret < 0)
5426 return ret;
5427 level = wc->level;
5428 }
5429 return 0;
5430}
5431
5432static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5433 struct btrfs_root *root,
5434 struct btrfs_path *path,
5435 struct walk_control *wc, int max_level)
5436{
5437 int level = wc->level;
5438 int ret;
5439
5440 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5441 while (level < max_level && path->nodes[level]) {
5442 wc->level = level;
5443 if (path->slots[level] + 1 <
5444 btrfs_header_nritems(path->nodes[level])) {
5445 path->slots[level]++;
5446 return 0;
5447 } else {
5448 ret = walk_up_proc(trans, root, path, wc);
5449 if (ret > 0)
5450 return 0;
5451 if (ret < 0)
5452 return ret;
5453
5454 if (path->locks[level]) {
5455 btrfs_tree_unlock_rw(path->nodes[level],
5456 path->locks[level]);
5457 path->locks[level] = 0;
5458 }
5459 free_extent_buffer(path->nodes[level]);
5460 path->nodes[level] = NULL;
5461 level++;
5462 }
5463 }
5464 return 1;
5465}
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
5481{
5482 struct btrfs_fs_info *fs_info = root->fs_info;
5483 struct btrfs_path *path;
5484 struct btrfs_trans_handle *trans;
5485 struct btrfs_root *tree_root = fs_info->tree_root;
5486 struct btrfs_root_item *root_item = &root->root_item;
5487 struct walk_control *wc;
5488 struct btrfs_key key;
5489 int err = 0;
5490 int ret;
5491 int level;
5492 bool root_dropped = false;
5493
5494 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5495
5496 path = btrfs_alloc_path();
5497 if (!path) {
5498 err = -ENOMEM;
5499 goto out;
5500 }
5501
5502 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5503 if (!wc) {
5504 btrfs_free_path(path);
5505 err = -ENOMEM;
5506 goto out;
5507 }
5508
5509
5510
5511
5512
5513 if (for_reloc)
5514 trans = btrfs_join_transaction(tree_root);
5515 else
5516 trans = btrfs_start_transaction(tree_root, 0);
5517 if (IS_ERR(trans)) {
5518 err = PTR_ERR(trans);
5519 goto out_free;
5520 }
5521
5522 err = btrfs_run_delayed_items(trans);
5523 if (err)
5524 goto out_end_trans;
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534 set_bit(BTRFS_ROOT_DELETING, &root->state);
5535 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5536 level = btrfs_header_level(root->node);
5537 path->nodes[level] = btrfs_lock_root_node(root);
5538 path->slots[level] = 0;
5539 path->locks[level] = BTRFS_WRITE_LOCK;
5540 memset(&wc->update_progress, 0,
5541 sizeof(wc->update_progress));
5542 } else {
5543 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5544 memcpy(&wc->update_progress, &key,
5545 sizeof(wc->update_progress));
5546
5547 level = btrfs_root_drop_level(root_item);
5548 BUG_ON(level == 0);
5549 path->lowest_level = level;
5550 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5551 path->lowest_level = 0;
5552 if (ret < 0) {
5553 err = ret;
5554 goto out_end_trans;
5555 }
5556 WARN_ON(ret > 0);
5557
5558
5559
5560
5561
5562 btrfs_unlock_up_safe(path, 0);
5563
5564 level = btrfs_header_level(root->node);
5565 while (1) {
5566 btrfs_tree_lock(path->nodes[level]);
5567 path->locks[level] = BTRFS_WRITE_LOCK;
5568
5569 ret = btrfs_lookup_extent_info(trans, fs_info,
5570 path->nodes[level]->start,
5571 level, 1, &wc->refs[level],
5572 &wc->flags[level]);
5573 if (ret < 0) {
5574 err = ret;
5575 goto out_end_trans;
5576 }
5577 BUG_ON(wc->refs[level] == 0);
5578
5579 if (level == btrfs_root_drop_level(root_item))
5580 break;
5581
5582 btrfs_tree_unlock(path->nodes[level]);
5583 path->locks[level] = 0;
5584 WARN_ON(wc->refs[level] != 1);
5585 level--;
5586 }
5587 }
5588
5589 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5590 wc->level = level;
5591 wc->shared_level = -1;
5592 wc->stage = DROP_REFERENCE;
5593 wc->update_ref = update_ref;
5594 wc->keep_locks = 0;
5595 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5596
5597 while (1) {
5598
5599 ret = walk_down_tree(trans, root, path, wc);
5600 if (ret < 0) {
5601 err = ret;
5602 break;
5603 }
5604
5605 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5606 if (ret < 0) {
5607 err = ret;
5608 break;
5609 }
5610
5611 if (ret > 0) {
5612 BUG_ON(wc->stage != DROP_REFERENCE);
5613 break;
5614 }
5615
5616 if (wc->stage == DROP_REFERENCE) {
5617 wc->drop_level = wc->level;
5618 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5619 &wc->drop_progress,
5620 path->slots[wc->drop_level]);
5621 }
5622 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5623 &wc->drop_progress);
5624 btrfs_set_root_drop_level(root_item, wc->drop_level);
5625
5626 BUG_ON(wc->level == 0);
5627 if (btrfs_should_end_transaction(trans) ||
5628 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5629 ret = btrfs_update_root(trans, tree_root,
5630 &root->root_key,
5631 root_item);
5632 if (ret) {
5633 btrfs_abort_transaction(trans, ret);
5634 err = ret;
5635 goto out_end_trans;
5636 }
5637
5638 btrfs_end_transaction_throttle(trans);
5639 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5640 btrfs_debug(fs_info,
5641 "drop snapshot early exit");
5642 err = -EAGAIN;
5643 goto out_free;
5644 }
5645
5646
5647
5648
5649
5650
5651 if (for_reloc)
5652 trans = btrfs_join_transaction(tree_root);
5653 else
5654 trans = btrfs_start_transaction(tree_root, 0);
5655 if (IS_ERR(trans)) {
5656 err = PTR_ERR(trans);
5657 goto out_free;
5658 }
5659 }
5660 }
5661 btrfs_release_path(path);
5662 if (err)
5663 goto out_end_trans;
5664
5665 ret = btrfs_del_root(trans, &root->root_key);
5666 if (ret) {
5667 btrfs_abort_transaction(trans, ret);
5668 err = ret;
5669 goto out_end_trans;
5670 }
5671
5672 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5673 ret = btrfs_find_root(tree_root, &root->root_key, path,
5674 NULL, NULL);
5675 if (ret < 0) {
5676 btrfs_abort_transaction(trans, ret);
5677 err = ret;
5678 goto out_end_trans;
5679 } else if (ret > 0) {
5680
5681
5682
5683
5684
5685 btrfs_del_orphan_item(trans, tree_root,
5686 root->root_key.objectid);
5687 }
5688 }
5689
5690
5691
5692
5693
5694
5695 btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
5696 btrfs_qgroup_free_meta_all_pertrans(root);
5697
5698 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
5699 btrfs_add_dropped_root(trans, root);
5700 else
5701 btrfs_put_root(root);
5702 root_dropped = true;
5703out_end_trans:
5704 btrfs_end_transaction_throttle(trans);
5705out_free:
5706 kfree(wc);
5707 btrfs_free_path(path);
5708out:
5709
5710
5711
5712
5713
5714
5715
5716 if (!for_reloc && !root_dropped)
5717 btrfs_add_dead_root(root);
5718 return err;
5719}
5720
5721
5722
5723
5724
5725
5726
5727int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5728 struct btrfs_root *root,
5729 struct extent_buffer *node,
5730 struct extent_buffer *parent)
5731{
5732 struct btrfs_fs_info *fs_info = root->fs_info;
5733 struct btrfs_path *path;
5734 struct walk_control *wc;
5735 int level;
5736 int parent_level;
5737 int ret = 0;
5738 int wret;
5739
5740 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5741
5742 path = btrfs_alloc_path();
5743 if (!path)
5744 return -ENOMEM;
5745
5746 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5747 if (!wc) {
5748 btrfs_free_path(path);
5749 return -ENOMEM;
5750 }
5751
5752 btrfs_assert_tree_locked(parent);
5753 parent_level = btrfs_header_level(parent);
5754 atomic_inc(&parent->refs);
5755 path->nodes[parent_level] = parent;
5756 path->slots[parent_level] = btrfs_header_nritems(parent);
5757
5758 btrfs_assert_tree_locked(node);
5759 level = btrfs_header_level(node);
5760 path->nodes[level] = node;
5761 path->slots[level] = 0;
5762 path->locks[level] = BTRFS_WRITE_LOCK;
5763
5764 wc->refs[parent_level] = 1;
5765 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5766 wc->level = level;
5767 wc->shared_level = -1;
5768 wc->stage = DROP_REFERENCE;
5769 wc->update_ref = 0;
5770 wc->keep_locks = 1;
5771 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5772
5773 while (1) {
5774 wret = walk_down_tree(trans, root, path, wc);
5775 if (wret < 0) {
5776 ret = wret;
5777 break;
5778 }
5779
5780 wret = walk_up_tree(trans, root, path, wc, parent_level);
5781 if (wret < 0)
5782 ret = wret;
5783 if (wret != 0)
5784 break;
5785 }
5786
5787 kfree(wc);
5788 btrfs_free_path(path);
5789 return ret;
5790}
5791
5792
5793
5794
5795
5796u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5797{
5798 struct btrfs_block_group *block_group;
5799 u64 free_bytes = 0;
5800 int factor;
5801
5802
5803 if (list_empty(&sinfo->ro_bgs))
5804 return 0;
5805
5806 spin_lock(&sinfo->lock);
5807 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5808 spin_lock(&block_group->lock);
5809
5810 if (!block_group->ro) {
5811 spin_unlock(&block_group->lock);
5812 continue;
5813 }
5814
5815 factor = btrfs_bg_type_to_factor(block_group->flags);
5816 free_bytes += (block_group->length -
5817 block_group->used) * factor;
5818
5819 spin_unlock(&block_group->lock);
5820 }
5821 spin_unlock(&sinfo->lock);
5822
5823 return free_bytes;
5824}
5825
5826int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5827 u64 start, u64 end)
5828{
5829 return unpin_extent_range(fs_info, start, end, false);
5830}
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5853{
5854 u64 start = SZ_1M, len = 0, end = 0;
5855 int ret;
5856
5857 *trimmed = 0;
5858
5859
5860 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
5861 return 0;
5862
5863
5864 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
5865 return 0;
5866
5867
5868 if (device->total_bytes <= device->bytes_used)
5869 return 0;
5870
5871 ret = 0;
5872
5873 while (1) {
5874 struct btrfs_fs_info *fs_info = device->fs_info;
5875 u64 bytes;
5876
5877 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
5878 if (ret)
5879 break;
5880
5881 find_first_clear_extent_bit(&device->alloc_state, start,
5882 &start, &end,
5883 CHUNK_TRIMMED | CHUNK_ALLOCATED);
5884
5885
5886 if (start > device->total_bytes) {
5887 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
5888 btrfs_warn_in_rcu(fs_info,
5889"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
5890 start, end - start + 1,
5891 rcu_str_deref(device->name),
5892 device->total_bytes);
5893 mutex_unlock(&fs_info->chunk_mutex);
5894 ret = 0;
5895 break;
5896 }
5897
5898
5899 start = max_t(u64, start, SZ_1M);
5900
5901
5902
5903
5904
5905
5906 end = min(end, device->total_bytes - 1);
5907
5908 len = end - start + 1;
5909
5910
5911 if (!len) {
5912 mutex_unlock(&fs_info->chunk_mutex);
5913 ret = 0;
5914 break;
5915 }
5916
5917 ret = btrfs_issue_discard(device->bdev, start, len,
5918 &bytes);
5919 if (!ret)
5920 set_extent_bits(&device->alloc_state, start,
5921 start + bytes - 1,
5922 CHUNK_TRIMMED);
5923 mutex_unlock(&fs_info->chunk_mutex);
5924
5925 if (ret)
5926 break;
5927
5928 start += len;
5929 *trimmed += bytes;
5930
5931 if (fatal_signal_pending(current)) {
5932 ret = -ERESTARTSYS;
5933 break;
5934 }
5935
5936 cond_resched();
5937 }
5938
5939 return ret;
5940}
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
5952{
5953 struct btrfs_block_group *cache = NULL;
5954 struct btrfs_device *device;
5955 struct list_head *devices;
5956 u64 group_trimmed;
5957 u64 range_end = U64_MAX;
5958 u64 start;
5959 u64 end;
5960 u64 trimmed = 0;
5961 u64 bg_failed = 0;
5962 u64 dev_failed = 0;
5963 int bg_ret = 0;
5964 int dev_ret = 0;
5965 int ret = 0;
5966
5967
5968
5969
5970
5971 if (range->len != U64_MAX &&
5972 check_add_overflow(range->start, range->len, &range_end))
5973 return -EINVAL;
5974
5975 cache = btrfs_lookup_first_block_group(fs_info, range->start);
5976 for (; cache; cache = btrfs_next_block_group(cache)) {
5977 if (cache->start >= range_end) {
5978 btrfs_put_block_group(cache);
5979 break;
5980 }
5981
5982 start = max(range->start, cache->start);
5983 end = min(range_end, cache->start + cache->length);
5984
5985 if (end - start >= range->minlen) {
5986 if (!btrfs_block_group_done(cache)) {
5987 ret = btrfs_cache_block_group(cache, 0);
5988 if (ret) {
5989 bg_failed++;
5990 bg_ret = ret;
5991 continue;
5992 }
5993 ret = btrfs_wait_block_group_cache_done(cache);
5994 if (ret) {
5995 bg_failed++;
5996 bg_ret = ret;
5997 continue;
5998 }
5999 }
6000 ret = btrfs_trim_block_group(cache,
6001 &group_trimmed,
6002 start,
6003 end,
6004 range->minlen);
6005
6006 trimmed += group_trimmed;
6007 if (ret) {
6008 bg_failed++;
6009 bg_ret = ret;
6010 continue;
6011 }
6012 }
6013 }
6014
6015 if (bg_failed)
6016 btrfs_warn(fs_info,
6017 "failed to trim %llu block group(s), last error %d",
6018 bg_failed, bg_ret);
6019 mutex_lock(&fs_info->fs_devices->device_list_mutex);
6020 devices = &fs_info->fs_devices->devices;
6021 list_for_each_entry(device, devices, dev_list) {
6022 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
6023 continue;
6024
6025 ret = btrfs_trim_free_extents(device, &group_trimmed);
6026 if (ret) {
6027 dev_failed++;
6028 dev_ret = ret;
6029 break;
6030 }
6031
6032 trimmed += group_trimmed;
6033 }
6034 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
6035
6036 if (dev_failed)
6037 btrfs_warn(fs_info,
6038 "failed to trim %llu device(s), last error %d",
6039 dev_failed, dev_ret);
6040 range->len = trimmed;
6041 if (bg_ret)
6042 return bg_ret;
6043 return dev_ret;
6044}
6045