1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "misc.h"
20#include "tree-log.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "locking.h"
26#include "free-space-cache.h"
27#include "free-space-tree.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31#include "space-info.h"
32#include "block-rsv.h"
33#include "delalloc-space.h"
34#include "block-group.h"
35#include "discard.h"
36#include "rcu-string.h"
37#include "zoned.h"
38#include "dev-replace.h"
39
40#undef SCRAMBLE_DELAYED_REFS
41
42
43static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
44 struct btrfs_delayed_ref_node *node, u64 parent,
45 u64 root_objectid, u64 owner_objectid,
46 u64 owner_offset, int refs_to_drop,
47 struct btrfs_delayed_extent_op *extra_op);
48static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
49 struct extent_buffer *leaf,
50 struct btrfs_extent_item *ei);
51static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
52 u64 parent, u64 root_objectid,
53 u64 flags, u64 owner, u64 offset,
54 struct btrfs_key *ins, int ref_mod);
55static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
56 struct btrfs_delayed_ref_node *node,
57 struct btrfs_delayed_extent_op *extent_op);
58static int find_next_key(struct btrfs_path *path, int level,
59 struct btrfs_key *key);
60
61static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
62{
63 return (cache->flags & bits) == bits;
64}
65
66int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
67 u64 start, u64 num_bytes)
68{
69 u64 end = start + num_bytes - 1;
70 set_extent_bits(&fs_info->excluded_extents, start, end,
71 EXTENT_UPTODATE);
72 return 0;
73}
74
75void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
76{
77 struct btrfs_fs_info *fs_info = cache->fs_info;
78 u64 start, end;
79
80 start = cache->start;
81 end = start + cache->length - 1;
82
83 clear_extent_bits(&fs_info->excluded_extents, start, end,
84 EXTENT_UPTODATE);
85}
86
87
88int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
89{
90 int ret;
91 struct btrfs_key key;
92 struct btrfs_path *path;
93
94 path = btrfs_alloc_path();
95 if (!path)
96 return -ENOMEM;
97
98 key.objectid = start;
99 key.offset = len;
100 key.type = BTRFS_EXTENT_ITEM_KEY;
101 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
102 btrfs_free_path(path);
103 return ret;
104}
105
106
107
108
109
110
111
112
113
114
115int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
116 struct btrfs_fs_info *fs_info, u64 bytenr,
117 u64 offset, int metadata, u64 *refs, u64 *flags)
118{
119 struct btrfs_delayed_ref_head *head;
120 struct btrfs_delayed_ref_root *delayed_refs;
121 struct btrfs_path *path;
122 struct btrfs_extent_item *ei;
123 struct extent_buffer *leaf;
124 struct btrfs_key key;
125 u32 item_size;
126 u64 num_refs;
127 u64 extent_flags;
128 int ret;
129
130
131
132
133
134 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
135 offset = fs_info->nodesize;
136 metadata = 0;
137 }
138
139 path = btrfs_alloc_path();
140 if (!path)
141 return -ENOMEM;
142
143 if (!trans) {
144 path->skip_locking = 1;
145 path->search_commit_root = 1;
146 }
147
148search_again:
149 key.objectid = bytenr;
150 key.offset = offset;
151 if (metadata)
152 key.type = BTRFS_METADATA_ITEM_KEY;
153 else
154 key.type = BTRFS_EXTENT_ITEM_KEY;
155
156 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
157 if (ret < 0)
158 goto out_free;
159
160 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
161 if (path->slots[0]) {
162 path->slots[0]--;
163 btrfs_item_key_to_cpu(path->nodes[0], &key,
164 path->slots[0]);
165 if (key.objectid == bytenr &&
166 key.type == BTRFS_EXTENT_ITEM_KEY &&
167 key.offset == fs_info->nodesize)
168 ret = 0;
169 }
170 }
171
172 if (ret == 0) {
173 leaf = path->nodes[0];
174 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
175 if (item_size >= sizeof(*ei)) {
176 ei = btrfs_item_ptr(leaf, path->slots[0],
177 struct btrfs_extent_item);
178 num_refs = btrfs_extent_refs(leaf, ei);
179 extent_flags = btrfs_extent_flags(leaf, ei);
180 } else {
181 ret = -EINVAL;
182 btrfs_print_v0_err(fs_info);
183 if (trans)
184 btrfs_abort_transaction(trans, ret);
185 else
186 btrfs_handle_fs_error(fs_info, ret, NULL);
187
188 goto out_free;
189 }
190
191 BUG_ON(num_refs == 0);
192 } else {
193 num_refs = 0;
194 extent_flags = 0;
195 ret = 0;
196 }
197
198 if (!trans)
199 goto out;
200
201 delayed_refs = &trans->transaction->delayed_refs;
202 spin_lock(&delayed_refs->lock);
203 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
204 if (head) {
205 if (!mutex_trylock(&head->mutex)) {
206 refcount_inc(&head->refs);
207 spin_unlock(&delayed_refs->lock);
208
209 btrfs_release_path(path);
210
211
212
213
214
215 mutex_lock(&head->mutex);
216 mutex_unlock(&head->mutex);
217 btrfs_put_delayed_ref_head(head);
218 goto search_again;
219 }
220 spin_lock(&head->lock);
221 if (head->extent_op && head->extent_op->update_flags)
222 extent_flags |= head->extent_op->flags_to_set;
223 else
224 BUG_ON(num_refs == 0);
225
226 num_refs += head->ref_mod;
227 spin_unlock(&head->lock);
228 mutex_unlock(&head->mutex);
229 }
230 spin_unlock(&delayed_refs->lock);
231out:
232 WARN_ON(num_refs == 0);
233 if (refs)
234 *refs = num_refs;
235 if (flags)
236 *flags = extent_flags;
237out_free:
238 btrfs_free_path(path);
239 return ret;
240}
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
354 struct btrfs_extent_inline_ref *iref,
355 enum btrfs_inline_ref_type is_data)
356{
357 int type = btrfs_extent_inline_ref_type(eb, iref);
358 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
359
360 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
361 type == BTRFS_SHARED_BLOCK_REF_KEY ||
362 type == BTRFS_SHARED_DATA_REF_KEY ||
363 type == BTRFS_EXTENT_DATA_REF_KEY) {
364 if (is_data == BTRFS_REF_TYPE_BLOCK) {
365 if (type == BTRFS_TREE_BLOCK_REF_KEY)
366 return type;
367 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
368 ASSERT(eb->fs_info);
369
370
371
372
373 if (offset &&
374 IS_ALIGNED(offset, eb->fs_info->sectorsize))
375 return type;
376 }
377 } else if (is_data == BTRFS_REF_TYPE_DATA) {
378 if (type == BTRFS_EXTENT_DATA_REF_KEY)
379 return type;
380 if (type == BTRFS_SHARED_DATA_REF_KEY) {
381 ASSERT(eb->fs_info);
382
383
384
385
386 if (offset &&
387 IS_ALIGNED(offset, eb->fs_info->sectorsize))
388 return type;
389 }
390 } else {
391 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
392 return type;
393 }
394 }
395
396 btrfs_print_leaf((struct extent_buffer *)eb);
397 btrfs_err(eb->fs_info,
398 "eb %llu iref 0x%lx invalid extent inline ref type %d",
399 eb->start, (unsigned long)iref, type);
400 WARN_ON(1);
401
402 return BTRFS_REF_TYPE_INVALID;
403}
404
405u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
406{
407 u32 high_crc = ~(u32)0;
408 u32 low_crc = ~(u32)0;
409 __le64 lenum;
410
411 lenum = cpu_to_le64(root_objectid);
412 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
413 lenum = cpu_to_le64(owner);
414 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
415 lenum = cpu_to_le64(offset);
416 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
417
418 return ((u64)high_crc << 31) ^ (u64)low_crc;
419}
420
421static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
422 struct btrfs_extent_data_ref *ref)
423{
424 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
425 btrfs_extent_data_ref_objectid(leaf, ref),
426 btrfs_extent_data_ref_offset(leaf, ref));
427}
428
429static int match_extent_data_ref(struct extent_buffer *leaf,
430 struct btrfs_extent_data_ref *ref,
431 u64 root_objectid, u64 owner, u64 offset)
432{
433 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
434 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
435 btrfs_extent_data_ref_offset(leaf, ref) != offset)
436 return 0;
437 return 1;
438}
439
440static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
441 struct btrfs_path *path,
442 u64 bytenr, u64 parent,
443 u64 root_objectid,
444 u64 owner, u64 offset)
445{
446 struct btrfs_root *root = trans->fs_info->extent_root;
447 struct btrfs_key key;
448 struct btrfs_extent_data_ref *ref;
449 struct extent_buffer *leaf;
450 u32 nritems;
451 int ret;
452 int recow;
453 int err = -ENOENT;
454
455 key.objectid = bytenr;
456 if (parent) {
457 key.type = BTRFS_SHARED_DATA_REF_KEY;
458 key.offset = parent;
459 } else {
460 key.type = BTRFS_EXTENT_DATA_REF_KEY;
461 key.offset = hash_extent_data_ref(root_objectid,
462 owner, offset);
463 }
464again:
465 recow = 0;
466 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
467 if (ret < 0) {
468 err = ret;
469 goto fail;
470 }
471
472 if (parent) {
473 if (!ret)
474 return 0;
475 goto fail;
476 }
477
478 leaf = path->nodes[0];
479 nritems = btrfs_header_nritems(leaf);
480 while (1) {
481 if (path->slots[0] >= nritems) {
482 ret = btrfs_next_leaf(root, path);
483 if (ret < 0)
484 err = ret;
485 if (ret)
486 goto fail;
487
488 leaf = path->nodes[0];
489 nritems = btrfs_header_nritems(leaf);
490 recow = 1;
491 }
492
493 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
494 if (key.objectid != bytenr ||
495 key.type != BTRFS_EXTENT_DATA_REF_KEY)
496 goto fail;
497
498 ref = btrfs_item_ptr(leaf, path->slots[0],
499 struct btrfs_extent_data_ref);
500
501 if (match_extent_data_ref(leaf, ref, root_objectid,
502 owner, offset)) {
503 if (recow) {
504 btrfs_release_path(path);
505 goto again;
506 }
507 err = 0;
508 break;
509 }
510 path->slots[0]++;
511 }
512fail:
513 return err;
514}
515
516static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
517 struct btrfs_path *path,
518 u64 bytenr, u64 parent,
519 u64 root_objectid, u64 owner,
520 u64 offset, int refs_to_add)
521{
522 struct btrfs_root *root = trans->fs_info->extent_root;
523 struct btrfs_key key;
524 struct extent_buffer *leaf;
525 u32 size;
526 u32 num_refs;
527 int ret;
528
529 key.objectid = bytenr;
530 if (parent) {
531 key.type = BTRFS_SHARED_DATA_REF_KEY;
532 key.offset = parent;
533 size = sizeof(struct btrfs_shared_data_ref);
534 } else {
535 key.type = BTRFS_EXTENT_DATA_REF_KEY;
536 key.offset = hash_extent_data_ref(root_objectid,
537 owner, offset);
538 size = sizeof(struct btrfs_extent_data_ref);
539 }
540
541 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
542 if (ret && ret != -EEXIST)
543 goto fail;
544
545 leaf = path->nodes[0];
546 if (parent) {
547 struct btrfs_shared_data_ref *ref;
548 ref = btrfs_item_ptr(leaf, path->slots[0],
549 struct btrfs_shared_data_ref);
550 if (ret == 0) {
551 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
552 } else {
553 num_refs = btrfs_shared_data_ref_count(leaf, ref);
554 num_refs += refs_to_add;
555 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
556 }
557 } else {
558 struct btrfs_extent_data_ref *ref;
559 while (ret == -EEXIST) {
560 ref = btrfs_item_ptr(leaf, path->slots[0],
561 struct btrfs_extent_data_ref);
562 if (match_extent_data_ref(leaf, ref, root_objectid,
563 owner, offset))
564 break;
565 btrfs_release_path(path);
566 key.offset++;
567 ret = btrfs_insert_empty_item(trans, root, path, &key,
568 size);
569 if (ret && ret != -EEXIST)
570 goto fail;
571
572 leaf = path->nodes[0];
573 }
574 ref = btrfs_item_ptr(leaf, path->slots[0],
575 struct btrfs_extent_data_ref);
576 if (ret == 0) {
577 btrfs_set_extent_data_ref_root(leaf, ref,
578 root_objectid);
579 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
580 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
581 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
582 } else {
583 num_refs = btrfs_extent_data_ref_count(leaf, ref);
584 num_refs += refs_to_add;
585 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
586 }
587 }
588 btrfs_mark_buffer_dirty(leaf);
589 ret = 0;
590fail:
591 btrfs_release_path(path);
592 return ret;
593}
594
595static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
596 struct btrfs_path *path,
597 int refs_to_drop, int *last_ref)
598{
599 struct btrfs_key key;
600 struct btrfs_extent_data_ref *ref1 = NULL;
601 struct btrfs_shared_data_ref *ref2 = NULL;
602 struct extent_buffer *leaf;
603 u32 num_refs = 0;
604 int ret = 0;
605
606 leaf = path->nodes[0];
607 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
608
609 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
610 ref1 = btrfs_item_ptr(leaf, path->slots[0],
611 struct btrfs_extent_data_ref);
612 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
613 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
614 ref2 = btrfs_item_ptr(leaf, path->slots[0],
615 struct btrfs_shared_data_ref);
616 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
617 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
618 btrfs_print_v0_err(trans->fs_info);
619 btrfs_abort_transaction(trans, -EINVAL);
620 return -EINVAL;
621 } else {
622 BUG();
623 }
624
625 BUG_ON(num_refs < refs_to_drop);
626 num_refs -= refs_to_drop;
627
628 if (num_refs == 0) {
629 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
630 *last_ref = 1;
631 } else {
632 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
633 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
634 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
635 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
636 btrfs_mark_buffer_dirty(leaf);
637 }
638 return ret;
639}
640
641static noinline u32 extent_data_ref_count(struct btrfs_path *path,
642 struct btrfs_extent_inline_ref *iref)
643{
644 struct btrfs_key key;
645 struct extent_buffer *leaf;
646 struct btrfs_extent_data_ref *ref1;
647 struct btrfs_shared_data_ref *ref2;
648 u32 num_refs = 0;
649 int type;
650
651 leaf = path->nodes[0];
652 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
653
654 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
655 if (iref) {
656
657
658
659
660 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
661 ASSERT(type != BTRFS_REF_TYPE_INVALID);
662 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
663 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
664 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
665 } else {
666 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
667 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
668 }
669 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
670 ref1 = btrfs_item_ptr(leaf, path->slots[0],
671 struct btrfs_extent_data_ref);
672 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
673 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
674 ref2 = btrfs_item_ptr(leaf, path->slots[0],
675 struct btrfs_shared_data_ref);
676 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
677 } else {
678 WARN_ON(1);
679 }
680 return num_refs;
681}
682
683static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
684 struct btrfs_path *path,
685 u64 bytenr, u64 parent,
686 u64 root_objectid)
687{
688 struct btrfs_root *root = trans->fs_info->extent_root;
689 struct btrfs_key key;
690 int ret;
691
692 key.objectid = bytenr;
693 if (parent) {
694 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
695 key.offset = parent;
696 } else {
697 key.type = BTRFS_TREE_BLOCK_REF_KEY;
698 key.offset = root_objectid;
699 }
700
701 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
702 if (ret > 0)
703 ret = -ENOENT;
704 return ret;
705}
706
707static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
708 struct btrfs_path *path,
709 u64 bytenr, u64 parent,
710 u64 root_objectid)
711{
712 struct btrfs_key key;
713 int ret;
714
715 key.objectid = bytenr;
716 if (parent) {
717 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
718 key.offset = parent;
719 } else {
720 key.type = BTRFS_TREE_BLOCK_REF_KEY;
721 key.offset = root_objectid;
722 }
723
724 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
725 path, &key, 0);
726 btrfs_release_path(path);
727 return ret;
728}
729
730static inline int extent_ref_type(u64 parent, u64 owner)
731{
732 int type;
733 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
734 if (parent > 0)
735 type = BTRFS_SHARED_BLOCK_REF_KEY;
736 else
737 type = BTRFS_TREE_BLOCK_REF_KEY;
738 } else {
739 if (parent > 0)
740 type = BTRFS_SHARED_DATA_REF_KEY;
741 else
742 type = BTRFS_EXTENT_DATA_REF_KEY;
743 }
744 return type;
745}
746
747static int find_next_key(struct btrfs_path *path, int level,
748 struct btrfs_key *key)
749
750{
751 for (; level < BTRFS_MAX_LEVEL; level++) {
752 if (!path->nodes[level])
753 break;
754 if (path->slots[level] + 1 >=
755 btrfs_header_nritems(path->nodes[level]))
756 continue;
757 if (level == 0)
758 btrfs_item_key_to_cpu(path->nodes[level], key,
759 path->slots[level] + 1);
760 else
761 btrfs_node_key_to_cpu(path->nodes[level], key,
762 path->slots[level] + 1);
763 return 0;
764 }
765 return 1;
766}
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781static noinline_for_stack
782int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
783 struct btrfs_path *path,
784 struct btrfs_extent_inline_ref **ref_ret,
785 u64 bytenr, u64 num_bytes,
786 u64 parent, u64 root_objectid,
787 u64 owner, u64 offset, int insert)
788{
789 struct btrfs_fs_info *fs_info = trans->fs_info;
790 struct btrfs_root *root = fs_info->extent_root;
791 struct btrfs_key key;
792 struct extent_buffer *leaf;
793 struct btrfs_extent_item *ei;
794 struct btrfs_extent_inline_ref *iref;
795 u64 flags;
796 u64 item_size;
797 unsigned long ptr;
798 unsigned long end;
799 int extra_size;
800 int type;
801 int want;
802 int ret;
803 int err = 0;
804 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
805 int needed;
806
807 key.objectid = bytenr;
808 key.type = BTRFS_EXTENT_ITEM_KEY;
809 key.offset = num_bytes;
810
811 want = extent_ref_type(parent, owner);
812 if (insert) {
813 extra_size = btrfs_extent_inline_ref_size(want);
814 path->search_for_extension = 1;
815 path->keep_locks = 1;
816 } else
817 extra_size = -1;
818
819
820
821
822
823 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
824 key.type = BTRFS_METADATA_ITEM_KEY;
825 key.offset = owner;
826 }
827
828again:
829 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
830 if (ret < 0) {
831 err = ret;
832 goto out;
833 }
834
835
836
837
838
839 if (ret > 0 && skinny_metadata) {
840 skinny_metadata = false;
841 if (path->slots[0]) {
842 path->slots[0]--;
843 btrfs_item_key_to_cpu(path->nodes[0], &key,
844 path->slots[0]);
845 if (key.objectid == bytenr &&
846 key.type == BTRFS_EXTENT_ITEM_KEY &&
847 key.offset == num_bytes)
848 ret = 0;
849 }
850 if (ret) {
851 key.objectid = bytenr;
852 key.type = BTRFS_EXTENT_ITEM_KEY;
853 key.offset = num_bytes;
854 btrfs_release_path(path);
855 goto again;
856 }
857 }
858
859 if (ret && !insert) {
860 err = -ENOENT;
861 goto out;
862 } else if (WARN_ON(ret)) {
863 err = -EIO;
864 goto out;
865 }
866
867 leaf = path->nodes[0];
868 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
869 if (unlikely(item_size < sizeof(*ei))) {
870 err = -EINVAL;
871 btrfs_print_v0_err(fs_info);
872 btrfs_abort_transaction(trans, err);
873 goto out;
874 }
875
876 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
877 flags = btrfs_extent_flags(leaf, ei);
878
879 ptr = (unsigned long)(ei + 1);
880 end = (unsigned long)ei + item_size;
881
882 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
883 ptr += sizeof(struct btrfs_tree_block_info);
884 BUG_ON(ptr > end);
885 }
886
887 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
888 needed = BTRFS_REF_TYPE_DATA;
889 else
890 needed = BTRFS_REF_TYPE_BLOCK;
891
892 err = -ENOENT;
893 while (1) {
894 if (ptr >= end) {
895 WARN_ON(ptr > end);
896 break;
897 }
898 iref = (struct btrfs_extent_inline_ref *)ptr;
899 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
900 if (type == BTRFS_REF_TYPE_INVALID) {
901 err = -EUCLEAN;
902 goto out;
903 }
904
905 if (want < type)
906 break;
907 if (want > type) {
908 ptr += btrfs_extent_inline_ref_size(type);
909 continue;
910 }
911
912 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
913 struct btrfs_extent_data_ref *dref;
914 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
915 if (match_extent_data_ref(leaf, dref, root_objectid,
916 owner, offset)) {
917 err = 0;
918 break;
919 }
920 if (hash_extent_data_ref_item(leaf, dref) <
921 hash_extent_data_ref(root_objectid, owner, offset))
922 break;
923 } else {
924 u64 ref_offset;
925 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
926 if (parent > 0) {
927 if (parent == ref_offset) {
928 err = 0;
929 break;
930 }
931 if (ref_offset < parent)
932 break;
933 } else {
934 if (root_objectid == ref_offset) {
935 err = 0;
936 break;
937 }
938 if (ref_offset < root_objectid)
939 break;
940 }
941 }
942 ptr += btrfs_extent_inline_ref_size(type);
943 }
944 if (err == -ENOENT && insert) {
945 if (item_size + extra_size >=
946 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
947 err = -EAGAIN;
948 goto out;
949 }
950
951
952
953
954
955
956 if (find_next_key(path, 0, &key) == 0 &&
957 key.objectid == bytenr &&
958 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
959 err = -EAGAIN;
960 goto out;
961 }
962 }
963 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
964out:
965 if (insert) {
966 path->keep_locks = 0;
967 path->search_for_extension = 0;
968 btrfs_unlock_up_safe(path, 1);
969 }
970 return err;
971}
972
973
974
975
976static noinline_for_stack
977void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
978 struct btrfs_path *path,
979 struct btrfs_extent_inline_ref *iref,
980 u64 parent, u64 root_objectid,
981 u64 owner, u64 offset, int refs_to_add,
982 struct btrfs_delayed_extent_op *extent_op)
983{
984 struct extent_buffer *leaf;
985 struct btrfs_extent_item *ei;
986 unsigned long ptr;
987 unsigned long end;
988 unsigned long item_offset;
989 u64 refs;
990 int size;
991 int type;
992
993 leaf = path->nodes[0];
994 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
995 item_offset = (unsigned long)iref - (unsigned long)ei;
996
997 type = extent_ref_type(parent, owner);
998 size = btrfs_extent_inline_ref_size(type);
999
1000 btrfs_extend_item(path, size);
1001
1002 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1003 refs = btrfs_extent_refs(leaf, ei);
1004 refs += refs_to_add;
1005 btrfs_set_extent_refs(leaf, ei, refs);
1006 if (extent_op)
1007 __run_delayed_extent_op(extent_op, leaf, ei);
1008
1009 ptr = (unsigned long)ei + item_offset;
1010 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1011 if (ptr < end - size)
1012 memmove_extent_buffer(leaf, ptr + size, ptr,
1013 end - size - ptr);
1014
1015 iref = (struct btrfs_extent_inline_ref *)ptr;
1016 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1017 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1018 struct btrfs_extent_data_ref *dref;
1019 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1020 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1021 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1022 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1023 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1024 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1025 struct btrfs_shared_data_ref *sref;
1026 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1027 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1028 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1029 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1030 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1031 } else {
1032 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1033 }
1034 btrfs_mark_buffer_dirty(leaf);
1035}
1036
1037static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1038 struct btrfs_path *path,
1039 struct btrfs_extent_inline_ref **ref_ret,
1040 u64 bytenr, u64 num_bytes, u64 parent,
1041 u64 root_objectid, u64 owner, u64 offset)
1042{
1043 int ret;
1044
1045 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1046 num_bytes, parent, root_objectid,
1047 owner, offset, 0);
1048 if (ret != -ENOENT)
1049 return ret;
1050
1051 btrfs_release_path(path);
1052 *ref_ret = NULL;
1053
1054 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1055 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1056 root_objectid);
1057 } else {
1058 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1059 root_objectid, owner, offset);
1060 }
1061 return ret;
1062}
1063
1064
1065
1066
1067static noinline_for_stack
1068void update_inline_extent_backref(struct btrfs_path *path,
1069 struct btrfs_extent_inline_ref *iref,
1070 int refs_to_mod,
1071 struct btrfs_delayed_extent_op *extent_op,
1072 int *last_ref)
1073{
1074 struct extent_buffer *leaf = path->nodes[0];
1075 struct btrfs_extent_item *ei;
1076 struct btrfs_extent_data_ref *dref = NULL;
1077 struct btrfs_shared_data_ref *sref = NULL;
1078 unsigned long ptr;
1079 unsigned long end;
1080 u32 item_size;
1081 int size;
1082 int type;
1083 u64 refs;
1084
1085 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1086 refs = btrfs_extent_refs(leaf, ei);
1087 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1088 refs += refs_to_mod;
1089 btrfs_set_extent_refs(leaf, ei, refs);
1090 if (extent_op)
1091 __run_delayed_extent_op(extent_op, leaf, ei);
1092
1093
1094
1095
1096
1097 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1098 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1099
1100 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1101 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1102 refs = btrfs_extent_data_ref_count(leaf, dref);
1103 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1104 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1105 refs = btrfs_shared_data_ref_count(leaf, sref);
1106 } else {
1107 refs = 1;
1108 BUG_ON(refs_to_mod != -1);
1109 }
1110
1111 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1112 refs += refs_to_mod;
1113
1114 if (refs > 0) {
1115 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1116 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1117 else
1118 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1119 } else {
1120 *last_ref = 1;
1121 size = btrfs_extent_inline_ref_size(type);
1122 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1123 ptr = (unsigned long)iref;
1124 end = (unsigned long)ei + item_size;
1125 if (ptr + size < end)
1126 memmove_extent_buffer(leaf, ptr, ptr + size,
1127 end - ptr - size);
1128 item_size -= size;
1129 btrfs_truncate_item(path, item_size, 1);
1130 }
1131 btrfs_mark_buffer_dirty(leaf);
1132}
1133
1134static noinline_for_stack
1135int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1136 struct btrfs_path *path,
1137 u64 bytenr, u64 num_bytes, u64 parent,
1138 u64 root_objectid, u64 owner,
1139 u64 offset, int refs_to_add,
1140 struct btrfs_delayed_extent_op *extent_op)
1141{
1142 struct btrfs_extent_inline_ref *iref;
1143 int ret;
1144
1145 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1146 num_bytes, parent, root_objectid,
1147 owner, offset, 1);
1148 if (ret == 0) {
1149
1150
1151
1152
1153 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1154 btrfs_crit(trans->fs_info,
1155"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu",
1156 bytenr, num_bytes, root_objectid);
1157 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
1158 WARN_ON(1);
1159 btrfs_crit(trans->fs_info,
1160 "path->slots[0]=%d path->nodes[0]:", path->slots[0]);
1161 btrfs_print_leaf(path->nodes[0]);
1162 }
1163 return -EUCLEAN;
1164 }
1165 update_inline_extent_backref(path, iref, refs_to_add,
1166 extent_op, NULL);
1167 } else if (ret == -ENOENT) {
1168 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1169 root_objectid, owner, offset,
1170 refs_to_add, extent_op);
1171 ret = 0;
1172 }
1173 return ret;
1174}
1175
1176static int remove_extent_backref(struct btrfs_trans_handle *trans,
1177 struct btrfs_path *path,
1178 struct btrfs_extent_inline_ref *iref,
1179 int refs_to_drop, int is_data, int *last_ref)
1180{
1181 int ret = 0;
1182
1183 BUG_ON(!is_data && refs_to_drop != 1);
1184 if (iref) {
1185 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1186 last_ref);
1187 } else if (is_data) {
1188 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1189 last_ref);
1190 } else {
1191 *last_ref = 1;
1192 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1193 }
1194 return ret;
1195}
1196
1197static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1198 u64 *discarded_bytes)
1199{
1200 int j, ret = 0;
1201 u64 bytes_left, end;
1202 u64 aligned_start = ALIGN(start, 1 << 9);
1203
1204 if (WARN_ON(start != aligned_start)) {
1205 len -= aligned_start - start;
1206 len = round_down(len, 1 << 9);
1207 start = aligned_start;
1208 }
1209
1210 *discarded_bytes = 0;
1211
1212 if (!len)
1213 return 0;
1214
1215 end = start + len;
1216 bytes_left = len;
1217
1218
1219 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1220 u64 sb_start = btrfs_sb_offset(j);
1221 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1222 u64 size = sb_start - start;
1223
1224 if (!in_range(sb_start, start, bytes_left) &&
1225 !in_range(sb_end, start, bytes_left) &&
1226 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1227 continue;
1228
1229
1230
1231
1232
1233 if (sb_start <= start) {
1234 start += sb_end - start;
1235 if (start > end) {
1236 bytes_left = 0;
1237 break;
1238 }
1239 bytes_left = end - start;
1240 continue;
1241 }
1242
1243 if (size) {
1244 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1245 GFP_NOFS, 0);
1246 if (!ret)
1247 *discarded_bytes += size;
1248 else if (ret != -EOPNOTSUPP)
1249 return ret;
1250 }
1251
1252 start = sb_end;
1253 if (start > end) {
1254 bytes_left = 0;
1255 break;
1256 }
1257 bytes_left = end - start;
1258 }
1259
1260 if (bytes_left) {
1261 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1262 GFP_NOFS, 0);
1263 if (!ret)
1264 *discarded_bytes += bytes_left;
1265 }
1266 return ret;
1267}
1268
1269static int do_discard_extent(struct btrfs_bio_stripe *stripe, u64 *bytes)
1270{
1271 struct btrfs_device *dev = stripe->dev;
1272 struct btrfs_fs_info *fs_info = dev->fs_info;
1273 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
1274 u64 phys = stripe->physical;
1275 u64 len = stripe->length;
1276 u64 discarded = 0;
1277 int ret = 0;
1278
1279
1280 if (btrfs_can_zone_reset(dev, phys, len)) {
1281 u64 src_disc;
1282
1283 ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
1284 if (ret)
1285 goto out;
1286
1287 if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
1288 dev != dev_replace->srcdev)
1289 goto out;
1290
1291 src_disc = discarded;
1292
1293
1294 ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
1295 &discarded);
1296 discarded += src_disc;
1297 } else if (blk_queue_discard(bdev_get_queue(stripe->dev->bdev))) {
1298 ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
1299 } else {
1300 ret = 0;
1301 *bytes = 0;
1302 }
1303
1304out:
1305 *bytes = discarded;
1306 return ret;
1307}
1308
1309int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1310 u64 num_bytes, u64 *actual_bytes)
1311{
1312 int ret = 0;
1313 u64 discarded_bytes = 0;
1314 u64 end = bytenr + num_bytes;
1315 u64 cur = bytenr;
1316 struct btrfs_bio *bbio = NULL;
1317
1318
1319
1320
1321
1322
1323 btrfs_bio_counter_inc_blocked(fs_info);
1324 while (cur < end) {
1325 struct btrfs_bio_stripe *stripe;
1326 int i;
1327
1328 num_bytes = end - cur;
1329
1330 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
1331 &num_bytes, &bbio, 0);
1332
1333
1334
1335
1336
1337 if (ret < 0)
1338 goto out;
1339
1340 stripe = bbio->stripes;
1341 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1342 u64 bytes;
1343 struct btrfs_device *device = stripe->dev;
1344
1345 if (!device->bdev) {
1346 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1347 continue;
1348 }
1349
1350 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
1351 continue;
1352
1353 ret = do_discard_extent(stripe, &bytes);
1354 if (!ret) {
1355 discarded_bytes += bytes;
1356 } else if (ret != -EOPNOTSUPP) {
1357
1358
1359
1360
1361
1362
1363
1364 btrfs_put_bbio(bbio);
1365 goto out;
1366 }
1367
1368
1369
1370
1371
1372
1373 ret = 0;
1374 }
1375 btrfs_put_bbio(bbio);
1376 cur += num_bytes;
1377 }
1378out:
1379 btrfs_bio_counter_dec(fs_info);
1380
1381 if (actual_bytes)
1382 *actual_bytes = discarded_bytes;
1383
1384
1385 if (ret == -EOPNOTSUPP)
1386 ret = 0;
1387 return ret;
1388}
1389
1390
1391int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1392 struct btrfs_ref *generic_ref)
1393{
1394 struct btrfs_fs_info *fs_info = trans->fs_info;
1395 int ret;
1396
1397 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1398 generic_ref->action);
1399 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1400 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
1401
1402 if (generic_ref->type == BTRFS_REF_METADATA)
1403 ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
1404 else
1405 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
1406
1407 btrfs_ref_tree_mod(fs_info, generic_ref);
1408
1409 return ret;
1410}
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1450 struct btrfs_delayed_ref_node *node,
1451 u64 parent, u64 root_objectid,
1452 u64 owner, u64 offset, int refs_to_add,
1453 struct btrfs_delayed_extent_op *extent_op)
1454{
1455 struct btrfs_path *path;
1456 struct extent_buffer *leaf;
1457 struct btrfs_extent_item *item;
1458 struct btrfs_key key;
1459 u64 bytenr = node->bytenr;
1460 u64 num_bytes = node->num_bytes;
1461 u64 refs;
1462 int ret;
1463
1464 path = btrfs_alloc_path();
1465 if (!path)
1466 return -ENOMEM;
1467
1468
1469 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1470 parent, root_objectid, owner,
1471 offset, refs_to_add, extent_op);
1472 if ((ret < 0 && ret != -EAGAIN) || !ret)
1473 goto out;
1474
1475
1476
1477
1478
1479
1480 leaf = path->nodes[0];
1481 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1482 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1483 refs = btrfs_extent_refs(leaf, item);
1484 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1485 if (extent_op)
1486 __run_delayed_extent_op(extent_op, leaf, item);
1487
1488 btrfs_mark_buffer_dirty(leaf);
1489 btrfs_release_path(path);
1490
1491
1492 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1493 BUG_ON(refs_to_add != 1);
1494 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1495 root_objectid);
1496 } else {
1497 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1498 root_objectid, owner, offset,
1499 refs_to_add);
1500 }
1501 if (ret)
1502 btrfs_abort_transaction(trans, ret);
1503out:
1504 btrfs_free_path(path);
1505 return ret;
1506}
1507
1508static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1509 struct btrfs_delayed_ref_node *node,
1510 struct btrfs_delayed_extent_op *extent_op,
1511 int insert_reserved)
1512{
1513 int ret = 0;
1514 struct btrfs_delayed_data_ref *ref;
1515 struct btrfs_key ins;
1516 u64 parent = 0;
1517 u64 ref_root = 0;
1518 u64 flags = 0;
1519
1520 ins.objectid = node->bytenr;
1521 ins.offset = node->num_bytes;
1522 ins.type = BTRFS_EXTENT_ITEM_KEY;
1523
1524 ref = btrfs_delayed_node_to_data_ref(node);
1525 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1526
1527 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1528 parent = ref->parent;
1529 ref_root = ref->root;
1530
1531 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1532 if (extent_op)
1533 flags |= extent_op->flags_to_set;
1534 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1535 flags, ref->objectid,
1536 ref->offset, &ins,
1537 node->ref_mod);
1538 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1539 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1540 ref->objectid, ref->offset,
1541 node->ref_mod, extent_op);
1542 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1543 ret = __btrfs_free_extent(trans, node, parent,
1544 ref_root, ref->objectid,
1545 ref->offset, node->ref_mod,
1546 extent_op);
1547 } else {
1548 BUG();
1549 }
1550 return ret;
1551}
1552
1553static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1554 struct extent_buffer *leaf,
1555 struct btrfs_extent_item *ei)
1556{
1557 u64 flags = btrfs_extent_flags(leaf, ei);
1558 if (extent_op->update_flags) {
1559 flags |= extent_op->flags_to_set;
1560 btrfs_set_extent_flags(leaf, ei, flags);
1561 }
1562
1563 if (extent_op->update_key) {
1564 struct btrfs_tree_block_info *bi;
1565 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1566 bi = (struct btrfs_tree_block_info *)(ei + 1);
1567 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1568 }
1569}
1570
1571static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1572 struct btrfs_delayed_ref_head *head,
1573 struct btrfs_delayed_extent_op *extent_op)
1574{
1575 struct btrfs_fs_info *fs_info = trans->fs_info;
1576 struct btrfs_key key;
1577 struct btrfs_path *path;
1578 struct btrfs_extent_item *ei;
1579 struct extent_buffer *leaf;
1580 u32 item_size;
1581 int ret;
1582 int err = 0;
1583 int metadata = !extent_op->is_data;
1584
1585 if (TRANS_ABORTED(trans))
1586 return 0;
1587
1588 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1589 metadata = 0;
1590
1591 path = btrfs_alloc_path();
1592 if (!path)
1593 return -ENOMEM;
1594
1595 key.objectid = head->bytenr;
1596
1597 if (metadata) {
1598 key.type = BTRFS_METADATA_ITEM_KEY;
1599 key.offset = extent_op->level;
1600 } else {
1601 key.type = BTRFS_EXTENT_ITEM_KEY;
1602 key.offset = head->num_bytes;
1603 }
1604
1605again:
1606 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
1607 if (ret < 0) {
1608 err = ret;
1609 goto out;
1610 }
1611 if (ret > 0) {
1612 if (metadata) {
1613 if (path->slots[0] > 0) {
1614 path->slots[0]--;
1615 btrfs_item_key_to_cpu(path->nodes[0], &key,
1616 path->slots[0]);
1617 if (key.objectid == head->bytenr &&
1618 key.type == BTRFS_EXTENT_ITEM_KEY &&
1619 key.offset == head->num_bytes)
1620 ret = 0;
1621 }
1622 if (ret > 0) {
1623 btrfs_release_path(path);
1624 metadata = 0;
1625
1626 key.objectid = head->bytenr;
1627 key.offset = head->num_bytes;
1628 key.type = BTRFS_EXTENT_ITEM_KEY;
1629 goto again;
1630 }
1631 } else {
1632 err = -EIO;
1633 goto out;
1634 }
1635 }
1636
1637 leaf = path->nodes[0];
1638 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1639
1640 if (unlikely(item_size < sizeof(*ei))) {
1641 err = -EINVAL;
1642 btrfs_print_v0_err(fs_info);
1643 btrfs_abort_transaction(trans, err);
1644 goto out;
1645 }
1646
1647 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1648 __run_delayed_extent_op(extent_op, leaf, ei);
1649
1650 btrfs_mark_buffer_dirty(leaf);
1651out:
1652 btrfs_free_path(path);
1653 return err;
1654}
1655
1656static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1657 struct btrfs_delayed_ref_node *node,
1658 struct btrfs_delayed_extent_op *extent_op,
1659 int insert_reserved)
1660{
1661 int ret = 0;
1662 struct btrfs_delayed_tree_ref *ref;
1663 u64 parent = 0;
1664 u64 ref_root = 0;
1665
1666 ref = btrfs_delayed_node_to_tree_ref(node);
1667 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1668
1669 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1670 parent = ref->parent;
1671 ref_root = ref->root;
1672
1673 if (node->ref_mod != 1) {
1674 btrfs_err(trans->fs_info,
1675 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1676 node->bytenr, node->ref_mod, node->action, ref_root,
1677 parent);
1678 return -EIO;
1679 }
1680 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1681 BUG_ON(!extent_op || !extent_op->update_flags);
1682 ret = alloc_reserved_tree_block(trans, node, extent_op);
1683 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1684 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1685 ref->level, 0, 1, extent_op);
1686 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1687 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1688 ref->level, 0, 1, extent_op);
1689 } else {
1690 BUG();
1691 }
1692 return ret;
1693}
1694
1695
1696static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1697 struct btrfs_delayed_ref_node *node,
1698 struct btrfs_delayed_extent_op *extent_op,
1699 int insert_reserved)
1700{
1701 int ret = 0;
1702
1703 if (TRANS_ABORTED(trans)) {
1704 if (insert_reserved)
1705 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1706 return 0;
1707 }
1708
1709 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1710 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1711 ret = run_delayed_tree_ref(trans, node, extent_op,
1712 insert_reserved);
1713 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1714 node->type == BTRFS_SHARED_DATA_REF_KEY)
1715 ret = run_delayed_data_ref(trans, node, extent_op,
1716 insert_reserved);
1717 else
1718 BUG();
1719 if (ret && insert_reserved)
1720 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1721 return ret;
1722}
1723
1724static inline struct btrfs_delayed_ref_node *
1725select_delayed_ref(struct btrfs_delayed_ref_head *head)
1726{
1727 struct btrfs_delayed_ref_node *ref;
1728
1729 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1730 return NULL;
1731
1732
1733
1734
1735
1736
1737
1738 if (!list_empty(&head->ref_add_list))
1739 return list_first_entry(&head->ref_add_list,
1740 struct btrfs_delayed_ref_node, add_list);
1741
1742 ref = rb_entry(rb_first_cached(&head->ref_tree),
1743 struct btrfs_delayed_ref_node, ref_node);
1744 ASSERT(list_empty(&ref->add_list));
1745 return ref;
1746}
1747
1748static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1749 struct btrfs_delayed_ref_head *head)
1750{
1751 spin_lock(&delayed_refs->lock);
1752 head->processing = 0;
1753 delayed_refs->num_heads_ready++;
1754 spin_unlock(&delayed_refs->lock);
1755 btrfs_delayed_ref_unlock(head);
1756}
1757
1758static struct btrfs_delayed_extent_op *cleanup_extent_op(
1759 struct btrfs_delayed_ref_head *head)
1760{
1761 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1762
1763 if (!extent_op)
1764 return NULL;
1765
1766 if (head->must_insert_reserved) {
1767 head->extent_op = NULL;
1768 btrfs_free_delayed_extent_op(extent_op);
1769 return NULL;
1770 }
1771 return extent_op;
1772}
1773
1774static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1775 struct btrfs_delayed_ref_head *head)
1776{
1777 struct btrfs_delayed_extent_op *extent_op;
1778 int ret;
1779
1780 extent_op = cleanup_extent_op(head);
1781 if (!extent_op)
1782 return 0;
1783 head->extent_op = NULL;
1784 spin_unlock(&head->lock);
1785 ret = run_delayed_extent_op(trans, head, extent_op);
1786 btrfs_free_delayed_extent_op(extent_op);
1787 return ret ? ret : 1;
1788}
1789
1790void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1791 struct btrfs_delayed_ref_root *delayed_refs,
1792 struct btrfs_delayed_ref_head *head)
1793{
1794 int nr_items = 1;
1795
1796
1797
1798
1799
1800 if (head->total_ref_mod < 0 && head->is_data) {
1801 spin_lock(&delayed_refs->lock);
1802 delayed_refs->pending_csums -= head->num_bytes;
1803 spin_unlock(&delayed_refs->lock);
1804 nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
1805 }
1806
1807 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1808}
1809
1810static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1811 struct btrfs_delayed_ref_head *head)
1812{
1813
1814 struct btrfs_fs_info *fs_info = trans->fs_info;
1815 struct btrfs_delayed_ref_root *delayed_refs;
1816 int ret;
1817
1818 delayed_refs = &trans->transaction->delayed_refs;
1819
1820 ret = run_and_cleanup_extent_op(trans, head);
1821 if (ret < 0) {
1822 unselect_delayed_ref_head(delayed_refs, head);
1823 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1824 return ret;
1825 } else if (ret) {
1826 return ret;
1827 }
1828
1829
1830
1831
1832
1833 spin_unlock(&head->lock);
1834 spin_lock(&delayed_refs->lock);
1835 spin_lock(&head->lock);
1836 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1837 spin_unlock(&head->lock);
1838 spin_unlock(&delayed_refs->lock);
1839 return 1;
1840 }
1841 btrfs_delete_ref_head(delayed_refs, head);
1842 spin_unlock(&head->lock);
1843 spin_unlock(&delayed_refs->lock);
1844
1845 if (head->must_insert_reserved) {
1846 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
1847 if (head->is_data) {
1848 ret = btrfs_del_csums(trans, fs_info->csum_root,
1849 head->bytenr, head->num_bytes);
1850 }
1851 }
1852
1853 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1854
1855 trace_run_delayed_ref_head(fs_info, head, 0);
1856 btrfs_delayed_ref_unlock(head);
1857 btrfs_put_delayed_ref_head(head);
1858 return ret;
1859}
1860
1861static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1862 struct btrfs_trans_handle *trans)
1863{
1864 struct btrfs_delayed_ref_root *delayed_refs =
1865 &trans->transaction->delayed_refs;
1866 struct btrfs_delayed_ref_head *head = NULL;
1867 int ret;
1868
1869 spin_lock(&delayed_refs->lock);
1870 head = btrfs_select_ref_head(delayed_refs);
1871 if (!head) {
1872 spin_unlock(&delayed_refs->lock);
1873 return head;
1874 }
1875
1876
1877
1878
1879
1880 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1881 spin_unlock(&delayed_refs->lock);
1882
1883
1884
1885
1886
1887
1888 if (ret == -EAGAIN)
1889 head = ERR_PTR(-EAGAIN);
1890
1891 return head;
1892}
1893
1894static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1895 struct btrfs_delayed_ref_head *locked_ref,
1896 unsigned long *run_refs)
1897{
1898 struct btrfs_fs_info *fs_info = trans->fs_info;
1899 struct btrfs_delayed_ref_root *delayed_refs;
1900 struct btrfs_delayed_extent_op *extent_op;
1901 struct btrfs_delayed_ref_node *ref;
1902 int must_insert_reserved = 0;
1903 int ret;
1904
1905 delayed_refs = &trans->transaction->delayed_refs;
1906
1907 lockdep_assert_held(&locked_ref->mutex);
1908 lockdep_assert_held(&locked_ref->lock);
1909
1910 while ((ref = select_delayed_ref(locked_ref))) {
1911 if (ref->seq &&
1912 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1913 spin_unlock(&locked_ref->lock);
1914 unselect_delayed_ref_head(delayed_refs, locked_ref);
1915 return -EAGAIN;
1916 }
1917
1918 (*run_refs)++;
1919 ref->in_tree = 0;
1920 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1921 RB_CLEAR_NODE(&ref->ref_node);
1922 if (!list_empty(&ref->add_list))
1923 list_del(&ref->add_list);
1924
1925
1926
1927
1928 switch (ref->action) {
1929 case BTRFS_ADD_DELAYED_REF:
1930 case BTRFS_ADD_DELAYED_EXTENT:
1931 locked_ref->ref_mod -= ref->ref_mod;
1932 break;
1933 case BTRFS_DROP_DELAYED_REF:
1934 locked_ref->ref_mod += ref->ref_mod;
1935 break;
1936 default:
1937 WARN_ON(1);
1938 }
1939 atomic_dec(&delayed_refs->num_entries);
1940
1941
1942
1943
1944
1945 must_insert_reserved = locked_ref->must_insert_reserved;
1946 locked_ref->must_insert_reserved = 0;
1947
1948 extent_op = locked_ref->extent_op;
1949 locked_ref->extent_op = NULL;
1950 spin_unlock(&locked_ref->lock);
1951
1952 ret = run_one_delayed_ref(trans, ref, extent_op,
1953 must_insert_reserved);
1954
1955 btrfs_free_delayed_extent_op(extent_op);
1956 if (ret) {
1957 unselect_delayed_ref_head(delayed_refs, locked_ref);
1958 btrfs_put_delayed_ref(ref);
1959 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1960 ret);
1961 return ret;
1962 }
1963
1964 btrfs_put_delayed_ref(ref);
1965 cond_resched();
1966
1967 spin_lock(&locked_ref->lock);
1968 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1969 }
1970
1971 return 0;
1972}
1973
1974
1975
1976
1977
1978static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1979 unsigned long nr)
1980{
1981 struct btrfs_fs_info *fs_info = trans->fs_info;
1982 struct btrfs_delayed_ref_root *delayed_refs;
1983 struct btrfs_delayed_ref_head *locked_ref = NULL;
1984 ktime_t start = ktime_get();
1985 int ret;
1986 unsigned long count = 0;
1987 unsigned long actual_count = 0;
1988
1989 delayed_refs = &trans->transaction->delayed_refs;
1990 do {
1991 if (!locked_ref) {
1992 locked_ref = btrfs_obtain_ref_head(trans);
1993 if (IS_ERR_OR_NULL(locked_ref)) {
1994 if (PTR_ERR(locked_ref) == -EAGAIN) {
1995 continue;
1996 } else {
1997 break;
1998 }
1999 }
2000 count++;
2001 }
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014 spin_lock(&locked_ref->lock);
2015 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2016
2017 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2018 &actual_count);
2019 if (ret < 0 && ret != -EAGAIN) {
2020
2021
2022
2023
2024 return ret;
2025 } else if (!ret) {
2026
2027
2028
2029
2030 ret = cleanup_ref_head(trans, locked_ref);
2031 if (ret > 0 ) {
2032
2033 ret = 0;
2034 continue;
2035 } else if (ret) {
2036 return ret;
2037 }
2038 }
2039
2040
2041
2042
2043
2044
2045 locked_ref = NULL;
2046 cond_resched();
2047 } while ((nr != -1 && count < nr) || locked_ref);
2048
2049
2050
2051
2052
2053
2054 if (actual_count > 0) {
2055 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2056 u64 avg;
2057
2058
2059
2060
2061
2062 spin_lock(&delayed_refs->lock);
2063 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2064 fs_info->avg_delayed_ref_runtime = avg >> 2;
2065 spin_unlock(&delayed_refs->lock);
2066 }
2067 return 0;
2068}
2069
2070#ifdef SCRAMBLE_DELAYED_REFS
2071
2072
2073
2074
2075
2076static u64 find_middle(struct rb_root *root)
2077{
2078 struct rb_node *n = root->rb_node;
2079 struct btrfs_delayed_ref_node *entry;
2080 int alt = 1;
2081 u64 middle;
2082 u64 first = 0, last = 0;
2083
2084 n = rb_first(root);
2085 if (n) {
2086 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2087 first = entry->bytenr;
2088 }
2089 n = rb_last(root);
2090 if (n) {
2091 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2092 last = entry->bytenr;
2093 }
2094 n = root->rb_node;
2095
2096 while (n) {
2097 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2098 WARN_ON(!entry->in_tree);
2099
2100 middle = entry->bytenr;
2101
2102 if (alt)
2103 n = n->rb_left;
2104 else
2105 n = n->rb_right;
2106
2107 alt = 1 - alt;
2108 }
2109 return middle;
2110}
2111#endif
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2124 unsigned long count)
2125{
2126 struct btrfs_fs_info *fs_info = trans->fs_info;
2127 struct rb_node *node;
2128 struct btrfs_delayed_ref_root *delayed_refs;
2129 struct btrfs_delayed_ref_head *head;
2130 int ret;
2131 int run_all = count == (unsigned long)-1;
2132
2133
2134 if (TRANS_ABORTED(trans))
2135 return 0;
2136
2137 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2138 return 0;
2139
2140 delayed_refs = &trans->transaction->delayed_refs;
2141 if (count == 0)
2142 count = delayed_refs->num_heads_ready;
2143
2144again:
2145#ifdef SCRAMBLE_DELAYED_REFS
2146 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2147#endif
2148 ret = __btrfs_run_delayed_refs(trans, count);
2149 if (ret < 0) {
2150 btrfs_abort_transaction(trans, ret);
2151 return ret;
2152 }
2153
2154 if (run_all) {
2155 btrfs_create_pending_block_groups(trans);
2156
2157 spin_lock(&delayed_refs->lock);
2158 node = rb_first_cached(&delayed_refs->href_root);
2159 if (!node) {
2160 spin_unlock(&delayed_refs->lock);
2161 goto out;
2162 }
2163 head = rb_entry(node, struct btrfs_delayed_ref_head,
2164 href_node);
2165 refcount_inc(&head->refs);
2166 spin_unlock(&delayed_refs->lock);
2167
2168
2169 mutex_lock(&head->mutex);
2170 mutex_unlock(&head->mutex);
2171
2172 btrfs_put_delayed_ref_head(head);
2173 cond_resched();
2174 goto again;
2175 }
2176out:
2177 return 0;
2178}
2179
2180int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2181 struct extent_buffer *eb, u64 flags,
2182 int level, int is_data)
2183{
2184 struct btrfs_delayed_extent_op *extent_op;
2185 int ret;
2186
2187 extent_op = btrfs_alloc_delayed_extent_op();
2188 if (!extent_op)
2189 return -ENOMEM;
2190
2191 extent_op->flags_to_set = flags;
2192 extent_op->update_flags = true;
2193 extent_op->update_key = false;
2194 extent_op->is_data = is_data ? true : false;
2195 extent_op->level = level;
2196
2197 ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
2198 if (ret)
2199 btrfs_free_delayed_extent_op(extent_op);
2200 return ret;
2201}
2202
2203static noinline int check_delayed_ref(struct btrfs_root *root,
2204 struct btrfs_path *path,
2205 u64 objectid, u64 offset, u64 bytenr)
2206{
2207 struct btrfs_delayed_ref_head *head;
2208 struct btrfs_delayed_ref_node *ref;
2209 struct btrfs_delayed_data_ref *data_ref;
2210 struct btrfs_delayed_ref_root *delayed_refs;
2211 struct btrfs_transaction *cur_trans;
2212 struct rb_node *node;
2213 int ret = 0;
2214
2215 spin_lock(&root->fs_info->trans_lock);
2216 cur_trans = root->fs_info->running_transaction;
2217 if (cur_trans)
2218 refcount_inc(&cur_trans->use_count);
2219 spin_unlock(&root->fs_info->trans_lock);
2220 if (!cur_trans)
2221 return 0;
2222
2223 delayed_refs = &cur_trans->delayed_refs;
2224 spin_lock(&delayed_refs->lock);
2225 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2226 if (!head) {
2227 spin_unlock(&delayed_refs->lock);
2228 btrfs_put_transaction(cur_trans);
2229 return 0;
2230 }
2231
2232 if (!mutex_trylock(&head->mutex)) {
2233 refcount_inc(&head->refs);
2234 spin_unlock(&delayed_refs->lock);
2235
2236 btrfs_release_path(path);
2237
2238
2239
2240
2241
2242 mutex_lock(&head->mutex);
2243 mutex_unlock(&head->mutex);
2244 btrfs_put_delayed_ref_head(head);
2245 btrfs_put_transaction(cur_trans);
2246 return -EAGAIN;
2247 }
2248 spin_unlock(&delayed_refs->lock);
2249
2250 spin_lock(&head->lock);
2251
2252
2253
2254
2255 for (node = rb_first_cached(&head->ref_tree); node;
2256 node = rb_next(node)) {
2257 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2258
2259 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2260 ret = 1;
2261 break;
2262 }
2263
2264 data_ref = btrfs_delayed_node_to_data_ref(ref);
2265
2266
2267
2268
2269
2270 if (data_ref->root != root->root_key.objectid ||
2271 data_ref->objectid != objectid ||
2272 data_ref->offset != offset) {
2273 ret = 1;
2274 break;
2275 }
2276 }
2277 spin_unlock(&head->lock);
2278 mutex_unlock(&head->mutex);
2279 btrfs_put_transaction(cur_trans);
2280 return ret;
2281}
2282
2283static noinline int check_committed_ref(struct btrfs_root *root,
2284 struct btrfs_path *path,
2285 u64 objectid, u64 offset, u64 bytenr,
2286 bool strict)
2287{
2288 struct btrfs_fs_info *fs_info = root->fs_info;
2289 struct btrfs_root *extent_root = fs_info->extent_root;
2290 struct extent_buffer *leaf;
2291 struct btrfs_extent_data_ref *ref;
2292 struct btrfs_extent_inline_ref *iref;
2293 struct btrfs_extent_item *ei;
2294 struct btrfs_key key;
2295 u32 item_size;
2296 int type;
2297 int ret;
2298
2299 key.objectid = bytenr;
2300 key.offset = (u64)-1;
2301 key.type = BTRFS_EXTENT_ITEM_KEY;
2302
2303 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2304 if (ret < 0)
2305 goto out;
2306 BUG_ON(ret == 0);
2307
2308 ret = -ENOENT;
2309 if (path->slots[0] == 0)
2310 goto out;
2311
2312 path->slots[0]--;
2313 leaf = path->nodes[0];
2314 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2315
2316 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2317 goto out;
2318
2319 ret = 1;
2320 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2321 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2322
2323
2324 if (item_size != sizeof(*ei) +
2325 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2326 goto out;
2327
2328
2329
2330
2331
2332 if (!strict &&
2333 (btrfs_extent_generation(leaf, ei) <=
2334 btrfs_root_last_snapshot(&root->root_item)))
2335 goto out;
2336
2337 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2338
2339
2340 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2341 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2342 goto out;
2343
2344 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2345 if (btrfs_extent_refs(leaf, ei) !=
2346 btrfs_extent_data_ref_count(leaf, ref) ||
2347 btrfs_extent_data_ref_root(leaf, ref) !=
2348 root->root_key.objectid ||
2349 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2350 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2351 goto out;
2352
2353 ret = 0;
2354out:
2355 return ret;
2356}
2357
2358int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2359 u64 bytenr, bool strict)
2360{
2361 struct btrfs_path *path;
2362 int ret;
2363
2364 path = btrfs_alloc_path();
2365 if (!path)
2366 return -ENOMEM;
2367
2368 do {
2369 ret = check_committed_ref(root, path, objectid,
2370 offset, bytenr, strict);
2371 if (ret && ret != -ENOENT)
2372 goto out;
2373
2374 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2375 } while (ret == -EAGAIN);
2376
2377out:
2378 btrfs_free_path(path);
2379 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2380 WARN_ON(ret > 0);
2381 return ret;
2382}
2383
2384static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2385 struct btrfs_root *root,
2386 struct extent_buffer *buf,
2387 int full_backref, int inc)
2388{
2389 struct btrfs_fs_info *fs_info = root->fs_info;
2390 u64 bytenr;
2391 u64 num_bytes;
2392 u64 parent;
2393 u64 ref_root;
2394 u32 nritems;
2395 struct btrfs_key key;
2396 struct btrfs_file_extent_item *fi;
2397 struct btrfs_ref generic_ref = { 0 };
2398 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2399 int i;
2400 int action;
2401 int level;
2402 int ret = 0;
2403
2404 if (btrfs_is_testing(fs_info))
2405 return 0;
2406
2407 ref_root = btrfs_header_owner(buf);
2408 nritems = btrfs_header_nritems(buf);
2409 level = btrfs_header_level(buf);
2410
2411 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
2412 return 0;
2413
2414 if (full_backref)
2415 parent = buf->start;
2416 else
2417 parent = 0;
2418 if (inc)
2419 action = BTRFS_ADD_DELAYED_REF;
2420 else
2421 action = BTRFS_DROP_DELAYED_REF;
2422
2423 for (i = 0; i < nritems; i++) {
2424 if (level == 0) {
2425 btrfs_item_key_to_cpu(buf, &key, i);
2426 if (key.type != BTRFS_EXTENT_DATA_KEY)
2427 continue;
2428 fi = btrfs_item_ptr(buf, i,
2429 struct btrfs_file_extent_item);
2430 if (btrfs_file_extent_type(buf, fi) ==
2431 BTRFS_FILE_EXTENT_INLINE)
2432 continue;
2433 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2434 if (bytenr == 0)
2435 continue;
2436
2437 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2438 key.offset -= btrfs_file_extent_offset(buf, fi);
2439 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2440 num_bytes, parent);
2441 generic_ref.real_root = root->root_key.objectid;
2442 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2443 key.offset);
2444 generic_ref.skip_qgroup = for_reloc;
2445 if (inc)
2446 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2447 else
2448 ret = btrfs_free_extent(trans, &generic_ref);
2449 if (ret)
2450 goto fail;
2451 } else {
2452 bytenr = btrfs_node_blockptr(buf, i);
2453 num_bytes = fs_info->nodesize;
2454 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2455 num_bytes, parent);
2456 generic_ref.real_root = root->root_key.objectid;
2457 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
2458 generic_ref.skip_qgroup = for_reloc;
2459 if (inc)
2460 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2461 else
2462 ret = btrfs_free_extent(trans, &generic_ref);
2463 if (ret)
2464 goto fail;
2465 }
2466 }
2467 return 0;
2468fail:
2469 return ret;
2470}
2471
2472int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2473 struct extent_buffer *buf, int full_backref)
2474{
2475 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2476}
2477
2478int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2479 struct extent_buffer *buf, int full_backref)
2480{
2481 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2482}
2483
2484static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2485{
2486 struct btrfs_fs_info *fs_info = root->fs_info;
2487 u64 flags;
2488 u64 ret;
2489
2490 if (data)
2491 flags = BTRFS_BLOCK_GROUP_DATA;
2492 else if (root == fs_info->chunk_root)
2493 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2494 else
2495 flags = BTRFS_BLOCK_GROUP_METADATA;
2496
2497 ret = btrfs_get_alloc_profile(fs_info, flags);
2498 return ret;
2499}
2500
2501static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
2502{
2503 struct btrfs_block_group *cache;
2504 u64 bytenr;
2505
2506 spin_lock(&fs_info->block_group_cache_lock);
2507 bytenr = fs_info->first_logical_byte;
2508 spin_unlock(&fs_info->block_group_cache_lock);
2509
2510 if (bytenr < (u64)-1)
2511 return bytenr;
2512
2513 cache = btrfs_lookup_first_block_group(fs_info, search_start);
2514 if (!cache)
2515 return 0;
2516
2517 bytenr = cache->start;
2518 btrfs_put_block_group(cache);
2519
2520 return bytenr;
2521}
2522
2523static int pin_down_extent(struct btrfs_trans_handle *trans,
2524 struct btrfs_block_group *cache,
2525 u64 bytenr, u64 num_bytes, int reserved)
2526{
2527 struct btrfs_fs_info *fs_info = cache->fs_info;
2528
2529 spin_lock(&cache->space_info->lock);
2530 spin_lock(&cache->lock);
2531 cache->pinned += num_bytes;
2532 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2533 num_bytes);
2534 if (reserved) {
2535 cache->reserved -= num_bytes;
2536 cache->space_info->bytes_reserved -= num_bytes;
2537 }
2538 spin_unlock(&cache->lock);
2539 spin_unlock(&cache->space_info->lock);
2540
2541 set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
2542 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2543 return 0;
2544}
2545
2546int btrfs_pin_extent(struct btrfs_trans_handle *trans,
2547 u64 bytenr, u64 num_bytes, int reserved)
2548{
2549 struct btrfs_block_group *cache;
2550
2551 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2552 BUG_ON(!cache);
2553
2554 pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
2555
2556 btrfs_put_block_group(cache);
2557 return 0;
2558}
2559
2560
2561
2562
2563int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
2564 u64 bytenr, u64 num_bytes)
2565{
2566 struct btrfs_block_group *cache;
2567 int ret;
2568
2569 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2570 if (!cache)
2571 return -EINVAL;
2572
2573
2574
2575
2576
2577
2578
2579 btrfs_cache_block_group(cache, 1);
2580
2581
2582
2583
2584 ret = btrfs_wait_block_group_cache_done(cache);
2585 if (ret)
2586 goto out;
2587
2588 pin_down_extent(trans, cache, bytenr, num_bytes, 0);
2589
2590
2591 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2592out:
2593 btrfs_put_block_group(cache);
2594 return ret;
2595}
2596
2597static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2598 u64 start, u64 num_bytes)
2599{
2600 int ret;
2601 struct btrfs_block_group *block_group;
2602
2603 block_group = btrfs_lookup_block_group(fs_info, start);
2604 if (!block_group)
2605 return -EINVAL;
2606
2607 btrfs_cache_block_group(block_group, 1);
2608
2609
2610
2611
2612 ret = btrfs_wait_block_group_cache_done(block_group);
2613 if (ret)
2614 goto out;
2615
2616 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2617out:
2618 btrfs_put_block_group(block_group);
2619 return ret;
2620}
2621
2622int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2623{
2624 struct btrfs_fs_info *fs_info = eb->fs_info;
2625 struct btrfs_file_extent_item *item;
2626 struct btrfs_key key;
2627 int found_type;
2628 int i;
2629 int ret = 0;
2630
2631 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2632 return 0;
2633
2634 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2635 btrfs_item_key_to_cpu(eb, &key, i);
2636 if (key.type != BTRFS_EXTENT_DATA_KEY)
2637 continue;
2638 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2639 found_type = btrfs_file_extent_type(eb, item);
2640 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2641 continue;
2642 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2643 continue;
2644 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2645 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2646 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2647 if (ret)
2648 break;
2649 }
2650
2651 return ret;
2652}
2653
2654static void
2655btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
2656{
2657 atomic_inc(&bg->reservations);
2658}
2659
2660
2661
2662
2663
2664static struct btrfs_free_cluster *
2665fetch_cluster_info(struct btrfs_fs_info *fs_info,
2666 struct btrfs_space_info *space_info, u64 *empty_cluster)
2667{
2668 struct btrfs_free_cluster *ret = NULL;
2669
2670 *empty_cluster = 0;
2671 if (btrfs_mixed_space_info(space_info))
2672 return ret;
2673
2674 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2675 ret = &fs_info->meta_alloc_cluster;
2676 if (btrfs_test_opt(fs_info, SSD))
2677 *empty_cluster = SZ_2M;
2678 else
2679 *empty_cluster = SZ_64K;
2680 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2681 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2682 *empty_cluster = SZ_2M;
2683 ret = &fs_info->data_alloc_cluster;
2684 }
2685
2686 return ret;
2687}
2688
2689static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2690 u64 start, u64 end,
2691 const bool return_free_space)
2692{
2693 struct btrfs_block_group *cache = NULL;
2694 struct btrfs_space_info *space_info;
2695 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2696 struct btrfs_free_cluster *cluster = NULL;
2697 u64 len;
2698 u64 total_unpinned = 0;
2699 u64 empty_cluster = 0;
2700 bool readonly;
2701
2702 while (start <= end) {
2703 readonly = false;
2704 if (!cache ||
2705 start >= cache->start + cache->length) {
2706 if (cache)
2707 btrfs_put_block_group(cache);
2708 total_unpinned = 0;
2709 cache = btrfs_lookup_block_group(fs_info, start);
2710 BUG_ON(!cache);
2711
2712 cluster = fetch_cluster_info(fs_info,
2713 cache->space_info,
2714 &empty_cluster);
2715 empty_cluster <<= 1;
2716 }
2717
2718 len = cache->start + cache->length - start;
2719 len = min(len, end + 1 - start);
2720
2721 down_read(&fs_info->commit_root_sem);
2722 if (start < cache->last_byte_to_unpin && return_free_space) {
2723 u64 add_len = min(len, cache->last_byte_to_unpin - start);
2724
2725 btrfs_add_free_space(cache, start, add_len);
2726 }
2727 up_read(&fs_info->commit_root_sem);
2728
2729 start += len;
2730 total_unpinned += len;
2731 space_info = cache->space_info;
2732
2733
2734
2735
2736
2737
2738
2739 if (cluster && cluster->fragmented &&
2740 total_unpinned > empty_cluster) {
2741 spin_lock(&cluster->lock);
2742 cluster->fragmented = 0;
2743 spin_unlock(&cluster->lock);
2744 }
2745
2746 spin_lock(&space_info->lock);
2747 spin_lock(&cache->lock);
2748 cache->pinned -= len;
2749 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2750 space_info->max_extent_size = 0;
2751 if (cache->ro) {
2752 space_info->bytes_readonly += len;
2753 readonly = true;
2754 } else if (btrfs_is_zoned(fs_info)) {
2755
2756 space_info->bytes_zone_unusable += len;
2757 readonly = true;
2758 }
2759 spin_unlock(&cache->lock);
2760 if (!readonly && return_free_space &&
2761 global_rsv->space_info == space_info) {
2762 u64 to_add = len;
2763
2764 spin_lock(&global_rsv->lock);
2765 if (!global_rsv->full) {
2766 to_add = min(len, global_rsv->size -
2767 global_rsv->reserved);
2768 global_rsv->reserved += to_add;
2769 btrfs_space_info_update_bytes_may_use(fs_info,
2770 space_info, to_add);
2771 if (global_rsv->reserved >= global_rsv->size)
2772 global_rsv->full = 1;
2773 len -= to_add;
2774 }
2775 spin_unlock(&global_rsv->lock);
2776 }
2777
2778 if (!readonly && return_free_space && len)
2779 btrfs_try_granting_tickets(fs_info, space_info);
2780 spin_unlock(&space_info->lock);
2781 }
2782
2783 if (cache)
2784 btrfs_put_block_group(cache);
2785 return 0;
2786}
2787
2788int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2789{
2790 struct btrfs_fs_info *fs_info = trans->fs_info;
2791 struct btrfs_block_group *block_group, *tmp;
2792 struct list_head *deleted_bgs;
2793 struct extent_io_tree *unpin;
2794 u64 start;
2795 u64 end;
2796 int ret;
2797
2798 unpin = &trans->transaction->pinned_extents;
2799
2800 while (!TRANS_ABORTED(trans)) {
2801 struct extent_state *cached_state = NULL;
2802
2803 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2804 ret = find_first_extent_bit(unpin, 0, &start, &end,
2805 EXTENT_DIRTY, &cached_state);
2806 if (ret) {
2807 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2808 break;
2809 }
2810
2811 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
2812 ret = btrfs_discard_extent(fs_info, start,
2813 end + 1 - start, NULL);
2814
2815 clear_extent_dirty(unpin, start, end, &cached_state);
2816 unpin_extent_range(fs_info, start, end, true);
2817 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2818 free_extent_state(cached_state);
2819 cond_resched();
2820 }
2821
2822 if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
2823 btrfs_discard_calc_delay(&fs_info->discard_ctl);
2824 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
2825 }
2826
2827
2828
2829
2830
2831
2832 deleted_bgs = &trans->transaction->deleted_bgs;
2833 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2834 u64 trimmed = 0;
2835
2836 ret = -EROFS;
2837 if (!TRANS_ABORTED(trans))
2838 ret = btrfs_discard_extent(fs_info,
2839 block_group->start,
2840 block_group->length,
2841 &trimmed);
2842
2843 list_del_init(&block_group->bg_list);
2844 btrfs_unfreeze_block_group(block_group);
2845 btrfs_put_block_group(block_group);
2846
2847 if (ret) {
2848 const char *errstr = btrfs_decode_error(ret);
2849 btrfs_warn(fs_info,
2850 "discard failed while removing blockgroup: errno=%d %s",
2851 ret, errstr);
2852 }
2853 }
2854
2855 return 0;
2856}
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2918 struct btrfs_delayed_ref_node *node, u64 parent,
2919 u64 root_objectid, u64 owner_objectid,
2920 u64 owner_offset, int refs_to_drop,
2921 struct btrfs_delayed_extent_op *extent_op)
2922{
2923 struct btrfs_fs_info *info = trans->fs_info;
2924 struct btrfs_key key;
2925 struct btrfs_path *path;
2926 struct btrfs_root *extent_root = info->extent_root;
2927 struct extent_buffer *leaf;
2928 struct btrfs_extent_item *ei;
2929 struct btrfs_extent_inline_ref *iref;
2930 int ret;
2931 int is_data;
2932 int extent_slot = 0;
2933 int found_extent = 0;
2934 int num_to_del = 1;
2935 u32 item_size;
2936 u64 refs;
2937 u64 bytenr = node->bytenr;
2938 u64 num_bytes = node->num_bytes;
2939 int last_ref = 0;
2940 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2941
2942 path = btrfs_alloc_path();
2943 if (!path)
2944 return -ENOMEM;
2945
2946 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2947
2948 if (!is_data && refs_to_drop != 1) {
2949 btrfs_crit(info,
2950"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
2951 node->bytenr, refs_to_drop);
2952 ret = -EINVAL;
2953 btrfs_abort_transaction(trans, ret);
2954 goto out;
2955 }
2956
2957 if (is_data)
2958 skinny_metadata = false;
2959
2960 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
2961 parent, root_objectid, owner_objectid,
2962 owner_offset);
2963 if (ret == 0) {
2964
2965
2966
2967
2968
2969
2970
2971 extent_slot = path->slots[0];
2972 while (extent_slot >= 0) {
2973 btrfs_item_key_to_cpu(path->nodes[0], &key,
2974 extent_slot);
2975 if (key.objectid != bytenr)
2976 break;
2977 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2978 key.offset == num_bytes) {
2979 found_extent = 1;
2980 break;
2981 }
2982 if (key.type == BTRFS_METADATA_ITEM_KEY &&
2983 key.offset == owner_objectid) {
2984 found_extent = 1;
2985 break;
2986 }
2987
2988
2989 if (path->slots[0] - extent_slot > 5)
2990 break;
2991 extent_slot--;
2992 }
2993
2994 if (!found_extent) {
2995 if (iref) {
2996 btrfs_crit(info,
2997"invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref");
2998 btrfs_abort_transaction(trans, -EUCLEAN);
2999 goto err_dump;
3000 }
3001
3002 ret = remove_extent_backref(trans, path, NULL,
3003 refs_to_drop,
3004 is_data, &last_ref);
3005 if (ret) {
3006 btrfs_abort_transaction(trans, ret);
3007 goto out;
3008 }
3009 btrfs_release_path(path);
3010
3011
3012 key.objectid = bytenr;
3013 key.type = BTRFS_EXTENT_ITEM_KEY;
3014 key.offset = num_bytes;
3015
3016 if (!is_data && skinny_metadata) {
3017 key.type = BTRFS_METADATA_ITEM_KEY;
3018 key.offset = owner_objectid;
3019 }
3020
3021 ret = btrfs_search_slot(trans, extent_root,
3022 &key, path, -1, 1);
3023 if (ret > 0 && skinny_metadata && path->slots[0]) {
3024
3025
3026
3027
3028 path->slots[0]--;
3029 btrfs_item_key_to_cpu(path->nodes[0], &key,
3030 path->slots[0]);
3031 if (key.objectid == bytenr &&
3032 key.type == BTRFS_EXTENT_ITEM_KEY &&
3033 key.offset == num_bytes)
3034 ret = 0;
3035 }
3036
3037 if (ret > 0 && skinny_metadata) {
3038 skinny_metadata = false;
3039 key.objectid = bytenr;
3040 key.type = BTRFS_EXTENT_ITEM_KEY;
3041 key.offset = num_bytes;
3042 btrfs_release_path(path);
3043 ret = btrfs_search_slot(trans, extent_root,
3044 &key, path, -1, 1);
3045 }
3046
3047 if (ret) {
3048 btrfs_err(info,
3049 "umm, got %d back from search, was looking for %llu",
3050 ret, bytenr);
3051 if (ret > 0)
3052 btrfs_print_leaf(path->nodes[0]);
3053 }
3054 if (ret < 0) {
3055 btrfs_abort_transaction(trans, ret);
3056 goto out;
3057 }
3058 extent_slot = path->slots[0];
3059 }
3060 } else if (WARN_ON(ret == -ENOENT)) {
3061 btrfs_print_leaf(path->nodes[0]);
3062 btrfs_err(info,
3063 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3064 bytenr, parent, root_objectid, owner_objectid,
3065 owner_offset);
3066 btrfs_abort_transaction(trans, ret);
3067 goto out;
3068 } else {
3069 btrfs_abort_transaction(trans, ret);
3070 goto out;
3071 }
3072
3073 leaf = path->nodes[0];
3074 item_size = btrfs_item_size_nr(leaf, extent_slot);
3075 if (unlikely(item_size < sizeof(*ei))) {
3076 ret = -EINVAL;
3077 btrfs_print_v0_err(info);
3078 btrfs_abort_transaction(trans, ret);
3079 goto out;
3080 }
3081 ei = btrfs_item_ptr(leaf, extent_slot,
3082 struct btrfs_extent_item);
3083 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3084 key.type == BTRFS_EXTENT_ITEM_KEY) {
3085 struct btrfs_tree_block_info *bi;
3086 if (item_size < sizeof(*ei) + sizeof(*bi)) {
3087 btrfs_crit(info,
3088"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
3089 key.objectid, key.type, key.offset,
3090 owner_objectid, item_size,
3091 sizeof(*ei) + sizeof(*bi));
3092 btrfs_abort_transaction(trans, -EUCLEAN);
3093 goto err_dump;
3094 }
3095 bi = (struct btrfs_tree_block_info *)(ei + 1);
3096 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3097 }
3098
3099 refs = btrfs_extent_refs(leaf, ei);
3100 if (refs < refs_to_drop) {
3101 btrfs_crit(info,
3102 "trying to drop %d refs but we only have %llu for bytenr %llu",
3103 refs_to_drop, refs, bytenr);
3104 btrfs_abort_transaction(trans, -EUCLEAN);
3105 goto err_dump;
3106 }
3107 refs -= refs_to_drop;
3108
3109 if (refs > 0) {
3110 if (extent_op)
3111 __run_delayed_extent_op(extent_op, leaf, ei);
3112
3113
3114
3115
3116 if (iref) {
3117 if (!found_extent) {
3118 btrfs_crit(info,
3119"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found");
3120 btrfs_abort_transaction(trans, -EUCLEAN);
3121 goto err_dump;
3122 }
3123 } else {
3124 btrfs_set_extent_refs(leaf, ei, refs);
3125 btrfs_mark_buffer_dirty(leaf);
3126 }
3127 if (found_extent) {
3128 ret = remove_extent_backref(trans, path, iref,
3129 refs_to_drop, is_data,
3130 &last_ref);
3131 if (ret) {
3132 btrfs_abort_transaction(trans, ret);
3133 goto out;
3134 }
3135 }
3136 } else {
3137
3138 if (found_extent) {
3139 if (is_data && refs_to_drop !=
3140 extent_data_ref_count(path, iref)) {
3141 btrfs_crit(info,
3142 "invalid refs_to_drop, current refs %u refs_to_drop %u",
3143 extent_data_ref_count(path, iref),
3144 refs_to_drop);
3145 btrfs_abort_transaction(trans, -EUCLEAN);
3146 goto err_dump;
3147 }
3148 if (iref) {
3149 if (path->slots[0] != extent_slot) {
3150 btrfs_crit(info,
3151"invalid iref, extent item key (%llu %u %llu) doesn't have wanted iref",
3152 key.objectid, key.type,
3153 key.offset);
3154 btrfs_abort_transaction(trans, -EUCLEAN);
3155 goto err_dump;
3156 }
3157 } else {
3158
3159
3160
3161
3162
3163
3164 if (path->slots[0] != extent_slot + 1) {
3165 btrfs_crit(info,
3166 "invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM");
3167 btrfs_abort_transaction(trans, -EUCLEAN);
3168 goto err_dump;
3169 }
3170 path->slots[0] = extent_slot;
3171 num_to_del = 2;
3172 }
3173 }
3174
3175 last_ref = 1;
3176 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3177 num_to_del);
3178 if (ret) {
3179 btrfs_abort_transaction(trans, ret);
3180 goto out;
3181 }
3182 btrfs_release_path(path);
3183
3184 if (is_data) {
3185 ret = btrfs_del_csums(trans, info->csum_root, bytenr,
3186 num_bytes);
3187 if (ret) {
3188 btrfs_abort_transaction(trans, ret);
3189 goto out;
3190 }
3191 }
3192
3193 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
3194 if (ret) {
3195 btrfs_abort_transaction(trans, ret);
3196 goto out;
3197 }
3198
3199 ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
3200 if (ret) {
3201 btrfs_abort_transaction(trans, ret);
3202 goto out;
3203 }
3204 }
3205 btrfs_release_path(path);
3206
3207out:
3208 btrfs_free_path(path);
3209 return ret;
3210err_dump:
3211
3212
3213
3214
3215 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
3216 btrfs_crit(info, "path->slots[0]=%d extent_slot=%d",
3217 path->slots[0], extent_slot);
3218 btrfs_print_leaf(path->nodes[0]);
3219 }
3220
3221 btrfs_free_path(path);
3222 return -EUCLEAN;
3223}
3224
3225
3226
3227
3228
3229
3230
3231static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3232 u64 bytenr)
3233{
3234 struct btrfs_delayed_ref_head *head;
3235 struct btrfs_delayed_ref_root *delayed_refs;
3236 int ret = 0;
3237
3238 delayed_refs = &trans->transaction->delayed_refs;
3239 spin_lock(&delayed_refs->lock);
3240 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3241 if (!head)
3242 goto out_delayed_unlock;
3243
3244 spin_lock(&head->lock);
3245 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3246 goto out;
3247
3248 if (cleanup_extent_op(head) != NULL)
3249 goto out;
3250
3251
3252
3253
3254
3255 if (!mutex_trylock(&head->mutex))
3256 goto out;
3257
3258 btrfs_delete_ref_head(delayed_refs, head);
3259 head->processing = 0;
3260
3261 spin_unlock(&head->lock);
3262 spin_unlock(&delayed_refs->lock);
3263
3264 BUG_ON(head->extent_op);
3265 if (head->must_insert_reserved)
3266 ret = 1;
3267
3268 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3269 mutex_unlock(&head->mutex);
3270 btrfs_put_delayed_ref_head(head);
3271 return ret;
3272out:
3273 spin_unlock(&head->lock);
3274
3275out_delayed_unlock:
3276 spin_unlock(&delayed_refs->lock);
3277 return 0;
3278}
3279
3280void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3281 struct btrfs_root *root,
3282 struct extent_buffer *buf,
3283 u64 parent, int last_ref)
3284{
3285 struct btrfs_fs_info *fs_info = root->fs_info;
3286 struct btrfs_ref generic_ref = { 0 };
3287 int ret;
3288
3289 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3290 buf->start, buf->len, parent);
3291 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3292 root->root_key.objectid);
3293
3294 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3295 btrfs_ref_tree_mod(fs_info, &generic_ref);
3296 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
3297 BUG_ON(ret);
3298 }
3299
3300 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3301 struct btrfs_block_group *cache;
3302 bool must_pin = false;
3303
3304 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3305 ret = check_ref_cleanup(trans, buf->start);
3306 if (!ret) {
3307 btrfs_redirty_list_add(trans->transaction, buf);
3308 goto out;
3309 }
3310 }
3311
3312 cache = btrfs_lookup_block_group(fs_info, buf->start);
3313
3314 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3315 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3316 btrfs_put_block_group(cache);
3317 goto out;
3318 }
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334 if (btrfs_header_level(buf) == 0 &&
3335 test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
3336 must_pin = true;
3337
3338 if (must_pin || btrfs_is_zoned(fs_info)) {
3339 btrfs_redirty_list_add(trans->transaction, buf);
3340 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3341 btrfs_put_block_group(cache);
3342 goto out;
3343 }
3344
3345 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3346
3347 btrfs_add_free_space(cache, buf->start, buf->len);
3348 btrfs_free_reserved_bytes(cache, buf->len, 0);
3349 btrfs_put_block_group(cache);
3350 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3351 }
3352out:
3353 if (last_ref) {
3354
3355
3356
3357
3358 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3359 }
3360}
3361
3362
3363int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3364{
3365 struct btrfs_fs_info *fs_info = trans->fs_info;
3366 int ret;
3367
3368 if (btrfs_is_testing(fs_info))
3369 return 0;
3370
3371
3372
3373
3374
3375 if ((ref->type == BTRFS_REF_METADATA &&
3376 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3377 (ref->type == BTRFS_REF_DATA &&
3378 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
3379
3380 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
3381 ret = 0;
3382 } else if (ref->type == BTRFS_REF_METADATA) {
3383 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
3384 } else {
3385 ret = btrfs_add_delayed_data_ref(trans, ref, 0);
3386 }
3387
3388 if (!((ref->type == BTRFS_REF_METADATA &&
3389 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3390 (ref->type == BTRFS_REF_DATA &&
3391 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
3392 btrfs_ref_tree_mod(fs_info, ref);
3393
3394 return ret;
3395}
3396
3397enum btrfs_loop_type {
3398 LOOP_CACHING_NOWAIT,
3399 LOOP_CACHING_WAIT,
3400 LOOP_ALLOC_CHUNK,
3401 LOOP_NO_EMPTY_SIZE,
3402};
3403
3404static inline void
3405btrfs_lock_block_group(struct btrfs_block_group *cache,
3406 int delalloc)
3407{
3408 if (delalloc)
3409 down_read(&cache->data_rwsem);
3410}
3411
3412static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3413 int delalloc)
3414{
3415 btrfs_get_block_group(cache);
3416 if (delalloc)
3417 down_read(&cache->data_rwsem);
3418}
3419
3420static struct btrfs_block_group *btrfs_lock_cluster(
3421 struct btrfs_block_group *block_group,
3422 struct btrfs_free_cluster *cluster,
3423 int delalloc)
3424 __acquires(&cluster->refill_lock)
3425{
3426 struct btrfs_block_group *used_bg = NULL;
3427
3428 spin_lock(&cluster->refill_lock);
3429 while (1) {
3430 used_bg = cluster->block_group;
3431 if (!used_bg)
3432 return NULL;
3433
3434 if (used_bg == block_group)
3435 return used_bg;
3436
3437 btrfs_get_block_group(used_bg);
3438
3439 if (!delalloc)
3440 return used_bg;
3441
3442 if (down_read_trylock(&used_bg->data_rwsem))
3443 return used_bg;
3444
3445 spin_unlock(&cluster->refill_lock);
3446
3447
3448 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3449
3450 spin_lock(&cluster->refill_lock);
3451 if (used_bg == cluster->block_group)
3452 return used_bg;
3453
3454 up_read(&used_bg->data_rwsem);
3455 btrfs_put_block_group(used_bg);
3456 }
3457}
3458
3459static inline void
3460btrfs_release_block_group(struct btrfs_block_group *cache,
3461 int delalloc)
3462{
3463 if (delalloc)
3464 up_read(&cache->data_rwsem);
3465 btrfs_put_block_group(cache);
3466}
3467
3468enum btrfs_extent_allocation_policy {
3469 BTRFS_EXTENT_ALLOC_CLUSTERED,
3470 BTRFS_EXTENT_ALLOC_ZONED,
3471};
3472
3473
3474
3475
3476
3477struct find_free_extent_ctl {
3478
3479 u64 num_bytes;
3480 u64 empty_size;
3481 u64 flags;
3482 int delalloc;
3483
3484
3485 u64 search_start;
3486
3487
3488 u64 empty_cluster;
3489 struct btrfs_free_cluster *last_ptr;
3490 bool use_cluster;
3491
3492 bool have_caching_bg;
3493 bool orig_have_caching_bg;
3494
3495
3496 bool for_treelog;
3497
3498
3499 int index;
3500
3501
3502
3503
3504 int loop;
3505
3506
3507
3508
3509
3510 bool retry_clustered;
3511
3512
3513
3514
3515
3516 bool retry_unclustered;
3517
3518
3519 int cached;
3520
3521
3522 u64 max_extent_size;
3523
3524
3525 u64 total_free_space;
3526
3527
3528 u64 found_offset;
3529
3530
3531 u64 hint_byte;
3532
3533
3534 enum btrfs_extent_allocation_policy policy;
3535};
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546static int find_free_extent_clustered(struct btrfs_block_group *bg,
3547 struct find_free_extent_ctl *ffe_ctl,
3548 struct btrfs_block_group **cluster_bg_ret)
3549{
3550 struct btrfs_block_group *cluster_bg;
3551 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3552 u64 aligned_cluster;
3553 u64 offset;
3554 int ret;
3555
3556 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3557 if (!cluster_bg)
3558 goto refill_cluster;
3559 if (cluster_bg != bg && (cluster_bg->ro ||
3560 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3561 goto release_cluster;
3562
3563 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3564 ffe_ctl->num_bytes, cluster_bg->start,
3565 &ffe_ctl->max_extent_size);
3566 if (offset) {
3567
3568 spin_unlock(&last_ptr->refill_lock);
3569 trace_btrfs_reserve_extent_cluster(cluster_bg,
3570 ffe_ctl->search_start, ffe_ctl->num_bytes);
3571 *cluster_bg_ret = cluster_bg;
3572 ffe_ctl->found_offset = offset;
3573 return 0;
3574 }
3575 WARN_ON(last_ptr->block_group != cluster_bg);
3576
3577release_cluster:
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3590 spin_unlock(&last_ptr->refill_lock);
3591 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3592 return -ENOENT;
3593 }
3594
3595
3596 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3597
3598 if (cluster_bg != bg)
3599 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3600
3601refill_cluster:
3602 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3603 spin_unlock(&last_ptr->refill_lock);
3604 return -ENOENT;
3605 }
3606
3607 aligned_cluster = max_t(u64,
3608 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3609 bg->full_stripe_len);
3610 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3611 ffe_ctl->num_bytes, aligned_cluster);
3612 if (ret == 0) {
3613
3614 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3615 ffe_ctl->num_bytes, ffe_ctl->search_start,
3616 &ffe_ctl->max_extent_size);
3617 if (offset) {
3618
3619 spin_unlock(&last_ptr->refill_lock);
3620 trace_btrfs_reserve_extent_cluster(bg,
3621 ffe_ctl->search_start,
3622 ffe_ctl->num_bytes);
3623 ffe_ctl->found_offset = offset;
3624 return 0;
3625 }
3626 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3627 !ffe_ctl->retry_clustered) {
3628 spin_unlock(&last_ptr->refill_lock);
3629
3630 ffe_ctl->retry_clustered = true;
3631 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3632 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3633 return -EAGAIN;
3634 }
3635
3636
3637
3638
3639
3640 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3641 spin_unlock(&last_ptr->refill_lock);
3642 return 1;
3643}
3644
3645
3646
3647
3648
3649
3650static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3651 struct find_free_extent_ctl *ffe_ctl)
3652{
3653 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3654 u64 offset;
3655
3656
3657
3658
3659
3660
3661 if (unlikely(last_ptr)) {
3662 spin_lock(&last_ptr->lock);
3663 last_ptr->fragmented = 1;
3664 spin_unlock(&last_ptr->lock);
3665 }
3666 if (ffe_ctl->cached) {
3667 struct btrfs_free_space_ctl *free_space_ctl;
3668
3669 free_space_ctl = bg->free_space_ctl;
3670 spin_lock(&free_space_ctl->tree_lock);
3671 if (free_space_ctl->free_space <
3672 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3673 ffe_ctl->empty_size) {
3674 ffe_ctl->total_free_space = max_t(u64,
3675 ffe_ctl->total_free_space,
3676 free_space_ctl->free_space);
3677 spin_unlock(&free_space_ctl->tree_lock);
3678 return 1;
3679 }
3680 spin_unlock(&free_space_ctl->tree_lock);
3681 }
3682
3683 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3684 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3685 &ffe_ctl->max_extent_size);
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3697 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3698 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3699 ffe_ctl->empty_size);
3700 ffe_ctl->retry_unclustered = true;
3701 return -EAGAIN;
3702 } else if (!offset) {
3703 return 1;
3704 }
3705 ffe_ctl->found_offset = offset;
3706 return 0;
3707}
3708
3709static int do_allocation_clustered(struct btrfs_block_group *block_group,
3710 struct find_free_extent_ctl *ffe_ctl,
3711 struct btrfs_block_group **bg_ret)
3712{
3713 int ret;
3714
3715
3716 if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
3717 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
3718 if (ret >= 0 || ret == -EAGAIN)
3719 return ret;
3720
3721 }
3722
3723 return find_free_extent_unclustered(block_group, ffe_ctl);
3724}
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747static int do_allocation_zoned(struct btrfs_block_group *block_group,
3748 struct find_free_extent_ctl *ffe_ctl,
3749 struct btrfs_block_group **bg_ret)
3750{
3751 struct btrfs_fs_info *fs_info = block_group->fs_info;
3752 struct btrfs_space_info *space_info = block_group->space_info;
3753 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3754 u64 start = block_group->start;
3755 u64 num_bytes = ffe_ctl->num_bytes;
3756 u64 avail;
3757 u64 bytenr = block_group->start;
3758 u64 log_bytenr;
3759 int ret = 0;
3760 bool skip;
3761
3762 ASSERT(btrfs_is_zoned(block_group->fs_info));
3763
3764
3765
3766
3767
3768 spin_lock(&fs_info->treelog_bg_lock);
3769 log_bytenr = fs_info->treelog_bg;
3770 skip = log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
3771 (!ffe_ctl->for_treelog && bytenr == log_bytenr));
3772 spin_unlock(&fs_info->treelog_bg_lock);
3773 if (skip)
3774 return 1;
3775
3776 spin_lock(&space_info->lock);
3777 spin_lock(&block_group->lock);
3778 spin_lock(&fs_info->treelog_bg_lock);
3779
3780 ASSERT(!ffe_ctl->for_treelog ||
3781 block_group->start == fs_info->treelog_bg ||
3782 fs_info->treelog_bg == 0);
3783
3784 if (block_group->ro) {
3785 ret = 1;
3786 goto out;
3787 }
3788
3789
3790
3791
3792
3793 if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
3794 (block_group->used || block_group->reserved)) {
3795 ret = 1;
3796 goto out;
3797 }
3798
3799 avail = block_group->length - block_group->alloc_offset;
3800 if (avail < num_bytes) {
3801 if (ffe_ctl->max_extent_size < avail) {
3802
3803
3804
3805
3806 ffe_ctl->max_extent_size = avail;
3807 ffe_ctl->total_free_space = avail;
3808 }
3809 ret = 1;
3810 goto out;
3811 }
3812
3813 if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
3814 fs_info->treelog_bg = block_group->start;
3815
3816 ffe_ctl->found_offset = start + block_group->alloc_offset;
3817 block_group->alloc_offset += num_bytes;
3818 spin_lock(&ctl->tree_lock);
3819 ctl->free_space -= num_bytes;
3820 spin_unlock(&ctl->tree_lock);
3821
3822
3823
3824
3825
3826
3827 ffe_ctl->search_start = ffe_ctl->found_offset;
3828
3829out:
3830 if (ret && ffe_ctl->for_treelog)
3831 fs_info->treelog_bg = 0;
3832 spin_unlock(&fs_info->treelog_bg_lock);
3833 spin_unlock(&block_group->lock);
3834 spin_unlock(&space_info->lock);
3835 return ret;
3836}
3837
3838static int do_allocation(struct btrfs_block_group *block_group,
3839 struct find_free_extent_ctl *ffe_ctl,
3840 struct btrfs_block_group **bg_ret)
3841{
3842 switch (ffe_ctl->policy) {
3843 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3844 return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
3845 case BTRFS_EXTENT_ALLOC_ZONED:
3846 return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
3847 default:
3848 BUG();
3849 }
3850}
3851
3852static void release_block_group(struct btrfs_block_group *block_group,
3853 struct find_free_extent_ctl *ffe_ctl,
3854 int delalloc)
3855{
3856 switch (ffe_ctl->policy) {
3857 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3858 ffe_ctl->retry_clustered = false;
3859 ffe_ctl->retry_unclustered = false;
3860 break;
3861 case BTRFS_EXTENT_ALLOC_ZONED:
3862
3863 break;
3864 default:
3865 BUG();
3866 }
3867
3868 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
3869 ffe_ctl->index);
3870 btrfs_release_block_group(block_group, delalloc);
3871}
3872
3873static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
3874 struct btrfs_key *ins)
3875{
3876 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3877
3878 if (!ffe_ctl->use_cluster && last_ptr) {
3879 spin_lock(&last_ptr->lock);
3880 last_ptr->window_start = ins->objectid;
3881 spin_unlock(&last_ptr->lock);
3882 }
3883}
3884
3885static void found_extent(struct find_free_extent_ctl *ffe_ctl,
3886 struct btrfs_key *ins)
3887{
3888 switch (ffe_ctl->policy) {
3889 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3890 found_extent_clustered(ffe_ctl, ins);
3891 break;
3892 case BTRFS_EXTENT_ALLOC_ZONED:
3893
3894 break;
3895 default:
3896 BUG();
3897 }
3898}
3899
3900static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
3901{
3902 switch (ffe_ctl->policy) {
3903 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3904
3905
3906
3907
3908 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
3909 return 0;
3910 case BTRFS_EXTENT_ALLOC_ZONED:
3911
3912 return -ENOSPC;
3913 default:
3914 BUG();
3915 }
3916}
3917
3918
3919
3920
3921
3922
3923static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3924 struct btrfs_key *ins,
3925 struct find_free_extent_ctl *ffe_ctl,
3926 bool full_search)
3927{
3928 struct btrfs_root *root = fs_info->extent_root;
3929 int ret;
3930
3931 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3932 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3933 ffe_ctl->orig_have_caching_bg = true;
3934
3935 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
3936 ffe_ctl->have_caching_bg)
3937 return 1;
3938
3939 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
3940 return 1;
3941
3942 if (ins->objectid) {
3943 found_extent(ffe_ctl, ins);
3944 return 0;
3945 }
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
3956 ffe_ctl->index = 0;
3957 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
3958
3959
3960
3961
3962
3963 if (ffe_ctl->orig_have_caching_bg || !full_search)
3964 ffe_ctl->loop = LOOP_CACHING_WAIT;
3965 else
3966 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
3967 } else {
3968 ffe_ctl->loop++;
3969 }
3970
3971 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
3972 struct btrfs_trans_handle *trans;
3973 int exist = 0;
3974
3975 trans = current->journal_info;
3976 if (trans)
3977 exist = 1;
3978 else
3979 trans = btrfs_join_transaction(root);
3980
3981 if (IS_ERR(trans)) {
3982 ret = PTR_ERR(trans);
3983 return ret;
3984 }
3985
3986 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
3987 CHUNK_ALLOC_FORCE);
3988
3989
3990 if (ret == -ENOSPC)
3991 ret = chunk_allocation_failed(ffe_ctl);
3992 else if (ret < 0)
3993 btrfs_abort_transaction(trans, ret);
3994 else
3995 ret = 0;
3996 if (!exist)
3997 btrfs_end_transaction(trans);
3998 if (ret)
3999 return ret;
4000 }
4001
4002 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
4003 if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
4004 return -ENOSPC;
4005
4006
4007
4008
4009
4010 if (ffe_ctl->empty_size == 0 &&
4011 ffe_ctl->empty_cluster == 0)
4012 return -ENOSPC;
4013 ffe_ctl->empty_size = 0;
4014 ffe_ctl->empty_cluster = 0;
4015 }
4016 return 1;
4017 }
4018 return -ENOSPC;
4019}
4020
4021static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
4022 struct find_free_extent_ctl *ffe_ctl,
4023 struct btrfs_space_info *space_info,
4024 struct btrfs_key *ins)
4025{
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036 if (space_info->max_extent_size) {
4037 spin_lock(&space_info->lock);
4038 if (space_info->max_extent_size &&
4039 ffe_ctl->num_bytes > space_info->max_extent_size) {
4040 ins->offset = space_info->max_extent_size;
4041 spin_unlock(&space_info->lock);
4042 return -ENOSPC;
4043 } else if (space_info->max_extent_size) {
4044 ffe_ctl->use_cluster = false;
4045 }
4046 spin_unlock(&space_info->lock);
4047 }
4048
4049 ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
4050 &ffe_ctl->empty_cluster);
4051 if (ffe_ctl->last_ptr) {
4052 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
4053
4054 spin_lock(&last_ptr->lock);
4055 if (last_ptr->block_group)
4056 ffe_ctl->hint_byte = last_ptr->window_start;
4057 if (last_ptr->fragmented) {
4058
4059
4060
4061
4062
4063 ffe_ctl->hint_byte = last_ptr->window_start;
4064 ffe_ctl->use_cluster = false;
4065 }
4066 spin_unlock(&last_ptr->lock);
4067 }
4068
4069 return 0;
4070}
4071
4072static int prepare_allocation(struct btrfs_fs_info *fs_info,
4073 struct find_free_extent_ctl *ffe_ctl,
4074 struct btrfs_space_info *space_info,
4075 struct btrfs_key *ins)
4076{
4077 switch (ffe_ctl->policy) {
4078 case BTRFS_EXTENT_ALLOC_CLUSTERED:
4079 return prepare_allocation_clustered(fs_info, ffe_ctl,
4080 space_info, ins);
4081 case BTRFS_EXTENT_ALLOC_ZONED:
4082 if (ffe_ctl->for_treelog) {
4083 spin_lock(&fs_info->treelog_bg_lock);
4084 if (fs_info->treelog_bg)
4085 ffe_ctl->hint_byte = fs_info->treelog_bg;
4086 spin_unlock(&fs_info->treelog_bg_lock);
4087 }
4088 return 0;
4089 default:
4090 BUG();
4091 }
4092}
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119static noinline int find_free_extent(struct btrfs_root *root,
4120 u64 ram_bytes, u64 num_bytes, u64 empty_size,
4121 u64 hint_byte_orig, struct btrfs_key *ins,
4122 u64 flags, int delalloc)
4123{
4124 struct btrfs_fs_info *fs_info = root->fs_info;
4125 int ret = 0;
4126 int cache_block_group_error = 0;
4127 struct btrfs_block_group *block_group = NULL;
4128 struct find_free_extent_ctl ffe_ctl = {0};
4129 struct btrfs_space_info *space_info;
4130 bool full_search = false;
4131 bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4132
4133 WARN_ON(num_bytes < fs_info->sectorsize);
4134
4135 ffe_ctl.num_bytes = num_bytes;
4136 ffe_ctl.empty_size = empty_size;
4137 ffe_ctl.flags = flags;
4138 ffe_ctl.search_start = 0;
4139 ffe_ctl.delalloc = delalloc;
4140 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
4141 ffe_ctl.have_caching_bg = false;
4142 ffe_ctl.orig_have_caching_bg = false;
4143 ffe_ctl.found_offset = 0;
4144 ffe_ctl.hint_byte = hint_byte_orig;
4145 ffe_ctl.for_treelog = for_treelog;
4146 ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
4147
4148
4149 ffe_ctl.retry_clustered = false;
4150 ffe_ctl.retry_unclustered = false;
4151 ffe_ctl.last_ptr = NULL;
4152 ffe_ctl.use_cluster = true;
4153
4154 if (btrfs_is_zoned(fs_info))
4155 ffe_ctl.policy = BTRFS_EXTENT_ALLOC_ZONED;
4156
4157 ins->type = BTRFS_EXTENT_ITEM_KEY;
4158 ins->objectid = 0;
4159 ins->offset = 0;
4160
4161 trace_find_free_extent(root, num_bytes, empty_size, flags);
4162
4163 space_info = btrfs_find_space_info(fs_info, flags);
4164 if (!space_info) {
4165 btrfs_err(fs_info, "No space info for %llu", flags);
4166 return -ENOSPC;
4167 }
4168
4169 ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins);
4170 if (ret < 0)
4171 return ret;
4172
4173 ffe_ctl.search_start = max(ffe_ctl.search_start,
4174 first_logical_byte(fs_info, 0));
4175 ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte);
4176 if (ffe_ctl.search_start == ffe_ctl.hint_byte) {
4177 block_group = btrfs_lookup_block_group(fs_info,
4178 ffe_ctl.search_start);
4179
4180
4181
4182
4183
4184
4185
4186 if (block_group && block_group_bits(block_group, flags) &&
4187 block_group->cached != BTRFS_CACHE_NO) {
4188 down_read(&space_info->groups_sem);
4189 if (list_empty(&block_group->list) ||
4190 block_group->ro) {
4191
4192
4193
4194
4195
4196
4197 btrfs_put_block_group(block_group);
4198 up_read(&space_info->groups_sem);
4199 } else {
4200 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
4201 block_group->flags);
4202 btrfs_lock_block_group(block_group, delalloc);
4203 goto have_block_group;
4204 }
4205 } else if (block_group) {
4206 btrfs_put_block_group(block_group);
4207 }
4208 }
4209search:
4210 ffe_ctl.have_caching_bg = false;
4211 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
4212 ffe_ctl.index == 0)
4213 full_search = true;
4214 down_read(&space_info->groups_sem);
4215 list_for_each_entry(block_group,
4216 &space_info->block_groups[ffe_ctl.index], list) {
4217 struct btrfs_block_group *bg_ret;
4218
4219
4220 if (unlikely(block_group->ro)) {
4221 if (for_treelog)
4222 btrfs_clear_treelog_bg(block_group);
4223 continue;
4224 }
4225
4226 btrfs_grab_block_group(block_group, delalloc);
4227 ffe_ctl.search_start = block_group->start;
4228
4229
4230
4231
4232
4233
4234 if (!block_group_bits(block_group, flags)) {
4235 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4236 BTRFS_BLOCK_GROUP_RAID1_MASK |
4237 BTRFS_BLOCK_GROUP_RAID56_MASK |
4238 BTRFS_BLOCK_GROUP_RAID10;
4239
4240
4241
4242
4243
4244
4245 if ((flags & extra) && !(block_group->flags & extra))
4246 goto loop;
4247
4248
4249
4250
4251
4252
4253 btrfs_release_block_group(block_group, delalloc);
4254 continue;
4255 }
4256
4257have_block_group:
4258 ffe_ctl.cached = btrfs_block_group_done(block_group);
4259 if (unlikely(!ffe_ctl.cached)) {
4260 ffe_ctl.have_caching_bg = true;
4261 ret = btrfs_cache_block_group(block_group, 0);
4262
4263
4264
4265
4266
4267
4268
4269
4270 if (ret < 0) {
4271 if (!cache_block_group_error)
4272 cache_block_group_error = ret;
4273 ret = 0;
4274 goto loop;
4275 }
4276 ret = 0;
4277 }
4278
4279 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
4280 goto loop;
4281
4282 bg_ret = NULL;
4283 ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
4284 if (ret == 0) {
4285 if (bg_ret && bg_ret != block_group) {
4286 btrfs_release_block_group(block_group, delalloc);
4287 block_group = bg_ret;
4288 }
4289 } else if (ret == -EAGAIN) {
4290 goto have_block_group;
4291 } else if (ret > 0) {
4292 goto loop;
4293 }
4294
4295
4296 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
4297 fs_info->stripesize);
4298
4299
4300 if (ffe_ctl.search_start + num_bytes >
4301 block_group->start + block_group->length) {
4302 btrfs_add_free_space_unused(block_group,
4303 ffe_ctl.found_offset, num_bytes);
4304 goto loop;
4305 }
4306
4307 if (ffe_ctl.found_offset < ffe_ctl.search_start)
4308 btrfs_add_free_space_unused(block_group,
4309 ffe_ctl.found_offset,
4310 ffe_ctl.search_start - ffe_ctl.found_offset);
4311
4312 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
4313 num_bytes, delalloc);
4314 if (ret == -EAGAIN) {
4315 btrfs_add_free_space_unused(block_group,
4316 ffe_ctl.found_offset, num_bytes);
4317 goto loop;
4318 }
4319 btrfs_inc_block_group_reservations(block_group);
4320
4321
4322 ins->objectid = ffe_ctl.search_start;
4323 ins->offset = num_bytes;
4324
4325 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
4326 num_bytes);
4327 btrfs_release_block_group(block_group, delalloc);
4328 break;
4329loop:
4330 release_block_group(block_group, &ffe_ctl, delalloc);
4331 cond_resched();
4332 }
4333 up_read(&space_info->groups_sem);
4334
4335 ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search);
4336 if (ret > 0)
4337 goto search;
4338
4339 if (ret == -ENOSPC && !cache_block_group_error) {
4340
4341
4342
4343
4344 if (!ffe_ctl.max_extent_size)
4345 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
4346 spin_lock(&space_info->lock);
4347 space_info->max_extent_size = ffe_ctl.max_extent_size;
4348 spin_unlock(&space_info->lock);
4349 ins->offset = ffe_ctl.max_extent_size;
4350 } else if (ret == -ENOSPC) {
4351 ret = cache_block_group_error;
4352 }
4353 return ret;
4354}
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4402 u64 num_bytes, u64 min_alloc_size,
4403 u64 empty_size, u64 hint_byte,
4404 struct btrfs_key *ins, int is_data, int delalloc)
4405{
4406 struct btrfs_fs_info *fs_info = root->fs_info;
4407 bool final_tried = num_bytes == min_alloc_size;
4408 u64 flags;
4409 int ret;
4410 bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4411
4412 flags = get_alloc_profile_by_root(root, is_data);
4413again:
4414 WARN_ON(num_bytes < fs_info->sectorsize);
4415 ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
4416 hint_byte, ins, flags, delalloc);
4417 if (!ret && !is_data) {
4418 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4419 } else if (ret == -ENOSPC) {
4420 if (!final_tried && ins->offset) {
4421 num_bytes = min(num_bytes >> 1, ins->offset);
4422 num_bytes = round_down(num_bytes,
4423 fs_info->sectorsize);
4424 num_bytes = max(num_bytes, min_alloc_size);
4425 ram_bytes = num_bytes;
4426 if (num_bytes == min_alloc_size)
4427 final_tried = true;
4428 goto again;
4429 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4430 struct btrfs_space_info *sinfo;
4431
4432 sinfo = btrfs_find_space_info(fs_info, flags);
4433 btrfs_err(fs_info,
4434 "allocation failed flags %llu, wanted %llu tree-log %d",
4435 flags, num_bytes, for_treelog);
4436 if (sinfo)
4437 btrfs_dump_space_info(fs_info, sinfo,
4438 num_bytes, 1);
4439 }
4440 }
4441
4442 return ret;
4443}
4444
4445int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4446 u64 start, u64 len, int delalloc)
4447{
4448 struct btrfs_block_group *cache;
4449
4450 cache = btrfs_lookup_block_group(fs_info, start);
4451 if (!cache) {
4452 btrfs_err(fs_info, "Unable to find block group for %llu",
4453 start);
4454 return -ENOSPC;
4455 }
4456
4457 btrfs_add_free_space(cache, start, len);
4458 btrfs_free_reserved_bytes(cache, len, delalloc);
4459 trace_btrfs_reserved_extent_free(fs_info, start, len);
4460
4461 btrfs_put_block_group(cache);
4462 return 0;
4463}
4464
4465int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
4466 u64 len)
4467{
4468 struct btrfs_block_group *cache;
4469 int ret = 0;
4470
4471 cache = btrfs_lookup_block_group(trans->fs_info, start);
4472 if (!cache) {
4473 btrfs_err(trans->fs_info, "unable to find block group for %llu",
4474 start);
4475 return -ENOSPC;
4476 }
4477
4478 ret = pin_down_extent(trans, cache, start, len, 1);
4479 btrfs_put_block_group(cache);
4480 return ret;
4481}
4482
4483static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4484 u64 parent, u64 root_objectid,
4485 u64 flags, u64 owner, u64 offset,
4486 struct btrfs_key *ins, int ref_mod)
4487{
4488 struct btrfs_fs_info *fs_info = trans->fs_info;
4489 int ret;
4490 struct btrfs_extent_item *extent_item;
4491 struct btrfs_extent_inline_ref *iref;
4492 struct btrfs_path *path;
4493 struct extent_buffer *leaf;
4494 int type;
4495 u32 size;
4496
4497 if (parent > 0)
4498 type = BTRFS_SHARED_DATA_REF_KEY;
4499 else
4500 type = BTRFS_EXTENT_DATA_REF_KEY;
4501
4502 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4503
4504 path = btrfs_alloc_path();
4505 if (!path)
4506 return -ENOMEM;
4507
4508 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4509 ins, size);
4510 if (ret) {
4511 btrfs_free_path(path);
4512 return ret;
4513 }
4514
4515 leaf = path->nodes[0];
4516 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4517 struct btrfs_extent_item);
4518 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4519 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4520 btrfs_set_extent_flags(leaf, extent_item,
4521 flags | BTRFS_EXTENT_FLAG_DATA);
4522
4523 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4524 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4525 if (parent > 0) {
4526 struct btrfs_shared_data_ref *ref;
4527 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4528 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4529 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4530 } else {
4531 struct btrfs_extent_data_ref *ref;
4532 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4533 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4534 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4535 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4536 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4537 }
4538
4539 btrfs_mark_buffer_dirty(path->nodes[0]);
4540 btrfs_free_path(path);
4541
4542 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
4543 if (ret)
4544 return ret;
4545
4546 ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
4547 if (ret) {
4548 btrfs_err(fs_info, "update block group failed for %llu %llu",
4549 ins->objectid, ins->offset);
4550 BUG();
4551 }
4552 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
4553 return ret;
4554}
4555
4556static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4557 struct btrfs_delayed_ref_node *node,
4558 struct btrfs_delayed_extent_op *extent_op)
4559{
4560 struct btrfs_fs_info *fs_info = trans->fs_info;
4561 int ret;
4562 struct btrfs_extent_item *extent_item;
4563 struct btrfs_key extent_key;
4564 struct btrfs_tree_block_info *block_info;
4565 struct btrfs_extent_inline_ref *iref;
4566 struct btrfs_path *path;
4567 struct extent_buffer *leaf;
4568 struct btrfs_delayed_tree_ref *ref;
4569 u32 size = sizeof(*extent_item) + sizeof(*iref);
4570 u64 num_bytes;
4571 u64 flags = extent_op->flags_to_set;
4572 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4573
4574 ref = btrfs_delayed_node_to_tree_ref(node);
4575
4576 extent_key.objectid = node->bytenr;
4577 if (skinny_metadata) {
4578 extent_key.offset = ref->level;
4579 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4580 num_bytes = fs_info->nodesize;
4581 } else {
4582 extent_key.offset = node->num_bytes;
4583 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4584 size += sizeof(*block_info);
4585 num_bytes = node->num_bytes;
4586 }
4587
4588 path = btrfs_alloc_path();
4589 if (!path)
4590 return -ENOMEM;
4591
4592 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4593 &extent_key, size);
4594 if (ret) {
4595 btrfs_free_path(path);
4596 return ret;
4597 }
4598
4599 leaf = path->nodes[0];
4600 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4601 struct btrfs_extent_item);
4602 btrfs_set_extent_refs(leaf, extent_item, 1);
4603 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4604 btrfs_set_extent_flags(leaf, extent_item,
4605 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4606
4607 if (skinny_metadata) {
4608 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4609 } else {
4610 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4611 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4612 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4613 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4614 }
4615
4616 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4617 btrfs_set_extent_inline_ref_type(leaf, iref,
4618 BTRFS_SHARED_BLOCK_REF_KEY);
4619 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4620 } else {
4621 btrfs_set_extent_inline_ref_type(leaf, iref,
4622 BTRFS_TREE_BLOCK_REF_KEY);
4623 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4624 }
4625
4626 btrfs_mark_buffer_dirty(leaf);
4627 btrfs_free_path(path);
4628
4629 ret = remove_from_free_space_tree(trans, extent_key.objectid,
4630 num_bytes);
4631 if (ret)
4632 return ret;
4633
4634 ret = btrfs_update_block_group(trans, extent_key.objectid,
4635 fs_info->nodesize, 1);
4636 if (ret) {
4637 btrfs_err(fs_info, "update block group failed for %llu %llu",
4638 extent_key.objectid, extent_key.offset);
4639 BUG();
4640 }
4641
4642 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
4643 fs_info->nodesize);
4644 return ret;
4645}
4646
4647int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4648 struct btrfs_root *root, u64 owner,
4649 u64 offset, u64 ram_bytes,
4650 struct btrfs_key *ins)
4651{
4652 struct btrfs_ref generic_ref = { 0 };
4653
4654 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4655
4656 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4657 ins->objectid, ins->offset, 0);
4658 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
4659 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4660
4661 return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
4662}
4663
4664
4665
4666
4667
4668
4669int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4670 u64 root_objectid, u64 owner, u64 offset,
4671 struct btrfs_key *ins)
4672{
4673 struct btrfs_fs_info *fs_info = trans->fs_info;
4674 int ret;
4675 struct btrfs_block_group *block_group;
4676 struct btrfs_space_info *space_info;
4677
4678
4679
4680
4681
4682 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4683 ret = __exclude_logged_extent(fs_info, ins->objectid,
4684 ins->offset);
4685 if (ret)
4686 return ret;
4687 }
4688
4689 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4690 if (!block_group)
4691 return -EINVAL;
4692
4693 space_info = block_group->space_info;
4694 spin_lock(&space_info->lock);
4695 spin_lock(&block_group->lock);
4696 space_info->bytes_reserved += ins->offset;
4697 block_group->reserved += ins->offset;
4698 spin_unlock(&block_group->lock);
4699 spin_unlock(&space_info->lock);
4700
4701 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4702 offset, ins, 1);
4703 if (ret)
4704 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
4705 btrfs_put_block_group(block_group);
4706 return ret;
4707}
4708
4709static struct extent_buffer *
4710btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4711 u64 bytenr, int level, u64 owner,
4712 enum btrfs_lock_nesting nest)
4713{
4714 struct btrfs_fs_info *fs_info = root->fs_info;
4715 struct extent_buffer *buf;
4716
4717 buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
4718 if (IS_ERR(buf))
4719 return buf;
4720
4721
4722
4723
4724
4725
4726 if (buf->lock_owner == current->pid) {
4727 btrfs_err_rl(fs_info,
4728"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4729 buf->start, btrfs_header_owner(buf), current->pid);
4730 free_extent_buffer(buf);
4731 return ERR_PTR(-EUCLEAN);
4732 }
4733
4734
4735
4736
4737
4738
4739 btrfs_set_buffer_lockdep_class(owner, buf, level);
4740 __btrfs_tree_lock(buf, nest);
4741 btrfs_clean_tree_block(buf);
4742 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4743 clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
4744
4745 set_extent_buffer_uptodate(buf);
4746
4747 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4748 btrfs_set_header_level(buf, level);
4749 btrfs_set_header_bytenr(buf, buf->start);
4750 btrfs_set_header_generation(buf, trans->transid);
4751 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4752 btrfs_set_header_owner(buf, owner);
4753 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4754 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4755 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4756 buf->log_index = root->log_transid % 2;
4757
4758
4759
4760
4761 if (buf->log_index == 0)
4762 set_extent_dirty(&root->dirty_log_pages, buf->start,
4763 buf->start + buf->len - 1, GFP_NOFS);
4764 else
4765 set_extent_new(&root->dirty_log_pages, buf->start,
4766 buf->start + buf->len - 1);
4767 } else {
4768 buf->log_index = -1;
4769 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4770 buf->start + buf->len - 1, GFP_NOFS);
4771 }
4772
4773 return buf;
4774}
4775
4776
4777
4778
4779
4780struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4781 struct btrfs_root *root,
4782 u64 parent, u64 root_objectid,
4783 const struct btrfs_disk_key *key,
4784 int level, u64 hint,
4785 u64 empty_size,
4786 enum btrfs_lock_nesting nest)
4787{
4788 struct btrfs_fs_info *fs_info = root->fs_info;
4789 struct btrfs_key ins;
4790 struct btrfs_block_rsv *block_rsv;
4791 struct extent_buffer *buf;
4792 struct btrfs_delayed_extent_op *extent_op;
4793 struct btrfs_ref generic_ref = { 0 };
4794 u64 flags = 0;
4795 int ret;
4796 u32 blocksize = fs_info->nodesize;
4797 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4798
4799#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4800 if (btrfs_is_testing(fs_info)) {
4801 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4802 level, root_objectid, nest);
4803 if (!IS_ERR(buf))
4804 root->alloc_bytenr += blocksize;
4805 return buf;
4806 }
4807#endif
4808
4809 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4810 if (IS_ERR(block_rsv))
4811 return ERR_CAST(block_rsv);
4812
4813 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4814 empty_size, hint, &ins, 0, 0);
4815 if (ret)
4816 goto out_unuse;
4817
4818 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4819 root_objectid, nest);
4820 if (IS_ERR(buf)) {
4821 ret = PTR_ERR(buf);
4822 goto out_free_reserved;
4823 }
4824
4825 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4826 if (parent == 0)
4827 parent = ins.objectid;
4828 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4829 } else
4830 BUG_ON(parent > 0);
4831
4832 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4833 extent_op = btrfs_alloc_delayed_extent_op();
4834 if (!extent_op) {
4835 ret = -ENOMEM;
4836 goto out_free_buf;
4837 }
4838 if (key)
4839 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4840 else
4841 memset(&extent_op->key, 0, sizeof(extent_op->key));
4842 extent_op->flags_to_set = flags;
4843 extent_op->update_key = skinny_metadata ? false : true;
4844 extent_op->update_flags = true;
4845 extent_op->is_data = false;
4846 extent_op->level = level;
4847
4848 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4849 ins.objectid, ins.offset, parent);
4850 generic_ref.real_root = root->root_key.objectid;
4851 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
4852 btrfs_ref_tree_mod(fs_info, &generic_ref);
4853 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
4854 if (ret)
4855 goto out_free_delayed;
4856 }
4857 return buf;
4858
4859out_free_delayed:
4860 btrfs_free_delayed_extent_op(extent_op);
4861out_free_buf:
4862 btrfs_tree_unlock(buf);
4863 free_extent_buffer(buf);
4864out_free_reserved:
4865 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
4866out_unuse:
4867 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
4868 return ERR_PTR(ret);
4869}
4870
4871struct walk_control {
4872 u64 refs[BTRFS_MAX_LEVEL];
4873 u64 flags[BTRFS_MAX_LEVEL];
4874 struct btrfs_key update_progress;
4875 struct btrfs_key drop_progress;
4876 int drop_level;
4877 int stage;
4878 int level;
4879 int shared_level;
4880 int update_ref;
4881 int keep_locks;
4882 int reada_slot;
4883 int reada_count;
4884 int restarted;
4885};
4886
4887#define DROP_REFERENCE 1
4888#define UPDATE_BACKREF 2
4889
4890static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4891 struct btrfs_root *root,
4892 struct walk_control *wc,
4893 struct btrfs_path *path)
4894{
4895 struct btrfs_fs_info *fs_info = root->fs_info;
4896 u64 bytenr;
4897 u64 generation;
4898 u64 refs;
4899 u64 flags;
4900 u32 nritems;
4901 struct btrfs_key key;
4902 struct extent_buffer *eb;
4903 int ret;
4904 int slot;
4905 int nread = 0;
4906
4907 if (path->slots[wc->level] < wc->reada_slot) {
4908 wc->reada_count = wc->reada_count * 2 / 3;
4909 wc->reada_count = max(wc->reada_count, 2);
4910 } else {
4911 wc->reada_count = wc->reada_count * 3 / 2;
4912 wc->reada_count = min_t(int, wc->reada_count,
4913 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
4914 }
4915
4916 eb = path->nodes[wc->level];
4917 nritems = btrfs_header_nritems(eb);
4918
4919 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4920 if (nread >= wc->reada_count)
4921 break;
4922
4923 cond_resched();
4924 bytenr = btrfs_node_blockptr(eb, slot);
4925 generation = btrfs_node_ptr_generation(eb, slot);
4926
4927 if (slot == path->slots[wc->level])
4928 goto reada;
4929
4930 if (wc->stage == UPDATE_BACKREF &&
4931 generation <= root->root_key.offset)
4932 continue;
4933
4934
4935 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
4936 wc->level - 1, 1, &refs,
4937 &flags);
4938
4939 if (ret < 0)
4940 continue;
4941 BUG_ON(refs == 0);
4942
4943 if (wc->stage == DROP_REFERENCE) {
4944 if (refs == 1)
4945 goto reada;
4946
4947 if (wc->level == 1 &&
4948 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4949 continue;
4950 if (!wc->update_ref ||
4951 generation <= root->root_key.offset)
4952 continue;
4953 btrfs_node_key_to_cpu(eb, &key, slot);
4954 ret = btrfs_comp_cpu_keys(&key,
4955 &wc->update_progress);
4956 if (ret < 0)
4957 continue;
4958 } else {
4959 if (wc->level == 1 &&
4960 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4961 continue;
4962 }
4963reada:
4964 btrfs_readahead_node_child(eb, slot);
4965 nread++;
4966 }
4967 wc->reada_slot = slot;
4968}
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4979 struct btrfs_root *root,
4980 struct btrfs_path *path,
4981 struct walk_control *wc, int lookup_info)
4982{
4983 struct btrfs_fs_info *fs_info = root->fs_info;
4984 int level = wc->level;
4985 struct extent_buffer *eb = path->nodes[level];
4986 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4987 int ret;
4988
4989 if (wc->stage == UPDATE_BACKREF &&
4990 btrfs_header_owner(eb) != root->root_key.objectid)
4991 return 1;
4992
4993
4994
4995
4996
4997 if (lookup_info &&
4998 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4999 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
5000 BUG_ON(!path->locks[level]);
5001 ret = btrfs_lookup_extent_info(trans, fs_info,
5002 eb->start, level, 1,
5003 &wc->refs[level],
5004 &wc->flags[level]);
5005 BUG_ON(ret == -ENOMEM);
5006 if (ret)
5007 return ret;
5008 BUG_ON(wc->refs[level] == 0);
5009 }
5010
5011 if (wc->stage == DROP_REFERENCE) {
5012 if (wc->refs[level] > 1)
5013 return 1;
5014
5015 if (path->locks[level] && !wc->keep_locks) {
5016 btrfs_tree_unlock_rw(eb, path->locks[level]);
5017 path->locks[level] = 0;
5018 }
5019 return 0;
5020 }
5021
5022
5023 if (!(wc->flags[level] & flag)) {
5024 BUG_ON(!path->locks[level]);
5025 ret = btrfs_inc_ref(trans, root, eb, 1);
5026 BUG_ON(ret);
5027 ret = btrfs_dec_ref(trans, root, eb, 0);
5028 BUG_ON(ret);
5029 ret = btrfs_set_disk_extent_flags(trans, eb, flag,
5030 btrfs_header_level(eb), 0);
5031 BUG_ON(ret);
5032 wc->flags[level] |= flag;
5033 }
5034
5035
5036
5037
5038
5039 if (path->locks[level] && level > 0) {
5040 btrfs_tree_unlock_rw(eb, path->locks[level]);
5041 path->locks[level] = 0;
5042 }
5043 return 0;
5044}
5045
5046
5047
5048
5049
5050static int check_ref_exists(struct btrfs_trans_handle *trans,
5051 struct btrfs_root *root, u64 bytenr, u64 parent,
5052 int level)
5053{
5054 struct btrfs_path *path;
5055 struct btrfs_extent_inline_ref *iref;
5056 int ret;
5057
5058 path = btrfs_alloc_path();
5059 if (!path)
5060 return -ENOMEM;
5061
5062 ret = lookup_extent_backref(trans, path, &iref, bytenr,
5063 root->fs_info->nodesize, parent,
5064 root->root_key.objectid, level, 0);
5065 btrfs_free_path(path);
5066 if (ret == -ENOENT)
5067 return 0;
5068 if (ret < 0)
5069 return ret;
5070 return 1;
5071}
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5087 struct btrfs_root *root,
5088 struct btrfs_path *path,
5089 struct walk_control *wc, int *lookup_info)
5090{
5091 struct btrfs_fs_info *fs_info = root->fs_info;
5092 u64 bytenr;
5093 u64 generation;
5094 u64 parent;
5095 struct btrfs_key key;
5096 struct btrfs_key first_key;
5097 struct btrfs_ref ref = { 0 };
5098 struct extent_buffer *next;
5099 int level = wc->level;
5100 int reada = 0;
5101 int ret = 0;
5102 bool need_account = false;
5103
5104 generation = btrfs_node_ptr_generation(path->nodes[level],
5105 path->slots[level]);
5106
5107
5108
5109
5110
5111 if (wc->stage == UPDATE_BACKREF &&
5112 generation <= root->root_key.offset) {
5113 *lookup_info = 1;
5114 return 1;
5115 }
5116
5117 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
5118 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
5119 path->slots[level]);
5120
5121 next = find_extent_buffer(fs_info, bytenr);
5122 if (!next) {
5123 next = btrfs_find_create_tree_block(fs_info, bytenr,
5124 root->root_key.objectid, level - 1);
5125 if (IS_ERR(next))
5126 return PTR_ERR(next);
5127 reada = 1;
5128 }
5129 btrfs_tree_lock(next);
5130
5131 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
5132 &wc->refs[level - 1],
5133 &wc->flags[level - 1]);
5134 if (ret < 0)
5135 goto out_unlock;
5136
5137 if (unlikely(wc->refs[level - 1] == 0)) {
5138 btrfs_err(fs_info, "Missing references.");
5139 ret = -EIO;
5140 goto out_unlock;
5141 }
5142 *lookup_info = 0;
5143
5144 if (wc->stage == DROP_REFERENCE) {
5145 if (wc->refs[level - 1] > 1) {
5146 need_account = true;
5147 if (level == 1 &&
5148 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5149 goto skip;
5150
5151 if (!wc->update_ref ||
5152 generation <= root->root_key.offset)
5153 goto skip;
5154
5155 btrfs_node_key_to_cpu(path->nodes[level], &key,
5156 path->slots[level]);
5157 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
5158 if (ret < 0)
5159 goto skip;
5160
5161 wc->stage = UPDATE_BACKREF;
5162 wc->shared_level = level - 1;
5163 }
5164 } else {
5165 if (level == 1 &&
5166 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5167 goto skip;
5168 }
5169
5170 if (!btrfs_buffer_uptodate(next, generation, 0)) {
5171 btrfs_tree_unlock(next);
5172 free_extent_buffer(next);
5173 next = NULL;
5174 *lookup_info = 1;
5175 }
5176
5177 if (!next) {
5178 if (reada && level == 1)
5179 reada_walk_down(trans, root, wc, path);
5180 next = read_tree_block(fs_info, bytenr, root->root_key.objectid,
5181 generation, level - 1, &first_key);
5182 if (IS_ERR(next)) {
5183 return PTR_ERR(next);
5184 } else if (!extent_buffer_uptodate(next)) {
5185 free_extent_buffer(next);
5186 return -EIO;
5187 }
5188 btrfs_tree_lock(next);
5189 }
5190
5191 level--;
5192 ASSERT(level == btrfs_header_level(next));
5193 if (level != btrfs_header_level(next)) {
5194 btrfs_err(root->fs_info, "mismatched level");
5195 ret = -EIO;
5196 goto out_unlock;
5197 }
5198 path->nodes[level] = next;
5199 path->slots[level] = 0;
5200 path->locks[level] = BTRFS_WRITE_LOCK;
5201 wc->level = level;
5202 if (wc->level == 1)
5203 wc->reada_slot = 0;
5204 return 0;
5205skip:
5206 wc->refs[level - 1] = 0;
5207 wc->flags[level - 1] = 0;
5208 if (wc->stage == DROP_REFERENCE) {
5209 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5210 parent = path->nodes[level]->start;
5211 } else {
5212 ASSERT(root->root_key.objectid ==
5213 btrfs_header_owner(path->nodes[level]));
5214 if (root->root_key.objectid !=
5215 btrfs_header_owner(path->nodes[level])) {
5216 btrfs_err(root->fs_info,
5217 "mismatched block owner");
5218 ret = -EIO;
5219 goto out_unlock;
5220 }
5221 parent = 0;
5222 }
5223
5224
5225
5226
5227
5228
5229
5230 if (wc->restarted) {
5231 ret = check_ref_exists(trans, root, bytenr, parent,
5232 level - 1);
5233 if (ret < 0)
5234 goto out_unlock;
5235 if (ret == 0)
5236 goto no_delete;
5237 ret = 0;
5238 wc->restarted = 0;
5239 }
5240
5241
5242
5243
5244
5245
5246 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
5247 need_account) {
5248 ret = btrfs_qgroup_trace_subtree(trans, next,
5249 generation, level - 1);
5250 if (ret) {
5251 btrfs_err_rl(fs_info,
5252 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
5253 ret);
5254 }
5255 }
5256
5257
5258
5259
5260
5261
5262
5263 wc->drop_level = level;
5264 find_next_key(path, level, &wc->drop_progress);
5265
5266 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
5267 fs_info->nodesize, parent);
5268 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
5269 ret = btrfs_free_extent(trans, &ref);
5270 if (ret)
5271 goto out_unlock;
5272 }
5273no_delete:
5274 *lookup_info = 1;
5275 ret = 1;
5276
5277out_unlock:
5278 btrfs_tree_unlock(next);
5279 free_extent_buffer(next);
5280
5281 return ret;
5282}
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5297 struct btrfs_root *root,
5298 struct btrfs_path *path,
5299 struct walk_control *wc)
5300{
5301 struct btrfs_fs_info *fs_info = root->fs_info;
5302 int ret;
5303 int level = wc->level;
5304 struct extent_buffer *eb = path->nodes[level];
5305 u64 parent = 0;
5306
5307 if (wc->stage == UPDATE_BACKREF) {
5308 BUG_ON(wc->shared_level < level);
5309 if (level < wc->shared_level)
5310 goto out;
5311
5312 ret = find_next_key(path, level + 1, &wc->update_progress);
5313 if (ret > 0)
5314 wc->update_ref = 0;
5315
5316 wc->stage = DROP_REFERENCE;
5317 wc->shared_level = -1;
5318 path->slots[level] = 0;
5319
5320
5321
5322
5323
5324
5325 if (!path->locks[level]) {
5326 BUG_ON(level == 0);
5327 btrfs_tree_lock(eb);
5328 path->locks[level] = BTRFS_WRITE_LOCK;
5329
5330 ret = btrfs_lookup_extent_info(trans, fs_info,
5331 eb->start, level, 1,
5332 &wc->refs[level],
5333 &wc->flags[level]);
5334 if (ret < 0) {
5335 btrfs_tree_unlock_rw(eb, path->locks[level]);
5336 path->locks[level] = 0;
5337 return ret;
5338 }
5339 BUG_ON(wc->refs[level] == 0);
5340 if (wc->refs[level] == 1) {
5341 btrfs_tree_unlock_rw(eb, path->locks[level]);
5342 path->locks[level] = 0;
5343 return 1;
5344 }
5345 }
5346 }
5347
5348
5349 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5350
5351 if (wc->refs[level] == 1) {
5352 if (level == 0) {
5353 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5354 ret = btrfs_dec_ref(trans, root, eb, 1);
5355 else
5356 ret = btrfs_dec_ref(trans, root, eb, 0);
5357 BUG_ON(ret);
5358 if (is_fstree(root->root_key.objectid)) {
5359 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5360 if (ret) {
5361 btrfs_err_rl(fs_info,
5362 "error %d accounting leaf items, quota is out of sync, rescan required",
5363 ret);
5364 }
5365 }
5366 }
5367
5368 if (!path->locks[level] &&
5369 btrfs_header_generation(eb) == trans->transid) {
5370 btrfs_tree_lock(eb);
5371 path->locks[level] = BTRFS_WRITE_LOCK;
5372 }
5373 btrfs_clean_tree_block(eb);
5374 }
5375
5376 if (eb == root->node) {
5377 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5378 parent = eb->start;
5379 else if (root->root_key.objectid != btrfs_header_owner(eb))
5380 goto owner_mismatch;
5381 } else {
5382 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5383 parent = path->nodes[level + 1]->start;
5384 else if (root->root_key.objectid !=
5385 btrfs_header_owner(path->nodes[level + 1]))
5386 goto owner_mismatch;
5387 }
5388
5389 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5390out:
5391 wc->refs[level] = 0;
5392 wc->flags[level] = 0;
5393 return 0;
5394
5395owner_mismatch:
5396 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5397 btrfs_header_owner(eb), root->root_key.objectid);
5398 return -EUCLEAN;
5399}
5400
5401static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5402 struct btrfs_root *root,
5403 struct btrfs_path *path,
5404 struct walk_control *wc)
5405{
5406 int level = wc->level;
5407 int lookup_info = 1;
5408 int ret;
5409
5410 while (level >= 0) {
5411 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5412 if (ret > 0)
5413 break;
5414
5415 if (level == 0)
5416 break;
5417
5418 if (path->slots[level] >=
5419 btrfs_header_nritems(path->nodes[level]))
5420 break;
5421
5422 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5423 if (ret > 0) {
5424 path->slots[level]++;
5425 continue;
5426 } else if (ret < 0)
5427 return ret;
5428 level = wc->level;
5429 }
5430 return 0;
5431}
5432
5433static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5434 struct btrfs_root *root,
5435 struct btrfs_path *path,
5436 struct walk_control *wc, int max_level)
5437{
5438 int level = wc->level;
5439 int ret;
5440
5441 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5442 while (level < max_level && path->nodes[level]) {
5443 wc->level = level;
5444 if (path->slots[level] + 1 <
5445 btrfs_header_nritems(path->nodes[level])) {
5446 path->slots[level]++;
5447 return 0;
5448 } else {
5449 ret = walk_up_proc(trans, root, path, wc);
5450 if (ret > 0)
5451 return 0;
5452 if (ret < 0)
5453 return ret;
5454
5455 if (path->locks[level]) {
5456 btrfs_tree_unlock_rw(path->nodes[level],
5457 path->locks[level]);
5458 path->locks[level] = 0;
5459 }
5460 free_extent_buffer(path->nodes[level]);
5461 path->nodes[level] = NULL;
5462 level++;
5463 }
5464 }
5465 return 1;
5466}
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
5482{
5483 struct btrfs_fs_info *fs_info = root->fs_info;
5484 struct btrfs_path *path;
5485 struct btrfs_trans_handle *trans;
5486 struct btrfs_root *tree_root = fs_info->tree_root;
5487 struct btrfs_root_item *root_item = &root->root_item;
5488 struct walk_control *wc;
5489 struct btrfs_key key;
5490 int err = 0;
5491 int ret;
5492 int level;
5493 bool root_dropped = false;
5494
5495 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5496
5497 path = btrfs_alloc_path();
5498 if (!path) {
5499 err = -ENOMEM;
5500 goto out;
5501 }
5502
5503 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5504 if (!wc) {
5505 btrfs_free_path(path);
5506 err = -ENOMEM;
5507 goto out;
5508 }
5509
5510
5511
5512
5513
5514 if (for_reloc)
5515 trans = btrfs_join_transaction(tree_root);
5516 else
5517 trans = btrfs_start_transaction(tree_root, 0);
5518 if (IS_ERR(trans)) {
5519 err = PTR_ERR(trans);
5520 goto out_free;
5521 }
5522
5523 err = btrfs_run_delayed_items(trans);
5524 if (err)
5525 goto out_end_trans;
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535 set_bit(BTRFS_ROOT_DELETING, &root->state);
5536 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5537 level = btrfs_header_level(root->node);
5538 path->nodes[level] = btrfs_lock_root_node(root);
5539 path->slots[level] = 0;
5540 path->locks[level] = BTRFS_WRITE_LOCK;
5541 memset(&wc->update_progress, 0,
5542 sizeof(wc->update_progress));
5543 } else {
5544 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5545 memcpy(&wc->update_progress, &key,
5546 sizeof(wc->update_progress));
5547
5548 level = btrfs_root_drop_level(root_item);
5549 BUG_ON(level == 0);
5550 path->lowest_level = level;
5551 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5552 path->lowest_level = 0;
5553 if (ret < 0) {
5554 err = ret;
5555 goto out_end_trans;
5556 }
5557 WARN_ON(ret > 0);
5558
5559
5560
5561
5562
5563 btrfs_unlock_up_safe(path, 0);
5564
5565 level = btrfs_header_level(root->node);
5566 while (1) {
5567 btrfs_tree_lock(path->nodes[level]);
5568 path->locks[level] = BTRFS_WRITE_LOCK;
5569
5570 ret = btrfs_lookup_extent_info(trans, fs_info,
5571 path->nodes[level]->start,
5572 level, 1, &wc->refs[level],
5573 &wc->flags[level]);
5574 if (ret < 0) {
5575 err = ret;
5576 goto out_end_trans;
5577 }
5578 BUG_ON(wc->refs[level] == 0);
5579
5580 if (level == btrfs_root_drop_level(root_item))
5581 break;
5582
5583 btrfs_tree_unlock(path->nodes[level]);
5584 path->locks[level] = 0;
5585 WARN_ON(wc->refs[level] != 1);
5586 level--;
5587 }
5588 }
5589
5590 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5591 wc->level = level;
5592 wc->shared_level = -1;
5593 wc->stage = DROP_REFERENCE;
5594 wc->update_ref = update_ref;
5595 wc->keep_locks = 0;
5596 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5597
5598 while (1) {
5599
5600 ret = walk_down_tree(trans, root, path, wc);
5601 if (ret < 0) {
5602 err = ret;
5603 break;
5604 }
5605
5606 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5607 if (ret < 0) {
5608 err = ret;
5609 break;
5610 }
5611
5612 if (ret > 0) {
5613 BUG_ON(wc->stage != DROP_REFERENCE);
5614 break;
5615 }
5616
5617 if (wc->stage == DROP_REFERENCE) {
5618 wc->drop_level = wc->level;
5619 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5620 &wc->drop_progress,
5621 path->slots[wc->drop_level]);
5622 }
5623 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5624 &wc->drop_progress);
5625 btrfs_set_root_drop_level(root_item, wc->drop_level);
5626
5627 BUG_ON(wc->level == 0);
5628 if (btrfs_should_end_transaction(trans) ||
5629 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5630 ret = btrfs_update_root(trans, tree_root,
5631 &root->root_key,
5632 root_item);
5633 if (ret) {
5634 btrfs_abort_transaction(trans, ret);
5635 err = ret;
5636 goto out_end_trans;
5637 }
5638
5639 btrfs_end_transaction_throttle(trans);
5640 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5641 btrfs_debug(fs_info,
5642 "drop snapshot early exit");
5643 err = -EAGAIN;
5644 goto out_free;
5645 }
5646
5647
5648
5649
5650
5651
5652 if (for_reloc)
5653 trans = btrfs_join_transaction(tree_root);
5654 else
5655 trans = btrfs_start_transaction(tree_root, 0);
5656 if (IS_ERR(trans)) {
5657 err = PTR_ERR(trans);
5658 goto out_free;
5659 }
5660 }
5661 }
5662 btrfs_release_path(path);
5663 if (err)
5664 goto out_end_trans;
5665
5666 ret = btrfs_del_root(trans, &root->root_key);
5667 if (ret) {
5668 btrfs_abort_transaction(trans, ret);
5669 err = ret;
5670 goto out_end_trans;
5671 }
5672
5673 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5674 ret = btrfs_find_root(tree_root, &root->root_key, path,
5675 NULL, NULL);
5676 if (ret < 0) {
5677 btrfs_abort_transaction(trans, ret);
5678 err = ret;
5679 goto out_end_trans;
5680 } else if (ret > 0) {
5681
5682
5683
5684
5685
5686 btrfs_del_orphan_item(trans, tree_root,
5687 root->root_key.objectid);
5688 }
5689 }
5690
5691
5692
5693
5694
5695
5696 btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
5697 btrfs_qgroup_free_meta_all_pertrans(root);
5698
5699 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
5700 btrfs_add_dropped_root(trans, root);
5701 else
5702 btrfs_put_root(root);
5703 root_dropped = true;
5704out_end_trans:
5705 btrfs_end_transaction_throttle(trans);
5706out_free:
5707 kfree(wc);
5708 btrfs_free_path(path);
5709out:
5710
5711
5712
5713
5714
5715
5716
5717 if (!for_reloc && !root_dropped)
5718 btrfs_add_dead_root(root);
5719 return err;
5720}
5721
5722
5723
5724
5725
5726
5727
5728int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5729 struct btrfs_root *root,
5730 struct extent_buffer *node,
5731 struct extent_buffer *parent)
5732{
5733 struct btrfs_fs_info *fs_info = root->fs_info;
5734 struct btrfs_path *path;
5735 struct walk_control *wc;
5736 int level;
5737 int parent_level;
5738 int ret = 0;
5739 int wret;
5740
5741 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5742
5743 path = btrfs_alloc_path();
5744 if (!path)
5745 return -ENOMEM;
5746
5747 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5748 if (!wc) {
5749 btrfs_free_path(path);
5750 return -ENOMEM;
5751 }
5752
5753 btrfs_assert_tree_locked(parent);
5754 parent_level = btrfs_header_level(parent);
5755 atomic_inc(&parent->refs);
5756 path->nodes[parent_level] = parent;
5757 path->slots[parent_level] = btrfs_header_nritems(parent);
5758
5759 btrfs_assert_tree_locked(node);
5760 level = btrfs_header_level(node);
5761 path->nodes[level] = node;
5762 path->slots[level] = 0;
5763 path->locks[level] = BTRFS_WRITE_LOCK;
5764
5765 wc->refs[parent_level] = 1;
5766 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5767 wc->level = level;
5768 wc->shared_level = -1;
5769 wc->stage = DROP_REFERENCE;
5770 wc->update_ref = 0;
5771 wc->keep_locks = 1;
5772 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5773
5774 while (1) {
5775 wret = walk_down_tree(trans, root, path, wc);
5776 if (wret < 0) {
5777 ret = wret;
5778 break;
5779 }
5780
5781 wret = walk_up_tree(trans, root, path, wc, parent_level);
5782 if (wret < 0)
5783 ret = wret;
5784 if (wret != 0)
5785 break;
5786 }
5787
5788 kfree(wc);
5789 btrfs_free_path(path);
5790 return ret;
5791}
5792
5793
5794
5795
5796
5797u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5798{
5799 struct btrfs_block_group *block_group;
5800 u64 free_bytes = 0;
5801 int factor;
5802
5803
5804 if (list_empty(&sinfo->ro_bgs))
5805 return 0;
5806
5807 spin_lock(&sinfo->lock);
5808 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5809 spin_lock(&block_group->lock);
5810
5811 if (!block_group->ro) {
5812 spin_unlock(&block_group->lock);
5813 continue;
5814 }
5815
5816 factor = btrfs_bg_type_to_factor(block_group->flags);
5817 free_bytes += (block_group->length -
5818 block_group->used) * factor;
5819
5820 spin_unlock(&block_group->lock);
5821 }
5822 spin_unlock(&sinfo->lock);
5823
5824 return free_bytes;
5825}
5826
5827int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5828 u64 start, u64 end)
5829{
5830 return unpin_extent_range(fs_info, start, end, false);
5831}
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5854{
5855 u64 start = SZ_1M, len = 0, end = 0;
5856 int ret;
5857
5858 *trimmed = 0;
5859
5860
5861 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
5862 return 0;
5863
5864
5865 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
5866 return 0;
5867
5868
5869 if (device->total_bytes <= device->bytes_used)
5870 return 0;
5871
5872 ret = 0;
5873
5874 while (1) {
5875 struct btrfs_fs_info *fs_info = device->fs_info;
5876 u64 bytes;
5877
5878 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
5879 if (ret)
5880 break;
5881
5882 find_first_clear_extent_bit(&device->alloc_state, start,
5883 &start, &end,
5884 CHUNK_TRIMMED | CHUNK_ALLOCATED);
5885
5886
5887 if (start > device->total_bytes) {
5888 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
5889 btrfs_warn_in_rcu(fs_info,
5890"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
5891 start, end - start + 1,
5892 rcu_str_deref(device->name),
5893 device->total_bytes);
5894 mutex_unlock(&fs_info->chunk_mutex);
5895 ret = 0;
5896 break;
5897 }
5898
5899
5900 start = max_t(u64, start, SZ_1M);
5901
5902
5903
5904
5905
5906
5907 end = min(end, device->total_bytes - 1);
5908
5909 len = end - start + 1;
5910
5911
5912 if (!len) {
5913 mutex_unlock(&fs_info->chunk_mutex);
5914 ret = 0;
5915 break;
5916 }
5917
5918 ret = btrfs_issue_discard(device->bdev, start, len,
5919 &bytes);
5920 if (!ret)
5921 set_extent_bits(&device->alloc_state, start,
5922 start + bytes - 1,
5923 CHUNK_TRIMMED);
5924 mutex_unlock(&fs_info->chunk_mutex);
5925
5926 if (ret)
5927 break;
5928
5929 start += len;
5930 *trimmed += bytes;
5931
5932 if (fatal_signal_pending(current)) {
5933 ret = -ERESTARTSYS;
5934 break;
5935 }
5936
5937 cond_resched();
5938 }
5939
5940 return ret;
5941}
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
5953{
5954 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
5955 struct btrfs_block_group *cache = NULL;
5956 struct btrfs_device *device;
5957 u64 group_trimmed;
5958 u64 range_end = U64_MAX;
5959 u64 start;
5960 u64 end;
5961 u64 trimmed = 0;
5962 u64 bg_failed = 0;
5963 u64 dev_failed = 0;
5964 int bg_ret = 0;
5965 int dev_ret = 0;
5966 int ret = 0;
5967
5968
5969
5970
5971
5972 if (range->len != U64_MAX &&
5973 check_add_overflow(range->start, range->len, &range_end))
5974 return -EINVAL;
5975
5976 cache = btrfs_lookup_first_block_group(fs_info, range->start);
5977 for (; cache; cache = btrfs_next_block_group(cache)) {
5978 if (cache->start >= range_end) {
5979 btrfs_put_block_group(cache);
5980 break;
5981 }
5982
5983 start = max(range->start, cache->start);
5984 end = min(range_end, cache->start + cache->length);
5985
5986 if (end - start >= range->minlen) {
5987 if (!btrfs_block_group_done(cache)) {
5988 ret = btrfs_cache_block_group(cache, 0);
5989 if (ret) {
5990 bg_failed++;
5991 bg_ret = ret;
5992 continue;
5993 }
5994 ret = btrfs_wait_block_group_cache_done(cache);
5995 if (ret) {
5996 bg_failed++;
5997 bg_ret = ret;
5998 continue;
5999 }
6000 }
6001 ret = btrfs_trim_block_group(cache,
6002 &group_trimmed,
6003 start,
6004 end,
6005 range->minlen);
6006
6007 trimmed += group_trimmed;
6008 if (ret) {
6009 bg_failed++;
6010 bg_ret = ret;
6011 continue;
6012 }
6013 }
6014 }
6015
6016 if (bg_failed)
6017 btrfs_warn(fs_info,
6018 "failed to trim %llu block group(s), last error %d",
6019 bg_failed, bg_ret);
6020
6021 mutex_lock(&fs_devices->device_list_mutex);
6022 list_for_each_entry(device, &fs_devices->devices, dev_list) {
6023 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
6024 continue;
6025
6026 ret = btrfs_trim_free_extents(device, &group_trimmed);
6027 if (ret) {
6028 dev_failed++;
6029 dev_ret = ret;
6030 break;
6031 }
6032
6033 trimmed += group_trimmed;
6034 }
6035 mutex_unlock(&fs_devices->device_list_mutex);
6036
6037 if (dev_failed)
6038 btrfs_warn(fs_info,
6039 "failed to trim %llu device(s), last error %d",
6040 dev_failed, dev_ret);
6041 range->len = trimmed;
6042 if (bg_ret)
6043 return bg_ret;
6044 return dev_ret;
6045}
6046