1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "misc.h"
20#include "tree-log.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "locking.h"
26#include "free-space-cache.h"
27#include "free-space-tree.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31#include "space-info.h"
32#include "block-rsv.h"
33#include "delalloc-space.h"
34#include "block-group.h"
35#include "discard.h"
36
37#undef SCRAMBLE_DELAYED_REFS
38
39
40static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
41 struct btrfs_delayed_ref_node *node, u64 parent,
42 u64 root_objectid, u64 owner_objectid,
43 u64 owner_offset, int refs_to_drop,
44 struct btrfs_delayed_extent_op *extra_op);
45static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
46 struct extent_buffer *leaf,
47 struct btrfs_extent_item *ei);
48static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
49 u64 parent, u64 root_objectid,
50 u64 flags, u64 owner, u64 offset,
51 struct btrfs_key *ins, int ref_mod);
52static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
53 struct btrfs_delayed_ref_node *node,
54 struct btrfs_delayed_extent_op *extent_op);
55static int find_next_key(struct btrfs_path *path, int level,
56 struct btrfs_key *key);
57
58static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
59{
60 return (cache->flags & bits) == bits;
61}
62
63int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
64 u64 start, u64 num_bytes)
65{
66 u64 end = start + num_bytes - 1;
67 set_extent_bits(&fs_info->freed_extents[0],
68 start, end, EXTENT_UPTODATE);
69 set_extent_bits(&fs_info->freed_extents[1],
70 start, end, EXTENT_UPTODATE);
71 return 0;
72}
73
74void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
75{
76 struct btrfs_fs_info *fs_info = cache->fs_info;
77 u64 start, end;
78
79 start = cache->start;
80 end = start + cache->length - 1;
81
82 clear_extent_bits(&fs_info->freed_extents[0],
83 start, end, EXTENT_UPTODATE);
84 clear_extent_bits(&fs_info->freed_extents[1],
85 start, end, EXTENT_UPTODATE);
86}
87
88static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
89{
90 if (ref->type == BTRFS_REF_METADATA) {
91 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
92 return BTRFS_BLOCK_GROUP_SYSTEM;
93 else
94 return BTRFS_BLOCK_GROUP_METADATA;
95 }
96 return BTRFS_BLOCK_GROUP_DATA;
97}
98
99static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
100 struct btrfs_ref *ref)
101{
102 struct btrfs_space_info *space_info;
103 u64 flags = generic_ref_to_space_flags(ref);
104
105 space_info = btrfs_find_space_info(fs_info, flags);
106 ASSERT(space_info);
107 percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
108 BTRFS_TOTAL_BYTES_PINNED_BATCH);
109}
110
111static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
112 struct btrfs_ref *ref)
113{
114 struct btrfs_space_info *space_info;
115 u64 flags = generic_ref_to_space_flags(ref);
116
117 space_info = btrfs_find_space_info(fs_info, flags);
118 ASSERT(space_info);
119 percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
120 BTRFS_TOTAL_BYTES_PINNED_BATCH);
121}
122
123
124int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
125{
126 int ret;
127 struct btrfs_key key;
128 struct btrfs_path *path;
129
130 path = btrfs_alloc_path();
131 if (!path)
132 return -ENOMEM;
133
134 key.objectid = start;
135 key.offset = len;
136 key.type = BTRFS_EXTENT_ITEM_KEY;
137 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
138 btrfs_free_path(path);
139 return ret;
140}
141
142
143
144
145
146
147
148
149
150
151int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
152 struct btrfs_fs_info *fs_info, u64 bytenr,
153 u64 offset, int metadata, u64 *refs, u64 *flags)
154{
155 struct btrfs_delayed_ref_head *head;
156 struct btrfs_delayed_ref_root *delayed_refs;
157 struct btrfs_path *path;
158 struct btrfs_extent_item *ei;
159 struct extent_buffer *leaf;
160 struct btrfs_key key;
161 u32 item_size;
162 u64 num_refs;
163 u64 extent_flags;
164 int ret;
165
166
167
168
169
170 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
171 offset = fs_info->nodesize;
172 metadata = 0;
173 }
174
175 path = btrfs_alloc_path();
176 if (!path)
177 return -ENOMEM;
178
179 if (!trans) {
180 path->skip_locking = 1;
181 path->search_commit_root = 1;
182 }
183
184search_again:
185 key.objectid = bytenr;
186 key.offset = offset;
187 if (metadata)
188 key.type = BTRFS_METADATA_ITEM_KEY;
189 else
190 key.type = BTRFS_EXTENT_ITEM_KEY;
191
192 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
193 if (ret < 0)
194 goto out_free;
195
196 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
197 if (path->slots[0]) {
198 path->slots[0]--;
199 btrfs_item_key_to_cpu(path->nodes[0], &key,
200 path->slots[0]);
201 if (key.objectid == bytenr &&
202 key.type == BTRFS_EXTENT_ITEM_KEY &&
203 key.offset == fs_info->nodesize)
204 ret = 0;
205 }
206 }
207
208 if (ret == 0) {
209 leaf = path->nodes[0];
210 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
211 if (item_size >= sizeof(*ei)) {
212 ei = btrfs_item_ptr(leaf, path->slots[0],
213 struct btrfs_extent_item);
214 num_refs = btrfs_extent_refs(leaf, ei);
215 extent_flags = btrfs_extent_flags(leaf, ei);
216 } else {
217 ret = -EINVAL;
218 btrfs_print_v0_err(fs_info);
219 if (trans)
220 btrfs_abort_transaction(trans, ret);
221 else
222 btrfs_handle_fs_error(fs_info, ret, NULL);
223
224 goto out_free;
225 }
226
227 BUG_ON(num_refs == 0);
228 } else {
229 num_refs = 0;
230 extent_flags = 0;
231 ret = 0;
232 }
233
234 if (!trans)
235 goto out;
236
237 delayed_refs = &trans->transaction->delayed_refs;
238 spin_lock(&delayed_refs->lock);
239 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
240 if (head) {
241 if (!mutex_trylock(&head->mutex)) {
242 refcount_inc(&head->refs);
243 spin_unlock(&delayed_refs->lock);
244
245 btrfs_release_path(path);
246
247
248
249
250
251 mutex_lock(&head->mutex);
252 mutex_unlock(&head->mutex);
253 btrfs_put_delayed_ref_head(head);
254 goto search_again;
255 }
256 spin_lock(&head->lock);
257 if (head->extent_op && head->extent_op->update_flags)
258 extent_flags |= head->extent_op->flags_to_set;
259 else
260 BUG_ON(num_refs == 0);
261
262 num_refs += head->ref_mod;
263 spin_unlock(&head->lock);
264 mutex_unlock(&head->mutex);
265 }
266 spin_unlock(&delayed_refs->lock);
267out:
268 WARN_ON(num_refs == 0);
269 if (refs)
270 *refs = num_refs;
271 if (flags)
272 *flags = extent_flags;
273out_free:
274 btrfs_free_path(path);
275 return ret;
276}
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
390 struct btrfs_extent_inline_ref *iref,
391 enum btrfs_inline_ref_type is_data)
392{
393 int type = btrfs_extent_inline_ref_type(eb, iref);
394 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
395
396 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
397 type == BTRFS_SHARED_BLOCK_REF_KEY ||
398 type == BTRFS_SHARED_DATA_REF_KEY ||
399 type == BTRFS_EXTENT_DATA_REF_KEY) {
400 if (is_data == BTRFS_REF_TYPE_BLOCK) {
401 if (type == BTRFS_TREE_BLOCK_REF_KEY)
402 return type;
403 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
404 ASSERT(eb->fs_info);
405
406
407
408
409
410 if (offset &&
411 IS_ALIGNED(offset, eb->fs_info->nodesize))
412 return type;
413 }
414 } else if (is_data == BTRFS_REF_TYPE_DATA) {
415 if (type == BTRFS_EXTENT_DATA_REF_KEY)
416 return type;
417 if (type == BTRFS_SHARED_DATA_REF_KEY) {
418 ASSERT(eb->fs_info);
419
420
421
422
423
424 if (offset &&
425 IS_ALIGNED(offset, eb->fs_info->nodesize))
426 return type;
427 }
428 } else {
429 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
430 return type;
431 }
432 }
433
434 btrfs_print_leaf((struct extent_buffer *)eb);
435 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
436 eb->start, type);
437 WARN_ON(1);
438
439 return BTRFS_REF_TYPE_INVALID;
440}
441
442u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
443{
444 u32 high_crc = ~(u32)0;
445 u32 low_crc = ~(u32)0;
446 __le64 lenum;
447
448 lenum = cpu_to_le64(root_objectid);
449 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
450 lenum = cpu_to_le64(owner);
451 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
452 lenum = cpu_to_le64(offset);
453 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
454
455 return ((u64)high_crc << 31) ^ (u64)low_crc;
456}
457
458static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
459 struct btrfs_extent_data_ref *ref)
460{
461 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
462 btrfs_extent_data_ref_objectid(leaf, ref),
463 btrfs_extent_data_ref_offset(leaf, ref));
464}
465
466static int match_extent_data_ref(struct extent_buffer *leaf,
467 struct btrfs_extent_data_ref *ref,
468 u64 root_objectid, u64 owner, u64 offset)
469{
470 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
471 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
472 btrfs_extent_data_ref_offset(leaf, ref) != offset)
473 return 0;
474 return 1;
475}
476
477static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
478 struct btrfs_path *path,
479 u64 bytenr, u64 parent,
480 u64 root_objectid,
481 u64 owner, u64 offset)
482{
483 struct btrfs_root *root = trans->fs_info->extent_root;
484 struct btrfs_key key;
485 struct btrfs_extent_data_ref *ref;
486 struct extent_buffer *leaf;
487 u32 nritems;
488 int ret;
489 int recow;
490 int err = -ENOENT;
491
492 key.objectid = bytenr;
493 if (parent) {
494 key.type = BTRFS_SHARED_DATA_REF_KEY;
495 key.offset = parent;
496 } else {
497 key.type = BTRFS_EXTENT_DATA_REF_KEY;
498 key.offset = hash_extent_data_ref(root_objectid,
499 owner, offset);
500 }
501again:
502 recow = 0;
503 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
504 if (ret < 0) {
505 err = ret;
506 goto fail;
507 }
508
509 if (parent) {
510 if (!ret)
511 return 0;
512 goto fail;
513 }
514
515 leaf = path->nodes[0];
516 nritems = btrfs_header_nritems(leaf);
517 while (1) {
518 if (path->slots[0] >= nritems) {
519 ret = btrfs_next_leaf(root, path);
520 if (ret < 0)
521 err = ret;
522 if (ret)
523 goto fail;
524
525 leaf = path->nodes[0];
526 nritems = btrfs_header_nritems(leaf);
527 recow = 1;
528 }
529
530 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
531 if (key.objectid != bytenr ||
532 key.type != BTRFS_EXTENT_DATA_REF_KEY)
533 goto fail;
534
535 ref = btrfs_item_ptr(leaf, path->slots[0],
536 struct btrfs_extent_data_ref);
537
538 if (match_extent_data_ref(leaf, ref, root_objectid,
539 owner, offset)) {
540 if (recow) {
541 btrfs_release_path(path);
542 goto again;
543 }
544 err = 0;
545 break;
546 }
547 path->slots[0]++;
548 }
549fail:
550 return err;
551}
552
553static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
554 struct btrfs_path *path,
555 u64 bytenr, u64 parent,
556 u64 root_objectid, u64 owner,
557 u64 offset, int refs_to_add)
558{
559 struct btrfs_root *root = trans->fs_info->extent_root;
560 struct btrfs_key key;
561 struct extent_buffer *leaf;
562 u32 size;
563 u32 num_refs;
564 int ret;
565
566 key.objectid = bytenr;
567 if (parent) {
568 key.type = BTRFS_SHARED_DATA_REF_KEY;
569 key.offset = parent;
570 size = sizeof(struct btrfs_shared_data_ref);
571 } else {
572 key.type = BTRFS_EXTENT_DATA_REF_KEY;
573 key.offset = hash_extent_data_ref(root_objectid,
574 owner, offset);
575 size = sizeof(struct btrfs_extent_data_ref);
576 }
577
578 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
579 if (ret && ret != -EEXIST)
580 goto fail;
581
582 leaf = path->nodes[0];
583 if (parent) {
584 struct btrfs_shared_data_ref *ref;
585 ref = btrfs_item_ptr(leaf, path->slots[0],
586 struct btrfs_shared_data_ref);
587 if (ret == 0) {
588 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
589 } else {
590 num_refs = btrfs_shared_data_ref_count(leaf, ref);
591 num_refs += refs_to_add;
592 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
593 }
594 } else {
595 struct btrfs_extent_data_ref *ref;
596 while (ret == -EEXIST) {
597 ref = btrfs_item_ptr(leaf, path->slots[0],
598 struct btrfs_extent_data_ref);
599 if (match_extent_data_ref(leaf, ref, root_objectid,
600 owner, offset))
601 break;
602 btrfs_release_path(path);
603 key.offset++;
604 ret = btrfs_insert_empty_item(trans, root, path, &key,
605 size);
606 if (ret && ret != -EEXIST)
607 goto fail;
608
609 leaf = path->nodes[0];
610 }
611 ref = btrfs_item_ptr(leaf, path->slots[0],
612 struct btrfs_extent_data_ref);
613 if (ret == 0) {
614 btrfs_set_extent_data_ref_root(leaf, ref,
615 root_objectid);
616 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
617 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
618 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
619 } else {
620 num_refs = btrfs_extent_data_ref_count(leaf, ref);
621 num_refs += refs_to_add;
622 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
623 }
624 }
625 btrfs_mark_buffer_dirty(leaf);
626 ret = 0;
627fail:
628 btrfs_release_path(path);
629 return ret;
630}
631
632static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
633 struct btrfs_path *path,
634 int refs_to_drop, int *last_ref)
635{
636 struct btrfs_key key;
637 struct btrfs_extent_data_ref *ref1 = NULL;
638 struct btrfs_shared_data_ref *ref2 = NULL;
639 struct extent_buffer *leaf;
640 u32 num_refs = 0;
641 int ret = 0;
642
643 leaf = path->nodes[0];
644 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
645
646 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
647 ref1 = btrfs_item_ptr(leaf, path->slots[0],
648 struct btrfs_extent_data_ref);
649 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
650 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
651 ref2 = btrfs_item_ptr(leaf, path->slots[0],
652 struct btrfs_shared_data_ref);
653 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
654 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
655 btrfs_print_v0_err(trans->fs_info);
656 btrfs_abort_transaction(trans, -EINVAL);
657 return -EINVAL;
658 } else {
659 BUG();
660 }
661
662 BUG_ON(num_refs < refs_to_drop);
663 num_refs -= refs_to_drop;
664
665 if (num_refs == 0) {
666 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
667 *last_ref = 1;
668 } else {
669 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
670 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
671 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
672 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
673 btrfs_mark_buffer_dirty(leaf);
674 }
675 return ret;
676}
677
678static noinline u32 extent_data_ref_count(struct btrfs_path *path,
679 struct btrfs_extent_inline_ref *iref)
680{
681 struct btrfs_key key;
682 struct extent_buffer *leaf;
683 struct btrfs_extent_data_ref *ref1;
684 struct btrfs_shared_data_ref *ref2;
685 u32 num_refs = 0;
686 int type;
687
688 leaf = path->nodes[0];
689 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
690
691 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
692 if (iref) {
693
694
695
696
697 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
698 ASSERT(type != BTRFS_REF_TYPE_INVALID);
699 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
700 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
701 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
702 } else {
703 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
704 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
705 }
706 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
707 ref1 = btrfs_item_ptr(leaf, path->slots[0],
708 struct btrfs_extent_data_ref);
709 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
710 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
711 ref2 = btrfs_item_ptr(leaf, path->slots[0],
712 struct btrfs_shared_data_ref);
713 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
714 } else {
715 WARN_ON(1);
716 }
717 return num_refs;
718}
719
720static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
721 struct btrfs_path *path,
722 u64 bytenr, u64 parent,
723 u64 root_objectid)
724{
725 struct btrfs_root *root = trans->fs_info->extent_root;
726 struct btrfs_key key;
727 int ret;
728
729 key.objectid = bytenr;
730 if (parent) {
731 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
732 key.offset = parent;
733 } else {
734 key.type = BTRFS_TREE_BLOCK_REF_KEY;
735 key.offset = root_objectid;
736 }
737
738 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
739 if (ret > 0)
740 ret = -ENOENT;
741 return ret;
742}
743
744static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
745 struct btrfs_path *path,
746 u64 bytenr, u64 parent,
747 u64 root_objectid)
748{
749 struct btrfs_key key;
750 int ret;
751
752 key.objectid = bytenr;
753 if (parent) {
754 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
755 key.offset = parent;
756 } else {
757 key.type = BTRFS_TREE_BLOCK_REF_KEY;
758 key.offset = root_objectid;
759 }
760
761 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
762 path, &key, 0);
763 btrfs_release_path(path);
764 return ret;
765}
766
767static inline int extent_ref_type(u64 parent, u64 owner)
768{
769 int type;
770 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
771 if (parent > 0)
772 type = BTRFS_SHARED_BLOCK_REF_KEY;
773 else
774 type = BTRFS_TREE_BLOCK_REF_KEY;
775 } else {
776 if (parent > 0)
777 type = BTRFS_SHARED_DATA_REF_KEY;
778 else
779 type = BTRFS_EXTENT_DATA_REF_KEY;
780 }
781 return type;
782}
783
784static int find_next_key(struct btrfs_path *path, int level,
785 struct btrfs_key *key)
786
787{
788 for (; level < BTRFS_MAX_LEVEL; level++) {
789 if (!path->nodes[level])
790 break;
791 if (path->slots[level] + 1 >=
792 btrfs_header_nritems(path->nodes[level]))
793 continue;
794 if (level == 0)
795 btrfs_item_key_to_cpu(path->nodes[level], key,
796 path->slots[level] + 1);
797 else
798 btrfs_node_key_to_cpu(path->nodes[level], key,
799 path->slots[level] + 1);
800 return 0;
801 }
802 return 1;
803}
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818static noinline_for_stack
819int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
820 struct btrfs_path *path,
821 struct btrfs_extent_inline_ref **ref_ret,
822 u64 bytenr, u64 num_bytes,
823 u64 parent, u64 root_objectid,
824 u64 owner, u64 offset, int insert)
825{
826 struct btrfs_fs_info *fs_info = trans->fs_info;
827 struct btrfs_root *root = fs_info->extent_root;
828 struct btrfs_key key;
829 struct extent_buffer *leaf;
830 struct btrfs_extent_item *ei;
831 struct btrfs_extent_inline_ref *iref;
832 u64 flags;
833 u64 item_size;
834 unsigned long ptr;
835 unsigned long end;
836 int extra_size;
837 int type;
838 int want;
839 int ret;
840 int err = 0;
841 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
842 int needed;
843
844 key.objectid = bytenr;
845 key.type = BTRFS_EXTENT_ITEM_KEY;
846 key.offset = num_bytes;
847
848 want = extent_ref_type(parent, owner);
849 if (insert) {
850 extra_size = btrfs_extent_inline_ref_size(want);
851 path->keep_locks = 1;
852 } else
853 extra_size = -1;
854
855
856
857
858
859 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
860 key.type = BTRFS_METADATA_ITEM_KEY;
861 key.offset = owner;
862 }
863
864again:
865 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
866 if (ret < 0) {
867 err = ret;
868 goto out;
869 }
870
871
872
873
874
875 if (ret > 0 && skinny_metadata) {
876 skinny_metadata = false;
877 if (path->slots[0]) {
878 path->slots[0]--;
879 btrfs_item_key_to_cpu(path->nodes[0], &key,
880 path->slots[0]);
881 if (key.objectid == bytenr &&
882 key.type == BTRFS_EXTENT_ITEM_KEY &&
883 key.offset == num_bytes)
884 ret = 0;
885 }
886 if (ret) {
887 key.objectid = bytenr;
888 key.type = BTRFS_EXTENT_ITEM_KEY;
889 key.offset = num_bytes;
890 btrfs_release_path(path);
891 goto again;
892 }
893 }
894
895 if (ret && !insert) {
896 err = -ENOENT;
897 goto out;
898 } else if (WARN_ON(ret)) {
899 err = -EIO;
900 goto out;
901 }
902
903 leaf = path->nodes[0];
904 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
905 if (unlikely(item_size < sizeof(*ei))) {
906 err = -EINVAL;
907 btrfs_print_v0_err(fs_info);
908 btrfs_abort_transaction(trans, err);
909 goto out;
910 }
911
912 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
913 flags = btrfs_extent_flags(leaf, ei);
914
915 ptr = (unsigned long)(ei + 1);
916 end = (unsigned long)ei + item_size;
917
918 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
919 ptr += sizeof(struct btrfs_tree_block_info);
920 BUG_ON(ptr > end);
921 }
922
923 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
924 needed = BTRFS_REF_TYPE_DATA;
925 else
926 needed = BTRFS_REF_TYPE_BLOCK;
927
928 err = -ENOENT;
929 while (1) {
930 if (ptr >= end) {
931 WARN_ON(ptr > end);
932 break;
933 }
934 iref = (struct btrfs_extent_inline_ref *)ptr;
935 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
936 if (type == BTRFS_REF_TYPE_INVALID) {
937 err = -EUCLEAN;
938 goto out;
939 }
940
941 if (want < type)
942 break;
943 if (want > type) {
944 ptr += btrfs_extent_inline_ref_size(type);
945 continue;
946 }
947
948 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
949 struct btrfs_extent_data_ref *dref;
950 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
951 if (match_extent_data_ref(leaf, dref, root_objectid,
952 owner, offset)) {
953 err = 0;
954 break;
955 }
956 if (hash_extent_data_ref_item(leaf, dref) <
957 hash_extent_data_ref(root_objectid, owner, offset))
958 break;
959 } else {
960 u64 ref_offset;
961 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
962 if (parent > 0) {
963 if (parent == ref_offset) {
964 err = 0;
965 break;
966 }
967 if (ref_offset < parent)
968 break;
969 } else {
970 if (root_objectid == ref_offset) {
971 err = 0;
972 break;
973 }
974 if (ref_offset < root_objectid)
975 break;
976 }
977 }
978 ptr += btrfs_extent_inline_ref_size(type);
979 }
980 if (err == -ENOENT && insert) {
981 if (item_size + extra_size >=
982 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
983 err = -EAGAIN;
984 goto out;
985 }
986
987
988
989
990
991
992 if (find_next_key(path, 0, &key) == 0 &&
993 key.objectid == bytenr &&
994 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
995 err = -EAGAIN;
996 goto out;
997 }
998 }
999 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1000out:
1001 if (insert) {
1002 path->keep_locks = 0;
1003 btrfs_unlock_up_safe(path, 1);
1004 }
1005 return err;
1006}
1007
1008
1009
1010
1011static noinline_for_stack
1012void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1013 struct btrfs_path *path,
1014 struct btrfs_extent_inline_ref *iref,
1015 u64 parent, u64 root_objectid,
1016 u64 owner, u64 offset, int refs_to_add,
1017 struct btrfs_delayed_extent_op *extent_op)
1018{
1019 struct extent_buffer *leaf;
1020 struct btrfs_extent_item *ei;
1021 unsigned long ptr;
1022 unsigned long end;
1023 unsigned long item_offset;
1024 u64 refs;
1025 int size;
1026 int type;
1027
1028 leaf = path->nodes[0];
1029 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1030 item_offset = (unsigned long)iref - (unsigned long)ei;
1031
1032 type = extent_ref_type(parent, owner);
1033 size = btrfs_extent_inline_ref_size(type);
1034
1035 btrfs_extend_item(path, size);
1036
1037 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1038 refs = btrfs_extent_refs(leaf, ei);
1039 refs += refs_to_add;
1040 btrfs_set_extent_refs(leaf, ei, refs);
1041 if (extent_op)
1042 __run_delayed_extent_op(extent_op, leaf, ei);
1043
1044 ptr = (unsigned long)ei + item_offset;
1045 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1046 if (ptr < end - size)
1047 memmove_extent_buffer(leaf, ptr + size, ptr,
1048 end - size - ptr);
1049
1050 iref = (struct btrfs_extent_inline_ref *)ptr;
1051 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1052 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1053 struct btrfs_extent_data_ref *dref;
1054 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1055 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1056 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1057 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1058 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1059 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1060 struct btrfs_shared_data_ref *sref;
1061 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1062 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1063 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1064 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1065 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1066 } else {
1067 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1068 }
1069 btrfs_mark_buffer_dirty(leaf);
1070}
1071
1072static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1073 struct btrfs_path *path,
1074 struct btrfs_extent_inline_ref **ref_ret,
1075 u64 bytenr, u64 num_bytes, u64 parent,
1076 u64 root_objectid, u64 owner, u64 offset)
1077{
1078 int ret;
1079
1080 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1081 num_bytes, parent, root_objectid,
1082 owner, offset, 0);
1083 if (ret != -ENOENT)
1084 return ret;
1085
1086 btrfs_release_path(path);
1087 *ref_ret = NULL;
1088
1089 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1090 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1091 root_objectid);
1092 } else {
1093 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1094 root_objectid, owner, offset);
1095 }
1096 return ret;
1097}
1098
1099
1100
1101
1102static noinline_for_stack
1103void update_inline_extent_backref(struct btrfs_path *path,
1104 struct btrfs_extent_inline_ref *iref,
1105 int refs_to_mod,
1106 struct btrfs_delayed_extent_op *extent_op,
1107 int *last_ref)
1108{
1109 struct extent_buffer *leaf = path->nodes[0];
1110 struct btrfs_extent_item *ei;
1111 struct btrfs_extent_data_ref *dref = NULL;
1112 struct btrfs_shared_data_ref *sref = NULL;
1113 unsigned long ptr;
1114 unsigned long end;
1115 u32 item_size;
1116 int size;
1117 int type;
1118 u64 refs;
1119
1120 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1121 refs = btrfs_extent_refs(leaf, ei);
1122 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1123 refs += refs_to_mod;
1124 btrfs_set_extent_refs(leaf, ei, refs);
1125 if (extent_op)
1126 __run_delayed_extent_op(extent_op, leaf, ei);
1127
1128
1129
1130
1131
1132 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1133 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1134
1135 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1136 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1137 refs = btrfs_extent_data_ref_count(leaf, dref);
1138 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1139 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1140 refs = btrfs_shared_data_ref_count(leaf, sref);
1141 } else {
1142 refs = 1;
1143 BUG_ON(refs_to_mod != -1);
1144 }
1145
1146 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1147 refs += refs_to_mod;
1148
1149 if (refs > 0) {
1150 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1151 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1152 else
1153 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1154 } else {
1155 *last_ref = 1;
1156 size = btrfs_extent_inline_ref_size(type);
1157 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1158 ptr = (unsigned long)iref;
1159 end = (unsigned long)ei + item_size;
1160 if (ptr + size < end)
1161 memmove_extent_buffer(leaf, ptr, ptr + size,
1162 end - ptr - size);
1163 item_size -= size;
1164 btrfs_truncate_item(path, item_size, 1);
1165 }
1166 btrfs_mark_buffer_dirty(leaf);
1167}
1168
1169static noinline_for_stack
1170int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1171 struct btrfs_path *path,
1172 u64 bytenr, u64 num_bytes, u64 parent,
1173 u64 root_objectid, u64 owner,
1174 u64 offset, int refs_to_add,
1175 struct btrfs_delayed_extent_op *extent_op)
1176{
1177 struct btrfs_extent_inline_ref *iref;
1178 int ret;
1179
1180 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1181 num_bytes, parent, root_objectid,
1182 owner, offset, 1);
1183 if (ret == 0) {
1184 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1185 update_inline_extent_backref(path, iref, refs_to_add,
1186 extent_op, NULL);
1187 } else if (ret == -ENOENT) {
1188 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1189 root_objectid, owner, offset,
1190 refs_to_add, extent_op);
1191 ret = 0;
1192 }
1193 return ret;
1194}
1195
1196static int insert_extent_backref(struct btrfs_trans_handle *trans,
1197 struct btrfs_path *path,
1198 u64 bytenr, u64 parent, u64 root_objectid,
1199 u64 owner, u64 offset, int refs_to_add)
1200{
1201 int ret;
1202 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1203 BUG_ON(refs_to_add != 1);
1204 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1205 root_objectid);
1206 } else {
1207 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1208 root_objectid, owner, offset,
1209 refs_to_add);
1210 }
1211 return ret;
1212}
1213
1214static int remove_extent_backref(struct btrfs_trans_handle *trans,
1215 struct btrfs_path *path,
1216 struct btrfs_extent_inline_ref *iref,
1217 int refs_to_drop, int is_data, int *last_ref)
1218{
1219 int ret = 0;
1220
1221 BUG_ON(!is_data && refs_to_drop != 1);
1222 if (iref) {
1223 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1224 last_ref);
1225 } else if (is_data) {
1226 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1227 last_ref);
1228 } else {
1229 *last_ref = 1;
1230 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1231 }
1232 return ret;
1233}
1234
1235static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1236 u64 *discarded_bytes)
1237{
1238 int j, ret = 0;
1239 u64 bytes_left, end;
1240 u64 aligned_start = ALIGN(start, 1 << 9);
1241
1242 if (WARN_ON(start != aligned_start)) {
1243 len -= aligned_start - start;
1244 len = round_down(len, 1 << 9);
1245 start = aligned_start;
1246 }
1247
1248 *discarded_bytes = 0;
1249
1250 if (!len)
1251 return 0;
1252
1253 end = start + len;
1254 bytes_left = len;
1255
1256
1257 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1258 u64 sb_start = btrfs_sb_offset(j);
1259 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1260 u64 size = sb_start - start;
1261
1262 if (!in_range(sb_start, start, bytes_left) &&
1263 !in_range(sb_end, start, bytes_left) &&
1264 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1265 continue;
1266
1267
1268
1269
1270
1271 if (sb_start <= start) {
1272 start += sb_end - start;
1273 if (start > end) {
1274 bytes_left = 0;
1275 break;
1276 }
1277 bytes_left = end - start;
1278 continue;
1279 }
1280
1281 if (size) {
1282 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1283 GFP_NOFS, 0);
1284 if (!ret)
1285 *discarded_bytes += size;
1286 else if (ret != -EOPNOTSUPP)
1287 return ret;
1288 }
1289
1290 start = sb_end;
1291 if (start > end) {
1292 bytes_left = 0;
1293 break;
1294 }
1295 bytes_left = end - start;
1296 }
1297
1298 if (bytes_left) {
1299 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1300 GFP_NOFS, 0);
1301 if (!ret)
1302 *discarded_bytes += bytes_left;
1303 }
1304 return ret;
1305}
1306
1307int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1308 u64 num_bytes, u64 *actual_bytes)
1309{
1310 int ret = 0;
1311 u64 discarded_bytes = 0;
1312 u64 end = bytenr + num_bytes;
1313 u64 cur = bytenr;
1314 struct btrfs_bio *bbio = NULL;
1315
1316
1317
1318
1319
1320
1321 btrfs_bio_counter_inc_blocked(fs_info);
1322 while (cur < end) {
1323 struct btrfs_bio_stripe *stripe;
1324 int i;
1325
1326 num_bytes = end - cur;
1327
1328 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
1329 &num_bytes, &bbio, 0);
1330
1331
1332
1333
1334
1335 if (ret < 0)
1336 goto out;
1337
1338 stripe = bbio->stripes;
1339 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1340 u64 bytes;
1341 struct request_queue *req_q;
1342
1343 if (!stripe->dev->bdev) {
1344 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1345 continue;
1346 }
1347 req_q = bdev_get_queue(stripe->dev->bdev);
1348 if (!blk_queue_discard(req_q))
1349 continue;
1350
1351 ret = btrfs_issue_discard(stripe->dev->bdev,
1352 stripe->physical,
1353 stripe->length,
1354 &bytes);
1355 if (!ret) {
1356 discarded_bytes += bytes;
1357 } else if (ret != -EOPNOTSUPP) {
1358
1359
1360
1361
1362
1363
1364
1365 btrfs_put_bbio(bbio);
1366 goto out;
1367 }
1368
1369
1370
1371
1372
1373
1374 ret = 0;
1375 }
1376 btrfs_put_bbio(bbio);
1377 cur += num_bytes;
1378 }
1379out:
1380 btrfs_bio_counter_dec(fs_info);
1381
1382 if (actual_bytes)
1383 *actual_bytes = discarded_bytes;
1384
1385
1386 if (ret == -EOPNOTSUPP)
1387 ret = 0;
1388 return ret;
1389}
1390
1391
1392int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1393 struct btrfs_ref *generic_ref)
1394{
1395 struct btrfs_fs_info *fs_info = trans->fs_info;
1396 int old_ref_mod, new_ref_mod;
1397 int ret;
1398
1399 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1400 generic_ref->action);
1401 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1402 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
1403
1404 if (generic_ref->type == BTRFS_REF_METADATA)
1405 ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
1406 NULL, &old_ref_mod, &new_ref_mod);
1407 else
1408 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
1409 &old_ref_mod, &new_ref_mod);
1410
1411 btrfs_ref_tree_mod(fs_info, generic_ref);
1412
1413 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
1414 sub_pinned_bytes(fs_info, generic_ref);
1415
1416 return ret;
1417}
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1454 struct btrfs_delayed_ref_node *node,
1455 u64 parent, u64 root_objectid,
1456 u64 owner, u64 offset, int refs_to_add,
1457 struct btrfs_delayed_extent_op *extent_op)
1458{
1459 struct btrfs_path *path;
1460 struct extent_buffer *leaf;
1461 struct btrfs_extent_item *item;
1462 struct btrfs_key key;
1463 u64 bytenr = node->bytenr;
1464 u64 num_bytes = node->num_bytes;
1465 u64 refs;
1466 int ret;
1467
1468 path = btrfs_alloc_path();
1469 if (!path)
1470 return -ENOMEM;
1471
1472 path->reada = READA_FORWARD;
1473 path->leave_spinning = 1;
1474
1475 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1476 parent, root_objectid, owner,
1477 offset, refs_to_add, extent_op);
1478 if ((ret < 0 && ret != -EAGAIN) || !ret)
1479 goto out;
1480
1481
1482
1483
1484
1485
1486 leaf = path->nodes[0];
1487 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1488 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1489 refs = btrfs_extent_refs(leaf, item);
1490 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1491 if (extent_op)
1492 __run_delayed_extent_op(extent_op, leaf, item);
1493
1494 btrfs_mark_buffer_dirty(leaf);
1495 btrfs_release_path(path);
1496
1497 path->reada = READA_FORWARD;
1498 path->leave_spinning = 1;
1499
1500 ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
1501 owner, offset, refs_to_add);
1502 if (ret)
1503 btrfs_abort_transaction(trans, ret);
1504out:
1505 btrfs_free_path(path);
1506 return ret;
1507}
1508
1509static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1510 struct btrfs_delayed_ref_node *node,
1511 struct btrfs_delayed_extent_op *extent_op,
1512 int insert_reserved)
1513{
1514 int ret = 0;
1515 struct btrfs_delayed_data_ref *ref;
1516 struct btrfs_key ins;
1517 u64 parent = 0;
1518 u64 ref_root = 0;
1519 u64 flags = 0;
1520
1521 ins.objectid = node->bytenr;
1522 ins.offset = node->num_bytes;
1523 ins.type = BTRFS_EXTENT_ITEM_KEY;
1524
1525 ref = btrfs_delayed_node_to_data_ref(node);
1526 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1527
1528 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1529 parent = ref->parent;
1530 ref_root = ref->root;
1531
1532 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1533 if (extent_op)
1534 flags |= extent_op->flags_to_set;
1535 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1536 flags, ref->objectid,
1537 ref->offset, &ins,
1538 node->ref_mod);
1539 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1540 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1541 ref->objectid, ref->offset,
1542 node->ref_mod, extent_op);
1543 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1544 ret = __btrfs_free_extent(trans, node, parent,
1545 ref_root, ref->objectid,
1546 ref->offset, node->ref_mod,
1547 extent_op);
1548 } else {
1549 BUG();
1550 }
1551 return ret;
1552}
1553
1554static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1555 struct extent_buffer *leaf,
1556 struct btrfs_extent_item *ei)
1557{
1558 u64 flags = btrfs_extent_flags(leaf, ei);
1559 if (extent_op->update_flags) {
1560 flags |= extent_op->flags_to_set;
1561 btrfs_set_extent_flags(leaf, ei, flags);
1562 }
1563
1564 if (extent_op->update_key) {
1565 struct btrfs_tree_block_info *bi;
1566 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1567 bi = (struct btrfs_tree_block_info *)(ei + 1);
1568 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1569 }
1570}
1571
1572static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1573 struct btrfs_delayed_ref_head *head,
1574 struct btrfs_delayed_extent_op *extent_op)
1575{
1576 struct btrfs_fs_info *fs_info = trans->fs_info;
1577 struct btrfs_key key;
1578 struct btrfs_path *path;
1579 struct btrfs_extent_item *ei;
1580 struct extent_buffer *leaf;
1581 u32 item_size;
1582 int ret;
1583 int err = 0;
1584 int metadata = !extent_op->is_data;
1585
1586 if (trans->aborted)
1587 return 0;
1588
1589 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1590 metadata = 0;
1591
1592 path = btrfs_alloc_path();
1593 if (!path)
1594 return -ENOMEM;
1595
1596 key.objectid = head->bytenr;
1597
1598 if (metadata) {
1599 key.type = BTRFS_METADATA_ITEM_KEY;
1600 key.offset = extent_op->level;
1601 } else {
1602 key.type = BTRFS_EXTENT_ITEM_KEY;
1603 key.offset = head->num_bytes;
1604 }
1605
1606again:
1607 path->reada = READA_FORWARD;
1608 path->leave_spinning = 1;
1609 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
1610 if (ret < 0) {
1611 err = ret;
1612 goto out;
1613 }
1614 if (ret > 0) {
1615 if (metadata) {
1616 if (path->slots[0] > 0) {
1617 path->slots[0]--;
1618 btrfs_item_key_to_cpu(path->nodes[0], &key,
1619 path->slots[0]);
1620 if (key.objectid == head->bytenr &&
1621 key.type == BTRFS_EXTENT_ITEM_KEY &&
1622 key.offset == head->num_bytes)
1623 ret = 0;
1624 }
1625 if (ret > 0) {
1626 btrfs_release_path(path);
1627 metadata = 0;
1628
1629 key.objectid = head->bytenr;
1630 key.offset = head->num_bytes;
1631 key.type = BTRFS_EXTENT_ITEM_KEY;
1632 goto again;
1633 }
1634 } else {
1635 err = -EIO;
1636 goto out;
1637 }
1638 }
1639
1640 leaf = path->nodes[0];
1641 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1642
1643 if (unlikely(item_size < sizeof(*ei))) {
1644 err = -EINVAL;
1645 btrfs_print_v0_err(fs_info);
1646 btrfs_abort_transaction(trans, err);
1647 goto out;
1648 }
1649
1650 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1651 __run_delayed_extent_op(extent_op, leaf, ei);
1652
1653 btrfs_mark_buffer_dirty(leaf);
1654out:
1655 btrfs_free_path(path);
1656 return err;
1657}
1658
1659static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1660 struct btrfs_delayed_ref_node *node,
1661 struct btrfs_delayed_extent_op *extent_op,
1662 int insert_reserved)
1663{
1664 int ret = 0;
1665 struct btrfs_delayed_tree_ref *ref;
1666 u64 parent = 0;
1667 u64 ref_root = 0;
1668
1669 ref = btrfs_delayed_node_to_tree_ref(node);
1670 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1671
1672 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1673 parent = ref->parent;
1674 ref_root = ref->root;
1675
1676 if (node->ref_mod != 1) {
1677 btrfs_err(trans->fs_info,
1678 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1679 node->bytenr, node->ref_mod, node->action, ref_root,
1680 parent);
1681 return -EIO;
1682 }
1683 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1684 BUG_ON(!extent_op || !extent_op->update_flags);
1685 ret = alloc_reserved_tree_block(trans, node, extent_op);
1686 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1687 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1688 ref->level, 0, 1, extent_op);
1689 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1690 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1691 ref->level, 0, 1, extent_op);
1692 } else {
1693 BUG();
1694 }
1695 return ret;
1696}
1697
1698
1699static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1700 struct btrfs_delayed_ref_node *node,
1701 struct btrfs_delayed_extent_op *extent_op,
1702 int insert_reserved)
1703{
1704 int ret = 0;
1705
1706 if (trans->aborted) {
1707 if (insert_reserved)
1708 btrfs_pin_extent(trans->fs_info, node->bytenr,
1709 node->num_bytes, 1);
1710 return 0;
1711 }
1712
1713 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1714 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1715 ret = run_delayed_tree_ref(trans, node, extent_op,
1716 insert_reserved);
1717 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1718 node->type == BTRFS_SHARED_DATA_REF_KEY)
1719 ret = run_delayed_data_ref(trans, node, extent_op,
1720 insert_reserved);
1721 else
1722 BUG();
1723 if (ret && insert_reserved)
1724 btrfs_pin_extent(trans->fs_info, node->bytenr,
1725 node->num_bytes, 1);
1726 return ret;
1727}
1728
1729static inline struct btrfs_delayed_ref_node *
1730select_delayed_ref(struct btrfs_delayed_ref_head *head)
1731{
1732 struct btrfs_delayed_ref_node *ref;
1733
1734 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1735 return NULL;
1736
1737
1738
1739
1740
1741
1742
1743 if (!list_empty(&head->ref_add_list))
1744 return list_first_entry(&head->ref_add_list,
1745 struct btrfs_delayed_ref_node, add_list);
1746
1747 ref = rb_entry(rb_first_cached(&head->ref_tree),
1748 struct btrfs_delayed_ref_node, ref_node);
1749 ASSERT(list_empty(&ref->add_list));
1750 return ref;
1751}
1752
1753static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1754 struct btrfs_delayed_ref_head *head)
1755{
1756 spin_lock(&delayed_refs->lock);
1757 head->processing = 0;
1758 delayed_refs->num_heads_ready++;
1759 spin_unlock(&delayed_refs->lock);
1760 btrfs_delayed_ref_unlock(head);
1761}
1762
1763static struct btrfs_delayed_extent_op *cleanup_extent_op(
1764 struct btrfs_delayed_ref_head *head)
1765{
1766 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1767
1768 if (!extent_op)
1769 return NULL;
1770
1771 if (head->must_insert_reserved) {
1772 head->extent_op = NULL;
1773 btrfs_free_delayed_extent_op(extent_op);
1774 return NULL;
1775 }
1776 return extent_op;
1777}
1778
1779static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1780 struct btrfs_delayed_ref_head *head)
1781{
1782 struct btrfs_delayed_extent_op *extent_op;
1783 int ret;
1784
1785 extent_op = cleanup_extent_op(head);
1786 if (!extent_op)
1787 return 0;
1788 head->extent_op = NULL;
1789 spin_unlock(&head->lock);
1790 ret = run_delayed_extent_op(trans, head, extent_op);
1791 btrfs_free_delayed_extent_op(extent_op);
1792 return ret ? ret : 1;
1793}
1794
1795void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1796 struct btrfs_delayed_ref_root *delayed_refs,
1797 struct btrfs_delayed_ref_head *head)
1798{
1799 int nr_items = 1;
1800
1801 if (head->total_ref_mod < 0) {
1802 struct btrfs_space_info *space_info;
1803 u64 flags;
1804
1805 if (head->is_data)
1806 flags = BTRFS_BLOCK_GROUP_DATA;
1807 else if (head->is_system)
1808 flags = BTRFS_BLOCK_GROUP_SYSTEM;
1809 else
1810 flags = BTRFS_BLOCK_GROUP_METADATA;
1811 space_info = btrfs_find_space_info(fs_info, flags);
1812 ASSERT(space_info);
1813 percpu_counter_add_batch(&space_info->total_bytes_pinned,
1814 -head->num_bytes,
1815 BTRFS_TOTAL_BYTES_PINNED_BATCH);
1816
1817
1818
1819
1820
1821
1822 if (head->is_data) {
1823 spin_lock(&delayed_refs->lock);
1824 delayed_refs->pending_csums -= head->num_bytes;
1825 spin_unlock(&delayed_refs->lock);
1826 nr_items += btrfs_csum_bytes_to_leaves(fs_info,
1827 head->num_bytes);
1828 }
1829 }
1830
1831 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1832}
1833
1834static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1835 struct btrfs_delayed_ref_head *head)
1836{
1837
1838 struct btrfs_fs_info *fs_info = trans->fs_info;
1839 struct btrfs_delayed_ref_root *delayed_refs;
1840 int ret;
1841
1842 delayed_refs = &trans->transaction->delayed_refs;
1843
1844 ret = run_and_cleanup_extent_op(trans, head);
1845 if (ret < 0) {
1846 unselect_delayed_ref_head(delayed_refs, head);
1847 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1848 return ret;
1849 } else if (ret) {
1850 return ret;
1851 }
1852
1853
1854
1855
1856
1857 spin_unlock(&head->lock);
1858 spin_lock(&delayed_refs->lock);
1859 spin_lock(&head->lock);
1860 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1861 spin_unlock(&head->lock);
1862 spin_unlock(&delayed_refs->lock);
1863 return 1;
1864 }
1865 btrfs_delete_ref_head(delayed_refs, head);
1866 spin_unlock(&head->lock);
1867 spin_unlock(&delayed_refs->lock);
1868
1869 if (head->must_insert_reserved) {
1870 btrfs_pin_extent(fs_info, head->bytenr,
1871 head->num_bytes, 1);
1872 if (head->is_data) {
1873 ret = btrfs_del_csums(trans, fs_info->csum_root,
1874 head->bytenr, head->num_bytes);
1875 }
1876 }
1877
1878 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1879
1880 trace_run_delayed_ref_head(fs_info, head, 0);
1881 btrfs_delayed_ref_unlock(head);
1882 btrfs_put_delayed_ref_head(head);
1883 return 0;
1884}
1885
1886static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1887 struct btrfs_trans_handle *trans)
1888{
1889 struct btrfs_delayed_ref_root *delayed_refs =
1890 &trans->transaction->delayed_refs;
1891 struct btrfs_delayed_ref_head *head = NULL;
1892 int ret;
1893
1894 spin_lock(&delayed_refs->lock);
1895 head = btrfs_select_ref_head(delayed_refs);
1896 if (!head) {
1897 spin_unlock(&delayed_refs->lock);
1898 return head;
1899 }
1900
1901
1902
1903
1904
1905 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1906 spin_unlock(&delayed_refs->lock);
1907
1908
1909
1910
1911
1912
1913 if (ret == -EAGAIN)
1914 head = ERR_PTR(-EAGAIN);
1915
1916 return head;
1917}
1918
1919static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1920 struct btrfs_delayed_ref_head *locked_ref,
1921 unsigned long *run_refs)
1922{
1923 struct btrfs_fs_info *fs_info = trans->fs_info;
1924 struct btrfs_delayed_ref_root *delayed_refs;
1925 struct btrfs_delayed_extent_op *extent_op;
1926 struct btrfs_delayed_ref_node *ref;
1927 int must_insert_reserved = 0;
1928 int ret;
1929
1930 delayed_refs = &trans->transaction->delayed_refs;
1931
1932 lockdep_assert_held(&locked_ref->mutex);
1933 lockdep_assert_held(&locked_ref->lock);
1934
1935 while ((ref = select_delayed_ref(locked_ref))) {
1936 if (ref->seq &&
1937 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1938 spin_unlock(&locked_ref->lock);
1939 unselect_delayed_ref_head(delayed_refs, locked_ref);
1940 return -EAGAIN;
1941 }
1942
1943 (*run_refs)++;
1944 ref->in_tree = 0;
1945 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1946 RB_CLEAR_NODE(&ref->ref_node);
1947 if (!list_empty(&ref->add_list))
1948 list_del(&ref->add_list);
1949
1950
1951
1952
1953 switch (ref->action) {
1954 case BTRFS_ADD_DELAYED_REF:
1955 case BTRFS_ADD_DELAYED_EXTENT:
1956 locked_ref->ref_mod -= ref->ref_mod;
1957 break;
1958 case BTRFS_DROP_DELAYED_REF:
1959 locked_ref->ref_mod += ref->ref_mod;
1960 break;
1961 default:
1962 WARN_ON(1);
1963 }
1964 atomic_dec(&delayed_refs->num_entries);
1965
1966
1967
1968
1969
1970 must_insert_reserved = locked_ref->must_insert_reserved;
1971 locked_ref->must_insert_reserved = 0;
1972
1973 extent_op = locked_ref->extent_op;
1974 locked_ref->extent_op = NULL;
1975 spin_unlock(&locked_ref->lock);
1976
1977 ret = run_one_delayed_ref(trans, ref, extent_op,
1978 must_insert_reserved);
1979
1980 btrfs_free_delayed_extent_op(extent_op);
1981 if (ret) {
1982 unselect_delayed_ref_head(delayed_refs, locked_ref);
1983 btrfs_put_delayed_ref(ref);
1984 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1985 ret);
1986 return ret;
1987 }
1988
1989 btrfs_put_delayed_ref(ref);
1990 cond_resched();
1991
1992 spin_lock(&locked_ref->lock);
1993 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1994 }
1995
1996 return 0;
1997}
1998
1999
2000
2001
2002
2003static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2004 unsigned long nr)
2005{
2006 struct btrfs_fs_info *fs_info = trans->fs_info;
2007 struct btrfs_delayed_ref_root *delayed_refs;
2008 struct btrfs_delayed_ref_head *locked_ref = NULL;
2009 ktime_t start = ktime_get();
2010 int ret;
2011 unsigned long count = 0;
2012 unsigned long actual_count = 0;
2013
2014 delayed_refs = &trans->transaction->delayed_refs;
2015 do {
2016 if (!locked_ref) {
2017 locked_ref = btrfs_obtain_ref_head(trans);
2018 if (IS_ERR_OR_NULL(locked_ref)) {
2019 if (PTR_ERR(locked_ref) == -EAGAIN) {
2020 continue;
2021 } else {
2022 break;
2023 }
2024 }
2025 count++;
2026 }
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039 spin_lock(&locked_ref->lock);
2040 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2041
2042 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2043 &actual_count);
2044 if (ret < 0 && ret != -EAGAIN) {
2045
2046
2047
2048
2049 return ret;
2050 } else if (!ret) {
2051
2052
2053
2054
2055 ret = cleanup_ref_head(trans, locked_ref);
2056 if (ret > 0 ) {
2057
2058 ret = 0;
2059 continue;
2060 } else if (ret) {
2061 return ret;
2062 }
2063 }
2064
2065
2066
2067
2068
2069
2070 locked_ref = NULL;
2071 cond_resched();
2072 } while ((nr != -1 && count < nr) || locked_ref);
2073
2074
2075
2076
2077
2078
2079 if (actual_count > 0) {
2080 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2081 u64 avg;
2082
2083
2084
2085
2086
2087 spin_lock(&delayed_refs->lock);
2088 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2089 fs_info->avg_delayed_ref_runtime = avg >> 2;
2090 spin_unlock(&delayed_refs->lock);
2091 }
2092 return 0;
2093}
2094
2095#ifdef SCRAMBLE_DELAYED_REFS
2096
2097
2098
2099
2100
2101static u64 find_middle(struct rb_root *root)
2102{
2103 struct rb_node *n = root->rb_node;
2104 struct btrfs_delayed_ref_node *entry;
2105 int alt = 1;
2106 u64 middle;
2107 u64 first = 0, last = 0;
2108
2109 n = rb_first(root);
2110 if (n) {
2111 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2112 first = entry->bytenr;
2113 }
2114 n = rb_last(root);
2115 if (n) {
2116 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2117 last = entry->bytenr;
2118 }
2119 n = root->rb_node;
2120
2121 while (n) {
2122 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2123 WARN_ON(!entry->in_tree);
2124
2125 middle = entry->bytenr;
2126
2127 if (alt)
2128 n = n->rb_left;
2129 else
2130 n = n->rb_right;
2131
2132 alt = 1 - alt;
2133 }
2134 return middle;
2135}
2136#endif
2137
2138static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2139{
2140 u64 num_bytes;
2141
2142 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2143 sizeof(struct btrfs_extent_inline_ref));
2144 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2145 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2146
2147
2148
2149
2150
2151 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2152}
2153
2154
2155
2156
2157
2158u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2159{
2160 u64 csum_size;
2161 u64 num_csums_per_leaf;
2162 u64 num_csums;
2163
2164 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2165 num_csums_per_leaf = div64_u64(csum_size,
2166 (u64)btrfs_super_csum_size(fs_info->super_copy));
2167 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2168 num_csums += num_csums_per_leaf - 1;
2169 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2170 return num_csums;
2171}
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2184 unsigned long count)
2185{
2186 struct btrfs_fs_info *fs_info = trans->fs_info;
2187 struct rb_node *node;
2188 struct btrfs_delayed_ref_root *delayed_refs;
2189 struct btrfs_delayed_ref_head *head;
2190 int ret;
2191 int run_all = count == (unsigned long)-1;
2192
2193
2194 if (trans->aborted)
2195 return 0;
2196
2197 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2198 return 0;
2199
2200 delayed_refs = &trans->transaction->delayed_refs;
2201 if (count == 0)
2202 count = atomic_read(&delayed_refs->num_entries) * 2;
2203
2204again:
2205#ifdef SCRAMBLE_DELAYED_REFS
2206 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2207#endif
2208 ret = __btrfs_run_delayed_refs(trans, count);
2209 if (ret < 0) {
2210 btrfs_abort_transaction(trans, ret);
2211 return ret;
2212 }
2213
2214 if (run_all) {
2215 btrfs_create_pending_block_groups(trans);
2216
2217 spin_lock(&delayed_refs->lock);
2218 node = rb_first_cached(&delayed_refs->href_root);
2219 if (!node) {
2220 spin_unlock(&delayed_refs->lock);
2221 goto out;
2222 }
2223 head = rb_entry(node, struct btrfs_delayed_ref_head,
2224 href_node);
2225 refcount_inc(&head->refs);
2226 spin_unlock(&delayed_refs->lock);
2227
2228
2229 mutex_lock(&head->mutex);
2230 mutex_unlock(&head->mutex);
2231
2232 btrfs_put_delayed_ref_head(head);
2233 cond_resched();
2234 goto again;
2235 }
2236out:
2237 return 0;
2238}
2239
2240int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2241 u64 bytenr, u64 num_bytes, u64 flags,
2242 int level, int is_data)
2243{
2244 struct btrfs_delayed_extent_op *extent_op;
2245 int ret;
2246
2247 extent_op = btrfs_alloc_delayed_extent_op();
2248 if (!extent_op)
2249 return -ENOMEM;
2250
2251 extent_op->flags_to_set = flags;
2252 extent_op->update_flags = true;
2253 extent_op->update_key = false;
2254 extent_op->is_data = is_data ? true : false;
2255 extent_op->level = level;
2256
2257 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
2258 if (ret)
2259 btrfs_free_delayed_extent_op(extent_op);
2260 return ret;
2261}
2262
2263static noinline int check_delayed_ref(struct btrfs_root *root,
2264 struct btrfs_path *path,
2265 u64 objectid, u64 offset, u64 bytenr)
2266{
2267 struct btrfs_delayed_ref_head *head;
2268 struct btrfs_delayed_ref_node *ref;
2269 struct btrfs_delayed_data_ref *data_ref;
2270 struct btrfs_delayed_ref_root *delayed_refs;
2271 struct btrfs_transaction *cur_trans;
2272 struct rb_node *node;
2273 int ret = 0;
2274
2275 spin_lock(&root->fs_info->trans_lock);
2276 cur_trans = root->fs_info->running_transaction;
2277 if (cur_trans)
2278 refcount_inc(&cur_trans->use_count);
2279 spin_unlock(&root->fs_info->trans_lock);
2280 if (!cur_trans)
2281 return 0;
2282
2283 delayed_refs = &cur_trans->delayed_refs;
2284 spin_lock(&delayed_refs->lock);
2285 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2286 if (!head) {
2287 spin_unlock(&delayed_refs->lock);
2288 btrfs_put_transaction(cur_trans);
2289 return 0;
2290 }
2291
2292 if (!mutex_trylock(&head->mutex)) {
2293 refcount_inc(&head->refs);
2294 spin_unlock(&delayed_refs->lock);
2295
2296 btrfs_release_path(path);
2297
2298
2299
2300
2301
2302 mutex_lock(&head->mutex);
2303 mutex_unlock(&head->mutex);
2304 btrfs_put_delayed_ref_head(head);
2305 btrfs_put_transaction(cur_trans);
2306 return -EAGAIN;
2307 }
2308 spin_unlock(&delayed_refs->lock);
2309
2310 spin_lock(&head->lock);
2311
2312
2313
2314
2315 for (node = rb_first_cached(&head->ref_tree); node;
2316 node = rb_next(node)) {
2317 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2318
2319 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2320 ret = 1;
2321 break;
2322 }
2323
2324 data_ref = btrfs_delayed_node_to_data_ref(ref);
2325
2326
2327
2328
2329
2330 if (data_ref->root != root->root_key.objectid ||
2331 data_ref->objectid != objectid ||
2332 data_ref->offset != offset) {
2333 ret = 1;
2334 break;
2335 }
2336 }
2337 spin_unlock(&head->lock);
2338 mutex_unlock(&head->mutex);
2339 btrfs_put_transaction(cur_trans);
2340 return ret;
2341}
2342
2343static noinline int check_committed_ref(struct btrfs_root *root,
2344 struct btrfs_path *path,
2345 u64 objectid, u64 offset, u64 bytenr)
2346{
2347 struct btrfs_fs_info *fs_info = root->fs_info;
2348 struct btrfs_root *extent_root = fs_info->extent_root;
2349 struct extent_buffer *leaf;
2350 struct btrfs_extent_data_ref *ref;
2351 struct btrfs_extent_inline_ref *iref;
2352 struct btrfs_extent_item *ei;
2353 struct btrfs_key key;
2354 u32 item_size;
2355 int type;
2356 int ret;
2357
2358 key.objectid = bytenr;
2359 key.offset = (u64)-1;
2360 key.type = BTRFS_EXTENT_ITEM_KEY;
2361
2362 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2363 if (ret < 0)
2364 goto out;
2365 BUG_ON(ret == 0);
2366
2367 ret = -ENOENT;
2368 if (path->slots[0] == 0)
2369 goto out;
2370
2371 path->slots[0]--;
2372 leaf = path->nodes[0];
2373 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2374
2375 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2376 goto out;
2377
2378 ret = 1;
2379 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2380 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2381
2382
2383 if (item_size != sizeof(*ei) +
2384 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2385 goto out;
2386
2387
2388 if (btrfs_extent_generation(leaf, ei) <=
2389 btrfs_root_last_snapshot(&root->root_item))
2390 goto out;
2391
2392 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2393
2394
2395 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2396 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2397 goto out;
2398
2399 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2400 if (btrfs_extent_refs(leaf, ei) !=
2401 btrfs_extent_data_ref_count(leaf, ref) ||
2402 btrfs_extent_data_ref_root(leaf, ref) !=
2403 root->root_key.objectid ||
2404 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2405 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2406 goto out;
2407
2408 ret = 0;
2409out:
2410 return ret;
2411}
2412
2413int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2414 u64 bytenr)
2415{
2416 struct btrfs_path *path;
2417 int ret;
2418
2419 path = btrfs_alloc_path();
2420 if (!path)
2421 return -ENOMEM;
2422
2423 do {
2424 ret = check_committed_ref(root, path, objectid,
2425 offset, bytenr);
2426 if (ret && ret != -ENOENT)
2427 goto out;
2428
2429 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2430 } while (ret == -EAGAIN);
2431
2432out:
2433 btrfs_free_path(path);
2434 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2435 WARN_ON(ret > 0);
2436 return ret;
2437}
2438
2439static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2440 struct btrfs_root *root,
2441 struct extent_buffer *buf,
2442 int full_backref, int inc)
2443{
2444 struct btrfs_fs_info *fs_info = root->fs_info;
2445 u64 bytenr;
2446 u64 num_bytes;
2447 u64 parent;
2448 u64 ref_root;
2449 u32 nritems;
2450 struct btrfs_key key;
2451 struct btrfs_file_extent_item *fi;
2452 struct btrfs_ref generic_ref = { 0 };
2453 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2454 int i;
2455 int action;
2456 int level;
2457 int ret = 0;
2458
2459 if (btrfs_is_testing(fs_info))
2460 return 0;
2461
2462 ref_root = btrfs_header_owner(buf);
2463 nritems = btrfs_header_nritems(buf);
2464 level = btrfs_header_level(buf);
2465
2466 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
2467 return 0;
2468
2469 if (full_backref)
2470 parent = buf->start;
2471 else
2472 parent = 0;
2473 if (inc)
2474 action = BTRFS_ADD_DELAYED_REF;
2475 else
2476 action = BTRFS_DROP_DELAYED_REF;
2477
2478 for (i = 0; i < nritems; i++) {
2479 if (level == 0) {
2480 btrfs_item_key_to_cpu(buf, &key, i);
2481 if (key.type != BTRFS_EXTENT_DATA_KEY)
2482 continue;
2483 fi = btrfs_item_ptr(buf, i,
2484 struct btrfs_file_extent_item);
2485 if (btrfs_file_extent_type(buf, fi) ==
2486 BTRFS_FILE_EXTENT_INLINE)
2487 continue;
2488 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2489 if (bytenr == 0)
2490 continue;
2491
2492 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2493 key.offset -= btrfs_file_extent_offset(buf, fi);
2494 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2495 num_bytes, parent);
2496 generic_ref.real_root = root->root_key.objectid;
2497 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2498 key.offset);
2499 generic_ref.skip_qgroup = for_reloc;
2500 if (inc)
2501 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2502 else
2503 ret = btrfs_free_extent(trans, &generic_ref);
2504 if (ret)
2505 goto fail;
2506 } else {
2507 bytenr = btrfs_node_blockptr(buf, i);
2508 num_bytes = fs_info->nodesize;
2509 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2510 num_bytes, parent);
2511 generic_ref.real_root = root->root_key.objectid;
2512 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
2513 generic_ref.skip_qgroup = for_reloc;
2514 if (inc)
2515 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2516 else
2517 ret = btrfs_free_extent(trans, &generic_ref);
2518 if (ret)
2519 goto fail;
2520 }
2521 }
2522 return 0;
2523fail:
2524 return ret;
2525}
2526
2527int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2528 struct extent_buffer *buf, int full_backref)
2529{
2530 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2531}
2532
2533int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2534 struct extent_buffer *buf, int full_backref)
2535{
2536 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2537}
2538
2539int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
2540{
2541 struct btrfs_block_group *block_group;
2542 int readonly = 0;
2543
2544 block_group = btrfs_lookup_block_group(fs_info, bytenr);
2545 if (!block_group || block_group->ro)
2546 readonly = 1;
2547 if (block_group)
2548 btrfs_put_block_group(block_group);
2549 return readonly;
2550}
2551
2552static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2553{
2554 struct btrfs_fs_info *fs_info = root->fs_info;
2555 u64 flags;
2556 u64 ret;
2557
2558 if (data)
2559 flags = BTRFS_BLOCK_GROUP_DATA;
2560 else if (root == fs_info->chunk_root)
2561 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2562 else
2563 flags = BTRFS_BLOCK_GROUP_METADATA;
2564
2565 ret = btrfs_get_alloc_profile(fs_info, flags);
2566 return ret;
2567}
2568
2569static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
2570{
2571 struct btrfs_block_group *cache;
2572 u64 bytenr;
2573
2574 spin_lock(&fs_info->block_group_cache_lock);
2575 bytenr = fs_info->first_logical_byte;
2576 spin_unlock(&fs_info->block_group_cache_lock);
2577
2578 if (bytenr < (u64)-1)
2579 return bytenr;
2580
2581 cache = btrfs_lookup_first_block_group(fs_info, search_start);
2582 if (!cache)
2583 return 0;
2584
2585 bytenr = cache->start;
2586 btrfs_put_block_group(cache);
2587
2588 return bytenr;
2589}
2590
2591static int pin_down_extent(struct btrfs_block_group *cache,
2592 u64 bytenr, u64 num_bytes, int reserved)
2593{
2594 struct btrfs_fs_info *fs_info = cache->fs_info;
2595
2596 spin_lock(&cache->space_info->lock);
2597 spin_lock(&cache->lock);
2598 cache->pinned += num_bytes;
2599 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2600 num_bytes);
2601 if (reserved) {
2602 cache->reserved -= num_bytes;
2603 cache->space_info->bytes_reserved -= num_bytes;
2604 }
2605 spin_unlock(&cache->lock);
2606 spin_unlock(&cache->space_info->lock);
2607
2608 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
2609 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2610 set_extent_dirty(fs_info->pinned_extents, bytenr,
2611 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2612 return 0;
2613}
2614
2615int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2616 u64 bytenr, u64 num_bytes, int reserved)
2617{
2618 struct btrfs_block_group *cache;
2619
2620 ASSERT(fs_info->running_transaction);
2621
2622 cache = btrfs_lookup_block_group(fs_info, bytenr);
2623 BUG_ON(!cache);
2624
2625 pin_down_extent(cache, bytenr, num_bytes, reserved);
2626
2627 btrfs_put_block_group(cache);
2628 return 0;
2629}
2630
2631
2632
2633
2634int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
2635 u64 bytenr, u64 num_bytes)
2636{
2637 struct btrfs_block_group *cache;
2638 int ret;
2639
2640 cache = btrfs_lookup_block_group(fs_info, bytenr);
2641 if (!cache)
2642 return -EINVAL;
2643
2644
2645
2646
2647
2648
2649
2650 btrfs_cache_block_group(cache, 1);
2651
2652 pin_down_extent(cache, bytenr, num_bytes, 0);
2653
2654
2655 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2656 btrfs_put_block_group(cache);
2657 return ret;
2658}
2659
2660static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2661 u64 start, u64 num_bytes)
2662{
2663 int ret;
2664 struct btrfs_block_group *block_group;
2665 struct btrfs_caching_control *caching_ctl;
2666
2667 block_group = btrfs_lookup_block_group(fs_info, start);
2668 if (!block_group)
2669 return -EINVAL;
2670
2671 btrfs_cache_block_group(block_group, 0);
2672 caching_ctl = btrfs_get_caching_control(block_group);
2673
2674 if (!caching_ctl) {
2675
2676 BUG_ON(!btrfs_block_group_done(block_group));
2677 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2678 } else {
2679 mutex_lock(&caching_ctl->mutex);
2680
2681 if (start >= caching_ctl->progress) {
2682 ret = btrfs_add_excluded_extent(fs_info, start,
2683 num_bytes);
2684 } else if (start + num_bytes <= caching_ctl->progress) {
2685 ret = btrfs_remove_free_space(block_group,
2686 start, num_bytes);
2687 } else {
2688 num_bytes = caching_ctl->progress - start;
2689 ret = btrfs_remove_free_space(block_group,
2690 start, num_bytes);
2691 if (ret)
2692 goto out_lock;
2693
2694 num_bytes = (start + num_bytes) -
2695 caching_ctl->progress;
2696 start = caching_ctl->progress;
2697 ret = btrfs_add_excluded_extent(fs_info, start,
2698 num_bytes);
2699 }
2700out_lock:
2701 mutex_unlock(&caching_ctl->mutex);
2702 btrfs_put_caching_control(caching_ctl);
2703 }
2704 btrfs_put_block_group(block_group);
2705 return ret;
2706}
2707
2708int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2709{
2710 struct btrfs_fs_info *fs_info = eb->fs_info;
2711 struct btrfs_file_extent_item *item;
2712 struct btrfs_key key;
2713 int found_type;
2714 int i;
2715 int ret = 0;
2716
2717 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2718 return 0;
2719
2720 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2721 btrfs_item_key_to_cpu(eb, &key, i);
2722 if (key.type != BTRFS_EXTENT_DATA_KEY)
2723 continue;
2724 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2725 found_type = btrfs_file_extent_type(eb, item);
2726 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2727 continue;
2728 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2729 continue;
2730 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2731 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2732 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2733 if (ret)
2734 break;
2735 }
2736
2737 return ret;
2738}
2739
2740static void
2741btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
2742{
2743 atomic_inc(&bg->reservations);
2744}
2745
2746void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
2747{
2748 struct btrfs_caching_control *next;
2749 struct btrfs_caching_control *caching_ctl;
2750 struct btrfs_block_group *cache;
2751
2752 down_write(&fs_info->commit_root_sem);
2753
2754 list_for_each_entry_safe(caching_ctl, next,
2755 &fs_info->caching_block_groups, list) {
2756 cache = caching_ctl->block_group;
2757 if (btrfs_block_group_done(cache)) {
2758 cache->last_byte_to_unpin = (u64)-1;
2759 list_del_init(&caching_ctl->list);
2760 btrfs_put_caching_control(caching_ctl);
2761 } else {
2762 cache->last_byte_to_unpin = caching_ctl->progress;
2763 }
2764 }
2765
2766 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
2767 fs_info->pinned_extents = &fs_info->freed_extents[1];
2768 else
2769 fs_info->pinned_extents = &fs_info->freed_extents[0];
2770
2771 up_write(&fs_info->commit_root_sem);
2772
2773 btrfs_update_global_block_rsv(fs_info);
2774}
2775
2776
2777
2778
2779
2780static struct btrfs_free_cluster *
2781fetch_cluster_info(struct btrfs_fs_info *fs_info,
2782 struct btrfs_space_info *space_info, u64 *empty_cluster)
2783{
2784 struct btrfs_free_cluster *ret = NULL;
2785
2786 *empty_cluster = 0;
2787 if (btrfs_mixed_space_info(space_info))
2788 return ret;
2789
2790 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2791 ret = &fs_info->meta_alloc_cluster;
2792 if (btrfs_test_opt(fs_info, SSD))
2793 *empty_cluster = SZ_2M;
2794 else
2795 *empty_cluster = SZ_64K;
2796 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2797 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2798 *empty_cluster = SZ_2M;
2799 ret = &fs_info->data_alloc_cluster;
2800 }
2801
2802 return ret;
2803}
2804
2805static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2806 u64 start, u64 end,
2807 const bool return_free_space)
2808{
2809 struct btrfs_block_group *cache = NULL;
2810 struct btrfs_space_info *space_info;
2811 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2812 struct btrfs_free_cluster *cluster = NULL;
2813 u64 len;
2814 u64 total_unpinned = 0;
2815 u64 empty_cluster = 0;
2816 bool readonly;
2817
2818 while (start <= end) {
2819 readonly = false;
2820 if (!cache ||
2821 start >= cache->start + cache->length) {
2822 if (cache)
2823 btrfs_put_block_group(cache);
2824 total_unpinned = 0;
2825 cache = btrfs_lookup_block_group(fs_info, start);
2826 BUG_ON(!cache);
2827
2828 cluster = fetch_cluster_info(fs_info,
2829 cache->space_info,
2830 &empty_cluster);
2831 empty_cluster <<= 1;
2832 }
2833
2834 len = cache->start + cache->length - start;
2835 len = min(len, end + 1 - start);
2836
2837 if (start < cache->last_byte_to_unpin) {
2838 len = min(len, cache->last_byte_to_unpin - start);
2839 if (return_free_space)
2840 btrfs_add_free_space(cache, start, len);
2841 }
2842
2843 start += len;
2844 total_unpinned += len;
2845 space_info = cache->space_info;
2846
2847
2848
2849
2850
2851
2852
2853 if (cluster && cluster->fragmented &&
2854 total_unpinned > empty_cluster) {
2855 spin_lock(&cluster->lock);
2856 cluster->fragmented = 0;
2857 spin_unlock(&cluster->lock);
2858 }
2859
2860 spin_lock(&space_info->lock);
2861 spin_lock(&cache->lock);
2862 cache->pinned -= len;
2863 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2864 space_info->max_extent_size = 0;
2865 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2866 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2867 if (cache->ro) {
2868 space_info->bytes_readonly += len;
2869 readonly = true;
2870 }
2871 spin_unlock(&cache->lock);
2872 if (!readonly && return_free_space &&
2873 global_rsv->space_info == space_info) {
2874 u64 to_add = len;
2875
2876 spin_lock(&global_rsv->lock);
2877 if (!global_rsv->full) {
2878 to_add = min(len, global_rsv->size -
2879 global_rsv->reserved);
2880 global_rsv->reserved += to_add;
2881 btrfs_space_info_update_bytes_may_use(fs_info,
2882 space_info, to_add);
2883 if (global_rsv->reserved >= global_rsv->size)
2884 global_rsv->full = 1;
2885 len -= to_add;
2886 }
2887 spin_unlock(&global_rsv->lock);
2888
2889 if (len)
2890 btrfs_try_granting_tickets(fs_info,
2891 space_info);
2892 }
2893 spin_unlock(&space_info->lock);
2894 }
2895
2896 if (cache)
2897 btrfs_put_block_group(cache);
2898 return 0;
2899}
2900
2901int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2902{
2903 struct btrfs_fs_info *fs_info = trans->fs_info;
2904 struct btrfs_block_group *block_group, *tmp;
2905 struct list_head *deleted_bgs;
2906 struct extent_io_tree *unpin;
2907 u64 start;
2908 u64 end;
2909 int ret;
2910
2911 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
2912 unpin = &fs_info->freed_extents[1];
2913 else
2914 unpin = &fs_info->freed_extents[0];
2915
2916 while (!trans->aborted) {
2917 struct extent_state *cached_state = NULL;
2918
2919 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2920 ret = find_first_extent_bit(unpin, 0, &start, &end,
2921 EXTENT_DIRTY, &cached_state);
2922 if (ret) {
2923 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2924 break;
2925 }
2926
2927 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
2928 ret = btrfs_discard_extent(fs_info, start,
2929 end + 1 - start, NULL);
2930
2931 clear_extent_dirty(unpin, start, end, &cached_state);
2932 unpin_extent_range(fs_info, start, end, true);
2933 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2934 free_extent_state(cached_state);
2935 cond_resched();
2936 }
2937
2938 if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
2939 btrfs_discard_calc_delay(&fs_info->discard_ctl);
2940 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
2941 }
2942
2943
2944
2945
2946
2947
2948 deleted_bgs = &trans->transaction->deleted_bgs;
2949 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2950 u64 trimmed = 0;
2951
2952 ret = -EROFS;
2953 if (!trans->aborted)
2954 ret = btrfs_discard_extent(fs_info,
2955 block_group->start,
2956 block_group->length,
2957 &trimmed);
2958
2959 list_del_init(&block_group->bg_list);
2960 btrfs_put_block_group_trimming(block_group);
2961 btrfs_put_block_group(block_group);
2962
2963 if (ret) {
2964 const char *errstr = btrfs_decode_error(ret);
2965 btrfs_warn(fs_info,
2966 "discard failed while removing blockgroup: errno=%d %s",
2967 ret, errstr);
2968 }
2969 }
2970
2971 return 0;
2972}
2973
2974static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2975 struct btrfs_delayed_ref_node *node, u64 parent,
2976 u64 root_objectid, u64 owner_objectid,
2977 u64 owner_offset, int refs_to_drop,
2978 struct btrfs_delayed_extent_op *extent_op)
2979{
2980 struct btrfs_fs_info *info = trans->fs_info;
2981 struct btrfs_key key;
2982 struct btrfs_path *path;
2983 struct btrfs_root *extent_root = info->extent_root;
2984 struct extent_buffer *leaf;
2985 struct btrfs_extent_item *ei;
2986 struct btrfs_extent_inline_ref *iref;
2987 int ret;
2988 int is_data;
2989 int extent_slot = 0;
2990 int found_extent = 0;
2991 int num_to_del = 1;
2992 u32 item_size;
2993 u64 refs;
2994 u64 bytenr = node->bytenr;
2995 u64 num_bytes = node->num_bytes;
2996 int last_ref = 0;
2997 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2998
2999 path = btrfs_alloc_path();
3000 if (!path)
3001 return -ENOMEM;
3002
3003 path->reada = READA_FORWARD;
3004 path->leave_spinning = 1;
3005
3006 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
3007 BUG_ON(!is_data && refs_to_drop != 1);
3008
3009 if (is_data)
3010 skinny_metadata = false;
3011
3012 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
3013 parent, root_objectid, owner_objectid,
3014 owner_offset);
3015 if (ret == 0) {
3016 extent_slot = path->slots[0];
3017 while (extent_slot >= 0) {
3018 btrfs_item_key_to_cpu(path->nodes[0], &key,
3019 extent_slot);
3020 if (key.objectid != bytenr)
3021 break;
3022 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3023 key.offset == num_bytes) {
3024 found_extent = 1;
3025 break;
3026 }
3027 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3028 key.offset == owner_objectid) {
3029 found_extent = 1;
3030 break;
3031 }
3032 if (path->slots[0] - extent_slot > 5)
3033 break;
3034 extent_slot--;
3035 }
3036
3037 if (!found_extent) {
3038 BUG_ON(iref);
3039 ret = remove_extent_backref(trans, path, NULL,
3040 refs_to_drop,
3041 is_data, &last_ref);
3042 if (ret) {
3043 btrfs_abort_transaction(trans, ret);
3044 goto out;
3045 }
3046 btrfs_release_path(path);
3047 path->leave_spinning = 1;
3048
3049 key.objectid = bytenr;
3050 key.type = BTRFS_EXTENT_ITEM_KEY;
3051 key.offset = num_bytes;
3052
3053 if (!is_data && skinny_metadata) {
3054 key.type = BTRFS_METADATA_ITEM_KEY;
3055 key.offset = owner_objectid;
3056 }
3057
3058 ret = btrfs_search_slot(trans, extent_root,
3059 &key, path, -1, 1);
3060 if (ret > 0 && skinny_metadata && path->slots[0]) {
3061
3062
3063
3064
3065 path->slots[0]--;
3066 btrfs_item_key_to_cpu(path->nodes[0], &key,
3067 path->slots[0]);
3068 if (key.objectid == bytenr &&
3069 key.type == BTRFS_EXTENT_ITEM_KEY &&
3070 key.offset == num_bytes)
3071 ret = 0;
3072 }
3073
3074 if (ret > 0 && skinny_metadata) {
3075 skinny_metadata = false;
3076 key.objectid = bytenr;
3077 key.type = BTRFS_EXTENT_ITEM_KEY;
3078 key.offset = num_bytes;
3079 btrfs_release_path(path);
3080 ret = btrfs_search_slot(trans, extent_root,
3081 &key, path, -1, 1);
3082 }
3083
3084 if (ret) {
3085 btrfs_err(info,
3086 "umm, got %d back from search, was looking for %llu",
3087 ret, bytenr);
3088 if (ret > 0)
3089 btrfs_print_leaf(path->nodes[0]);
3090 }
3091 if (ret < 0) {
3092 btrfs_abort_transaction(trans, ret);
3093 goto out;
3094 }
3095 extent_slot = path->slots[0];
3096 }
3097 } else if (WARN_ON(ret == -ENOENT)) {
3098 btrfs_print_leaf(path->nodes[0]);
3099 btrfs_err(info,
3100 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3101 bytenr, parent, root_objectid, owner_objectid,
3102 owner_offset);
3103 btrfs_abort_transaction(trans, ret);
3104 goto out;
3105 } else {
3106 btrfs_abort_transaction(trans, ret);
3107 goto out;
3108 }
3109
3110 leaf = path->nodes[0];
3111 item_size = btrfs_item_size_nr(leaf, extent_slot);
3112 if (unlikely(item_size < sizeof(*ei))) {
3113 ret = -EINVAL;
3114 btrfs_print_v0_err(info);
3115 btrfs_abort_transaction(trans, ret);
3116 goto out;
3117 }
3118 ei = btrfs_item_ptr(leaf, extent_slot,
3119 struct btrfs_extent_item);
3120 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3121 key.type == BTRFS_EXTENT_ITEM_KEY) {
3122 struct btrfs_tree_block_info *bi;
3123 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
3124 bi = (struct btrfs_tree_block_info *)(ei + 1);
3125 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3126 }
3127
3128 refs = btrfs_extent_refs(leaf, ei);
3129 if (refs < refs_to_drop) {
3130 btrfs_err(info,
3131 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
3132 refs_to_drop, refs, bytenr);
3133 ret = -EINVAL;
3134 btrfs_abort_transaction(trans, ret);
3135 goto out;
3136 }
3137 refs -= refs_to_drop;
3138
3139 if (refs > 0) {
3140 if (extent_op)
3141 __run_delayed_extent_op(extent_op, leaf, ei);
3142
3143
3144
3145
3146 if (iref) {
3147 BUG_ON(!found_extent);
3148 } else {
3149 btrfs_set_extent_refs(leaf, ei, refs);
3150 btrfs_mark_buffer_dirty(leaf);
3151 }
3152 if (found_extent) {
3153 ret = remove_extent_backref(trans, path, iref,
3154 refs_to_drop, is_data,
3155 &last_ref);
3156 if (ret) {
3157 btrfs_abort_transaction(trans, ret);
3158 goto out;
3159 }
3160 }
3161 } else {
3162 if (found_extent) {
3163 BUG_ON(is_data && refs_to_drop !=
3164 extent_data_ref_count(path, iref));
3165 if (iref) {
3166 BUG_ON(path->slots[0] != extent_slot);
3167 } else {
3168 BUG_ON(path->slots[0] != extent_slot + 1);
3169 path->slots[0] = extent_slot;
3170 num_to_del = 2;
3171 }
3172 }
3173
3174 last_ref = 1;
3175 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3176 num_to_del);
3177 if (ret) {
3178 btrfs_abort_transaction(trans, ret);
3179 goto out;
3180 }
3181 btrfs_release_path(path);
3182
3183 if (is_data) {
3184 ret = btrfs_del_csums(trans, info->csum_root, bytenr,
3185 num_bytes);
3186 if (ret) {
3187 btrfs_abort_transaction(trans, ret);
3188 goto out;
3189 }
3190 }
3191
3192 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
3193 if (ret) {
3194 btrfs_abort_transaction(trans, ret);
3195 goto out;
3196 }
3197
3198 ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
3199 if (ret) {
3200 btrfs_abort_transaction(trans, ret);
3201 goto out;
3202 }
3203 }
3204 btrfs_release_path(path);
3205
3206out:
3207 btrfs_free_path(path);
3208 return ret;
3209}
3210
3211
3212
3213
3214
3215
3216
3217static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3218 u64 bytenr)
3219{
3220 struct btrfs_delayed_ref_head *head;
3221 struct btrfs_delayed_ref_root *delayed_refs;
3222 int ret = 0;
3223
3224 delayed_refs = &trans->transaction->delayed_refs;
3225 spin_lock(&delayed_refs->lock);
3226 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3227 if (!head)
3228 goto out_delayed_unlock;
3229
3230 spin_lock(&head->lock);
3231 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3232 goto out;
3233
3234 if (cleanup_extent_op(head) != NULL)
3235 goto out;
3236
3237
3238
3239
3240
3241 if (!mutex_trylock(&head->mutex))
3242 goto out;
3243
3244 btrfs_delete_ref_head(delayed_refs, head);
3245 head->processing = 0;
3246
3247 spin_unlock(&head->lock);
3248 spin_unlock(&delayed_refs->lock);
3249
3250 BUG_ON(head->extent_op);
3251 if (head->must_insert_reserved)
3252 ret = 1;
3253
3254 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3255 mutex_unlock(&head->mutex);
3256 btrfs_put_delayed_ref_head(head);
3257 return ret;
3258out:
3259 spin_unlock(&head->lock);
3260
3261out_delayed_unlock:
3262 spin_unlock(&delayed_refs->lock);
3263 return 0;
3264}
3265
3266void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3267 struct btrfs_root *root,
3268 struct extent_buffer *buf,
3269 u64 parent, int last_ref)
3270{
3271 struct btrfs_fs_info *fs_info = root->fs_info;
3272 struct btrfs_ref generic_ref = { 0 };
3273 int pin = 1;
3274 int ret;
3275
3276 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3277 buf->start, buf->len, parent);
3278 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3279 root->root_key.objectid);
3280
3281 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3282 int old_ref_mod, new_ref_mod;
3283
3284 btrfs_ref_tree_mod(fs_info, &generic_ref);
3285 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
3286 &old_ref_mod, &new_ref_mod);
3287 BUG_ON(ret);
3288 pin = old_ref_mod >= 0 && new_ref_mod < 0;
3289 }
3290
3291 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3292 struct btrfs_block_group *cache;
3293
3294 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3295 ret = check_ref_cleanup(trans, buf->start);
3296 if (!ret)
3297 goto out;
3298 }
3299
3300 pin = 0;
3301 cache = btrfs_lookup_block_group(fs_info, buf->start);
3302
3303 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3304 pin_down_extent(cache, buf->start, buf->len, 1);
3305 btrfs_put_block_group(cache);
3306 goto out;
3307 }
3308
3309 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3310
3311 btrfs_add_free_space(cache, buf->start, buf->len);
3312 btrfs_free_reserved_bytes(cache, buf->len, 0);
3313 btrfs_put_block_group(cache);
3314 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3315 }
3316out:
3317 if (pin)
3318 add_pinned_bytes(fs_info, &generic_ref);
3319
3320 if (last_ref) {
3321
3322
3323
3324
3325 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3326 }
3327}
3328
3329
3330int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3331{
3332 struct btrfs_fs_info *fs_info = trans->fs_info;
3333 int old_ref_mod, new_ref_mod;
3334 int ret;
3335
3336 if (btrfs_is_testing(fs_info))
3337 return 0;
3338
3339
3340
3341
3342
3343 if ((ref->type == BTRFS_REF_METADATA &&
3344 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3345 (ref->type == BTRFS_REF_DATA &&
3346 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
3347
3348 btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
3349 old_ref_mod = new_ref_mod = 0;
3350 ret = 0;
3351 } else if (ref->type == BTRFS_REF_METADATA) {
3352 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
3353 &old_ref_mod, &new_ref_mod);
3354 } else {
3355 ret = btrfs_add_delayed_data_ref(trans, ref, 0,
3356 &old_ref_mod, &new_ref_mod);
3357 }
3358
3359 if (!((ref->type == BTRFS_REF_METADATA &&
3360 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3361 (ref->type == BTRFS_REF_DATA &&
3362 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
3363 btrfs_ref_tree_mod(fs_info, ref);
3364
3365 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
3366 add_pinned_bytes(fs_info, ref);
3367
3368 return ret;
3369}
3370
3371enum btrfs_loop_type {
3372 LOOP_CACHING_NOWAIT,
3373 LOOP_CACHING_WAIT,
3374 LOOP_ALLOC_CHUNK,
3375 LOOP_NO_EMPTY_SIZE,
3376};
3377
3378static inline void
3379btrfs_lock_block_group(struct btrfs_block_group *cache,
3380 int delalloc)
3381{
3382 if (delalloc)
3383 down_read(&cache->data_rwsem);
3384}
3385
3386static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3387 int delalloc)
3388{
3389 btrfs_get_block_group(cache);
3390 if (delalloc)
3391 down_read(&cache->data_rwsem);
3392}
3393
3394static struct btrfs_block_group *btrfs_lock_cluster(
3395 struct btrfs_block_group *block_group,
3396 struct btrfs_free_cluster *cluster,
3397 int delalloc)
3398{
3399 struct btrfs_block_group *used_bg = NULL;
3400
3401 spin_lock(&cluster->refill_lock);
3402 while (1) {
3403 used_bg = cluster->block_group;
3404 if (!used_bg)
3405 return NULL;
3406
3407 if (used_bg == block_group)
3408 return used_bg;
3409
3410 btrfs_get_block_group(used_bg);
3411
3412 if (!delalloc)
3413 return used_bg;
3414
3415 if (down_read_trylock(&used_bg->data_rwsem))
3416 return used_bg;
3417
3418 spin_unlock(&cluster->refill_lock);
3419
3420
3421 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3422
3423 spin_lock(&cluster->refill_lock);
3424 if (used_bg == cluster->block_group)
3425 return used_bg;
3426
3427 up_read(&used_bg->data_rwsem);
3428 btrfs_put_block_group(used_bg);
3429 }
3430}
3431
3432static inline void
3433btrfs_release_block_group(struct btrfs_block_group *cache,
3434 int delalloc)
3435{
3436 if (delalloc)
3437 up_read(&cache->data_rwsem);
3438 btrfs_put_block_group(cache);
3439}
3440
3441
3442
3443
3444
3445struct find_free_extent_ctl {
3446
3447 u64 num_bytes;
3448 u64 empty_size;
3449 u64 flags;
3450 int delalloc;
3451
3452
3453 u64 search_start;
3454
3455
3456 u64 empty_cluster;
3457
3458 bool have_caching_bg;
3459 bool orig_have_caching_bg;
3460
3461
3462 int index;
3463
3464
3465
3466
3467 int loop;
3468
3469
3470
3471
3472
3473 bool retry_clustered;
3474
3475
3476
3477
3478
3479 bool retry_unclustered;
3480
3481
3482 int cached;
3483
3484
3485 u64 max_extent_size;
3486
3487
3488 u64 total_free_space;
3489
3490
3491 u64 found_offset;
3492};
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503static int find_free_extent_clustered(struct btrfs_block_group *bg,
3504 struct btrfs_free_cluster *last_ptr,
3505 struct find_free_extent_ctl *ffe_ctl,
3506 struct btrfs_block_group **cluster_bg_ret)
3507{
3508 struct btrfs_block_group *cluster_bg;
3509 u64 aligned_cluster;
3510 u64 offset;
3511 int ret;
3512
3513 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3514 if (!cluster_bg)
3515 goto refill_cluster;
3516 if (cluster_bg != bg && (cluster_bg->ro ||
3517 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3518 goto release_cluster;
3519
3520 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3521 ffe_ctl->num_bytes, cluster_bg->start,
3522 &ffe_ctl->max_extent_size);
3523 if (offset) {
3524
3525 spin_unlock(&last_ptr->refill_lock);
3526 trace_btrfs_reserve_extent_cluster(cluster_bg,
3527 ffe_ctl->search_start, ffe_ctl->num_bytes);
3528 *cluster_bg_ret = cluster_bg;
3529 ffe_ctl->found_offset = offset;
3530 return 0;
3531 }
3532 WARN_ON(last_ptr->block_group != cluster_bg);
3533
3534release_cluster:
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3547 spin_unlock(&last_ptr->refill_lock);
3548 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3549 return -ENOENT;
3550 }
3551
3552
3553 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3554
3555 if (cluster_bg != bg)
3556 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3557
3558refill_cluster:
3559 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3560 spin_unlock(&last_ptr->refill_lock);
3561 return -ENOENT;
3562 }
3563
3564 aligned_cluster = max_t(u64,
3565 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3566 bg->full_stripe_len);
3567 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3568 ffe_ctl->num_bytes, aligned_cluster);
3569 if (ret == 0) {
3570
3571 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3572 ffe_ctl->num_bytes, ffe_ctl->search_start,
3573 &ffe_ctl->max_extent_size);
3574 if (offset) {
3575
3576 spin_unlock(&last_ptr->refill_lock);
3577 trace_btrfs_reserve_extent_cluster(bg,
3578 ffe_ctl->search_start,
3579 ffe_ctl->num_bytes);
3580 ffe_ctl->found_offset = offset;
3581 return 0;
3582 }
3583 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3584 !ffe_ctl->retry_clustered) {
3585 spin_unlock(&last_ptr->refill_lock);
3586
3587 ffe_ctl->retry_clustered = true;
3588 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3589 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3590 return -EAGAIN;
3591 }
3592
3593
3594
3595
3596
3597 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3598 spin_unlock(&last_ptr->refill_lock);
3599 return 1;
3600}
3601
3602
3603
3604
3605
3606
3607static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3608 struct btrfs_free_cluster *last_ptr,
3609 struct find_free_extent_ctl *ffe_ctl)
3610{
3611 u64 offset;
3612
3613
3614
3615
3616
3617
3618 if (unlikely(last_ptr)) {
3619 spin_lock(&last_ptr->lock);
3620 last_ptr->fragmented = 1;
3621 spin_unlock(&last_ptr->lock);
3622 }
3623 if (ffe_ctl->cached) {
3624 struct btrfs_free_space_ctl *free_space_ctl;
3625
3626 free_space_ctl = bg->free_space_ctl;
3627 spin_lock(&free_space_ctl->tree_lock);
3628 if (free_space_ctl->free_space <
3629 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3630 ffe_ctl->empty_size) {
3631 ffe_ctl->total_free_space = max_t(u64,
3632 ffe_ctl->total_free_space,
3633 free_space_ctl->free_space);
3634 spin_unlock(&free_space_ctl->tree_lock);
3635 return 1;
3636 }
3637 spin_unlock(&free_space_ctl->tree_lock);
3638 }
3639
3640 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3641 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3642 &ffe_ctl->max_extent_size);
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3654 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3655 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3656 ffe_ctl->empty_size);
3657 ffe_ctl->retry_unclustered = true;
3658 return -EAGAIN;
3659 } else if (!offset) {
3660 return 1;
3661 }
3662 ffe_ctl->found_offset = offset;
3663 return 0;
3664}
3665
3666
3667
3668
3669
3670
3671static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3672 struct btrfs_free_cluster *last_ptr,
3673 struct btrfs_key *ins,
3674 struct find_free_extent_ctl *ffe_ctl,
3675 int full_search, bool use_cluster)
3676{
3677 struct btrfs_root *root = fs_info->extent_root;
3678 int ret;
3679
3680 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3681 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3682 ffe_ctl->orig_have_caching_bg = true;
3683
3684 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
3685 ffe_ctl->have_caching_bg)
3686 return 1;
3687
3688 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
3689 return 1;
3690
3691 if (ins->objectid) {
3692 if (!use_cluster && last_ptr) {
3693 spin_lock(&last_ptr->lock);
3694 last_ptr->window_start = ins->objectid;
3695 spin_unlock(&last_ptr->lock);
3696 }
3697 return 0;
3698 }
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
3709 ffe_ctl->index = 0;
3710 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
3711
3712
3713
3714
3715
3716 if (ffe_ctl->orig_have_caching_bg || !full_search)
3717 ffe_ctl->loop = LOOP_CACHING_WAIT;
3718 else
3719 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
3720 } else {
3721 ffe_ctl->loop++;
3722 }
3723
3724 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
3725 struct btrfs_trans_handle *trans;
3726 int exist = 0;
3727
3728 trans = current->journal_info;
3729 if (trans)
3730 exist = 1;
3731 else
3732 trans = btrfs_join_transaction(root);
3733
3734 if (IS_ERR(trans)) {
3735 ret = PTR_ERR(trans);
3736 return ret;
3737 }
3738
3739 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
3740 CHUNK_ALLOC_FORCE);
3741
3742
3743
3744
3745
3746
3747 if (ret == -ENOSPC)
3748 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
3749
3750
3751 if (ret < 0 && ret != -ENOSPC)
3752 btrfs_abort_transaction(trans, ret);
3753 else
3754 ret = 0;
3755 if (!exist)
3756 btrfs_end_transaction(trans);
3757 if (ret)
3758 return ret;
3759 }
3760
3761 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
3762
3763
3764
3765
3766 if (ffe_ctl->empty_size == 0 &&
3767 ffe_ctl->empty_cluster == 0)
3768 return -ENOSPC;
3769 ffe_ctl->empty_size = 0;
3770 ffe_ctl->empty_cluster = 0;
3771 }
3772 return 1;
3773 }
3774 return -ENOSPC;
3775}
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
3803 u64 ram_bytes, u64 num_bytes, u64 empty_size,
3804 u64 hint_byte, struct btrfs_key *ins,
3805 u64 flags, int delalloc)
3806{
3807 int ret = 0;
3808 int cache_block_group_error = 0;
3809 struct btrfs_free_cluster *last_ptr = NULL;
3810 struct btrfs_block_group *block_group = NULL;
3811 struct find_free_extent_ctl ffe_ctl = {0};
3812 struct btrfs_space_info *space_info;
3813 bool use_cluster = true;
3814 bool full_search = false;
3815
3816 WARN_ON(num_bytes < fs_info->sectorsize);
3817
3818 ffe_ctl.num_bytes = num_bytes;
3819 ffe_ctl.empty_size = empty_size;
3820 ffe_ctl.flags = flags;
3821 ffe_ctl.search_start = 0;
3822 ffe_ctl.retry_clustered = false;
3823 ffe_ctl.retry_unclustered = false;
3824 ffe_ctl.delalloc = delalloc;
3825 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
3826 ffe_ctl.have_caching_bg = false;
3827 ffe_ctl.orig_have_caching_bg = false;
3828 ffe_ctl.found_offset = 0;
3829
3830 ins->type = BTRFS_EXTENT_ITEM_KEY;
3831 ins->objectid = 0;
3832 ins->offset = 0;
3833
3834 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
3835
3836 space_info = btrfs_find_space_info(fs_info, flags);
3837 if (!space_info) {
3838 btrfs_err(fs_info, "No space info for %llu", flags);
3839 return -ENOSPC;
3840 }
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852 if (unlikely(space_info->max_extent_size)) {
3853 spin_lock(&space_info->lock);
3854 if (space_info->max_extent_size &&
3855 num_bytes > space_info->max_extent_size) {
3856 ins->offset = space_info->max_extent_size;
3857 spin_unlock(&space_info->lock);
3858 return -ENOSPC;
3859 } else if (space_info->max_extent_size) {
3860 use_cluster = false;
3861 }
3862 spin_unlock(&space_info->lock);
3863 }
3864
3865 last_ptr = fetch_cluster_info(fs_info, space_info,
3866 &ffe_ctl.empty_cluster);
3867 if (last_ptr) {
3868 spin_lock(&last_ptr->lock);
3869 if (last_ptr->block_group)
3870 hint_byte = last_ptr->window_start;
3871 if (last_ptr->fragmented) {
3872
3873
3874
3875
3876
3877 hint_byte = last_ptr->window_start;
3878 use_cluster = false;
3879 }
3880 spin_unlock(&last_ptr->lock);
3881 }
3882
3883 ffe_ctl.search_start = max(ffe_ctl.search_start,
3884 first_logical_byte(fs_info, 0));
3885 ffe_ctl.search_start = max(ffe_ctl.search_start, hint_byte);
3886 if (ffe_ctl.search_start == hint_byte) {
3887 block_group = btrfs_lookup_block_group(fs_info,
3888 ffe_ctl.search_start);
3889
3890
3891
3892
3893
3894
3895
3896 if (block_group && block_group_bits(block_group, flags) &&
3897 block_group->cached != BTRFS_CACHE_NO) {
3898 down_read(&space_info->groups_sem);
3899 if (list_empty(&block_group->list) ||
3900 block_group->ro) {
3901
3902
3903
3904
3905
3906
3907 btrfs_put_block_group(block_group);
3908 up_read(&space_info->groups_sem);
3909 } else {
3910 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
3911 block_group->flags);
3912 btrfs_lock_block_group(block_group, delalloc);
3913 goto have_block_group;
3914 }
3915 } else if (block_group) {
3916 btrfs_put_block_group(block_group);
3917 }
3918 }
3919search:
3920 ffe_ctl.have_caching_bg = false;
3921 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
3922 ffe_ctl.index == 0)
3923 full_search = true;
3924 down_read(&space_info->groups_sem);
3925 list_for_each_entry(block_group,
3926 &space_info->block_groups[ffe_ctl.index], list) {
3927
3928 if (unlikely(block_group->ro))
3929 continue;
3930
3931 btrfs_grab_block_group(block_group, delalloc);
3932 ffe_ctl.search_start = block_group->start;
3933
3934
3935
3936
3937
3938
3939 if (!block_group_bits(block_group, flags)) {
3940 u64 extra = BTRFS_BLOCK_GROUP_DUP |
3941 BTRFS_BLOCK_GROUP_RAID1_MASK |
3942 BTRFS_BLOCK_GROUP_RAID56_MASK |
3943 BTRFS_BLOCK_GROUP_RAID10;
3944
3945
3946
3947
3948
3949
3950 if ((flags & extra) && !(block_group->flags & extra))
3951 goto loop;
3952
3953
3954
3955
3956
3957
3958 btrfs_release_block_group(block_group, delalloc);
3959 continue;
3960 }
3961
3962have_block_group:
3963 ffe_ctl.cached = btrfs_block_group_done(block_group);
3964 if (unlikely(!ffe_ctl.cached)) {
3965 ffe_ctl.have_caching_bg = true;
3966 ret = btrfs_cache_block_group(block_group, 0);
3967
3968
3969
3970
3971
3972
3973
3974
3975 if (ret < 0) {
3976 if (!cache_block_group_error)
3977 cache_block_group_error = ret;
3978 ret = 0;
3979 goto loop;
3980 }
3981 ret = 0;
3982 }
3983
3984 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
3985 goto loop;
3986
3987
3988
3989
3990
3991 if (last_ptr && use_cluster) {
3992 struct btrfs_block_group *cluster_bg = NULL;
3993
3994 ret = find_free_extent_clustered(block_group, last_ptr,
3995 &ffe_ctl, &cluster_bg);
3996
3997 if (ret == 0) {
3998 if (cluster_bg && cluster_bg != block_group) {
3999 btrfs_release_block_group(block_group,
4000 delalloc);
4001 block_group = cluster_bg;
4002 }
4003 goto checks;
4004 } else if (ret == -EAGAIN) {
4005 goto have_block_group;
4006 } else if (ret > 0) {
4007 goto loop;
4008 }
4009
4010 }
4011
4012 ret = find_free_extent_unclustered(block_group, last_ptr,
4013 &ffe_ctl);
4014 if (ret == -EAGAIN)
4015 goto have_block_group;
4016 else if (ret > 0)
4017 goto loop;
4018
4019checks:
4020 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
4021 fs_info->stripesize);
4022
4023
4024 if (ffe_ctl.search_start + num_bytes >
4025 block_group->start + block_group->length) {
4026 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4027 num_bytes);
4028 goto loop;
4029 }
4030
4031 if (ffe_ctl.found_offset < ffe_ctl.search_start)
4032 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4033 ffe_ctl.search_start - ffe_ctl.found_offset);
4034
4035 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
4036 num_bytes, delalloc);
4037 if (ret == -EAGAIN) {
4038 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4039 num_bytes);
4040 goto loop;
4041 }
4042 btrfs_inc_block_group_reservations(block_group);
4043
4044
4045 ins->objectid = ffe_ctl.search_start;
4046 ins->offset = num_bytes;
4047
4048 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
4049 num_bytes);
4050 btrfs_release_block_group(block_group, delalloc);
4051 break;
4052loop:
4053 ffe_ctl.retry_clustered = false;
4054 ffe_ctl.retry_unclustered = false;
4055 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
4056 ffe_ctl.index);
4057 btrfs_release_block_group(block_group, delalloc);
4058 cond_resched();
4059 }
4060 up_read(&space_info->groups_sem);
4061
4062 ret = find_free_extent_update_loop(fs_info, last_ptr, ins, &ffe_ctl,
4063 full_search, use_cluster);
4064 if (ret > 0)
4065 goto search;
4066
4067 if (ret == -ENOSPC && !cache_block_group_error) {
4068
4069
4070
4071
4072 if (!ffe_ctl.max_extent_size)
4073 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
4074 spin_lock(&space_info->lock);
4075 space_info->max_extent_size = ffe_ctl.max_extent_size;
4076 spin_unlock(&space_info->lock);
4077 ins->offset = ffe_ctl.max_extent_size;
4078 } else if (ret == -ENOSPC) {
4079 ret = cache_block_group_error;
4080 }
4081 return ret;
4082}
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4130 u64 num_bytes, u64 min_alloc_size,
4131 u64 empty_size, u64 hint_byte,
4132 struct btrfs_key *ins, int is_data, int delalloc)
4133{
4134 struct btrfs_fs_info *fs_info = root->fs_info;
4135 bool final_tried = num_bytes == min_alloc_size;
4136 u64 flags;
4137 int ret;
4138
4139 flags = get_alloc_profile_by_root(root, is_data);
4140again:
4141 WARN_ON(num_bytes < fs_info->sectorsize);
4142 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
4143 hint_byte, ins, flags, delalloc);
4144 if (!ret && !is_data) {
4145 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4146 } else if (ret == -ENOSPC) {
4147 if (!final_tried && ins->offset) {
4148 num_bytes = min(num_bytes >> 1, ins->offset);
4149 num_bytes = round_down(num_bytes,
4150 fs_info->sectorsize);
4151 num_bytes = max(num_bytes, min_alloc_size);
4152 ram_bytes = num_bytes;
4153 if (num_bytes == min_alloc_size)
4154 final_tried = true;
4155 goto again;
4156 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4157 struct btrfs_space_info *sinfo;
4158
4159 sinfo = btrfs_find_space_info(fs_info, flags);
4160 btrfs_err(fs_info,
4161 "allocation failed flags %llu, wanted %llu",
4162 flags, num_bytes);
4163 if (sinfo)
4164 btrfs_dump_space_info(fs_info, sinfo,
4165 num_bytes, 1);
4166 }
4167 }
4168
4169 return ret;
4170}
4171
4172int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4173 u64 start, u64 len, int delalloc)
4174{
4175 struct btrfs_block_group *cache;
4176
4177 cache = btrfs_lookup_block_group(fs_info, start);
4178 if (!cache) {
4179 btrfs_err(fs_info, "Unable to find block group for %llu",
4180 start);
4181 return -ENOSPC;
4182 }
4183
4184 btrfs_add_free_space(cache, start, len);
4185 btrfs_free_reserved_bytes(cache, len, delalloc);
4186 trace_btrfs_reserved_extent_free(fs_info, start, len);
4187
4188 btrfs_put_block_group(cache);
4189 return 0;
4190}
4191
4192int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
4193{
4194 struct btrfs_block_group *cache;
4195 int ret = 0;
4196
4197 cache = btrfs_lookup_block_group(fs_info, start);
4198 if (!cache) {
4199 btrfs_err(fs_info, "unable to find block group for %llu", start);
4200 return -ENOSPC;
4201 }
4202
4203 ret = pin_down_extent(cache, start, len, 1);
4204 btrfs_put_block_group(cache);
4205 return ret;
4206}
4207
4208static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4209 u64 parent, u64 root_objectid,
4210 u64 flags, u64 owner, u64 offset,
4211 struct btrfs_key *ins, int ref_mod)
4212{
4213 struct btrfs_fs_info *fs_info = trans->fs_info;
4214 int ret;
4215 struct btrfs_extent_item *extent_item;
4216 struct btrfs_extent_inline_ref *iref;
4217 struct btrfs_path *path;
4218 struct extent_buffer *leaf;
4219 int type;
4220 u32 size;
4221
4222 if (parent > 0)
4223 type = BTRFS_SHARED_DATA_REF_KEY;
4224 else
4225 type = BTRFS_EXTENT_DATA_REF_KEY;
4226
4227 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4228
4229 path = btrfs_alloc_path();
4230 if (!path)
4231 return -ENOMEM;
4232
4233 path->leave_spinning = 1;
4234 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4235 ins, size);
4236 if (ret) {
4237 btrfs_free_path(path);
4238 return ret;
4239 }
4240
4241 leaf = path->nodes[0];
4242 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4243 struct btrfs_extent_item);
4244 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4245 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4246 btrfs_set_extent_flags(leaf, extent_item,
4247 flags | BTRFS_EXTENT_FLAG_DATA);
4248
4249 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4250 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4251 if (parent > 0) {
4252 struct btrfs_shared_data_ref *ref;
4253 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4254 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4255 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4256 } else {
4257 struct btrfs_extent_data_ref *ref;
4258 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4259 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4260 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4261 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4262 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4263 }
4264
4265 btrfs_mark_buffer_dirty(path->nodes[0]);
4266 btrfs_free_path(path);
4267
4268 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
4269 if (ret)
4270 return ret;
4271
4272 ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
4273 if (ret) {
4274 btrfs_err(fs_info, "update block group failed for %llu %llu",
4275 ins->objectid, ins->offset);
4276 BUG();
4277 }
4278 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
4279 return ret;
4280}
4281
4282static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4283 struct btrfs_delayed_ref_node *node,
4284 struct btrfs_delayed_extent_op *extent_op)
4285{
4286 struct btrfs_fs_info *fs_info = trans->fs_info;
4287 int ret;
4288 struct btrfs_extent_item *extent_item;
4289 struct btrfs_key extent_key;
4290 struct btrfs_tree_block_info *block_info;
4291 struct btrfs_extent_inline_ref *iref;
4292 struct btrfs_path *path;
4293 struct extent_buffer *leaf;
4294 struct btrfs_delayed_tree_ref *ref;
4295 u32 size = sizeof(*extent_item) + sizeof(*iref);
4296 u64 num_bytes;
4297 u64 flags = extent_op->flags_to_set;
4298 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4299
4300 ref = btrfs_delayed_node_to_tree_ref(node);
4301
4302 extent_key.objectid = node->bytenr;
4303 if (skinny_metadata) {
4304 extent_key.offset = ref->level;
4305 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4306 num_bytes = fs_info->nodesize;
4307 } else {
4308 extent_key.offset = node->num_bytes;
4309 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4310 size += sizeof(*block_info);
4311 num_bytes = node->num_bytes;
4312 }
4313
4314 path = btrfs_alloc_path();
4315 if (!path)
4316 return -ENOMEM;
4317
4318 path->leave_spinning = 1;
4319 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4320 &extent_key, size);
4321 if (ret) {
4322 btrfs_free_path(path);
4323 return ret;
4324 }
4325
4326 leaf = path->nodes[0];
4327 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4328 struct btrfs_extent_item);
4329 btrfs_set_extent_refs(leaf, extent_item, 1);
4330 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4331 btrfs_set_extent_flags(leaf, extent_item,
4332 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4333
4334 if (skinny_metadata) {
4335 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4336 } else {
4337 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4338 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4339 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4340 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4341 }
4342
4343 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4344 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4345 btrfs_set_extent_inline_ref_type(leaf, iref,
4346 BTRFS_SHARED_BLOCK_REF_KEY);
4347 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4348 } else {
4349 btrfs_set_extent_inline_ref_type(leaf, iref,
4350 BTRFS_TREE_BLOCK_REF_KEY);
4351 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4352 }
4353
4354 btrfs_mark_buffer_dirty(leaf);
4355 btrfs_free_path(path);
4356
4357 ret = remove_from_free_space_tree(trans, extent_key.objectid,
4358 num_bytes);
4359 if (ret)
4360 return ret;
4361
4362 ret = btrfs_update_block_group(trans, extent_key.objectid,
4363 fs_info->nodesize, 1);
4364 if (ret) {
4365 btrfs_err(fs_info, "update block group failed for %llu %llu",
4366 extent_key.objectid, extent_key.offset);
4367 BUG();
4368 }
4369
4370 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
4371 fs_info->nodesize);
4372 return ret;
4373}
4374
4375int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4376 struct btrfs_root *root, u64 owner,
4377 u64 offset, u64 ram_bytes,
4378 struct btrfs_key *ins)
4379{
4380 struct btrfs_ref generic_ref = { 0 };
4381 int ret;
4382
4383 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4384
4385 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4386 ins->objectid, ins->offset, 0);
4387 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
4388 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4389 ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
4390 ram_bytes, NULL, NULL);
4391 return ret;
4392}
4393
4394
4395
4396
4397
4398
4399int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4400 u64 root_objectid, u64 owner, u64 offset,
4401 struct btrfs_key *ins)
4402{
4403 struct btrfs_fs_info *fs_info = trans->fs_info;
4404 int ret;
4405 struct btrfs_block_group *block_group;
4406 struct btrfs_space_info *space_info;
4407
4408
4409
4410
4411
4412 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4413 ret = __exclude_logged_extent(fs_info, ins->objectid,
4414 ins->offset);
4415 if (ret)
4416 return ret;
4417 }
4418
4419 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4420 if (!block_group)
4421 return -EINVAL;
4422
4423 space_info = block_group->space_info;
4424 spin_lock(&space_info->lock);
4425 spin_lock(&block_group->lock);
4426 space_info->bytes_reserved += ins->offset;
4427 block_group->reserved += ins->offset;
4428 spin_unlock(&block_group->lock);
4429 spin_unlock(&space_info->lock);
4430
4431 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4432 offset, ins, 1);
4433 if (ret)
4434 btrfs_pin_extent(fs_info, ins->objectid, ins->offset, 1);
4435 btrfs_put_block_group(block_group);
4436 return ret;
4437}
4438
4439static struct extent_buffer *
4440btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4441 u64 bytenr, int level, u64 owner)
4442{
4443 struct btrfs_fs_info *fs_info = root->fs_info;
4444 struct extent_buffer *buf;
4445
4446 buf = btrfs_find_create_tree_block(fs_info, bytenr);
4447 if (IS_ERR(buf))
4448 return buf;
4449
4450
4451
4452
4453
4454
4455 if (buf->lock_owner == current->pid) {
4456 btrfs_err_rl(fs_info,
4457"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4458 buf->start, btrfs_header_owner(buf), current->pid);
4459 free_extent_buffer(buf);
4460 return ERR_PTR(-EUCLEAN);
4461 }
4462
4463 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
4464 btrfs_tree_lock(buf);
4465 btrfs_clean_tree_block(buf);
4466 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4467
4468 btrfs_set_lock_blocking_write(buf);
4469 set_extent_buffer_uptodate(buf);
4470
4471 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4472 btrfs_set_header_level(buf, level);
4473 btrfs_set_header_bytenr(buf, buf->start);
4474 btrfs_set_header_generation(buf, trans->transid);
4475 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4476 btrfs_set_header_owner(buf, owner);
4477 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4478 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4479 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4480 buf->log_index = root->log_transid % 2;
4481
4482
4483
4484
4485 if (buf->log_index == 0)
4486 set_extent_dirty(&root->dirty_log_pages, buf->start,
4487 buf->start + buf->len - 1, GFP_NOFS);
4488 else
4489 set_extent_new(&root->dirty_log_pages, buf->start,
4490 buf->start + buf->len - 1);
4491 } else {
4492 buf->log_index = -1;
4493 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4494 buf->start + buf->len - 1, GFP_NOFS);
4495 }
4496 trans->dirty = true;
4497
4498 return buf;
4499}
4500
4501
4502
4503
4504
4505struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4506 struct btrfs_root *root,
4507 u64 parent, u64 root_objectid,
4508 const struct btrfs_disk_key *key,
4509 int level, u64 hint,
4510 u64 empty_size)
4511{
4512 struct btrfs_fs_info *fs_info = root->fs_info;
4513 struct btrfs_key ins;
4514 struct btrfs_block_rsv *block_rsv;
4515 struct extent_buffer *buf;
4516 struct btrfs_delayed_extent_op *extent_op;
4517 struct btrfs_ref generic_ref = { 0 };
4518 u64 flags = 0;
4519 int ret;
4520 u32 blocksize = fs_info->nodesize;
4521 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4522
4523#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4524 if (btrfs_is_testing(fs_info)) {
4525 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4526 level, root_objectid);
4527 if (!IS_ERR(buf))
4528 root->alloc_bytenr += blocksize;
4529 return buf;
4530 }
4531#endif
4532
4533 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4534 if (IS_ERR(block_rsv))
4535 return ERR_CAST(block_rsv);
4536
4537 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4538 empty_size, hint, &ins, 0, 0);
4539 if (ret)
4540 goto out_unuse;
4541
4542 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4543 root_objectid);
4544 if (IS_ERR(buf)) {
4545 ret = PTR_ERR(buf);
4546 goto out_free_reserved;
4547 }
4548
4549 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4550 if (parent == 0)
4551 parent = ins.objectid;
4552 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4553 } else
4554 BUG_ON(parent > 0);
4555
4556 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4557 extent_op = btrfs_alloc_delayed_extent_op();
4558 if (!extent_op) {
4559 ret = -ENOMEM;
4560 goto out_free_buf;
4561 }
4562 if (key)
4563 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4564 else
4565 memset(&extent_op->key, 0, sizeof(extent_op->key));
4566 extent_op->flags_to_set = flags;
4567 extent_op->update_key = skinny_metadata ? false : true;
4568 extent_op->update_flags = true;
4569 extent_op->is_data = false;
4570 extent_op->level = level;
4571
4572 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4573 ins.objectid, ins.offset, parent);
4574 generic_ref.real_root = root->root_key.objectid;
4575 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
4576 btrfs_ref_tree_mod(fs_info, &generic_ref);
4577 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
4578 extent_op, NULL, NULL);
4579 if (ret)
4580 goto out_free_delayed;
4581 }
4582 return buf;
4583
4584out_free_delayed:
4585 btrfs_free_delayed_extent_op(extent_op);
4586out_free_buf:
4587 free_extent_buffer(buf);
4588out_free_reserved:
4589 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
4590out_unuse:
4591 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
4592 return ERR_PTR(ret);
4593}
4594
4595struct walk_control {
4596 u64 refs[BTRFS_MAX_LEVEL];
4597 u64 flags[BTRFS_MAX_LEVEL];
4598 struct btrfs_key update_progress;
4599 struct btrfs_key drop_progress;
4600 int drop_level;
4601 int stage;
4602 int level;
4603 int shared_level;
4604 int update_ref;
4605 int keep_locks;
4606 int reada_slot;
4607 int reada_count;
4608 int restarted;
4609};
4610
4611#define DROP_REFERENCE 1
4612#define UPDATE_BACKREF 2
4613
4614static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4615 struct btrfs_root *root,
4616 struct walk_control *wc,
4617 struct btrfs_path *path)
4618{
4619 struct btrfs_fs_info *fs_info = root->fs_info;
4620 u64 bytenr;
4621 u64 generation;
4622 u64 refs;
4623 u64 flags;
4624 u32 nritems;
4625 struct btrfs_key key;
4626 struct extent_buffer *eb;
4627 int ret;
4628 int slot;
4629 int nread = 0;
4630
4631 if (path->slots[wc->level] < wc->reada_slot) {
4632 wc->reada_count = wc->reada_count * 2 / 3;
4633 wc->reada_count = max(wc->reada_count, 2);
4634 } else {
4635 wc->reada_count = wc->reada_count * 3 / 2;
4636 wc->reada_count = min_t(int, wc->reada_count,
4637 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
4638 }
4639
4640 eb = path->nodes[wc->level];
4641 nritems = btrfs_header_nritems(eb);
4642
4643 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4644 if (nread >= wc->reada_count)
4645 break;
4646
4647 cond_resched();
4648 bytenr = btrfs_node_blockptr(eb, slot);
4649 generation = btrfs_node_ptr_generation(eb, slot);
4650
4651 if (slot == path->slots[wc->level])
4652 goto reada;
4653
4654 if (wc->stage == UPDATE_BACKREF &&
4655 generation <= root->root_key.offset)
4656 continue;
4657
4658
4659 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
4660 wc->level - 1, 1, &refs,
4661 &flags);
4662
4663 if (ret < 0)
4664 continue;
4665 BUG_ON(refs == 0);
4666
4667 if (wc->stage == DROP_REFERENCE) {
4668 if (refs == 1)
4669 goto reada;
4670
4671 if (wc->level == 1 &&
4672 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4673 continue;
4674 if (!wc->update_ref ||
4675 generation <= root->root_key.offset)
4676 continue;
4677 btrfs_node_key_to_cpu(eb, &key, slot);
4678 ret = btrfs_comp_cpu_keys(&key,
4679 &wc->update_progress);
4680 if (ret < 0)
4681 continue;
4682 } else {
4683 if (wc->level == 1 &&
4684 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4685 continue;
4686 }
4687reada:
4688 readahead_tree_block(fs_info, bytenr);
4689 nread++;
4690 }
4691 wc->reada_slot = slot;
4692}
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4703 struct btrfs_root *root,
4704 struct btrfs_path *path,
4705 struct walk_control *wc, int lookup_info)
4706{
4707 struct btrfs_fs_info *fs_info = root->fs_info;
4708 int level = wc->level;
4709 struct extent_buffer *eb = path->nodes[level];
4710 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4711 int ret;
4712
4713 if (wc->stage == UPDATE_BACKREF &&
4714 btrfs_header_owner(eb) != root->root_key.objectid)
4715 return 1;
4716
4717
4718
4719
4720
4721 if (lookup_info &&
4722 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4723 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4724 BUG_ON(!path->locks[level]);
4725 ret = btrfs_lookup_extent_info(trans, fs_info,
4726 eb->start, level, 1,
4727 &wc->refs[level],
4728 &wc->flags[level]);
4729 BUG_ON(ret == -ENOMEM);
4730 if (ret)
4731 return ret;
4732 BUG_ON(wc->refs[level] == 0);
4733 }
4734
4735 if (wc->stage == DROP_REFERENCE) {
4736 if (wc->refs[level] > 1)
4737 return 1;
4738
4739 if (path->locks[level] && !wc->keep_locks) {
4740 btrfs_tree_unlock_rw(eb, path->locks[level]);
4741 path->locks[level] = 0;
4742 }
4743 return 0;
4744 }
4745
4746
4747 if (!(wc->flags[level] & flag)) {
4748 BUG_ON(!path->locks[level]);
4749 ret = btrfs_inc_ref(trans, root, eb, 1);
4750 BUG_ON(ret);
4751 ret = btrfs_dec_ref(trans, root, eb, 0);
4752 BUG_ON(ret);
4753 ret = btrfs_set_disk_extent_flags(trans, eb->start,
4754 eb->len, flag,
4755 btrfs_header_level(eb), 0);
4756 BUG_ON(ret);
4757 wc->flags[level] |= flag;
4758 }
4759
4760
4761
4762
4763
4764 if (path->locks[level] && level > 0) {
4765 btrfs_tree_unlock_rw(eb, path->locks[level]);
4766 path->locks[level] = 0;
4767 }
4768 return 0;
4769}
4770
4771
4772
4773
4774
4775static int check_ref_exists(struct btrfs_trans_handle *trans,
4776 struct btrfs_root *root, u64 bytenr, u64 parent,
4777 int level)
4778{
4779 struct btrfs_path *path;
4780 struct btrfs_extent_inline_ref *iref;
4781 int ret;
4782
4783 path = btrfs_alloc_path();
4784 if (!path)
4785 return -ENOMEM;
4786
4787 ret = lookup_extent_backref(trans, path, &iref, bytenr,
4788 root->fs_info->nodesize, parent,
4789 root->root_key.objectid, level, 0);
4790 btrfs_free_path(path);
4791 if (ret == -ENOENT)
4792 return 0;
4793 if (ret < 0)
4794 return ret;
4795 return 1;
4796}
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4812 struct btrfs_root *root,
4813 struct btrfs_path *path,
4814 struct walk_control *wc, int *lookup_info)
4815{
4816 struct btrfs_fs_info *fs_info = root->fs_info;
4817 u64 bytenr;
4818 u64 generation;
4819 u64 parent;
4820 struct btrfs_key key;
4821 struct btrfs_key first_key;
4822 struct btrfs_ref ref = { 0 };
4823 struct extent_buffer *next;
4824 int level = wc->level;
4825 int reada = 0;
4826 int ret = 0;
4827 bool need_account = false;
4828
4829 generation = btrfs_node_ptr_generation(path->nodes[level],
4830 path->slots[level]);
4831
4832
4833
4834
4835
4836 if (wc->stage == UPDATE_BACKREF &&
4837 generation <= root->root_key.offset) {
4838 *lookup_info = 1;
4839 return 1;
4840 }
4841
4842 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4843 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
4844 path->slots[level]);
4845
4846 next = find_extent_buffer(fs_info, bytenr);
4847 if (!next) {
4848 next = btrfs_find_create_tree_block(fs_info, bytenr);
4849 if (IS_ERR(next))
4850 return PTR_ERR(next);
4851
4852 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
4853 level - 1);
4854 reada = 1;
4855 }
4856 btrfs_tree_lock(next);
4857 btrfs_set_lock_blocking_write(next);
4858
4859 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
4860 &wc->refs[level - 1],
4861 &wc->flags[level - 1]);
4862 if (ret < 0)
4863 goto out_unlock;
4864
4865 if (unlikely(wc->refs[level - 1] == 0)) {
4866 btrfs_err(fs_info, "Missing references.");
4867 ret = -EIO;
4868 goto out_unlock;
4869 }
4870 *lookup_info = 0;
4871
4872 if (wc->stage == DROP_REFERENCE) {
4873 if (wc->refs[level - 1] > 1) {
4874 need_account = true;
4875 if (level == 1 &&
4876 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4877 goto skip;
4878
4879 if (!wc->update_ref ||
4880 generation <= root->root_key.offset)
4881 goto skip;
4882
4883 btrfs_node_key_to_cpu(path->nodes[level], &key,
4884 path->slots[level]);
4885 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
4886 if (ret < 0)
4887 goto skip;
4888
4889 wc->stage = UPDATE_BACKREF;
4890 wc->shared_level = level - 1;
4891 }
4892 } else {
4893 if (level == 1 &&
4894 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4895 goto skip;
4896 }
4897
4898 if (!btrfs_buffer_uptodate(next, generation, 0)) {
4899 btrfs_tree_unlock(next);
4900 free_extent_buffer(next);
4901 next = NULL;
4902 *lookup_info = 1;
4903 }
4904
4905 if (!next) {
4906 if (reada && level == 1)
4907 reada_walk_down(trans, root, wc, path);
4908 next = read_tree_block(fs_info, bytenr, generation, level - 1,
4909 &first_key);
4910 if (IS_ERR(next)) {
4911 return PTR_ERR(next);
4912 } else if (!extent_buffer_uptodate(next)) {
4913 free_extent_buffer(next);
4914 return -EIO;
4915 }
4916 btrfs_tree_lock(next);
4917 btrfs_set_lock_blocking_write(next);
4918 }
4919
4920 level--;
4921 ASSERT(level == btrfs_header_level(next));
4922 if (level != btrfs_header_level(next)) {
4923 btrfs_err(root->fs_info, "mismatched level");
4924 ret = -EIO;
4925 goto out_unlock;
4926 }
4927 path->nodes[level] = next;
4928 path->slots[level] = 0;
4929 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
4930 wc->level = level;
4931 if (wc->level == 1)
4932 wc->reada_slot = 0;
4933 return 0;
4934skip:
4935 wc->refs[level - 1] = 0;
4936 wc->flags[level - 1] = 0;
4937 if (wc->stage == DROP_REFERENCE) {
4938 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4939 parent = path->nodes[level]->start;
4940 } else {
4941 ASSERT(root->root_key.objectid ==
4942 btrfs_header_owner(path->nodes[level]));
4943 if (root->root_key.objectid !=
4944 btrfs_header_owner(path->nodes[level])) {
4945 btrfs_err(root->fs_info,
4946 "mismatched block owner");
4947 ret = -EIO;
4948 goto out_unlock;
4949 }
4950 parent = 0;
4951 }
4952
4953
4954
4955
4956
4957
4958
4959 if (wc->restarted) {
4960 ret = check_ref_exists(trans, root, bytenr, parent,
4961 level - 1);
4962 if (ret < 0)
4963 goto out_unlock;
4964 if (ret == 0)
4965 goto no_delete;
4966 ret = 0;
4967 wc->restarted = 0;
4968 }
4969
4970
4971
4972
4973
4974
4975 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
4976 need_account) {
4977 ret = btrfs_qgroup_trace_subtree(trans, next,
4978 generation, level - 1);
4979 if (ret) {
4980 btrfs_err_rl(fs_info,
4981 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
4982 ret);
4983 }
4984 }
4985
4986
4987
4988
4989
4990
4991
4992 wc->drop_level = level;
4993 find_next_key(path, level, &wc->drop_progress);
4994
4995 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
4996 fs_info->nodesize, parent);
4997 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
4998 ret = btrfs_free_extent(trans, &ref);
4999 if (ret)
5000 goto out_unlock;
5001 }
5002no_delete:
5003 *lookup_info = 1;
5004 ret = 1;
5005
5006out_unlock:
5007 btrfs_tree_unlock(next);
5008 free_extent_buffer(next);
5009
5010 return ret;
5011}
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5026 struct btrfs_root *root,
5027 struct btrfs_path *path,
5028 struct walk_control *wc)
5029{
5030 struct btrfs_fs_info *fs_info = root->fs_info;
5031 int ret;
5032 int level = wc->level;
5033 struct extent_buffer *eb = path->nodes[level];
5034 u64 parent = 0;
5035
5036 if (wc->stage == UPDATE_BACKREF) {
5037 BUG_ON(wc->shared_level < level);
5038 if (level < wc->shared_level)
5039 goto out;
5040
5041 ret = find_next_key(path, level + 1, &wc->update_progress);
5042 if (ret > 0)
5043 wc->update_ref = 0;
5044
5045 wc->stage = DROP_REFERENCE;
5046 wc->shared_level = -1;
5047 path->slots[level] = 0;
5048
5049
5050
5051
5052
5053
5054 if (!path->locks[level]) {
5055 BUG_ON(level == 0);
5056 btrfs_tree_lock(eb);
5057 btrfs_set_lock_blocking_write(eb);
5058 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5059
5060 ret = btrfs_lookup_extent_info(trans, fs_info,
5061 eb->start, level, 1,
5062 &wc->refs[level],
5063 &wc->flags[level]);
5064 if (ret < 0) {
5065 btrfs_tree_unlock_rw(eb, path->locks[level]);
5066 path->locks[level] = 0;
5067 return ret;
5068 }
5069 BUG_ON(wc->refs[level] == 0);
5070 if (wc->refs[level] == 1) {
5071 btrfs_tree_unlock_rw(eb, path->locks[level]);
5072 path->locks[level] = 0;
5073 return 1;
5074 }
5075 }
5076 }
5077
5078
5079 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5080
5081 if (wc->refs[level] == 1) {
5082 if (level == 0) {
5083 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5084 ret = btrfs_dec_ref(trans, root, eb, 1);
5085 else
5086 ret = btrfs_dec_ref(trans, root, eb, 0);
5087 BUG_ON(ret);
5088 if (is_fstree(root->root_key.objectid)) {
5089 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5090 if (ret) {
5091 btrfs_err_rl(fs_info,
5092 "error %d accounting leaf items, quota is out of sync, rescan required",
5093 ret);
5094 }
5095 }
5096 }
5097
5098 if (!path->locks[level] &&
5099 btrfs_header_generation(eb) == trans->transid) {
5100 btrfs_tree_lock(eb);
5101 btrfs_set_lock_blocking_write(eb);
5102 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5103 }
5104 btrfs_clean_tree_block(eb);
5105 }
5106
5107 if (eb == root->node) {
5108 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5109 parent = eb->start;
5110 else if (root->root_key.objectid != btrfs_header_owner(eb))
5111 goto owner_mismatch;
5112 } else {
5113 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5114 parent = path->nodes[level + 1]->start;
5115 else if (root->root_key.objectid !=
5116 btrfs_header_owner(path->nodes[level + 1]))
5117 goto owner_mismatch;
5118 }
5119
5120 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5121out:
5122 wc->refs[level] = 0;
5123 wc->flags[level] = 0;
5124 return 0;
5125
5126owner_mismatch:
5127 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5128 btrfs_header_owner(eb), root->root_key.objectid);
5129 return -EUCLEAN;
5130}
5131
5132static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5133 struct btrfs_root *root,
5134 struct btrfs_path *path,
5135 struct walk_control *wc)
5136{
5137 int level = wc->level;
5138 int lookup_info = 1;
5139 int ret;
5140
5141 while (level >= 0) {
5142 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5143 if (ret > 0)
5144 break;
5145
5146 if (level == 0)
5147 break;
5148
5149 if (path->slots[level] >=
5150 btrfs_header_nritems(path->nodes[level]))
5151 break;
5152
5153 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5154 if (ret > 0) {
5155 path->slots[level]++;
5156 continue;
5157 } else if (ret < 0)
5158 return ret;
5159 level = wc->level;
5160 }
5161 return 0;
5162}
5163
5164static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5165 struct btrfs_root *root,
5166 struct btrfs_path *path,
5167 struct walk_control *wc, int max_level)
5168{
5169 int level = wc->level;
5170 int ret;
5171
5172 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5173 while (level < max_level && path->nodes[level]) {
5174 wc->level = level;
5175 if (path->slots[level] + 1 <
5176 btrfs_header_nritems(path->nodes[level])) {
5177 path->slots[level]++;
5178 return 0;
5179 } else {
5180 ret = walk_up_proc(trans, root, path, wc);
5181 if (ret > 0)
5182 return 0;
5183 if (ret < 0)
5184 return ret;
5185
5186 if (path->locks[level]) {
5187 btrfs_tree_unlock_rw(path->nodes[level],
5188 path->locks[level]);
5189 path->locks[level] = 0;
5190 }
5191 free_extent_buffer(path->nodes[level]);
5192 path->nodes[level] = NULL;
5193 level++;
5194 }
5195 }
5196 return 1;
5197}
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212int btrfs_drop_snapshot(struct btrfs_root *root,
5213 struct btrfs_block_rsv *block_rsv, int update_ref,
5214 int for_reloc)
5215{
5216 struct btrfs_fs_info *fs_info = root->fs_info;
5217 struct btrfs_path *path;
5218 struct btrfs_trans_handle *trans;
5219 struct btrfs_root *tree_root = fs_info->tree_root;
5220 struct btrfs_root_item *root_item = &root->root_item;
5221 struct walk_control *wc;
5222 struct btrfs_key key;
5223 int err = 0;
5224 int ret;
5225 int level;
5226 bool root_dropped = false;
5227
5228 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5229
5230 path = btrfs_alloc_path();
5231 if (!path) {
5232 err = -ENOMEM;
5233 goto out;
5234 }
5235
5236 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5237 if (!wc) {
5238 btrfs_free_path(path);
5239 err = -ENOMEM;
5240 goto out;
5241 }
5242
5243 trans = btrfs_start_transaction(tree_root, 0);
5244 if (IS_ERR(trans)) {
5245 err = PTR_ERR(trans);
5246 goto out_free;
5247 }
5248
5249 err = btrfs_run_delayed_items(trans);
5250 if (err)
5251 goto out_end_trans;
5252
5253 if (block_rsv)
5254 trans->block_rsv = block_rsv;
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264 set_bit(BTRFS_ROOT_DELETING, &root->state);
5265 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5266 level = btrfs_header_level(root->node);
5267 path->nodes[level] = btrfs_lock_root_node(root);
5268 btrfs_set_lock_blocking_write(path->nodes[level]);
5269 path->slots[level] = 0;
5270 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5271 memset(&wc->update_progress, 0,
5272 sizeof(wc->update_progress));
5273 } else {
5274 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5275 memcpy(&wc->update_progress, &key,
5276 sizeof(wc->update_progress));
5277
5278 level = root_item->drop_level;
5279 BUG_ON(level == 0);
5280 path->lowest_level = level;
5281 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5282 path->lowest_level = 0;
5283 if (ret < 0) {
5284 err = ret;
5285 goto out_end_trans;
5286 }
5287 WARN_ON(ret > 0);
5288
5289
5290
5291
5292
5293 btrfs_unlock_up_safe(path, 0);
5294
5295 level = btrfs_header_level(root->node);
5296 while (1) {
5297 btrfs_tree_lock(path->nodes[level]);
5298 btrfs_set_lock_blocking_write(path->nodes[level]);
5299 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5300
5301 ret = btrfs_lookup_extent_info(trans, fs_info,
5302 path->nodes[level]->start,
5303 level, 1, &wc->refs[level],
5304 &wc->flags[level]);
5305 if (ret < 0) {
5306 err = ret;
5307 goto out_end_trans;
5308 }
5309 BUG_ON(wc->refs[level] == 0);
5310
5311 if (level == root_item->drop_level)
5312 break;
5313
5314 btrfs_tree_unlock(path->nodes[level]);
5315 path->locks[level] = 0;
5316 WARN_ON(wc->refs[level] != 1);
5317 level--;
5318 }
5319 }
5320
5321 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5322 wc->level = level;
5323 wc->shared_level = -1;
5324 wc->stage = DROP_REFERENCE;
5325 wc->update_ref = update_ref;
5326 wc->keep_locks = 0;
5327 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5328
5329 while (1) {
5330
5331 ret = walk_down_tree(trans, root, path, wc);
5332 if (ret < 0) {
5333 err = ret;
5334 break;
5335 }
5336
5337 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5338 if (ret < 0) {
5339 err = ret;
5340 break;
5341 }
5342
5343 if (ret > 0) {
5344 BUG_ON(wc->stage != DROP_REFERENCE);
5345 break;
5346 }
5347
5348 if (wc->stage == DROP_REFERENCE) {
5349 wc->drop_level = wc->level;
5350 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5351 &wc->drop_progress,
5352 path->slots[wc->drop_level]);
5353 }
5354 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5355 &wc->drop_progress);
5356 root_item->drop_level = wc->drop_level;
5357
5358 BUG_ON(wc->level == 0);
5359 if (btrfs_should_end_transaction(trans) ||
5360 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5361 ret = btrfs_update_root(trans, tree_root,
5362 &root->root_key,
5363 root_item);
5364 if (ret) {
5365 btrfs_abort_transaction(trans, ret);
5366 err = ret;
5367 goto out_end_trans;
5368 }
5369
5370 btrfs_end_transaction_throttle(trans);
5371 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5372 btrfs_debug(fs_info,
5373 "drop snapshot early exit");
5374 err = -EAGAIN;
5375 goto out_free;
5376 }
5377
5378 trans = btrfs_start_transaction(tree_root, 0);
5379 if (IS_ERR(trans)) {
5380 err = PTR_ERR(trans);
5381 goto out_free;
5382 }
5383 if (block_rsv)
5384 trans->block_rsv = block_rsv;
5385 }
5386 }
5387 btrfs_release_path(path);
5388 if (err)
5389 goto out_end_trans;
5390
5391 ret = btrfs_del_root(trans, &root->root_key);
5392 if (ret) {
5393 btrfs_abort_transaction(trans, ret);
5394 err = ret;
5395 goto out_end_trans;
5396 }
5397
5398 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5399 ret = btrfs_find_root(tree_root, &root->root_key, path,
5400 NULL, NULL);
5401 if (ret < 0) {
5402 btrfs_abort_transaction(trans, ret);
5403 err = ret;
5404 goto out_end_trans;
5405 } else if (ret > 0) {
5406
5407
5408
5409
5410
5411 btrfs_del_orphan_item(trans, tree_root,
5412 root->root_key.objectid);
5413 }
5414 }
5415
5416 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
5417 btrfs_add_dropped_root(trans, root);
5418 } else {
5419 free_extent_buffer(root->node);
5420 free_extent_buffer(root->commit_root);
5421 btrfs_put_fs_root(root);
5422 }
5423 root_dropped = true;
5424out_end_trans:
5425 btrfs_end_transaction_throttle(trans);
5426out_free:
5427 kfree(wc);
5428 btrfs_free_path(path);
5429out:
5430
5431
5432
5433
5434
5435
5436
5437 if (!for_reloc && !root_dropped)
5438 btrfs_add_dead_root(root);
5439 if (err && err != -EAGAIN)
5440 btrfs_handle_fs_error(fs_info, err, NULL);
5441 return err;
5442}
5443
5444
5445
5446
5447
5448
5449
5450int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5451 struct btrfs_root *root,
5452 struct extent_buffer *node,
5453 struct extent_buffer *parent)
5454{
5455 struct btrfs_fs_info *fs_info = root->fs_info;
5456 struct btrfs_path *path;
5457 struct walk_control *wc;
5458 int level;
5459 int parent_level;
5460 int ret = 0;
5461 int wret;
5462
5463 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5464
5465 path = btrfs_alloc_path();
5466 if (!path)
5467 return -ENOMEM;
5468
5469 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5470 if (!wc) {
5471 btrfs_free_path(path);
5472 return -ENOMEM;
5473 }
5474
5475 btrfs_assert_tree_locked(parent);
5476 parent_level = btrfs_header_level(parent);
5477 atomic_inc(&parent->refs);
5478 path->nodes[parent_level] = parent;
5479 path->slots[parent_level] = btrfs_header_nritems(parent);
5480
5481 btrfs_assert_tree_locked(node);
5482 level = btrfs_header_level(node);
5483 path->nodes[level] = node;
5484 path->slots[level] = 0;
5485 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5486
5487 wc->refs[parent_level] = 1;
5488 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5489 wc->level = level;
5490 wc->shared_level = -1;
5491 wc->stage = DROP_REFERENCE;
5492 wc->update_ref = 0;
5493 wc->keep_locks = 1;
5494 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5495
5496 while (1) {
5497 wret = walk_down_tree(trans, root, path, wc);
5498 if (wret < 0) {
5499 ret = wret;
5500 break;
5501 }
5502
5503 wret = walk_up_tree(trans, root, path, wc, parent_level);
5504 if (wret < 0)
5505 ret = wret;
5506 if (wret != 0)
5507 break;
5508 }
5509
5510 kfree(wc);
5511 btrfs_free_path(path);
5512 return ret;
5513}
5514
5515
5516
5517
5518
5519u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5520{
5521 struct btrfs_block_group *block_group;
5522 u64 free_bytes = 0;
5523 int factor;
5524
5525
5526 if (list_empty(&sinfo->ro_bgs))
5527 return 0;
5528
5529 spin_lock(&sinfo->lock);
5530 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5531 spin_lock(&block_group->lock);
5532
5533 if (!block_group->ro) {
5534 spin_unlock(&block_group->lock);
5535 continue;
5536 }
5537
5538 factor = btrfs_bg_type_to_factor(block_group->flags);
5539 free_bytes += (block_group->length -
5540 block_group->used) * factor;
5541
5542 spin_unlock(&block_group->lock);
5543 }
5544 spin_unlock(&sinfo->lock);
5545
5546 return free_bytes;
5547}
5548
5549int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5550 u64 start, u64 end)
5551{
5552 return unpin_extent_range(fs_info, start, end, false);
5553}
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5576{
5577 u64 start = SZ_1M, len = 0, end = 0;
5578 int ret;
5579
5580 *trimmed = 0;
5581
5582
5583 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
5584 return 0;
5585
5586
5587 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
5588 return 0;
5589
5590
5591 if (device->total_bytes <= device->bytes_used)
5592 return 0;
5593
5594 ret = 0;
5595
5596 while (1) {
5597 struct btrfs_fs_info *fs_info = device->fs_info;
5598 u64 bytes;
5599
5600 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
5601 if (ret)
5602 break;
5603
5604 find_first_clear_extent_bit(&device->alloc_state, start,
5605 &start, &end,
5606 CHUNK_TRIMMED | CHUNK_ALLOCATED);
5607
5608
5609 start = max_t(u64, start, SZ_1M);
5610
5611
5612
5613
5614
5615
5616 end = min(end, device->total_bytes - 1);
5617
5618 len = end - start + 1;
5619
5620
5621 if (!len) {
5622 mutex_unlock(&fs_info->chunk_mutex);
5623 ret = 0;
5624 break;
5625 }
5626
5627 ret = btrfs_issue_discard(device->bdev, start, len,
5628 &bytes);
5629 if (!ret)
5630 set_extent_bits(&device->alloc_state, start,
5631 start + bytes - 1,
5632 CHUNK_TRIMMED);
5633 mutex_unlock(&fs_info->chunk_mutex);
5634
5635 if (ret)
5636 break;
5637
5638 start += len;
5639 *trimmed += bytes;
5640
5641 if (fatal_signal_pending(current)) {
5642 ret = -ERESTARTSYS;
5643 break;
5644 }
5645
5646 cond_resched();
5647 }
5648
5649 return ret;
5650}
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
5662{
5663 struct btrfs_block_group *cache = NULL;
5664 struct btrfs_device *device;
5665 struct list_head *devices;
5666 u64 group_trimmed;
5667 u64 range_end = U64_MAX;
5668 u64 start;
5669 u64 end;
5670 u64 trimmed = 0;
5671 u64 bg_failed = 0;
5672 u64 dev_failed = 0;
5673 int bg_ret = 0;
5674 int dev_ret = 0;
5675 int ret = 0;
5676
5677
5678
5679
5680
5681 if (range->len != U64_MAX &&
5682 check_add_overflow(range->start, range->len, &range_end))
5683 return -EINVAL;
5684
5685 cache = btrfs_lookup_first_block_group(fs_info, range->start);
5686 for (; cache; cache = btrfs_next_block_group(cache)) {
5687 if (cache->start >= range_end) {
5688 btrfs_put_block_group(cache);
5689 break;
5690 }
5691
5692 start = max(range->start, cache->start);
5693 end = min(range_end, cache->start + cache->length);
5694
5695 if (end - start >= range->minlen) {
5696 if (!btrfs_block_group_done(cache)) {
5697 ret = btrfs_cache_block_group(cache, 0);
5698 if (ret) {
5699 bg_failed++;
5700 bg_ret = ret;
5701 continue;
5702 }
5703 ret = btrfs_wait_block_group_cache_done(cache);
5704 if (ret) {
5705 bg_failed++;
5706 bg_ret = ret;
5707 continue;
5708 }
5709 }
5710 ret = btrfs_trim_block_group(cache,
5711 &group_trimmed,
5712 start,
5713 end,
5714 range->minlen);
5715
5716 trimmed += group_trimmed;
5717 if (ret) {
5718 bg_failed++;
5719 bg_ret = ret;
5720 continue;
5721 }
5722 }
5723 }
5724
5725 if (bg_failed)
5726 btrfs_warn(fs_info,
5727 "failed to trim %llu block group(s), last error %d",
5728 bg_failed, bg_ret);
5729 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5730 devices = &fs_info->fs_devices->devices;
5731 list_for_each_entry(device, devices, dev_list) {
5732 ret = btrfs_trim_free_extents(device, &group_trimmed);
5733 if (ret) {
5734 dev_failed++;
5735 dev_ret = ret;
5736 break;
5737 }
5738
5739 trimmed += group_trimmed;
5740 }
5741 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5742
5743 if (dev_failed)
5744 btrfs_warn(fs_info,
5745 "failed to trim %llu device(s), last error %d",
5746 dev_failed, dev_ret);
5747 range->len = trimmed;
5748 if (bg_ret)
5749 return bg_ret;
5750 return dev_ret;
5751}
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
5762{
5763 percpu_counter_dec(&root->subv_writers->counter);
5764 cond_wake_up(&root->subv_writers->wait);
5765}
5766
5767int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
5768{
5769 if (atomic_read(&root->will_be_snapshotted))
5770 return 0;
5771
5772 percpu_counter_inc(&root->subv_writers->counter);
5773
5774
5775
5776 smp_mb();
5777 if (atomic_read(&root->will_be_snapshotted)) {
5778 btrfs_end_write_no_snapshotting(root);
5779 return 0;
5780 }
5781 return 1;
5782}
5783
5784void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
5785{
5786 while (true) {
5787 int ret;
5788
5789 ret = btrfs_start_write_no_snapshotting(root);
5790 if (ret)
5791 break;
5792 wait_var_event(&root->will_be_snapshotted,
5793 !atomic_read(&root->will_be_snapshotted));
5794 }
5795}
5796