1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "misc.h"
20#include "tree-log.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "locking.h"
26#include "free-space-cache.h"
27#include "free-space-tree.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31#include "space-info.h"
32#include "block-rsv.h"
33#include "delalloc-space.h"
34#include "block-group.h"
35
36#undef SCRAMBLE_DELAYED_REFS
37
38
39static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
40 struct btrfs_delayed_ref_node *node, u64 parent,
41 u64 root_objectid, u64 owner_objectid,
42 u64 owner_offset, int refs_to_drop,
43 struct btrfs_delayed_extent_op *extra_op);
44static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
45 struct extent_buffer *leaf,
46 struct btrfs_extent_item *ei);
47static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
48 u64 parent, u64 root_objectid,
49 u64 flags, u64 owner, u64 offset,
50 struct btrfs_key *ins, int ref_mod);
51static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
52 struct btrfs_delayed_ref_node *node,
53 struct btrfs_delayed_extent_op *extent_op);
54static int find_next_key(struct btrfs_path *path, int level,
55 struct btrfs_key *key);
56
57static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
58{
59 return (cache->flags & bits) == bits;
60}
61
62int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
63 u64 start, u64 num_bytes)
64{
65 u64 end = start + num_bytes - 1;
66 set_extent_bits(&fs_info->freed_extents[0],
67 start, end, EXTENT_UPTODATE);
68 set_extent_bits(&fs_info->freed_extents[1],
69 start, end, EXTENT_UPTODATE);
70 return 0;
71}
72
73void btrfs_free_excluded_extents(struct btrfs_block_group_cache *cache)
74{
75 struct btrfs_fs_info *fs_info = cache->fs_info;
76 u64 start, end;
77
78 start = cache->key.objectid;
79 end = start + cache->key.offset - 1;
80
81 clear_extent_bits(&fs_info->freed_extents[0],
82 start, end, EXTENT_UPTODATE);
83 clear_extent_bits(&fs_info->freed_extents[1],
84 start, end, EXTENT_UPTODATE);
85}
86
87static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
88{
89 if (ref->type == BTRFS_REF_METADATA) {
90 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
91 return BTRFS_BLOCK_GROUP_SYSTEM;
92 else
93 return BTRFS_BLOCK_GROUP_METADATA;
94 }
95 return BTRFS_BLOCK_GROUP_DATA;
96}
97
98static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
99 struct btrfs_ref *ref)
100{
101 struct btrfs_space_info *space_info;
102 u64 flags = generic_ref_to_space_flags(ref);
103
104 space_info = btrfs_find_space_info(fs_info, flags);
105 ASSERT(space_info);
106 percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
107 BTRFS_TOTAL_BYTES_PINNED_BATCH);
108}
109
110static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
111 struct btrfs_ref *ref)
112{
113 struct btrfs_space_info *space_info;
114 u64 flags = generic_ref_to_space_flags(ref);
115
116 space_info = btrfs_find_space_info(fs_info, flags);
117 ASSERT(space_info);
118 percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
119 BTRFS_TOTAL_BYTES_PINNED_BATCH);
120}
121
122
123int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
124{
125 int ret;
126 struct btrfs_key key;
127 struct btrfs_path *path;
128
129 path = btrfs_alloc_path();
130 if (!path)
131 return -ENOMEM;
132
133 key.objectid = start;
134 key.offset = len;
135 key.type = BTRFS_EXTENT_ITEM_KEY;
136 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
137 btrfs_free_path(path);
138 return ret;
139}
140
141
142
143
144
145
146
147
148
149
150int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
151 struct btrfs_fs_info *fs_info, u64 bytenr,
152 u64 offset, int metadata, u64 *refs, u64 *flags)
153{
154 struct btrfs_delayed_ref_head *head;
155 struct btrfs_delayed_ref_root *delayed_refs;
156 struct btrfs_path *path;
157 struct btrfs_extent_item *ei;
158 struct extent_buffer *leaf;
159 struct btrfs_key key;
160 u32 item_size;
161 u64 num_refs;
162 u64 extent_flags;
163 int ret;
164
165
166
167
168
169 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
170 offset = fs_info->nodesize;
171 metadata = 0;
172 }
173
174 path = btrfs_alloc_path();
175 if (!path)
176 return -ENOMEM;
177
178 if (!trans) {
179 path->skip_locking = 1;
180 path->search_commit_root = 1;
181 }
182
183search_again:
184 key.objectid = bytenr;
185 key.offset = offset;
186 if (metadata)
187 key.type = BTRFS_METADATA_ITEM_KEY;
188 else
189 key.type = BTRFS_EXTENT_ITEM_KEY;
190
191 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
192 if (ret < 0)
193 goto out_free;
194
195 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
196 if (path->slots[0]) {
197 path->slots[0]--;
198 btrfs_item_key_to_cpu(path->nodes[0], &key,
199 path->slots[0]);
200 if (key.objectid == bytenr &&
201 key.type == BTRFS_EXTENT_ITEM_KEY &&
202 key.offset == fs_info->nodesize)
203 ret = 0;
204 }
205 }
206
207 if (ret == 0) {
208 leaf = path->nodes[0];
209 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
210 if (item_size >= sizeof(*ei)) {
211 ei = btrfs_item_ptr(leaf, path->slots[0],
212 struct btrfs_extent_item);
213 num_refs = btrfs_extent_refs(leaf, ei);
214 extent_flags = btrfs_extent_flags(leaf, ei);
215 } else {
216 ret = -EINVAL;
217 btrfs_print_v0_err(fs_info);
218 if (trans)
219 btrfs_abort_transaction(trans, ret);
220 else
221 btrfs_handle_fs_error(fs_info, ret, NULL);
222
223 goto out_free;
224 }
225
226 BUG_ON(num_refs == 0);
227 } else {
228 num_refs = 0;
229 extent_flags = 0;
230 ret = 0;
231 }
232
233 if (!trans)
234 goto out;
235
236 delayed_refs = &trans->transaction->delayed_refs;
237 spin_lock(&delayed_refs->lock);
238 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
239 if (head) {
240 if (!mutex_trylock(&head->mutex)) {
241 refcount_inc(&head->refs);
242 spin_unlock(&delayed_refs->lock);
243
244 btrfs_release_path(path);
245
246
247
248
249
250 mutex_lock(&head->mutex);
251 mutex_unlock(&head->mutex);
252 btrfs_put_delayed_ref_head(head);
253 goto search_again;
254 }
255 spin_lock(&head->lock);
256 if (head->extent_op && head->extent_op->update_flags)
257 extent_flags |= head->extent_op->flags_to_set;
258 else
259 BUG_ON(num_refs == 0);
260
261 num_refs += head->ref_mod;
262 spin_unlock(&head->lock);
263 mutex_unlock(&head->mutex);
264 }
265 spin_unlock(&delayed_refs->lock);
266out:
267 WARN_ON(num_refs == 0);
268 if (refs)
269 *refs = num_refs;
270 if (flags)
271 *flags = extent_flags;
272out_free:
273 btrfs_free_path(path);
274 return ret;
275}
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
389 struct btrfs_extent_inline_ref *iref,
390 enum btrfs_inline_ref_type is_data)
391{
392 int type = btrfs_extent_inline_ref_type(eb, iref);
393 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
394
395 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
396 type == BTRFS_SHARED_BLOCK_REF_KEY ||
397 type == BTRFS_SHARED_DATA_REF_KEY ||
398 type == BTRFS_EXTENT_DATA_REF_KEY) {
399 if (is_data == BTRFS_REF_TYPE_BLOCK) {
400 if (type == BTRFS_TREE_BLOCK_REF_KEY)
401 return type;
402 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
403 ASSERT(eb->fs_info);
404
405
406
407
408
409 if (offset &&
410 IS_ALIGNED(offset, eb->fs_info->nodesize))
411 return type;
412 }
413 } else if (is_data == BTRFS_REF_TYPE_DATA) {
414 if (type == BTRFS_EXTENT_DATA_REF_KEY)
415 return type;
416 if (type == BTRFS_SHARED_DATA_REF_KEY) {
417 ASSERT(eb->fs_info);
418
419
420
421
422
423 if (offset &&
424 IS_ALIGNED(offset, eb->fs_info->nodesize))
425 return type;
426 }
427 } else {
428 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
429 return type;
430 }
431 }
432
433 btrfs_print_leaf((struct extent_buffer *)eb);
434 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
435 eb->start, type);
436 WARN_ON(1);
437
438 return BTRFS_REF_TYPE_INVALID;
439}
440
441u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
442{
443 u32 high_crc = ~(u32)0;
444 u32 low_crc = ~(u32)0;
445 __le64 lenum;
446
447 lenum = cpu_to_le64(root_objectid);
448 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
449 lenum = cpu_to_le64(owner);
450 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
451 lenum = cpu_to_le64(offset);
452 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
453
454 return ((u64)high_crc << 31) ^ (u64)low_crc;
455}
456
457static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
458 struct btrfs_extent_data_ref *ref)
459{
460 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
461 btrfs_extent_data_ref_objectid(leaf, ref),
462 btrfs_extent_data_ref_offset(leaf, ref));
463}
464
465static int match_extent_data_ref(struct extent_buffer *leaf,
466 struct btrfs_extent_data_ref *ref,
467 u64 root_objectid, u64 owner, u64 offset)
468{
469 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
470 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
471 btrfs_extent_data_ref_offset(leaf, ref) != offset)
472 return 0;
473 return 1;
474}
475
476static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
477 struct btrfs_path *path,
478 u64 bytenr, u64 parent,
479 u64 root_objectid,
480 u64 owner, u64 offset)
481{
482 struct btrfs_root *root = trans->fs_info->extent_root;
483 struct btrfs_key key;
484 struct btrfs_extent_data_ref *ref;
485 struct extent_buffer *leaf;
486 u32 nritems;
487 int ret;
488 int recow;
489 int err = -ENOENT;
490
491 key.objectid = bytenr;
492 if (parent) {
493 key.type = BTRFS_SHARED_DATA_REF_KEY;
494 key.offset = parent;
495 } else {
496 key.type = BTRFS_EXTENT_DATA_REF_KEY;
497 key.offset = hash_extent_data_ref(root_objectid,
498 owner, offset);
499 }
500again:
501 recow = 0;
502 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
503 if (ret < 0) {
504 err = ret;
505 goto fail;
506 }
507
508 if (parent) {
509 if (!ret)
510 return 0;
511 goto fail;
512 }
513
514 leaf = path->nodes[0];
515 nritems = btrfs_header_nritems(leaf);
516 while (1) {
517 if (path->slots[0] >= nritems) {
518 ret = btrfs_next_leaf(root, path);
519 if (ret < 0)
520 err = ret;
521 if (ret)
522 goto fail;
523
524 leaf = path->nodes[0];
525 nritems = btrfs_header_nritems(leaf);
526 recow = 1;
527 }
528
529 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
530 if (key.objectid != bytenr ||
531 key.type != BTRFS_EXTENT_DATA_REF_KEY)
532 goto fail;
533
534 ref = btrfs_item_ptr(leaf, path->slots[0],
535 struct btrfs_extent_data_ref);
536
537 if (match_extent_data_ref(leaf, ref, root_objectid,
538 owner, offset)) {
539 if (recow) {
540 btrfs_release_path(path);
541 goto again;
542 }
543 err = 0;
544 break;
545 }
546 path->slots[0]++;
547 }
548fail:
549 return err;
550}
551
552static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
553 struct btrfs_path *path,
554 u64 bytenr, u64 parent,
555 u64 root_objectid, u64 owner,
556 u64 offset, int refs_to_add)
557{
558 struct btrfs_root *root = trans->fs_info->extent_root;
559 struct btrfs_key key;
560 struct extent_buffer *leaf;
561 u32 size;
562 u32 num_refs;
563 int ret;
564
565 key.objectid = bytenr;
566 if (parent) {
567 key.type = BTRFS_SHARED_DATA_REF_KEY;
568 key.offset = parent;
569 size = sizeof(struct btrfs_shared_data_ref);
570 } else {
571 key.type = BTRFS_EXTENT_DATA_REF_KEY;
572 key.offset = hash_extent_data_ref(root_objectid,
573 owner, offset);
574 size = sizeof(struct btrfs_extent_data_ref);
575 }
576
577 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
578 if (ret && ret != -EEXIST)
579 goto fail;
580
581 leaf = path->nodes[0];
582 if (parent) {
583 struct btrfs_shared_data_ref *ref;
584 ref = btrfs_item_ptr(leaf, path->slots[0],
585 struct btrfs_shared_data_ref);
586 if (ret == 0) {
587 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
588 } else {
589 num_refs = btrfs_shared_data_ref_count(leaf, ref);
590 num_refs += refs_to_add;
591 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
592 }
593 } else {
594 struct btrfs_extent_data_ref *ref;
595 while (ret == -EEXIST) {
596 ref = btrfs_item_ptr(leaf, path->slots[0],
597 struct btrfs_extent_data_ref);
598 if (match_extent_data_ref(leaf, ref, root_objectid,
599 owner, offset))
600 break;
601 btrfs_release_path(path);
602 key.offset++;
603 ret = btrfs_insert_empty_item(trans, root, path, &key,
604 size);
605 if (ret && ret != -EEXIST)
606 goto fail;
607
608 leaf = path->nodes[0];
609 }
610 ref = btrfs_item_ptr(leaf, path->slots[0],
611 struct btrfs_extent_data_ref);
612 if (ret == 0) {
613 btrfs_set_extent_data_ref_root(leaf, ref,
614 root_objectid);
615 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
616 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
617 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
618 } else {
619 num_refs = btrfs_extent_data_ref_count(leaf, ref);
620 num_refs += refs_to_add;
621 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
622 }
623 }
624 btrfs_mark_buffer_dirty(leaf);
625 ret = 0;
626fail:
627 btrfs_release_path(path);
628 return ret;
629}
630
631static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
632 struct btrfs_path *path,
633 int refs_to_drop, int *last_ref)
634{
635 struct btrfs_key key;
636 struct btrfs_extent_data_ref *ref1 = NULL;
637 struct btrfs_shared_data_ref *ref2 = NULL;
638 struct extent_buffer *leaf;
639 u32 num_refs = 0;
640 int ret = 0;
641
642 leaf = path->nodes[0];
643 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
644
645 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
646 ref1 = btrfs_item_ptr(leaf, path->slots[0],
647 struct btrfs_extent_data_ref);
648 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
649 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
650 ref2 = btrfs_item_ptr(leaf, path->slots[0],
651 struct btrfs_shared_data_ref);
652 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
653 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
654 btrfs_print_v0_err(trans->fs_info);
655 btrfs_abort_transaction(trans, -EINVAL);
656 return -EINVAL;
657 } else {
658 BUG();
659 }
660
661 BUG_ON(num_refs < refs_to_drop);
662 num_refs -= refs_to_drop;
663
664 if (num_refs == 0) {
665 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
666 *last_ref = 1;
667 } else {
668 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
669 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
670 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
671 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
672 btrfs_mark_buffer_dirty(leaf);
673 }
674 return ret;
675}
676
677static noinline u32 extent_data_ref_count(struct btrfs_path *path,
678 struct btrfs_extent_inline_ref *iref)
679{
680 struct btrfs_key key;
681 struct extent_buffer *leaf;
682 struct btrfs_extent_data_ref *ref1;
683 struct btrfs_shared_data_ref *ref2;
684 u32 num_refs = 0;
685 int type;
686
687 leaf = path->nodes[0];
688 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
689
690 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
691 if (iref) {
692
693
694
695
696 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
697 ASSERT(type != BTRFS_REF_TYPE_INVALID);
698 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
699 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
700 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
701 } else {
702 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
703 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
704 }
705 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
706 ref1 = btrfs_item_ptr(leaf, path->slots[0],
707 struct btrfs_extent_data_ref);
708 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
709 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
710 ref2 = btrfs_item_ptr(leaf, path->slots[0],
711 struct btrfs_shared_data_ref);
712 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
713 } else {
714 WARN_ON(1);
715 }
716 return num_refs;
717}
718
719static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
720 struct btrfs_path *path,
721 u64 bytenr, u64 parent,
722 u64 root_objectid)
723{
724 struct btrfs_root *root = trans->fs_info->extent_root;
725 struct btrfs_key key;
726 int ret;
727
728 key.objectid = bytenr;
729 if (parent) {
730 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
731 key.offset = parent;
732 } else {
733 key.type = BTRFS_TREE_BLOCK_REF_KEY;
734 key.offset = root_objectid;
735 }
736
737 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
738 if (ret > 0)
739 ret = -ENOENT;
740 return ret;
741}
742
743static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
744 struct btrfs_path *path,
745 u64 bytenr, u64 parent,
746 u64 root_objectid)
747{
748 struct btrfs_key key;
749 int ret;
750
751 key.objectid = bytenr;
752 if (parent) {
753 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
754 key.offset = parent;
755 } else {
756 key.type = BTRFS_TREE_BLOCK_REF_KEY;
757 key.offset = root_objectid;
758 }
759
760 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
761 path, &key, 0);
762 btrfs_release_path(path);
763 return ret;
764}
765
766static inline int extent_ref_type(u64 parent, u64 owner)
767{
768 int type;
769 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
770 if (parent > 0)
771 type = BTRFS_SHARED_BLOCK_REF_KEY;
772 else
773 type = BTRFS_TREE_BLOCK_REF_KEY;
774 } else {
775 if (parent > 0)
776 type = BTRFS_SHARED_DATA_REF_KEY;
777 else
778 type = BTRFS_EXTENT_DATA_REF_KEY;
779 }
780 return type;
781}
782
783static int find_next_key(struct btrfs_path *path, int level,
784 struct btrfs_key *key)
785
786{
787 for (; level < BTRFS_MAX_LEVEL; level++) {
788 if (!path->nodes[level])
789 break;
790 if (path->slots[level] + 1 >=
791 btrfs_header_nritems(path->nodes[level]))
792 continue;
793 if (level == 0)
794 btrfs_item_key_to_cpu(path->nodes[level], key,
795 path->slots[level] + 1);
796 else
797 btrfs_node_key_to_cpu(path->nodes[level], key,
798 path->slots[level] + 1);
799 return 0;
800 }
801 return 1;
802}
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817static noinline_for_stack
818int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
819 struct btrfs_path *path,
820 struct btrfs_extent_inline_ref **ref_ret,
821 u64 bytenr, u64 num_bytes,
822 u64 parent, u64 root_objectid,
823 u64 owner, u64 offset, int insert)
824{
825 struct btrfs_fs_info *fs_info = trans->fs_info;
826 struct btrfs_root *root = fs_info->extent_root;
827 struct btrfs_key key;
828 struct extent_buffer *leaf;
829 struct btrfs_extent_item *ei;
830 struct btrfs_extent_inline_ref *iref;
831 u64 flags;
832 u64 item_size;
833 unsigned long ptr;
834 unsigned long end;
835 int extra_size;
836 int type;
837 int want;
838 int ret;
839 int err = 0;
840 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
841 int needed;
842
843 key.objectid = bytenr;
844 key.type = BTRFS_EXTENT_ITEM_KEY;
845 key.offset = num_bytes;
846
847 want = extent_ref_type(parent, owner);
848 if (insert) {
849 extra_size = btrfs_extent_inline_ref_size(want);
850 path->keep_locks = 1;
851 } else
852 extra_size = -1;
853
854
855
856
857
858 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
859 key.type = BTRFS_METADATA_ITEM_KEY;
860 key.offset = owner;
861 }
862
863again:
864 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
865 if (ret < 0) {
866 err = ret;
867 goto out;
868 }
869
870
871
872
873
874 if (ret > 0 && skinny_metadata) {
875 skinny_metadata = false;
876 if (path->slots[0]) {
877 path->slots[0]--;
878 btrfs_item_key_to_cpu(path->nodes[0], &key,
879 path->slots[0]);
880 if (key.objectid == bytenr &&
881 key.type == BTRFS_EXTENT_ITEM_KEY &&
882 key.offset == num_bytes)
883 ret = 0;
884 }
885 if (ret) {
886 key.objectid = bytenr;
887 key.type = BTRFS_EXTENT_ITEM_KEY;
888 key.offset = num_bytes;
889 btrfs_release_path(path);
890 goto again;
891 }
892 }
893
894 if (ret && !insert) {
895 err = -ENOENT;
896 goto out;
897 } else if (WARN_ON(ret)) {
898 err = -EIO;
899 goto out;
900 }
901
902 leaf = path->nodes[0];
903 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
904 if (unlikely(item_size < sizeof(*ei))) {
905 err = -EINVAL;
906 btrfs_print_v0_err(fs_info);
907 btrfs_abort_transaction(trans, err);
908 goto out;
909 }
910
911 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
912 flags = btrfs_extent_flags(leaf, ei);
913
914 ptr = (unsigned long)(ei + 1);
915 end = (unsigned long)ei + item_size;
916
917 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
918 ptr += sizeof(struct btrfs_tree_block_info);
919 BUG_ON(ptr > end);
920 }
921
922 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
923 needed = BTRFS_REF_TYPE_DATA;
924 else
925 needed = BTRFS_REF_TYPE_BLOCK;
926
927 err = -ENOENT;
928 while (1) {
929 if (ptr >= end) {
930 WARN_ON(ptr > end);
931 break;
932 }
933 iref = (struct btrfs_extent_inline_ref *)ptr;
934 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
935 if (type == BTRFS_REF_TYPE_INVALID) {
936 err = -EUCLEAN;
937 goto out;
938 }
939
940 if (want < type)
941 break;
942 if (want > type) {
943 ptr += btrfs_extent_inline_ref_size(type);
944 continue;
945 }
946
947 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
948 struct btrfs_extent_data_ref *dref;
949 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
950 if (match_extent_data_ref(leaf, dref, root_objectid,
951 owner, offset)) {
952 err = 0;
953 break;
954 }
955 if (hash_extent_data_ref_item(leaf, dref) <
956 hash_extent_data_ref(root_objectid, owner, offset))
957 break;
958 } else {
959 u64 ref_offset;
960 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
961 if (parent > 0) {
962 if (parent == ref_offset) {
963 err = 0;
964 break;
965 }
966 if (ref_offset < parent)
967 break;
968 } else {
969 if (root_objectid == ref_offset) {
970 err = 0;
971 break;
972 }
973 if (ref_offset < root_objectid)
974 break;
975 }
976 }
977 ptr += btrfs_extent_inline_ref_size(type);
978 }
979 if (err == -ENOENT && insert) {
980 if (item_size + extra_size >=
981 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
982 err = -EAGAIN;
983 goto out;
984 }
985
986
987
988
989
990
991 if (find_next_key(path, 0, &key) == 0 &&
992 key.objectid == bytenr &&
993 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
994 err = -EAGAIN;
995 goto out;
996 }
997 }
998 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
999out:
1000 if (insert) {
1001 path->keep_locks = 0;
1002 btrfs_unlock_up_safe(path, 1);
1003 }
1004 return err;
1005}
1006
1007
1008
1009
1010static noinline_for_stack
1011void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1012 struct btrfs_path *path,
1013 struct btrfs_extent_inline_ref *iref,
1014 u64 parent, u64 root_objectid,
1015 u64 owner, u64 offset, int refs_to_add,
1016 struct btrfs_delayed_extent_op *extent_op)
1017{
1018 struct extent_buffer *leaf;
1019 struct btrfs_extent_item *ei;
1020 unsigned long ptr;
1021 unsigned long end;
1022 unsigned long item_offset;
1023 u64 refs;
1024 int size;
1025 int type;
1026
1027 leaf = path->nodes[0];
1028 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1029 item_offset = (unsigned long)iref - (unsigned long)ei;
1030
1031 type = extent_ref_type(parent, owner);
1032 size = btrfs_extent_inline_ref_size(type);
1033
1034 btrfs_extend_item(path, size);
1035
1036 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1037 refs = btrfs_extent_refs(leaf, ei);
1038 refs += refs_to_add;
1039 btrfs_set_extent_refs(leaf, ei, refs);
1040 if (extent_op)
1041 __run_delayed_extent_op(extent_op, leaf, ei);
1042
1043 ptr = (unsigned long)ei + item_offset;
1044 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1045 if (ptr < end - size)
1046 memmove_extent_buffer(leaf, ptr + size, ptr,
1047 end - size - ptr);
1048
1049 iref = (struct btrfs_extent_inline_ref *)ptr;
1050 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1051 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1052 struct btrfs_extent_data_ref *dref;
1053 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1054 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1055 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1056 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1057 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1058 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1059 struct btrfs_shared_data_ref *sref;
1060 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1061 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1062 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1063 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1064 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1065 } else {
1066 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1067 }
1068 btrfs_mark_buffer_dirty(leaf);
1069}
1070
1071static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1072 struct btrfs_path *path,
1073 struct btrfs_extent_inline_ref **ref_ret,
1074 u64 bytenr, u64 num_bytes, u64 parent,
1075 u64 root_objectid, u64 owner, u64 offset)
1076{
1077 int ret;
1078
1079 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1080 num_bytes, parent, root_objectid,
1081 owner, offset, 0);
1082 if (ret != -ENOENT)
1083 return ret;
1084
1085 btrfs_release_path(path);
1086 *ref_ret = NULL;
1087
1088 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1089 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1090 root_objectid);
1091 } else {
1092 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1093 root_objectid, owner, offset);
1094 }
1095 return ret;
1096}
1097
1098
1099
1100
1101static noinline_for_stack
1102void update_inline_extent_backref(struct btrfs_path *path,
1103 struct btrfs_extent_inline_ref *iref,
1104 int refs_to_mod,
1105 struct btrfs_delayed_extent_op *extent_op,
1106 int *last_ref)
1107{
1108 struct extent_buffer *leaf = path->nodes[0];
1109 struct btrfs_extent_item *ei;
1110 struct btrfs_extent_data_ref *dref = NULL;
1111 struct btrfs_shared_data_ref *sref = NULL;
1112 unsigned long ptr;
1113 unsigned long end;
1114 u32 item_size;
1115 int size;
1116 int type;
1117 u64 refs;
1118
1119 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1120 refs = btrfs_extent_refs(leaf, ei);
1121 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1122 refs += refs_to_mod;
1123 btrfs_set_extent_refs(leaf, ei, refs);
1124 if (extent_op)
1125 __run_delayed_extent_op(extent_op, leaf, ei);
1126
1127
1128
1129
1130
1131 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1132 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1133
1134 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1135 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1136 refs = btrfs_extent_data_ref_count(leaf, dref);
1137 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1138 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1139 refs = btrfs_shared_data_ref_count(leaf, sref);
1140 } else {
1141 refs = 1;
1142 BUG_ON(refs_to_mod != -1);
1143 }
1144
1145 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1146 refs += refs_to_mod;
1147
1148 if (refs > 0) {
1149 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1150 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1151 else
1152 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1153 } else {
1154 *last_ref = 1;
1155 size = btrfs_extent_inline_ref_size(type);
1156 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1157 ptr = (unsigned long)iref;
1158 end = (unsigned long)ei + item_size;
1159 if (ptr + size < end)
1160 memmove_extent_buffer(leaf, ptr, ptr + size,
1161 end - ptr - size);
1162 item_size -= size;
1163 btrfs_truncate_item(path, item_size, 1);
1164 }
1165 btrfs_mark_buffer_dirty(leaf);
1166}
1167
1168static noinline_for_stack
1169int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1170 struct btrfs_path *path,
1171 u64 bytenr, u64 num_bytes, u64 parent,
1172 u64 root_objectid, u64 owner,
1173 u64 offset, int refs_to_add,
1174 struct btrfs_delayed_extent_op *extent_op)
1175{
1176 struct btrfs_extent_inline_ref *iref;
1177 int ret;
1178
1179 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1180 num_bytes, parent, root_objectid,
1181 owner, offset, 1);
1182 if (ret == 0) {
1183 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1184 update_inline_extent_backref(path, iref, refs_to_add,
1185 extent_op, NULL);
1186 } else if (ret == -ENOENT) {
1187 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1188 root_objectid, owner, offset,
1189 refs_to_add, extent_op);
1190 ret = 0;
1191 }
1192 return ret;
1193}
1194
1195static int insert_extent_backref(struct btrfs_trans_handle *trans,
1196 struct btrfs_path *path,
1197 u64 bytenr, u64 parent, u64 root_objectid,
1198 u64 owner, u64 offset, int refs_to_add)
1199{
1200 int ret;
1201 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1202 BUG_ON(refs_to_add != 1);
1203 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1204 root_objectid);
1205 } else {
1206 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1207 root_objectid, owner, offset,
1208 refs_to_add);
1209 }
1210 return ret;
1211}
1212
1213static int remove_extent_backref(struct btrfs_trans_handle *trans,
1214 struct btrfs_path *path,
1215 struct btrfs_extent_inline_ref *iref,
1216 int refs_to_drop, int is_data, int *last_ref)
1217{
1218 int ret = 0;
1219
1220 BUG_ON(!is_data && refs_to_drop != 1);
1221 if (iref) {
1222 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1223 last_ref);
1224 } else if (is_data) {
1225 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1226 last_ref);
1227 } else {
1228 *last_ref = 1;
1229 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1230 }
1231 return ret;
1232}
1233
1234static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1235 u64 *discarded_bytes)
1236{
1237 int j, ret = 0;
1238 u64 bytes_left, end;
1239 u64 aligned_start = ALIGN(start, 1 << 9);
1240
1241 if (WARN_ON(start != aligned_start)) {
1242 len -= aligned_start - start;
1243 len = round_down(len, 1 << 9);
1244 start = aligned_start;
1245 }
1246
1247 *discarded_bytes = 0;
1248
1249 if (!len)
1250 return 0;
1251
1252 end = start + len;
1253 bytes_left = len;
1254
1255
1256 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1257 u64 sb_start = btrfs_sb_offset(j);
1258 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1259 u64 size = sb_start - start;
1260
1261 if (!in_range(sb_start, start, bytes_left) &&
1262 !in_range(sb_end, start, bytes_left) &&
1263 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1264 continue;
1265
1266
1267
1268
1269
1270 if (sb_start <= start) {
1271 start += sb_end - start;
1272 if (start > end) {
1273 bytes_left = 0;
1274 break;
1275 }
1276 bytes_left = end - start;
1277 continue;
1278 }
1279
1280 if (size) {
1281 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1282 GFP_NOFS, 0);
1283 if (!ret)
1284 *discarded_bytes += size;
1285 else if (ret != -EOPNOTSUPP)
1286 return ret;
1287 }
1288
1289 start = sb_end;
1290 if (start > end) {
1291 bytes_left = 0;
1292 break;
1293 }
1294 bytes_left = end - start;
1295 }
1296
1297 if (bytes_left) {
1298 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1299 GFP_NOFS, 0);
1300 if (!ret)
1301 *discarded_bytes += bytes_left;
1302 }
1303 return ret;
1304}
1305
1306int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1307 u64 num_bytes, u64 *actual_bytes)
1308{
1309 int ret;
1310 u64 discarded_bytes = 0;
1311 struct btrfs_bio *bbio = NULL;
1312
1313
1314
1315
1316
1317
1318 btrfs_bio_counter_inc_blocked(fs_info);
1319
1320 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
1321 &bbio, 0);
1322
1323 if (!ret) {
1324 struct btrfs_bio_stripe *stripe = bbio->stripes;
1325 int i;
1326
1327
1328 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1329 u64 bytes;
1330 struct request_queue *req_q;
1331
1332 if (!stripe->dev->bdev) {
1333 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1334 continue;
1335 }
1336 req_q = bdev_get_queue(stripe->dev->bdev);
1337 if (!blk_queue_discard(req_q))
1338 continue;
1339
1340 ret = btrfs_issue_discard(stripe->dev->bdev,
1341 stripe->physical,
1342 stripe->length,
1343 &bytes);
1344 if (!ret)
1345 discarded_bytes += bytes;
1346 else if (ret != -EOPNOTSUPP)
1347 break;
1348
1349
1350
1351
1352
1353
1354 ret = 0;
1355 }
1356 btrfs_put_bbio(bbio);
1357 }
1358 btrfs_bio_counter_dec(fs_info);
1359
1360 if (actual_bytes)
1361 *actual_bytes = discarded_bytes;
1362
1363
1364 if (ret == -EOPNOTSUPP)
1365 ret = 0;
1366 return ret;
1367}
1368
1369
1370int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1371 struct btrfs_ref *generic_ref)
1372{
1373 struct btrfs_fs_info *fs_info = trans->fs_info;
1374 int old_ref_mod, new_ref_mod;
1375 int ret;
1376
1377 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1378 generic_ref->action);
1379 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1380 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
1381
1382 if (generic_ref->type == BTRFS_REF_METADATA)
1383 ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
1384 NULL, &old_ref_mod, &new_ref_mod);
1385 else
1386 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
1387 &old_ref_mod, &new_ref_mod);
1388
1389 btrfs_ref_tree_mod(fs_info, generic_ref);
1390
1391 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
1392 sub_pinned_bytes(fs_info, generic_ref);
1393
1394 return ret;
1395}
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1432 struct btrfs_delayed_ref_node *node,
1433 u64 parent, u64 root_objectid,
1434 u64 owner, u64 offset, int refs_to_add,
1435 struct btrfs_delayed_extent_op *extent_op)
1436{
1437 struct btrfs_path *path;
1438 struct extent_buffer *leaf;
1439 struct btrfs_extent_item *item;
1440 struct btrfs_key key;
1441 u64 bytenr = node->bytenr;
1442 u64 num_bytes = node->num_bytes;
1443 u64 refs;
1444 int ret;
1445
1446 path = btrfs_alloc_path();
1447 if (!path)
1448 return -ENOMEM;
1449
1450 path->reada = READA_FORWARD;
1451 path->leave_spinning = 1;
1452
1453 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1454 parent, root_objectid, owner,
1455 offset, refs_to_add, extent_op);
1456 if ((ret < 0 && ret != -EAGAIN) || !ret)
1457 goto out;
1458
1459
1460
1461
1462
1463
1464 leaf = path->nodes[0];
1465 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1466 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1467 refs = btrfs_extent_refs(leaf, item);
1468 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1469 if (extent_op)
1470 __run_delayed_extent_op(extent_op, leaf, item);
1471
1472 btrfs_mark_buffer_dirty(leaf);
1473 btrfs_release_path(path);
1474
1475 path->reada = READA_FORWARD;
1476 path->leave_spinning = 1;
1477
1478 ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
1479 owner, offset, refs_to_add);
1480 if (ret)
1481 btrfs_abort_transaction(trans, ret);
1482out:
1483 btrfs_free_path(path);
1484 return ret;
1485}
1486
1487static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1488 struct btrfs_delayed_ref_node *node,
1489 struct btrfs_delayed_extent_op *extent_op,
1490 int insert_reserved)
1491{
1492 int ret = 0;
1493 struct btrfs_delayed_data_ref *ref;
1494 struct btrfs_key ins;
1495 u64 parent = 0;
1496 u64 ref_root = 0;
1497 u64 flags = 0;
1498
1499 ins.objectid = node->bytenr;
1500 ins.offset = node->num_bytes;
1501 ins.type = BTRFS_EXTENT_ITEM_KEY;
1502
1503 ref = btrfs_delayed_node_to_data_ref(node);
1504 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1505
1506 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1507 parent = ref->parent;
1508 ref_root = ref->root;
1509
1510 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1511 if (extent_op)
1512 flags |= extent_op->flags_to_set;
1513 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1514 flags, ref->objectid,
1515 ref->offset, &ins,
1516 node->ref_mod);
1517 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1518 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1519 ref->objectid, ref->offset,
1520 node->ref_mod, extent_op);
1521 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1522 ret = __btrfs_free_extent(trans, node, parent,
1523 ref_root, ref->objectid,
1524 ref->offset, node->ref_mod,
1525 extent_op);
1526 } else {
1527 BUG();
1528 }
1529 return ret;
1530}
1531
1532static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1533 struct extent_buffer *leaf,
1534 struct btrfs_extent_item *ei)
1535{
1536 u64 flags = btrfs_extent_flags(leaf, ei);
1537 if (extent_op->update_flags) {
1538 flags |= extent_op->flags_to_set;
1539 btrfs_set_extent_flags(leaf, ei, flags);
1540 }
1541
1542 if (extent_op->update_key) {
1543 struct btrfs_tree_block_info *bi;
1544 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1545 bi = (struct btrfs_tree_block_info *)(ei + 1);
1546 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1547 }
1548}
1549
1550static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1551 struct btrfs_delayed_ref_head *head,
1552 struct btrfs_delayed_extent_op *extent_op)
1553{
1554 struct btrfs_fs_info *fs_info = trans->fs_info;
1555 struct btrfs_key key;
1556 struct btrfs_path *path;
1557 struct btrfs_extent_item *ei;
1558 struct extent_buffer *leaf;
1559 u32 item_size;
1560 int ret;
1561 int err = 0;
1562 int metadata = !extent_op->is_data;
1563
1564 if (trans->aborted)
1565 return 0;
1566
1567 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1568 metadata = 0;
1569
1570 path = btrfs_alloc_path();
1571 if (!path)
1572 return -ENOMEM;
1573
1574 key.objectid = head->bytenr;
1575
1576 if (metadata) {
1577 key.type = BTRFS_METADATA_ITEM_KEY;
1578 key.offset = extent_op->level;
1579 } else {
1580 key.type = BTRFS_EXTENT_ITEM_KEY;
1581 key.offset = head->num_bytes;
1582 }
1583
1584again:
1585 path->reada = READA_FORWARD;
1586 path->leave_spinning = 1;
1587 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
1588 if (ret < 0) {
1589 err = ret;
1590 goto out;
1591 }
1592 if (ret > 0) {
1593 if (metadata) {
1594 if (path->slots[0] > 0) {
1595 path->slots[0]--;
1596 btrfs_item_key_to_cpu(path->nodes[0], &key,
1597 path->slots[0]);
1598 if (key.objectid == head->bytenr &&
1599 key.type == BTRFS_EXTENT_ITEM_KEY &&
1600 key.offset == head->num_bytes)
1601 ret = 0;
1602 }
1603 if (ret > 0) {
1604 btrfs_release_path(path);
1605 metadata = 0;
1606
1607 key.objectid = head->bytenr;
1608 key.offset = head->num_bytes;
1609 key.type = BTRFS_EXTENT_ITEM_KEY;
1610 goto again;
1611 }
1612 } else {
1613 err = -EIO;
1614 goto out;
1615 }
1616 }
1617
1618 leaf = path->nodes[0];
1619 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1620
1621 if (unlikely(item_size < sizeof(*ei))) {
1622 err = -EINVAL;
1623 btrfs_print_v0_err(fs_info);
1624 btrfs_abort_transaction(trans, err);
1625 goto out;
1626 }
1627
1628 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1629 __run_delayed_extent_op(extent_op, leaf, ei);
1630
1631 btrfs_mark_buffer_dirty(leaf);
1632out:
1633 btrfs_free_path(path);
1634 return err;
1635}
1636
1637static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1638 struct btrfs_delayed_ref_node *node,
1639 struct btrfs_delayed_extent_op *extent_op,
1640 int insert_reserved)
1641{
1642 int ret = 0;
1643 struct btrfs_delayed_tree_ref *ref;
1644 u64 parent = 0;
1645 u64 ref_root = 0;
1646
1647 ref = btrfs_delayed_node_to_tree_ref(node);
1648 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1649
1650 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1651 parent = ref->parent;
1652 ref_root = ref->root;
1653
1654 if (node->ref_mod != 1) {
1655 btrfs_err(trans->fs_info,
1656 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1657 node->bytenr, node->ref_mod, node->action, ref_root,
1658 parent);
1659 return -EIO;
1660 }
1661 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1662 BUG_ON(!extent_op || !extent_op->update_flags);
1663 ret = alloc_reserved_tree_block(trans, node, extent_op);
1664 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1665 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1666 ref->level, 0, 1, extent_op);
1667 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1668 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1669 ref->level, 0, 1, extent_op);
1670 } else {
1671 BUG();
1672 }
1673 return ret;
1674}
1675
1676
1677static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1678 struct btrfs_delayed_ref_node *node,
1679 struct btrfs_delayed_extent_op *extent_op,
1680 int insert_reserved)
1681{
1682 int ret = 0;
1683
1684 if (trans->aborted) {
1685 if (insert_reserved)
1686 btrfs_pin_extent(trans->fs_info, node->bytenr,
1687 node->num_bytes, 1);
1688 return 0;
1689 }
1690
1691 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1692 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1693 ret = run_delayed_tree_ref(trans, node, extent_op,
1694 insert_reserved);
1695 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1696 node->type == BTRFS_SHARED_DATA_REF_KEY)
1697 ret = run_delayed_data_ref(trans, node, extent_op,
1698 insert_reserved);
1699 else
1700 BUG();
1701 if (ret && insert_reserved)
1702 btrfs_pin_extent(trans->fs_info, node->bytenr,
1703 node->num_bytes, 1);
1704 return ret;
1705}
1706
1707static inline struct btrfs_delayed_ref_node *
1708select_delayed_ref(struct btrfs_delayed_ref_head *head)
1709{
1710 struct btrfs_delayed_ref_node *ref;
1711
1712 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1713 return NULL;
1714
1715
1716
1717
1718
1719
1720
1721 if (!list_empty(&head->ref_add_list))
1722 return list_first_entry(&head->ref_add_list,
1723 struct btrfs_delayed_ref_node, add_list);
1724
1725 ref = rb_entry(rb_first_cached(&head->ref_tree),
1726 struct btrfs_delayed_ref_node, ref_node);
1727 ASSERT(list_empty(&ref->add_list));
1728 return ref;
1729}
1730
1731static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1732 struct btrfs_delayed_ref_head *head)
1733{
1734 spin_lock(&delayed_refs->lock);
1735 head->processing = 0;
1736 delayed_refs->num_heads_ready++;
1737 spin_unlock(&delayed_refs->lock);
1738 btrfs_delayed_ref_unlock(head);
1739}
1740
1741static struct btrfs_delayed_extent_op *cleanup_extent_op(
1742 struct btrfs_delayed_ref_head *head)
1743{
1744 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1745
1746 if (!extent_op)
1747 return NULL;
1748
1749 if (head->must_insert_reserved) {
1750 head->extent_op = NULL;
1751 btrfs_free_delayed_extent_op(extent_op);
1752 return NULL;
1753 }
1754 return extent_op;
1755}
1756
1757static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1758 struct btrfs_delayed_ref_head *head)
1759{
1760 struct btrfs_delayed_extent_op *extent_op;
1761 int ret;
1762
1763 extent_op = cleanup_extent_op(head);
1764 if (!extent_op)
1765 return 0;
1766 head->extent_op = NULL;
1767 spin_unlock(&head->lock);
1768 ret = run_delayed_extent_op(trans, head, extent_op);
1769 btrfs_free_delayed_extent_op(extent_op);
1770 return ret ? ret : 1;
1771}
1772
1773void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1774 struct btrfs_delayed_ref_root *delayed_refs,
1775 struct btrfs_delayed_ref_head *head)
1776{
1777 int nr_items = 1;
1778
1779 if (head->total_ref_mod < 0) {
1780 struct btrfs_space_info *space_info;
1781 u64 flags;
1782
1783 if (head->is_data)
1784 flags = BTRFS_BLOCK_GROUP_DATA;
1785 else if (head->is_system)
1786 flags = BTRFS_BLOCK_GROUP_SYSTEM;
1787 else
1788 flags = BTRFS_BLOCK_GROUP_METADATA;
1789 space_info = btrfs_find_space_info(fs_info, flags);
1790 ASSERT(space_info);
1791 percpu_counter_add_batch(&space_info->total_bytes_pinned,
1792 -head->num_bytes,
1793 BTRFS_TOTAL_BYTES_PINNED_BATCH);
1794
1795
1796
1797
1798
1799
1800 if (head->is_data) {
1801 spin_lock(&delayed_refs->lock);
1802 delayed_refs->pending_csums -= head->num_bytes;
1803 spin_unlock(&delayed_refs->lock);
1804 nr_items += btrfs_csum_bytes_to_leaves(fs_info,
1805 head->num_bytes);
1806 }
1807 }
1808
1809 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1810}
1811
1812static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1813 struct btrfs_delayed_ref_head *head)
1814{
1815
1816 struct btrfs_fs_info *fs_info = trans->fs_info;
1817 struct btrfs_delayed_ref_root *delayed_refs;
1818 int ret;
1819
1820 delayed_refs = &trans->transaction->delayed_refs;
1821
1822 ret = run_and_cleanup_extent_op(trans, head);
1823 if (ret < 0) {
1824 unselect_delayed_ref_head(delayed_refs, head);
1825 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1826 return ret;
1827 } else if (ret) {
1828 return ret;
1829 }
1830
1831
1832
1833
1834
1835 spin_unlock(&head->lock);
1836 spin_lock(&delayed_refs->lock);
1837 spin_lock(&head->lock);
1838 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1839 spin_unlock(&head->lock);
1840 spin_unlock(&delayed_refs->lock);
1841 return 1;
1842 }
1843 btrfs_delete_ref_head(delayed_refs, head);
1844 spin_unlock(&head->lock);
1845 spin_unlock(&delayed_refs->lock);
1846
1847 if (head->must_insert_reserved) {
1848 btrfs_pin_extent(fs_info, head->bytenr,
1849 head->num_bytes, 1);
1850 if (head->is_data) {
1851 ret = btrfs_del_csums(trans, fs_info, head->bytenr,
1852 head->num_bytes);
1853 }
1854 }
1855
1856 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1857
1858 trace_run_delayed_ref_head(fs_info, head, 0);
1859 btrfs_delayed_ref_unlock(head);
1860 btrfs_put_delayed_ref_head(head);
1861 return 0;
1862}
1863
1864static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1865 struct btrfs_trans_handle *trans)
1866{
1867 struct btrfs_delayed_ref_root *delayed_refs =
1868 &trans->transaction->delayed_refs;
1869 struct btrfs_delayed_ref_head *head = NULL;
1870 int ret;
1871
1872 spin_lock(&delayed_refs->lock);
1873 head = btrfs_select_ref_head(delayed_refs);
1874 if (!head) {
1875 spin_unlock(&delayed_refs->lock);
1876 return head;
1877 }
1878
1879
1880
1881
1882
1883 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1884 spin_unlock(&delayed_refs->lock);
1885
1886
1887
1888
1889
1890
1891 if (ret == -EAGAIN)
1892 head = ERR_PTR(-EAGAIN);
1893
1894 return head;
1895}
1896
1897static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1898 struct btrfs_delayed_ref_head *locked_ref,
1899 unsigned long *run_refs)
1900{
1901 struct btrfs_fs_info *fs_info = trans->fs_info;
1902 struct btrfs_delayed_ref_root *delayed_refs;
1903 struct btrfs_delayed_extent_op *extent_op;
1904 struct btrfs_delayed_ref_node *ref;
1905 int must_insert_reserved = 0;
1906 int ret;
1907
1908 delayed_refs = &trans->transaction->delayed_refs;
1909
1910 lockdep_assert_held(&locked_ref->mutex);
1911 lockdep_assert_held(&locked_ref->lock);
1912
1913 while ((ref = select_delayed_ref(locked_ref))) {
1914 if (ref->seq &&
1915 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1916 spin_unlock(&locked_ref->lock);
1917 unselect_delayed_ref_head(delayed_refs, locked_ref);
1918 return -EAGAIN;
1919 }
1920
1921 (*run_refs)++;
1922 ref->in_tree = 0;
1923 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1924 RB_CLEAR_NODE(&ref->ref_node);
1925 if (!list_empty(&ref->add_list))
1926 list_del(&ref->add_list);
1927
1928
1929
1930
1931 switch (ref->action) {
1932 case BTRFS_ADD_DELAYED_REF:
1933 case BTRFS_ADD_DELAYED_EXTENT:
1934 locked_ref->ref_mod -= ref->ref_mod;
1935 break;
1936 case BTRFS_DROP_DELAYED_REF:
1937 locked_ref->ref_mod += ref->ref_mod;
1938 break;
1939 default:
1940 WARN_ON(1);
1941 }
1942 atomic_dec(&delayed_refs->num_entries);
1943
1944
1945
1946
1947
1948 must_insert_reserved = locked_ref->must_insert_reserved;
1949 locked_ref->must_insert_reserved = 0;
1950
1951 extent_op = locked_ref->extent_op;
1952 locked_ref->extent_op = NULL;
1953 spin_unlock(&locked_ref->lock);
1954
1955 ret = run_one_delayed_ref(trans, ref, extent_op,
1956 must_insert_reserved);
1957
1958 btrfs_free_delayed_extent_op(extent_op);
1959 if (ret) {
1960 unselect_delayed_ref_head(delayed_refs, locked_ref);
1961 btrfs_put_delayed_ref(ref);
1962 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1963 ret);
1964 return ret;
1965 }
1966
1967 btrfs_put_delayed_ref(ref);
1968 cond_resched();
1969
1970 spin_lock(&locked_ref->lock);
1971 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1972 }
1973
1974 return 0;
1975}
1976
1977
1978
1979
1980
1981static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1982 unsigned long nr)
1983{
1984 struct btrfs_fs_info *fs_info = trans->fs_info;
1985 struct btrfs_delayed_ref_root *delayed_refs;
1986 struct btrfs_delayed_ref_head *locked_ref = NULL;
1987 ktime_t start = ktime_get();
1988 int ret;
1989 unsigned long count = 0;
1990 unsigned long actual_count = 0;
1991
1992 delayed_refs = &trans->transaction->delayed_refs;
1993 do {
1994 if (!locked_ref) {
1995 locked_ref = btrfs_obtain_ref_head(trans);
1996 if (IS_ERR_OR_NULL(locked_ref)) {
1997 if (PTR_ERR(locked_ref) == -EAGAIN) {
1998 continue;
1999 } else {
2000 break;
2001 }
2002 }
2003 count++;
2004 }
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017 spin_lock(&locked_ref->lock);
2018 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2019
2020 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2021 &actual_count);
2022 if (ret < 0 && ret != -EAGAIN) {
2023
2024
2025
2026
2027 return ret;
2028 } else if (!ret) {
2029
2030
2031
2032
2033 ret = cleanup_ref_head(trans, locked_ref);
2034 if (ret > 0 ) {
2035
2036 ret = 0;
2037 continue;
2038 } else if (ret) {
2039 return ret;
2040 }
2041 }
2042
2043
2044
2045
2046
2047
2048 locked_ref = NULL;
2049 cond_resched();
2050 } while ((nr != -1 && count < nr) || locked_ref);
2051
2052
2053
2054
2055
2056
2057 if (actual_count > 0) {
2058 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2059 u64 avg;
2060
2061
2062
2063
2064
2065 spin_lock(&delayed_refs->lock);
2066 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2067 fs_info->avg_delayed_ref_runtime = avg >> 2;
2068 spin_unlock(&delayed_refs->lock);
2069 }
2070 return 0;
2071}
2072
2073#ifdef SCRAMBLE_DELAYED_REFS
2074
2075
2076
2077
2078
2079static u64 find_middle(struct rb_root *root)
2080{
2081 struct rb_node *n = root->rb_node;
2082 struct btrfs_delayed_ref_node *entry;
2083 int alt = 1;
2084 u64 middle;
2085 u64 first = 0, last = 0;
2086
2087 n = rb_first(root);
2088 if (n) {
2089 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2090 first = entry->bytenr;
2091 }
2092 n = rb_last(root);
2093 if (n) {
2094 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2095 last = entry->bytenr;
2096 }
2097 n = root->rb_node;
2098
2099 while (n) {
2100 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2101 WARN_ON(!entry->in_tree);
2102
2103 middle = entry->bytenr;
2104
2105 if (alt)
2106 n = n->rb_left;
2107 else
2108 n = n->rb_right;
2109
2110 alt = 1 - alt;
2111 }
2112 return middle;
2113}
2114#endif
2115
2116static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2117{
2118 u64 num_bytes;
2119
2120 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2121 sizeof(struct btrfs_extent_inline_ref));
2122 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2123 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2124
2125
2126
2127
2128
2129 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2130}
2131
2132
2133
2134
2135
2136u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2137{
2138 u64 csum_size;
2139 u64 num_csums_per_leaf;
2140 u64 num_csums;
2141
2142 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2143 num_csums_per_leaf = div64_u64(csum_size,
2144 (u64)btrfs_super_csum_size(fs_info->super_copy));
2145 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2146 num_csums += num_csums_per_leaf - 1;
2147 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2148 return num_csums;
2149}
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2162 unsigned long count)
2163{
2164 struct btrfs_fs_info *fs_info = trans->fs_info;
2165 struct rb_node *node;
2166 struct btrfs_delayed_ref_root *delayed_refs;
2167 struct btrfs_delayed_ref_head *head;
2168 int ret;
2169 int run_all = count == (unsigned long)-1;
2170
2171
2172 if (trans->aborted)
2173 return 0;
2174
2175 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2176 return 0;
2177
2178 delayed_refs = &trans->transaction->delayed_refs;
2179 if (count == 0)
2180 count = atomic_read(&delayed_refs->num_entries) * 2;
2181
2182again:
2183#ifdef SCRAMBLE_DELAYED_REFS
2184 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2185#endif
2186 ret = __btrfs_run_delayed_refs(trans, count);
2187 if (ret < 0) {
2188 btrfs_abort_transaction(trans, ret);
2189 return ret;
2190 }
2191
2192 if (run_all) {
2193 btrfs_create_pending_block_groups(trans);
2194
2195 spin_lock(&delayed_refs->lock);
2196 node = rb_first_cached(&delayed_refs->href_root);
2197 if (!node) {
2198 spin_unlock(&delayed_refs->lock);
2199 goto out;
2200 }
2201 head = rb_entry(node, struct btrfs_delayed_ref_head,
2202 href_node);
2203 refcount_inc(&head->refs);
2204 spin_unlock(&delayed_refs->lock);
2205
2206
2207 mutex_lock(&head->mutex);
2208 mutex_unlock(&head->mutex);
2209
2210 btrfs_put_delayed_ref_head(head);
2211 cond_resched();
2212 goto again;
2213 }
2214out:
2215 return 0;
2216}
2217
2218int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2219 u64 bytenr, u64 num_bytes, u64 flags,
2220 int level, int is_data)
2221{
2222 struct btrfs_delayed_extent_op *extent_op;
2223 int ret;
2224
2225 extent_op = btrfs_alloc_delayed_extent_op();
2226 if (!extent_op)
2227 return -ENOMEM;
2228
2229 extent_op->flags_to_set = flags;
2230 extent_op->update_flags = true;
2231 extent_op->update_key = false;
2232 extent_op->is_data = is_data ? true : false;
2233 extent_op->level = level;
2234
2235 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
2236 if (ret)
2237 btrfs_free_delayed_extent_op(extent_op);
2238 return ret;
2239}
2240
2241static noinline int check_delayed_ref(struct btrfs_root *root,
2242 struct btrfs_path *path,
2243 u64 objectid, u64 offset, u64 bytenr)
2244{
2245 struct btrfs_delayed_ref_head *head;
2246 struct btrfs_delayed_ref_node *ref;
2247 struct btrfs_delayed_data_ref *data_ref;
2248 struct btrfs_delayed_ref_root *delayed_refs;
2249 struct btrfs_transaction *cur_trans;
2250 struct rb_node *node;
2251 int ret = 0;
2252
2253 spin_lock(&root->fs_info->trans_lock);
2254 cur_trans = root->fs_info->running_transaction;
2255 if (cur_trans)
2256 refcount_inc(&cur_trans->use_count);
2257 spin_unlock(&root->fs_info->trans_lock);
2258 if (!cur_trans)
2259 return 0;
2260
2261 delayed_refs = &cur_trans->delayed_refs;
2262 spin_lock(&delayed_refs->lock);
2263 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2264 if (!head) {
2265 spin_unlock(&delayed_refs->lock);
2266 btrfs_put_transaction(cur_trans);
2267 return 0;
2268 }
2269
2270 if (!mutex_trylock(&head->mutex)) {
2271 refcount_inc(&head->refs);
2272 spin_unlock(&delayed_refs->lock);
2273
2274 btrfs_release_path(path);
2275
2276
2277
2278
2279
2280 mutex_lock(&head->mutex);
2281 mutex_unlock(&head->mutex);
2282 btrfs_put_delayed_ref_head(head);
2283 btrfs_put_transaction(cur_trans);
2284 return -EAGAIN;
2285 }
2286 spin_unlock(&delayed_refs->lock);
2287
2288 spin_lock(&head->lock);
2289
2290
2291
2292
2293 for (node = rb_first_cached(&head->ref_tree); node;
2294 node = rb_next(node)) {
2295 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2296
2297 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2298 ret = 1;
2299 break;
2300 }
2301
2302 data_ref = btrfs_delayed_node_to_data_ref(ref);
2303
2304
2305
2306
2307
2308 if (data_ref->root != root->root_key.objectid ||
2309 data_ref->objectid != objectid ||
2310 data_ref->offset != offset) {
2311 ret = 1;
2312 break;
2313 }
2314 }
2315 spin_unlock(&head->lock);
2316 mutex_unlock(&head->mutex);
2317 btrfs_put_transaction(cur_trans);
2318 return ret;
2319}
2320
2321static noinline int check_committed_ref(struct btrfs_root *root,
2322 struct btrfs_path *path,
2323 u64 objectid, u64 offset, u64 bytenr)
2324{
2325 struct btrfs_fs_info *fs_info = root->fs_info;
2326 struct btrfs_root *extent_root = fs_info->extent_root;
2327 struct extent_buffer *leaf;
2328 struct btrfs_extent_data_ref *ref;
2329 struct btrfs_extent_inline_ref *iref;
2330 struct btrfs_extent_item *ei;
2331 struct btrfs_key key;
2332 u32 item_size;
2333 int type;
2334 int ret;
2335
2336 key.objectid = bytenr;
2337 key.offset = (u64)-1;
2338 key.type = BTRFS_EXTENT_ITEM_KEY;
2339
2340 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2341 if (ret < 0)
2342 goto out;
2343 BUG_ON(ret == 0);
2344
2345 ret = -ENOENT;
2346 if (path->slots[0] == 0)
2347 goto out;
2348
2349 path->slots[0]--;
2350 leaf = path->nodes[0];
2351 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2352
2353 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2354 goto out;
2355
2356 ret = 1;
2357 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2358 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2359
2360
2361 if (item_size != sizeof(*ei) +
2362 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2363 goto out;
2364
2365
2366 if (btrfs_extent_generation(leaf, ei) <=
2367 btrfs_root_last_snapshot(&root->root_item))
2368 goto out;
2369
2370 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2371
2372
2373 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2374 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2375 goto out;
2376
2377 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2378 if (btrfs_extent_refs(leaf, ei) !=
2379 btrfs_extent_data_ref_count(leaf, ref) ||
2380 btrfs_extent_data_ref_root(leaf, ref) !=
2381 root->root_key.objectid ||
2382 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2383 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2384 goto out;
2385
2386 ret = 0;
2387out:
2388 return ret;
2389}
2390
2391int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2392 u64 bytenr)
2393{
2394 struct btrfs_path *path;
2395 int ret;
2396
2397 path = btrfs_alloc_path();
2398 if (!path)
2399 return -ENOMEM;
2400
2401 do {
2402 ret = check_committed_ref(root, path, objectid,
2403 offset, bytenr);
2404 if (ret && ret != -ENOENT)
2405 goto out;
2406
2407 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2408 } while (ret == -EAGAIN);
2409
2410out:
2411 btrfs_free_path(path);
2412 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2413 WARN_ON(ret > 0);
2414 return ret;
2415}
2416
2417static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2418 struct btrfs_root *root,
2419 struct extent_buffer *buf,
2420 int full_backref, int inc)
2421{
2422 struct btrfs_fs_info *fs_info = root->fs_info;
2423 u64 bytenr;
2424 u64 num_bytes;
2425 u64 parent;
2426 u64 ref_root;
2427 u32 nritems;
2428 struct btrfs_key key;
2429 struct btrfs_file_extent_item *fi;
2430 struct btrfs_ref generic_ref = { 0 };
2431 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2432 int i;
2433 int action;
2434 int level;
2435 int ret = 0;
2436
2437 if (btrfs_is_testing(fs_info))
2438 return 0;
2439
2440 ref_root = btrfs_header_owner(buf);
2441 nritems = btrfs_header_nritems(buf);
2442 level = btrfs_header_level(buf);
2443
2444 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
2445 return 0;
2446
2447 if (full_backref)
2448 parent = buf->start;
2449 else
2450 parent = 0;
2451 if (inc)
2452 action = BTRFS_ADD_DELAYED_REF;
2453 else
2454 action = BTRFS_DROP_DELAYED_REF;
2455
2456 for (i = 0; i < nritems; i++) {
2457 if (level == 0) {
2458 btrfs_item_key_to_cpu(buf, &key, i);
2459 if (key.type != BTRFS_EXTENT_DATA_KEY)
2460 continue;
2461 fi = btrfs_item_ptr(buf, i,
2462 struct btrfs_file_extent_item);
2463 if (btrfs_file_extent_type(buf, fi) ==
2464 BTRFS_FILE_EXTENT_INLINE)
2465 continue;
2466 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2467 if (bytenr == 0)
2468 continue;
2469
2470 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2471 key.offset -= btrfs_file_extent_offset(buf, fi);
2472 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2473 num_bytes, parent);
2474 generic_ref.real_root = root->root_key.objectid;
2475 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2476 key.offset);
2477 generic_ref.skip_qgroup = for_reloc;
2478 if (inc)
2479 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2480 else
2481 ret = btrfs_free_extent(trans, &generic_ref);
2482 if (ret)
2483 goto fail;
2484 } else {
2485 bytenr = btrfs_node_blockptr(buf, i);
2486 num_bytes = fs_info->nodesize;
2487 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2488 num_bytes, parent);
2489 generic_ref.real_root = root->root_key.objectid;
2490 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
2491 generic_ref.skip_qgroup = for_reloc;
2492 if (inc)
2493 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2494 else
2495 ret = btrfs_free_extent(trans, &generic_ref);
2496 if (ret)
2497 goto fail;
2498 }
2499 }
2500 return 0;
2501fail:
2502 return ret;
2503}
2504
2505int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2506 struct extent_buffer *buf, int full_backref)
2507{
2508 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2509}
2510
2511int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2512 struct extent_buffer *buf, int full_backref)
2513{
2514 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2515}
2516
2517int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
2518{
2519 struct btrfs_block_group_cache *block_group;
2520 int readonly = 0;
2521
2522 block_group = btrfs_lookup_block_group(fs_info, bytenr);
2523 if (!block_group || block_group->ro)
2524 readonly = 1;
2525 if (block_group)
2526 btrfs_put_block_group(block_group);
2527 return readonly;
2528}
2529
2530static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2531{
2532 struct btrfs_fs_info *fs_info = root->fs_info;
2533 u64 flags;
2534 u64 ret;
2535
2536 if (data)
2537 flags = BTRFS_BLOCK_GROUP_DATA;
2538 else if (root == fs_info->chunk_root)
2539 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2540 else
2541 flags = BTRFS_BLOCK_GROUP_METADATA;
2542
2543 ret = btrfs_get_alloc_profile(fs_info, flags);
2544 return ret;
2545}
2546
2547static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
2548{
2549 struct btrfs_block_group_cache *cache;
2550 u64 bytenr;
2551
2552 spin_lock(&fs_info->block_group_cache_lock);
2553 bytenr = fs_info->first_logical_byte;
2554 spin_unlock(&fs_info->block_group_cache_lock);
2555
2556 if (bytenr < (u64)-1)
2557 return bytenr;
2558
2559 cache = btrfs_lookup_first_block_group(fs_info, search_start);
2560 if (!cache)
2561 return 0;
2562
2563 bytenr = cache->key.objectid;
2564 btrfs_put_block_group(cache);
2565
2566 return bytenr;
2567}
2568
2569static int pin_down_extent(struct btrfs_block_group_cache *cache,
2570 u64 bytenr, u64 num_bytes, int reserved)
2571{
2572 struct btrfs_fs_info *fs_info = cache->fs_info;
2573
2574 spin_lock(&cache->space_info->lock);
2575 spin_lock(&cache->lock);
2576 cache->pinned += num_bytes;
2577 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2578 num_bytes);
2579 if (reserved) {
2580 cache->reserved -= num_bytes;
2581 cache->space_info->bytes_reserved -= num_bytes;
2582 }
2583 spin_unlock(&cache->lock);
2584 spin_unlock(&cache->space_info->lock);
2585
2586 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
2587 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2588 set_extent_dirty(fs_info->pinned_extents, bytenr,
2589 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2590 return 0;
2591}
2592
2593
2594
2595
2596int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2597 u64 bytenr, u64 num_bytes, int reserved)
2598{
2599 struct btrfs_block_group_cache *cache;
2600
2601 cache = btrfs_lookup_block_group(fs_info, bytenr);
2602 BUG_ON(!cache);
2603
2604 pin_down_extent(cache, bytenr, num_bytes, reserved);
2605
2606 btrfs_put_block_group(cache);
2607 return 0;
2608}
2609
2610
2611
2612
2613int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
2614 u64 bytenr, u64 num_bytes)
2615{
2616 struct btrfs_block_group_cache *cache;
2617 int ret;
2618
2619 cache = btrfs_lookup_block_group(fs_info, bytenr);
2620 if (!cache)
2621 return -EINVAL;
2622
2623
2624
2625
2626
2627
2628
2629 btrfs_cache_block_group(cache, 1);
2630
2631 pin_down_extent(cache, bytenr, num_bytes, 0);
2632
2633
2634 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2635 btrfs_put_block_group(cache);
2636 return ret;
2637}
2638
2639static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2640 u64 start, u64 num_bytes)
2641{
2642 int ret;
2643 struct btrfs_block_group_cache *block_group;
2644 struct btrfs_caching_control *caching_ctl;
2645
2646 block_group = btrfs_lookup_block_group(fs_info, start);
2647 if (!block_group)
2648 return -EINVAL;
2649
2650 btrfs_cache_block_group(block_group, 0);
2651 caching_ctl = btrfs_get_caching_control(block_group);
2652
2653 if (!caching_ctl) {
2654
2655 BUG_ON(!btrfs_block_group_cache_done(block_group));
2656 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2657 } else {
2658 mutex_lock(&caching_ctl->mutex);
2659
2660 if (start >= caching_ctl->progress) {
2661 ret = btrfs_add_excluded_extent(fs_info, start,
2662 num_bytes);
2663 } else if (start + num_bytes <= caching_ctl->progress) {
2664 ret = btrfs_remove_free_space(block_group,
2665 start, num_bytes);
2666 } else {
2667 num_bytes = caching_ctl->progress - start;
2668 ret = btrfs_remove_free_space(block_group,
2669 start, num_bytes);
2670 if (ret)
2671 goto out_lock;
2672
2673 num_bytes = (start + num_bytes) -
2674 caching_ctl->progress;
2675 start = caching_ctl->progress;
2676 ret = btrfs_add_excluded_extent(fs_info, start,
2677 num_bytes);
2678 }
2679out_lock:
2680 mutex_unlock(&caching_ctl->mutex);
2681 btrfs_put_caching_control(caching_ctl);
2682 }
2683 btrfs_put_block_group(block_group);
2684 return ret;
2685}
2686
2687int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2688{
2689 struct btrfs_fs_info *fs_info = eb->fs_info;
2690 struct btrfs_file_extent_item *item;
2691 struct btrfs_key key;
2692 int found_type;
2693 int i;
2694 int ret = 0;
2695
2696 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2697 return 0;
2698
2699 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2700 btrfs_item_key_to_cpu(eb, &key, i);
2701 if (key.type != BTRFS_EXTENT_DATA_KEY)
2702 continue;
2703 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2704 found_type = btrfs_file_extent_type(eb, item);
2705 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2706 continue;
2707 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2708 continue;
2709 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2710 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2711 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2712 if (ret)
2713 break;
2714 }
2715
2716 return ret;
2717}
2718
2719static void
2720btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
2721{
2722 atomic_inc(&bg->reservations);
2723}
2724
2725void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
2726{
2727 struct btrfs_caching_control *next;
2728 struct btrfs_caching_control *caching_ctl;
2729 struct btrfs_block_group_cache *cache;
2730
2731 down_write(&fs_info->commit_root_sem);
2732
2733 list_for_each_entry_safe(caching_ctl, next,
2734 &fs_info->caching_block_groups, list) {
2735 cache = caching_ctl->block_group;
2736 if (btrfs_block_group_cache_done(cache)) {
2737 cache->last_byte_to_unpin = (u64)-1;
2738 list_del_init(&caching_ctl->list);
2739 btrfs_put_caching_control(caching_ctl);
2740 } else {
2741 cache->last_byte_to_unpin = caching_ctl->progress;
2742 }
2743 }
2744
2745 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
2746 fs_info->pinned_extents = &fs_info->freed_extents[1];
2747 else
2748 fs_info->pinned_extents = &fs_info->freed_extents[0];
2749
2750 up_write(&fs_info->commit_root_sem);
2751
2752 btrfs_update_global_block_rsv(fs_info);
2753}
2754
2755
2756
2757
2758
2759static struct btrfs_free_cluster *
2760fetch_cluster_info(struct btrfs_fs_info *fs_info,
2761 struct btrfs_space_info *space_info, u64 *empty_cluster)
2762{
2763 struct btrfs_free_cluster *ret = NULL;
2764
2765 *empty_cluster = 0;
2766 if (btrfs_mixed_space_info(space_info))
2767 return ret;
2768
2769 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2770 ret = &fs_info->meta_alloc_cluster;
2771 if (btrfs_test_opt(fs_info, SSD))
2772 *empty_cluster = SZ_2M;
2773 else
2774 *empty_cluster = SZ_64K;
2775 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2776 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2777 *empty_cluster = SZ_2M;
2778 ret = &fs_info->data_alloc_cluster;
2779 }
2780
2781 return ret;
2782}
2783
2784static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2785 u64 start, u64 end,
2786 const bool return_free_space)
2787{
2788 struct btrfs_block_group_cache *cache = NULL;
2789 struct btrfs_space_info *space_info;
2790 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2791 struct btrfs_free_cluster *cluster = NULL;
2792 u64 len;
2793 u64 total_unpinned = 0;
2794 u64 empty_cluster = 0;
2795 bool readonly;
2796
2797 while (start <= end) {
2798 readonly = false;
2799 if (!cache ||
2800 start >= cache->key.objectid + cache->key.offset) {
2801 if (cache)
2802 btrfs_put_block_group(cache);
2803 total_unpinned = 0;
2804 cache = btrfs_lookup_block_group(fs_info, start);
2805 BUG_ON(!cache);
2806
2807 cluster = fetch_cluster_info(fs_info,
2808 cache->space_info,
2809 &empty_cluster);
2810 empty_cluster <<= 1;
2811 }
2812
2813 len = cache->key.objectid + cache->key.offset - start;
2814 len = min(len, end + 1 - start);
2815
2816 if (start < cache->last_byte_to_unpin) {
2817 len = min(len, cache->last_byte_to_unpin - start);
2818 if (return_free_space)
2819 btrfs_add_free_space(cache, start, len);
2820 }
2821
2822 start += len;
2823 total_unpinned += len;
2824 space_info = cache->space_info;
2825
2826
2827
2828
2829
2830
2831
2832 if (cluster && cluster->fragmented &&
2833 total_unpinned > empty_cluster) {
2834 spin_lock(&cluster->lock);
2835 cluster->fragmented = 0;
2836 spin_unlock(&cluster->lock);
2837 }
2838
2839 spin_lock(&space_info->lock);
2840 spin_lock(&cache->lock);
2841 cache->pinned -= len;
2842 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2843 space_info->max_extent_size = 0;
2844 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2845 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2846 if (cache->ro) {
2847 space_info->bytes_readonly += len;
2848 readonly = true;
2849 }
2850 spin_unlock(&cache->lock);
2851 if (!readonly && return_free_space &&
2852 global_rsv->space_info == space_info) {
2853 u64 to_add = len;
2854
2855 spin_lock(&global_rsv->lock);
2856 if (!global_rsv->full) {
2857 to_add = min(len, global_rsv->size -
2858 global_rsv->reserved);
2859 global_rsv->reserved += to_add;
2860 btrfs_space_info_update_bytes_may_use(fs_info,
2861 space_info, to_add);
2862 if (global_rsv->reserved >= global_rsv->size)
2863 global_rsv->full = 1;
2864 len -= to_add;
2865 }
2866 spin_unlock(&global_rsv->lock);
2867
2868 if (len)
2869 btrfs_try_granting_tickets(fs_info,
2870 space_info);
2871 }
2872 spin_unlock(&space_info->lock);
2873 }
2874
2875 if (cache)
2876 btrfs_put_block_group(cache);
2877 return 0;
2878}
2879
2880int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2881{
2882 struct btrfs_fs_info *fs_info = trans->fs_info;
2883 struct btrfs_block_group_cache *block_group, *tmp;
2884 struct list_head *deleted_bgs;
2885 struct extent_io_tree *unpin;
2886 u64 start;
2887 u64 end;
2888 int ret;
2889
2890 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
2891 unpin = &fs_info->freed_extents[1];
2892 else
2893 unpin = &fs_info->freed_extents[0];
2894
2895 while (!trans->aborted) {
2896 struct extent_state *cached_state = NULL;
2897
2898 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2899 ret = find_first_extent_bit(unpin, 0, &start, &end,
2900 EXTENT_DIRTY, &cached_state);
2901 if (ret) {
2902 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2903 break;
2904 }
2905
2906 if (btrfs_test_opt(fs_info, DISCARD))
2907 ret = btrfs_discard_extent(fs_info, start,
2908 end + 1 - start, NULL);
2909
2910 clear_extent_dirty(unpin, start, end, &cached_state);
2911 unpin_extent_range(fs_info, start, end, true);
2912 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2913 free_extent_state(cached_state);
2914 cond_resched();
2915 }
2916
2917
2918
2919
2920
2921
2922 deleted_bgs = &trans->transaction->deleted_bgs;
2923 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2924 u64 trimmed = 0;
2925
2926 ret = -EROFS;
2927 if (!trans->aborted)
2928 ret = btrfs_discard_extent(fs_info,
2929 block_group->key.objectid,
2930 block_group->key.offset,
2931 &trimmed);
2932
2933 list_del_init(&block_group->bg_list);
2934 btrfs_put_block_group_trimming(block_group);
2935 btrfs_put_block_group(block_group);
2936
2937 if (ret) {
2938 const char *errstr = btrfs_decode_error(ret);
2939 btrfs_warn(fs_info,
2940 "discard failed while removing blockgroup: errno=%d %s",
2941 ret, errstr);
2942 }
2943 }
2944
2945 return 0;
2946}
2947
2948static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2949 struct btrfs_delayed_ref_node *node, u64 parent,
2950 u64 root_objectid, u64 owner_objectid,
2951 u64 owner_offset, int refs_to_drop,
2952 struct btrfs_delayed_extent_op *extent_op)
2953{
2954 struct btrfs_fs_info *info = trans->fs_info;
2955 struct btrfs_key key;
2956 struct btrfs_path *path;
2957 struct btrfs_root *extent_root = info->extent_root;
2958 struct extent_buffer *leaf;
2959 struct btrfs_extent_item *ei;
2960 struct btrfs_extent_inline_ref *iref;
2961 int ret;
2962 int is_data;
2963 int extent_slot = 0;
2964 int found_extent = 0;
2965 int num_to_del = 1;
2966 u32 item_size;
2967 u64 refs;
2968 u64 bytenr = node->bytenr;
2969 u64 num_bytes = node->num_bytes;
2970 int last_ref = 0;
2971 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2972
2973 path = btrfs_alloc_path();
2974 if (!path)
2975 return -ENOMEM;
2976
2977 path->reada = READA_FORWARD;
2978 path->leave_spinning = 1;
2979
2980 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2981 BUG_ON(!is_data && refs_to_drop != 1);
2982
2983 if (is_data)
2984 skinny_metadata = false;
2985
2986 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
2987 parent, root_objectid, owner_objectid,
2988 owner_offset);
2989 if (ret == 0) {
2990 extent_slot = path->slots[0];
2991 while (extent_slot >= 0) {
2992 btrfs_item_key_to_cpu(path->nodes[0], &key,
2993 extent_slot);
2994 if (key.objectid != bytenr)
2995 break;
2996 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2997 key.offset == num_bytes) {
2998 found_extent = 1;
2999 break;
3000 }
3001 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3002 key.offset == owner_objectid) {
3003 found_extent = 1;
3004 break;
3005 }
3006 if (path->slots[0] - extent_slot > 5)
3007 break;
3008 extent_slot--;
3009 }
3010
3011 if (!found_extent) {
3012 BUG_ON(iref);
3013 ret = remove_extent_backref(trans, path, NULL,
3014 refs_to_drop,
3015 is_data, &last_ref);
3016 if (ret) {
3017 btrfs_abort_transaction(trans, ret);
3018 goto out;
3019 }
3020 btrfs_release_path(path);
3021 path->leave_spinning = 1;
3022
3023 key.objectid = bytenr;
3024 key.type = BTRFS_EXTENT_ITEM_KEY;
3025 key.offset = num_bytes;
3026
3027 if (!is_data && skinny_metadata) {
3028 key.type = BTRFS_METADATA_ITEM_KEY;
3029 key.offset = owner_objectid;
3030 }
3031
3032 ret = btrfs_search_slot(trans, extent_root,
3033 &key, path, -1, 1);
3034 if (ret > 0 && skinny_metadata && path->slots[0]) {
3035
3036
3037
3038
3039 path->slots[0]--;
3040 btrfs_item_key_to_cpu(path->nodes[0], &key,
3041 path->slots[0]);
3042 if (key.objectid == bytenr &&
3043 key.type == BTRFS_EXTENT_ITEM_KEY &&
3044 key.offset == num_bytes)
3045 ret = 0;
3046 }
3047
3048 if (ret > 0 && skinny_metadata) {
3049 skinny_metadata = false;
3050 key.objectid = bytenr;
3051 key.type = BTRFS_EXTENT_ITEM_KEY;
3052 key.offset = num_bytes;
3053 btrfs_release_path(path);
3054 ret = btrfs_search_slot(trans, extent_root,
3055 &key, path, -1, 1);
3056 }
3057
3058 if (ret) {
3059 btrfs_err(info,
3060 "umm, got %d back from search, was looking for %llu",
3061 ret, bytenr);
3062 if (ret > 0)
3063 btrfs_print_leaf(path->nodes[0]);
3064 }
3065 if (ret < 0) {
3066 btrfs_abort_transaction(trans, ret);
3067 goto out;
3068 }
3069 extent_slot = path->slots[0];
3070 }
3071 } else if (WARN_ON(ret == -ENOENT)) {
3072 btrfs_print_leaf(path->nodes[0]);
3073 btrfs_err(info,
3074 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3075 bytenr, parent, root_objectid, owner_objectid,
3076 owner_offset);
3077 btrfs_abort_transaction(trans, ret);
3078 goto out;
3079 } else {
3080 btrfs_abort_transaction(trans, ret);
3081 goto out;
3082 }
3083
3084 leaf = path->nodes[0];
3085 item_size = btrfs_item_size_nr(leaf, extent_slot);
3086 if (unlikely(item_size < sizeof(*ei))) {
3087 ret = -EINVAL;
3088 btrfs_print_v0_err(info);
3089 btrfs_abort_transaction(trans, ret);
3090 goto out;
3091 }
3092 ei = btrfs_item_ptr(leaf, extent_slot,
3093 struct btrfs_extent_item);
3094 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3095 key.type == BTRFS_EXTENT_ITEM_KEY) {
3096 struct btrfs_tree_block_info *bi;
3097 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
3098 bi = (struct btrfs_tree_block_info *)(ei + 1);
3099 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3100 }
3101
3102 refs = btrfs_extent_refs(leaf, ei);
3103 if (refs < refs_to_drop) {
3104 btrfs_err(info,
3105 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
3106 refs_to_drop, refs, bytenr);
3107 ret = -EINVAL;
3108 btrfs_abort_transaction(trans, ret);
3109 goto out;
3110 }
3111 refs -= refs_to_drop;
3112
3113 if (refs > 0) {
3114 if (extent_op)
3115 __run_delayed_extent_op(extent_op, leaf, ei);
3116
3117
3118
3119
3120 if (iref) {
3121 BUG_ON(!found_extent);
3122 } else {
3123 btrfs_set_extent_refs(leaf, ei, refs);
3124 btrfs_mark_buffer_dirty(leaf);
3125 }
3126 if (found_extent) {
3127 ret = remove_extent_backref(trans, path, iref,
3128 refs_to_drop, is_data,
3129 &last_ref);
3130 if (ret) {
3131 btrfs_abort_transaction(trans, ret);
3132 goto out;
3133 }
3134 }
3135 } else {
3136 if (found_extent) {
3137 BUG_ON(is_data && refs_to_drop !=
3138 extent_data_ref_count(path, iref));
3139 if (iref) {
3140 BUG_ON(path->slots[0] != extent_slot);
3141 } else {
3142 BUG_ON(path->slots[0] != extent_slot + 1);
3143 path->slots[0] = extent_slot;
3144 num_to_del = 2;
3145 }
3146 }
3147
3148 last_ref = 1;
3149 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3150 num_to_del);
3151 if (ret) {
3152 btrfs_abort_transaction(trans, ret);
3153 goto out;
3154 }
3155 btrfs_release_path(path);
3156
3157 if (is_data) {
3158 ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
3159 if (ret) {
3160 btrfs_abort_transaction(trans, ret);
3161 goto out;
3162 }
3163 }
3164
3165 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
3166 if (ret) {
3167 btrfs_abort_transaction(trans, ret);
3168 goto out;
3169 }
3170
3171 ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
3172 if (ret) {
3173 btrfs_abort_transaction(trans, ret);
3174 goto out;
3175 }
3176 }
3177 btrfs_release_path(path);
3178
3179out:
3180 btrfs_free_path(path);
3181 return ret;
3182}
3183
3184
3185
3186
3187
3188
3189
3190static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3191 u64 bytenr)
3192{
3193 struct btrfs_delayed_ref_head *head;
3194 struct btrfs_delayed_ref_root *delayed_refs;
3195 int ret = 0;
3196
3197 delayed_refs = &trans->transaction->delayed_refs;
3198 spin_lock(&delayed_refs->lock);
3199 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3200 if (!head)
3201 goto out_delayed_unlock;
3202
3203 spin_lock(&head->lock);
3204 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3205 goto out;
3206
3207 if (cleanup_extent_op(head) != NULL)
3208 goto out;
3209
3210
3211
3212
3213
3214 if (!mutex_trylock(&head->mutex))
3215 goto out;
3216
3217 btrfs_delete_ref_head(delayed_refs, head);
3218 head->processing = 0;
3219
3220 spin_unlock(&head->lock);
3221 spin_unlock(&delayed_refs->lock);
3222
3223 BUG_ON(head->extent_op);
3224 if (head->must_insert_reserved)
3225 ret = 1;
3226
3227 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3228 mutex_unlock(&head->mutex);
3229 btrfs_put_delayed_ref_head(head);
3230 return ret;
3231out:
3232 spin_unlock(&head->lock);
3233
3234out_delayed_unlock:
3235 spin_unlock(&delayed_refs->lock);
3236 return 0;
3237}
3238
3239void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3240 struct btrfs_root *root,
3241 struct extent_buffer *buf,
3242 u64 parent, int last_ref)
3243{
3244 struct btrfs_fs_info *fs_info = root->fs_info;
3245 struct btrfs_ref generic_ref = { 0 };
3246 int pin = 1;
3247 int ret;
3248
3249 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3250 buf->start, buf->len, parent);
3251 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3252 root->root_key.objectid);
3253
3254 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3255 int old_ref_mod, new_ref_mod;
3256
3257 btrfs_ref_tree_mod(fs_info, &generic_ref);
3258 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
3259 &old_ref_mod, &new_ref_mod);
3260 BUG_ON(ret);
3261 pin = old_ref_mod >= 0 && new_ref_mod < 0;
3262 }
3263
3264 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3265 struct btrfs_block_group_cache *cache;
3266
3267 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3268 ret = check_ref_cleanup(trans, buf->start);
3269 if (!ret)
3270 goto out;
3271 }
3272
3273 pin = 0;
3274 cache = btrfs_lookup_block_group(fs_info, buf->start);
3275
3276 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3277 pin_down_extent(cache, buf->start, buf->len, 1);
3278 btrfs_put_block_group(cache);
3279 goto out;
3280 }
3281
3282 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3283
3284 btrfs_add_free_space(cache, buf->start, buf->len);
3285 btrfs_free_reserved_bytes(cache, buf->len, 0);
3286 btrfs_put_block_group(cache);
3287 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3288 }
3289out:
3290 if (pin)
3291 add_pinned_bytes(fs_info, &generic_ref);
3292
3293 if (last_ref) {
3294
3295
3296
3297
3298 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3299 }
3300}
3301
3302
3303int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3304{
3305 struct btrfs_fs_info *fs_info = trans->fs_info;
3306 int old_ref_mod, new_ref_mod;
3307 int ret;
3308
3309 if (btrfs_is_testing(fs_info))
3310 return 0;
3311
3312
3313
3314
3315
3316 if ((ref->type == BTRFS_REF_METADATA &&
3317 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3318 (ref->type == BTRFS_REF_DATA &&
3319 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
3320
3321 btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
3322 old_ref_mod = new_ref_mod = 0;
3323 ret = 0;
3324 } else if (ref->type == BTRFS_REF_METADATA) {
3325 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
3326 &old_ref_mod, &new_ref_mod);
3327 } else {
3328 ret = btrfs_add_delayed_data_ref(trans, ref, 0,
3329 &old_ref_mod, &new_ref_mod);
3330 }
3331
3332 if (!((ref->type == BTRFS_REF_METADATA &&
3333 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3334 (ref->type == BTRFS_REF_DATA &&
3335 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
3336 btrfs_ref_tree_mod(fs_info, ref);
3337
3338 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
3339 add_pinned_bytes(fs_info, ref);
3340
3341 return ret;
3342}
3343
3344enum btrfs_loop_type {
3345 LOOP_CACHING_NOWAIT,
3346 LOOP_CACHING_WAIT,
3347 LOOP_ALLOC_CHUNK,
3348 LOOP_NO_EMPTY_SIZE,
3349};
3350
3351static inline void
3352btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
3353 int delalloc)
3354{
3355 if (delalloc)
3356 down_read(&cache->data_rwsem);
3357}
3358
3359static inline void
3360btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
3361 int delalloc)
3362{
3363 btrfs_get_block_group(cache);
3364 if (delalloc)
3365 down_read(&cache->data_rwsem);
3366}
3367
3368static struct btrfs_block_group_cache *
3369btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
3370 struct btrfs_free_cluster *cluster,
3371 int delalloc)
3372{
3373 struct btrfs_block_group_cache *used_bg = NULL;
3374
3375 spin_lock(&cluster->refill_lock);
3376 while (1) {
3377 used_bg = cluster->block_group;
3378 if (!used_bg)
3379 return NULL;
3380
3381 if (used_bg == block_group)
3382 return used_bg;
3383
3384 btrfs_get_block_group(used_bg);
3385
3386 if (!delalloc)
3387 return used_bg;
3388
3389 if (down_read_trylock(&used_bg->data_rwsem))
3390 return used_bg;
3391
3392 spin_unlock(&cluster->refill_lock);
3393
3394
3395 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3396
3397 spin_lock(&cluster->refill_lock);
3398 if (used_bg == cluster->block_group)
3399 return used_bg;
3400
3401 up_read(&used_bg->data_rwsem);
3402 btrfs_put_block_group(used_bg);
3403 }
3404}
3405
3406static inline void
3407btrfs_release_block_group(struct btrfs_block_group_cache *cache,
3408 int delalloc)
3409{
3410 if (delalloc)
3411 up_read(&cache->data_rwsem);
3412 btrfs_put_block_group(cache);
3413}
3414
3415
3416
3417
3418
3419struct find_free_extent_ctl {
3420
3421 u64 ram_bytes;
3422 u64 num_bytes;
3423 u64 empty_size;
3424 u64 flags;
3425 int delalloc;
3426
3427
3428 u64 search_start;
3429
3430
3431 u64 empty_cluster;
3432
3433 bool have_caching_bg;
3434 bool orig_have_caching_bg;
3435
3436
3437 int index;
3438
3439
3440
3441
3442 int loop;
3443
3444
3445
3446
3447
3448 bool retry_clustered;
3449
3450
3451
3452
3453
3454 bool retry_unclustered;
3455
3456
3457 int cached;
3458
3459
3460 u64 max_extent_size;
3461
3462
3463 u64 total_free_space;
3464
3465
3466 u64 found_offset;
3467};
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
3479 struct btrfs_free_cluster *last_ptr,
3480 struct find_free_extent_ctl *ffe_ctl,
3481 struct btrfs_block_group_cache **cluster_bg_ret)
3482{
3483 struct btrfs_block_group_cache *cluster_bg;
3484 u64 aligned_cluster;
3485 u64 offset;
3486 int ret;
3487
3488 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3489 if (!cluster_bg)
3490 goto refill_cluster;
3491 if (cluster_bg != bg && (cluster_bg->ro ||
3492 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3493 goto release_cluster;
3494
3495 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3496 ffe_ctl->num_bytes, cluster_bg->key.objectid,
3497 &ffe_ctl->max_extent_size);
3498 if (offset) {
3499
3500 spin_unlock(&last_ptr->refill_lock);
3501 trace_btrfs_reserve_extent_cluster(cluster_bg,
3502 ffe_ctl->search_start, ffe_ctl->num_bytes);
3503 *cluster_bg_ret = cluster_bg;
3504 ffe_ctl->found_offset = offset;
3505 return 0;
3506 }
3507 WARN_ON(last_ptr->block_group != cluster_bg);
3508
3509release_cluster:
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3522 spin_unlock(&last_ptr->refill_lock);
3523 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3524 return -ENOENT;
3525 }
3526
3527
3528 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3529
3530 if (cluster_bg != bg)
3531 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3532
3533refill_cluster:
3534 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3535 spin_unlock(&last_ptr->refill_lock);
3536 return -ENOENT;
3537 }
3538
3539 aligned_cluster = max_t(u64,
3540 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3541 bg->full_stripe_len);
3542 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3543 ffe_ctl->num_bytes, aligned_cluster);
3544 if (ret == 0) {
3545
3546 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3547 ffe_ctl->num_bytes, ffe_ctl->search_start,
3548 &ffe_ctl->max_extent_size);
3549 if (offset) {
3550
3551 spin_unlock(&last_ptr->refill_lock);
3552 trace_btrfs_reserve_extent_cluster(bg,
3553 ffe_ctl->search_start,
3554 ffe_ctl->num_bytes);
3555 ffe_ctl->found_offset = offset;
3556 return 0;
3557 }
3558 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3559 !ffe_ctl->retry_clustered) {
3560 spin_unlock(&last_ptr->refill_lock);
3561
3562 ffe_ctl->retry_clustered = true;
3563 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3564 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3565 return -EAGAIN;
3566 }
3567
3568
3569
3570
3571
3572 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3573 spin_unlock(&last_ptr->refill_lock);
3574 return 1;
3575}
3576
3577
3578
3579
3580
3581
3582static int find_free_extent_unclustered(struct btrfs_block_group_cache *bg,
3583 struct btrfs_free_cluster *last_ptr,
3584 struct find_free_extent_ctl *ffe_ctl)
3585{
3586 u64 offset;
3587
3588
3589
3590
3591
3592
3593 if (unlikely(last_ptr)) {
3594 spin_lock(&last_ptr->lock);
3595 last_ptr->fragmented = 1;
3596 spin_unlock(&last_ptr->lock);
3597 }
3598 if (ffe_ctl->cached) {
3599 struct btrfs_free_space_ctl *free_space_ctl;
3600
3601 free_space_ctl = bg->free_space_ctl;
3602 spin_lock(&free_space_ctl->tree_lock);
3603 if (free_space_ctl->free_space <
3604 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3605 ffe_ctl->empty_size) {
3606 ffe_ctl->total_free_space = max_t(u64,
3607 ffe_ctl->total_free_space,
3608 free_space_ctl->free_space);
3609 spin_unlock(&free_space_ctl->tree_lock);
3610 return 1;
3611 }
3612 spin_unlock(&free_space_ctl->tree_lock);
3613 }
3614
3615 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3616 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3617 &ffe_ctl->max_extent_size);
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3629 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3630 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3631 ffe_ctl->empty_size);
3632 ffe_ctl->retry_unclustered = true;
3633 return -EAGAIN;
3634 } else if (!offset) {
3635 return 1;
3636 }
3637 ffe_ctl->found_offset = offset;
3638 return 0;
3639}
3640
3641
3642
3643
3644
3645
3646static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3647 struct btrfs_free_cluster *last_ptr,
3648 struct btrfs_key *ins,
3649 struct find_free_extent_ctl *ffe_ctl,
3650 int full_search, bool use_cluster)
3651{
3652 struct btrfs_root *root = fs_info->extent_root;
3653 int ret;
3654
3655 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3656 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3657 ffe_ctl->orig_have_caching_bg = true;
3658
3659 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
3660 ffe_ctl->have_caching_bg)
3661 return 1;
3662
3663 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
3664 return 1;
3665
3666 if (ins->objectid) {
3667 if (!use_cluster && last_ptr) {
3668 spin_lock(&last_ptr->lock);
3669 last_ptr->window_start = ins->objectid;
3670 spin_unlock(&last_ptr->lock);
3671 }
3672 return 0;
3673 }
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
3684 ffe_ctl->index = 0;
3685 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
3686
3687
3688
3689
3690
3691 if (ffe_ctl->orig_have_caching_bg || !full_search)
3692 ffe_ctl->loop = LOOP_CACHING_WAIT;
3693 else
3694 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
3695 } else {
3696 ffe_ctl->loop++;
3697 }
3698
3699 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
3700 struct btrfs_trans_handle *trans;
3701 int exist = 0;
3702
3703 trans = current->journal_info;
3704 if (trans)
3705 exist = 1;
3706 else
3707 trans = btrfs_join_transaction(root);
3708
3709 if (IS_ERR(trans)) {
3710 ret = PTR_ERR(trans);
3711 return ret;
3712 }
3713
3714 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
3715 CHUNK_ALLOC_FORCE);
3716
3717
3718
3719
3720
3721
3722 if (ret == -ENOSPC)
3723 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
3724
3725
3726 if (ret < 0 && ret != -ENOSPC)
3727 btrfs_abort_transaction(trans, ret);
3728 else
3729 ret = 0;
3730 if (!exist)
3731 btrfs_end_transaction(trans);
3732 if (ret)
3733 return ret;
3734 }
3735
3736 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
3737
3738
3739
3740
3741 if (ffe_ctl->empty_size == 0 &&
3742 ffe_ctl->empty_cluster == 0)
3743 return -ENOSPC;
3744 ffe_ctl->empty_size = 0;
3745 ffe_ctl->empty_cluster = 0;
3746 }
3747 return 1;
3748 }
3749 return -ENOSPC;
3750}
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
3778 u64 ram_bytes, u64 num_bytes, u64 empty_size,
3779 u64 hint_byte, struct btrfs_key *ins,
3780 u64 flags, int delalloc)
3781{
3782 int ret = 0;
3783 struct btrfs_free_cluster *last_ptr = NULL;
3784 struct btrfs_block_group_cache *block_group = NULL;
3785 struct find_free_extent_ctl ffe_ctl = {0};
3786 struct btrfs_space_info *space_info;
3787 bool use_cluster = true;
3788 bool full_search = false;
3789
3790 WARN_ON(num_bytes < fs_info->sectorsize);
3791
3792 ffe_ctl.ram_bytes = ram_bytes;
3793 ffe_ctl.num_bytes = num_bytes;
3794 ffe_ctl.empty_size = empty_size;
3795 ffe_ctl.flags = flags;
3796 ffe_ctl.search_start = 0;
3797 ffe_ctl.retry_clustered = false;
3798 ffe_ctl.retry_unclustered = false;
3799 ffe_ctl.delalloc = delalloc;
3800 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
3801 ffe_ctl.have_caching_bg = false;
3802 ffe_ctl.orig_have_caching_bg = false;
3803 ffe_ctl.found_offset = 0;
3804
3805 ins->type = BTRFS_EXTENT_ITEM_KEY;
3806 ins->objectid = 0;
3807 ins->offset = 0;
3808
3809 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
3810
3811 space_info = btrfs_find_space_info(fs_info, flags);
3812 if (!space_info) {
3813 btrfs_err(fs_info, "No space info for %llu", flags);
3814 return -ENOSPC;
3815 }
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827 if (unlikely(space_info->max_extent_size)) {
3828 spin_lock(&space_info->lock);
3829 if (space_info->max_extent_size &&
3830 num_bytes > space_info->max_extent_size) {
3831 ins->offset = space_info->max_extent_size;
3832 spin_unlock(&space_info->lock);
3833 return -ENOSPC;
3834 } else if (space_info->max_extent_size) {
3835 use_cluster = false;
3836 }
3837 spin_unlock(&space_info->lock);
3838 }
3839
3840 last_ptr = fetch_cluster_info(fs_info, space_info,
3841 &ffe_ctl.empty_cluster);
3842 if (last_ptr) {
3843 spin_lock(&last_ptr->lock);
3844 if (last_ptr->block_group)
3845 hint_byte = last_ptr->window_start;
3846 if (last_ptr->fragmented) {
3847
3848
3849
3850
3851
3852 hint_byte = last_ptr->window_start;
3853 use_cluster = false;
3854 }
3855 spin_unlock(&last_ptr->lock);
3856 }
3857
3858 ffe_ctl.search_start = max(ffe_ctl.search_start,
3859 first_logical_byte(fs_info, 0));
3860 ffe_ctl.search_start = max(ffe_ctl.search_start, hint_byte);
3861 if (ffe_ctl.search_start == hint_byte) {
3862 block_group = btrfs_lookup_block_group(fs_info,
3863 ffe_ctl.search_start);
3864
3865
3866
3867
3868
3869
3870
3871 if (block_group && block_group_bits(block_group, flags) &&
3872 block_group->cached != BTRFS_CACHE_NO) {
3873 down_read(&space_info->groups_sem);
3874 if (list_empty(&block_group->list) ||
3875 block_group->ro) {
3876
3877
3878
3879
3880
3881
3882 btrfs_put_block_group(block_group);
3883 up_read(&space_info->groups_sem);
3884 } else {
3885 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
3886 block_group->flags);
3887 btrfs_lock_block_group(block_group, delalloc);
3888 goto have_block_group;
3889 }
3890 } else if (block_group) {
3891 btrfs_put_block_group(block_group);
3892 }
3893 }
3894search:
3895 ffe_ctl.have_caching_bg = false;
3896 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
3897 ffe_ctl.index == 0)
3898 full_search = true;
3899 down_read(&space_info->groups_sem);
3900 list_for_each_entry(block_group,
3901 &space_info->block_groups[ffe_ctl.index], list) {
3902
3903 if (unlikely(block_group->ro))
3904 continue;
3905
3906 btrfs_grab_block_group(block_group, delalloc);
3907 ffe_ctl.search_start = block_group->key.objectid;
3908
3909
3910
3911
3912
3913
3914 if (!block_group_bits(block_group, flags)) {
3915 u64 extra = BTRFS_BLOCK_GROUP_DUP |
3916 BTRFS_BLOCK_GROUP_RAID1_MASK |
3917 BTRFS_BLOCK_GROUP_RAID56_MASK |
3918 BTRFS_BLOCK_GROUP_RAID10;
3919
3920
3921
3922
3923
3924
3925 if ((flags & extra) && !(block_group->flags & extra))
3926 goto loop;
3927
3928
3929
3930
3931
3932
3933 btrfs_release_block_group(block_group, delalloc);
3934 continue;
3935 }
3936
3937have_block_group:
3938 ffe_ctl.cached = btrfs_block_group_cache_done(block_group);
3939 if (unlikely(!ffe_ctl.cached)) {
3940 ffe_ctl.have_caching_bg = true;
3941 ret = btrfs_cache_block_group(block_group, 0);
3942 BUG_ON(ret < 0);
3943 ret = 0;
3944 }
3945
3946 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
3947 goto loop;
3948
3949
3950
3951
3952
3953 if (last_ptr && use_cluster) {
3954 struct btrfs_block_group_cache *cluster_bg = NULL;
3955
3956 ret = find_free_extent_clustered(block_group, last_ptr,
3957 &ffe_ctl, &cluster_bg);
3958
3959 if (ret == 0) {
3960 if (cluster_bg && cluster_bg != block_group) {
3961 btrfs_release_block_group(block_group,
3962 delalloc);
3963 block_group = cluster_bg;
3964 }
3965 goto checks;
3966 } else if (ret == -EAGAIN) {
3967 goto have_block_group;
3968 } else if (ret > 0) {
3969 goto loop;
3970 }
3971
3972 }
3973
3974 ret = find_free_extent_unclustered(block_group, last_ptr,
3975 &ffe_ctl);
3976 if (ret == -EAGAIN)
3977 goto have_block_group;
3978 else if (ret > 0)
3979 goto loop;
3980
3981checks:
3982 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
3983 fs_info->stripesize);
3984
3985
3986 if (ffe_ctl.search_start + num_bytes >
3987 block_group->key.objectid + block_group->key.offset) {
3988 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
3989 num_bytes);
3990 goto loop;
3991 }
3992
3993 if (ffe_ctl.found_offset < ffe_ctl.search_start)
3994 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
3995 ffe_ctl.search_start - ffe_ctl.found_offset);
3996
3997 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
3998 num_bytes, delalloc);
3999 if (ret == -EAGAIN) {
4000 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4001 num_bytes);
4002 goto loop;
4003 }
4004 btrfs_inc_block_group_reservations(block_group);
4005
4006
4007 ins->objectid = ffe_ctl.search_start;
4008 ins->offset = num_bytes;
4009
4010 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
4011 num_bytes);
4012 btrfs_release_block_group(block_group, delalloc);
4013 break;
4014loop:
4015 ffe_ctl.retry_clustered = false;
4016 ffe_ctl.retry_unclustered = false;
4017 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
4018 ffe_ctl.index);
4019 btrfs_release_block_group(block_group, delalloc);
4020 cond_resched();
4021 }
4022 up_read(&space_info->groups_sem);
4023
4024 ret = find_free_extent_update_loop(fs_info, last_ptr, ins, &ffe_ctl,
4025 full_search, use_cluster);
4026 if (ret > 0)
4027 goto search;
4028
4029 if (ret == -ENOSPC) {
4030
4031
4032
4033
4034 if (!ffe_ctl.max_extent_size)
4035 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
4036 spin_lock(&space_info->lock);
4037 space_info->max_extent_size = ffe_ctl.max_extent_size;
4038 spin_unlock(&space_info->lock);
4039 ins->offset = ffe_ctl.max_extent_size;
4040 }
4041 return ret;
4042}
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4090 u64 num_bytes, u64 min_alloc_size,
4091 u64 empty_size, u64 hint_byte,
4092 struct btrfs_key *ins, int is_data, int delalloc)
4093{
4094 struct btrfs_fs_info *fs_info = root->fs_info;
4095 bool final_tried = num_bytes == min_alloc_size;
4096 u64 flags;
4097 int ret;
4098
4099 flags = get_alloc_profile_by_root(root, is_data);
4100again:
4101 WARN_ON(num_bytes < fs_info->sectorsize);
4102 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
4103 hint_byte, ins, flags, delalloc);
4104 if (!ret && !is_data) {
4105 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4106 } else if (ret == -ENOSPC) {
4107 if (!final_tried && ins->offset) {
4108 num_bytes = min(num_bytes >> 1, ins->offset);
4109 num_bytes = round_down(num_bytes,
4110 fs_info->sectorsize);
4111 num_bytes = max(num_bytes, min_alloc_size);
4112 ram_bytes = num_bytes;
4113 if (num_bytes == min_alloc_size)
4114 final_tried = true;
4115 goto again;
4116 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4117 struct btrfs_space_info *sinfo;
4118
4119 sinfo = btrfs_find_space_info(fs_info, flags);
4120 btrfs_err(fs_info,
4121 "allocation failed flags %llu, wanted %llu",
4122 flags, num_bytes);
4123 if (sinfo)
4124 btrfs_dump_space_info(fs_info, sinfo,
4125 num_bytes, 1);
4126 }
4127 }
4128
4129 return ret;
4130}
4131
4132static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4133 u64 start, u64 len,
4134 int pin, int delalloc)
4135{
4136 struct btrfs_block_group_cache *cache;
4137 int ret = 0;
4138
4139 cache = btrfs_lookup_block_group(fs_info, start);
4140 if (!cache) {
4141 btrfs_err(fs_info, "Unable to find block group for %llu",
4142 start);
4143 return -ENOSPC;
4144 }
4145
4146 if (pin)
4147 pin_down_extent(cache, start, len, 1);
4148 else {
4149 if (btrfs_test_opt(fs_info, DISCARD))
4150 ret = btrfs_discard_extent(fs_info, start, len, NULL);
4151 btrfs_add_free_space(cache, start, len);
4152 btrfs_free_reserved_bytes(cache, len, delalloc);
4153 trace_btrfs_reserved_extent_free(fs_info, start, len);
4154 }
4155
4156 btrfs_put_block_group(cache);
4157 return ret;
4158}
4159
4160int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4161 u64 start, u64 len, int delalloc)
4162{
4163 return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
4164}
4165
4166int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
4167 u64 start, u64 len)
4168{
4169 return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
4170}
4171
4172static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4173 u64 parent, u64 root_objectid,
4174 u64 flags, u64 owner, u64 offset,
4175 struct btrfs_key *ins, int ref_mod)
4176{
4177 struct btrfs_fs_info *fs_info = trans->fs_info;
4178 int ret;
4179 struct btrfs_extent_item *extent_item;
4180 struct btrfs_extent_inline_ref *iref;
4181 struct btrfs_path *path;
4182 struct extent_buffer *leaf;
4183 int type;
4184 u32 size;
4185
4186 if (parent > 0)
4187 type = BTRFS_SHARED_DATA_REF_KEY;
4188 else
4189 type = BTRFS_EXTENT_DATA_REF_KEY;
4190
4191 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4192
4193 path = btrfs_alloc_path();
4194 if (!path)
4195 return -ENOMEM;
4196
4197 path->leave_spinning = 1;
4198 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4199 ins, size);
4200 if (ret) {
4201 btrfs_free_path(path);
4202 return ret;
4203 }
4204
4205 leaf = path->nodes[0];
4206 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4207 struct btrfs_extent_item);
4208 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4209 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4210 btrfs_set_extent_flags(leaf, extent_item,
4211 flags | BTRFS_EXTENT_FLAG_DATA);
4212
4213 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4214 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4215 if (parent > 0) {
4216 struct btrfs_shared_data_ref *ref;
4217 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4218 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4219 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4220 } else {
4221 struct btrfs_extent_data_ref *ref;
4222 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4223 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4224 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4225 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4226 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4227 }
4228
4229 btrfs_mark_buffer_dirty(path->nodes[0]);
4230 btrfs_free_path(path);
4231
4232 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
4233 if (ret)
4234 return ret;
4235
4236 ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
4237 if (ret) {
4238 btrfs_err(fs_info, "update block group failed for %llu %llu",
4239 ins->objectid, ins->offset);
4240 BUG();
4241 }
4242 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
4243 return ret;
4244}
4245
4246static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4247 struct btrfs_delayed_ref_node *node,
4248 struct btrfs_delayed_extent_op *extent_op)
4249{
4250 struct btrfs_fs_info *fs_info = trans->fs_info;
4251 int ret;
4252 struct btrfs_extent_item *extent_item;
4253 struct btrfs_key extent_key;
4254 struct btrfs_tree_block_info *block_info;
4255 struct btrfs_extent_inline_ref *iref;
4256 struct btrfs_path *path;
4257 struct extent_buffer *leaf;
4258 struct btrfs_delayed_tree_ref *ref;
4259 u32 size = sizeof(*extent_item) + sizeof(*iref);
4260 u64 num_bytes;
4261 u64 flags = extent_op->flags_to_set;
4262 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4263
4264 ref = btrfs_delayed_node_to_tree_ref(node);
4265
4266 extent_key.objectid = node->bytenr;
4267 if (skinny_metadata) {
4268 extent_key.offset = ref->level;
4269 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4270 num_bytes = fs_info->nodesize;
4271 } else {
4272 extent_key.offset = node->num_bytes;
4273 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4274 size += sizeof(*block_info);
4275 num_bytes = node->num_bytes;
4276 }
4277
4278 path = btrfs_alloc_path();
4279 if (!path)
4280 return -ENOMEM;
4281
4282 path->leave_spinning = 1;
4283 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4284 &extent_key, size);
4285 if (ret) {
4286 btrfs_free_path(path);
4287 return ret;
4288 }
4289
4290 leaf = path->nodes[0];
4291 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4292 struct btrfs_extent_item);
4293 btrfs_set_extent_refs(leaf, extent_item, 1);
4294 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4295 btrfs_set_extent_flags(leaf, extent_item,
4296 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4297
4298 if (skinny_metadata) {
4299 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4300 } else {
4301 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4302 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4303 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4304 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4305 }
4306
4307 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4308 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4309 btrfs_set_extent_inline_ref_type(leaf, iref,
4310 BTRFS_SHARED_BLOCK_REF_KEY);
4311 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4312 } else {
4313 btrfs_set_extent_inline_ref_type(leaf, iref,
4314 BTRFS_TREE_BLOCK_REF_KEY);
4315 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4316 }
4317
4318 btrfs_mark_buffer_dirty(leaf);
4319 btrfs_free_path(path);
4320
4321 ret = remove_from_free_space_tree(trans, extent_key.objectid,
4322 num_bytes);
4323 if (ret)
4324 return ret;
4325
4326 ret = btrfs_update_block_group(trans, extent_key.objectid,
4327 fs_info->nodesize, 1);
4328 if (ret) {
4329 btrfs_err(fs_info, "update block group failed for %llu %llu",
4330 extent_key.objectid, extent_key.offset);
4331 BUG();
4332 }
4333
4334 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
4335 fs_info->nodesize);
4336 return ret;
4337}
4338
4339int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4340 struct btrfs_root *root, u64 owner,
4341 u64 offset, u64 ram_bytes,
4342 struct btrfs_key *ins)
4343{
4344 struct btrfs_ref generic_ref = { 0 };
4345 int ret;
4346
4347 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4348
4349 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4350 ins->objectid, ins->offset, 0);
4351 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
4352 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4353 ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
4354 ram_bytes, NULL, NULL);
4355 return ret;
4356}
4357
4358
4359
4360
4361
4362
4363int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4364 u64 root_objectid, u64 owner, u64 offset,
4365 struct btrfs_key *ins)
4366{
4367 struct btrfs_fs_info *fs_info = trans->fs_info;
4368 int ret;
4369 struct btrfs_block_group_cache *block_group;
4370 struct btrfs_space_info *space_info;
4371
4372
4373
4374
4375
4376 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4377 ret = __exclude_logged_extent(fs_info, ins->objectid,
4378 ins->offset);
4379 if (ret)
4380 return ret;
4381 }
4382
4383 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4384 if (!block_group)
4385 return -EINVAL;
4386
4387 space_info = block_group->space_info;
4388 spin_lock(&space_info->lock);
4389 spin_lock(&block_group->lock);
4390 space_info->bytes_reserved += ins->offset;
4391 block_group->reserved += ins->offset;
4392 spin_unlock(&block_group->lock);
4393 spin_unlock(&space_info->lock);
4394
4395 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4396 offset, ins, 1);
4397 btrfs_put_block_group(block_group);
4398 return ret;
4399}
4400
4401static struct extent_buffer *
4402btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4403 u64 bytenr, int level, u64 owner)
4404{
4405 struct btrfs_fs_info *fs_info = root->fs_info;
4406 struct extent_buffer *buf;
4407
4408 buf = btrfs_find_create_tree_block(fs_info, bytenr);
4409 if (IS_ERR(buf))
4410 return buf;
4411
4412
4413
4414
4415
4416
4417 if (buf->lock_owner == current->pid) {
4418 btrfs_err_rl(fs_info,
4419"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4420 buf->start, btrfs_header_owner(buf), current->pid);
4421 free_extent_buffer(buf);
4422 return ERR_PTR(-EUCLEAN);
4423 }
4424
4425 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
4426 btrfs_tree_lock(buf);
4427 btrfs_clean_tree_block(buf);
4428 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4429
4430 btrfs_set_lock_blocking_write(buf);
4431 set_extent_buffer_uptodate(buf);
4432
4433 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4434 btrfs_set_header_level(buf, level);
4435 btrfs_set_header_bytenr(buf, buf->start);
4436 btrfs_set_header_generation(buf, trans->transid);
4437 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4438 btrfs_set_header_owner(buf, owner);
4439 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4440 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4441 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4442 buf->log_index = root->log_transid % 2;
4443
4444
4445
4446
4447 if (buf->log_index == 0)
4448 set_extent_dirty(&root->dirty_log_pages, buf->start,
4449 buf->start + buf->len - 1, GFP_NOFS);
4450 else
4451 set_extent_new(&root->dirty_log_pages, buf->start,
4452 buf->start + buf->len - 1);
4453 } else {
4454 buf->log_index = -1;
4455 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4456 buf->start + buf->len - 1, GFP_NOFS);
4457 }
4458 trans->dirty = true;
4459
4460 return buf;
4461}
4462
4463
4464
4465
4466
4467struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4468 struct btrfs_root *root,
4469 u64 parent, u64 root_objectid,
4470 const struct btrfs_disk_key *key,
4471 int level, u64 hint,
4472 u64 empty_size)
4473{
4474 struct btrfs_fs_info *fs_info = root->fs_info;
4475 struct btrfs_key ins;
4476 struct btrfs_block_rsv *block_rsv;
4477 struct extent_buffer *buf;
4478 struct btrfs_delayed_extent_op *extent_op;
4479 struct btrfs_ref generic_ref = { 0 };
4480 u64 flags = 0;
4481 int ret;
4482 u32 blocksize = fs_info->nodesize;
4483 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4484
4485#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4486 if (btrfs_is_testing(fs_info)) {
4487 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4488 level, root_objectid);
4489 if (!IS_ERR(buf))
4490 root->alloc_bytenr += blocksize;
4491 return buf;
4492 }
4493#endif
4494
4495 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4496 if (IS_ERR(block_rsv))
4497 return ERR_CAST(block_rsv);
4498
4499 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4500 empty_size, hint, &ins, 0, 0);
4501 if (ret)
4502 goto out_unuse;
4503
4504 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4505 root_objectid);
4506 if (IS_ERR(buf)) {
4507 ret = PTR_ERR(buf);
4508 goto out_free_reserved;
4509 }
4510
4511 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4512 if (parent == 0)
4513 parent = ins.objectid;
4514 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4515 } else
4516 BUG_ON(parent > 0);
4517
4518 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4519 extent_op = btrfs_alloc_delayed_extent_op();
4520 if (!extent_op) {
4521 ret = -ENOMEM;
4522 goto out_free_buf;
4523 }
4524 if (key)
4525 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4526 else
4527 memset(&extent_op->key, 0, sizeof(extent_op->key));
4528 extent_op->flags_to_set = flags;
4529 extent_op->update_key = skinny_metadata ? false : true;
4530 extent_op->update_flags = true;
4531 extent_op->is_data = false;
4532 extent_op->level = level;
4533
4534 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4535 ins.objectid, ins.offset, parent);
4536 generic_ref.real_root = root->root_key.objectid;
4537 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
4538 btrfs_ref_tree_mod(fs_info, &generic_ref);
4539 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
4540 extent_op, NULL, NULL);
4541 if (ret)
4542 goto out_free_delayed;
4543 }
4544 return buf;
4545
4546out_free_delayed:
4547 btrfs_free_delayed_extent_op(extent_op);
4548out_free_buf:
4549 free_extent_buffer(buf);
4550out_free_reserved:
4551 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
4552out_unuse:
4553 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
4554 return ERR_PTR(ret);
4555}
4556
4557struct walk_control {
4558 u64 refs[BTRFS_MAX_LEVEL];
4559 u64 flags[BTRFS_MAX_LEVEL];
4560 struct btrfs_key update_progress;
4561 struct btrfs_key drop_progress;
4562 int drop_level;
4563 int stage;
4564 int level;
4565 int shared_level;
4566 int update_ref;
4567 int keep_locks;
4568 int reada_slot;
4569 int reada_count;
4570 int restarted;
4571};
4572
4573#define DROP_REFERENCE 1
4574#define UPDATE_BACKREF 2
4575
4576static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4577 struct btrfs_root *root,
4578 struct walk_control *wc,
4579 struct btrfs_path *path)
4580{
4581 struct btrfs_fs_info *fs_info = root->fs_info;
4582 u64 bytenr;
4583 u64 generation;
4584 u64 refs;
4585 u64 flags;
4586 u32 nritems;
4587 struct btrfs_key key;
4588 struct extent_buffer *eb;
4589 int ret;
4590 int slot;
4591 int nread = 0;
4592
4593 if (path->slots[wc->level] < wc->reada_slot) {
4594 wc->reada_count = wc->reada_count * 2 / 3;
4595 wc->reada_count = max(wc->reada_count, 2);
4596 } else {
4597 wc->reada_count = wc->reada_count * 3 / 2;
4598 wc->reada_count = min_t(int, wc->reada_count,
4599 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
4600 }
4601
4602 eb = path->nodes[wc->level];
4603 nritems = btrfs_header_nritems(eb);
4604
4605 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4606 if (nread >= wc->reada_count)
4607 break;
4608
4609 cond_resched();
4610 bytenr = btrfs_node_blockptr(eb, slot);
4611 generation = btrfs_node_ptr_generation(eb, slot);
4612
4613 if (slot == path->slots[wc->level])
4614 goto reada;
4615
4616 if (wc->stage == UPDATE_BACKREF &&
4617 generation <= root->root_key.offset)
4618 continue;
4619
4620
4621 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
4622 wc->level - 1, 1, &refs,
4623 &flags);
4624
4625 if (ret < 0)
4626 continue;
4627 BUG_ON(refs == 0);
4628
4629 if (wc->stage == DROP_REFERENCE) {
4630 if (refs == 1)
4631 goto reada;
4632
4633 if (wc->level == 1 &&
4634 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4635 continue;
4636 if (!wc->update_ref ||
4637 generation <= root->root_key.offset)
4638 continue;
4639 btrfs_node_key_to_cpu(eb, &key, slot);
4640 ret = btrfs_comp_cpu_keys(&key,
4641 &wc->update_progress);
4642 if (ret < 0)
4643 continue;
4644 } else {
4645 if (wc->level == 1 &&
4646 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4647 continue;
4648 }
4649reada:
4650 readahead_tree_block(fs_info, bytenr);
4651 nread++;
4652 }
4653 wc->reada_slot = slot;
4654}
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4665 struct btrfs_root *root,
4666 struct btrfs_path *path,
4667 struct walk_control *wc, int lookup_info)
4668{
4669 struct btrfs_fs_info *fs_info = root->fs_info;
4670 int level = wc->level;
4671 struct extent_buffer *eb = path->nodes[level];
4672 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4673 int ret;
4674
4675 if (wc->stage == UPDATE_BACKREF &&
4676 btrfs_header_owner(eb) != root->root_key.objectid)
4677 return 1;
4678
4679
4680
4681
4682
4683 if (lookup_info &&
4684 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4685 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4686 BUG_ON(!path->locks[level]);
4687 ret = btrfs_lookup_extent_info(trans, fs_info,
4688 eb->start, level, 1,
4689 &wc->refs[level],
4690 &wc->flags[level]);
4691 BUG_ON(ret == -ENOMEM);
4692 if (ret)
4693 return ret;
4694 BUG_ON(wc->refs[level] == 0);
4695 }
4696
4697 if (wc->stage == DROP_REFERENCE) {
4698 if (wc->refs[level] > 1)
4699 return 1;
4700
4701 if (path->locks[level] && !wc->keep_locks) {
4702 btrfs_tree_unlock_rw(eb, path->locks[level]);
4703 path->locks[level] = 0;
4704 }
4705 return 0;
4706 }
4707
4708
4709 if (!(wc->flags[level] & flag)) {
4710 BUG_ON(!path->locks[level]);
4711 ret = btrfs_inc_ref(trans, root, eb, 1);
4712 BUG_ON(ret);
4713 ret = btrfs_dec_ref(trans, root, eb, 0);
4714 BUG_ON(ret);
4715 ret = btrfs_set_disk_extent_flags(trans, eb->start,
4716 eb->len, flag,
4717 btrfs_header_level(eb), 0);
4718 BUG_ON(ret);
4719 wc->flags[level] |= flag;
4720 }
4721
4722
4723
4724
4725
4726 if (path->locks[level] && level > 0) {
4727 btrfs_tree_unlock_rw(eb, path->locks[level]);
4728 path->locks[level] = 0;
4729 }
4730 return 0;
4731}
4732
4733
4734
4735
4736
4737static int check_ref_exists(struct btrfs_trans_handle *trans,
4738 struct btrfs_root *root, u64 bytenr, u64 parent,
4739 int level)
4740{
4741 struct btrfs_path *path;
4742 struct btrfs_extent_inline_ref *iref;
4743 int ret;
4744
4745 path = btrfs_alloc_path();
4746 if (!path)
4747 return -ENOMEM;
4748
4749 ret = lookup_extent_backref(trans, path, &iref, bytenr,
4750 root->fs_info->nodesize, parent,
4751 root->root_key.objectid, level, 0);
4752 btrfs_free_path(path);
4753 if (ret == -ENOENT)
4754 return 0;
4755 if (ret < 0)
4756 return ret;
4757 return 1;
4758}
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4774 struct btrfs_root *root,
4775 struct btrfs_path *path,
4776 struct walk_control *wc, int *lookup_info)
4777{
4778 struct btrfs_fs_info *fs_info = root->fs_info;
4779 u64 bytenr;
4780 u64 generation;
4781 u64 parent;
4782 struct btrfs_key key;
4783 struct btrfs_key first_key;
4784 struct btrfs_ref ref = { 0 };
4785 struct extent_buffer *next;
4786 int level = wc->level;
4787 int reada = 0;
4788 int ret = 0;
4789 bool need_account = false;
4790
4791 generation = btrfs_node_ptr_generation(path->nodes[level],
4792 path->slots[level]);
4793
4794
4795
4796
4797
4798 if (wc->stage == UPDATE_BACKREF &&
4799 generation <= root->root_key.offset) {
4800 *lookup_info = 1;
4801 return 1;
4802 }
4803
4804 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4805 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
4806 path->slots[level]);
4807
4808 next = find_extent_buffer(fs_info, bytenr);
4809 if (!next) {
4810 next = btrfs_find_create_tree_block(fs_info, bytenr);
4811 if (IS_ERR(next))
4812 return PTR_ERR(next);
4813
4814 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
4815 level - 1);
4816 reada = 1;
4817 }
4818 btrfs_tree_lock(next);
4819 btrfs_set_lock_blocking_write(next);
4820
4821 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
4822 &wc->refs[level - 1],
4823 &wc->flags[level - 1]);
4824 if (ret < 0)
4825 goto out_unlock;
4826
4827 if (unlikely(wc->refs[level - 1] == 0)) {
4828 btrfs_err(fs_info, "Missing references.");
4829 ret = -EIO;
4830 goto out_unlock;
4831 }
4832 *lookup_info = 0;
4833
4834 if (wc->stage == DROP_REFERENCE) {
4835 if (wc->refs[level - 1] > 1) {
4836 need_account = true;
4837 if (level == 1 &&
4838 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4839 goto skip;
4840
4841 if (!wc->update_ref ||
4842 generation <= root->root_key.offset)
4843 goto skip;
4844
4845 btrfs_node_key_to_cpu(path->nodes[level], &key,
4846 path->slots[level]);
4847 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
4848 if (ret < 0)
4849 goto skip;
4850
4851 wc->stage = UPDATE_BACKREF;
4852 wc->shared_level = level - 1;
4853 }
4854 } else {
4855 if (level == 1 &&
4856 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4857 goto skip;
4858 }
4859
4860 if (!btrfs_buffer_uptodate(next, generation, 0)) {
4861 btrfs_tree_unlock(next);
4862 free_extent_buffer(next);
4863 next = NULL;
4864 *lookup_info = 1;
4865 }
4866
4867 if (!next) {
4868 if (reada && level == 1)
4869 reada_walk_down(trans, root, wc, path);
4870 next = read_tree_block(fs_info, bytenr, generation, level - 1,
4871 &first_key);
4872 if (IS_ERR(next)) {
4873 return PTR_ERR(next);
4874 } else if (!extent_buffer_uptodate(next)) {
4875 free_extent_buffer(next);
4876 return -EIO;
4877 }
4878 btrfs_tree_lock(next);
4879 btrfs_set_lock_blocking_write(next);
4880 }
4881
4882 level--;
4883 ASSERT(level == btrfs_header_level(next));
4884 if (level != btrfs_header_level(next)) {
4885 btrfs_err(root->fs_info, "mismatched level");
4886 ret = -EIO;
4887 goto out_unlock;
4888 }
4889 path->nodes[level] = next;
4890 path->slots[level] = 0;
4891 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
4892 wc->level = level;
4893 if (wc->level == 1)
4894 wc->reada_slot = 0;
4895 return 0;
4896skip:
4897 wc->refs[level - 1] = 0;
4898 wc->flags[level - 1] = 0;
4899 if (wc->stage == DROP_REFERENCE) {
4900 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4901 parent = path->nodes[level]->start;
4902 } else {
4903 ASSERT(root->root_key.objectid ==
4904 btrfs_header_owner(path->nodes[level]));
4905 if (root->root_key.objectid !=
4906 btrfs_header_owner(path->nodes[level])) {
4907 btrfs_err(root->fs_info,
4908 "mismatched block owner");
4909 ret = -EIO;
4910 goto out_unlock;
4911 }
4912 parent = 0;
4913 }
4914
4915
4916
4917
4918
4919
4920
4921 if (wc->restarted) {
4922 ret = check_ref_exists(trans, root, bytenr, parent,
4923 level - 1);
4924 if (ret < 0)
4925 goto out_unlock;
4926 if (ret == 0)
4927 goto no_delete;
4928 ret = 0;
4929 wc->restarted = 0;
4930 }
4931
4932
4933
4934
4935
4936
4937 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
4938 need_account) {
4939 ret = btrfs_qgroup_trace_subtree(trans, next,
4940 generation, level - 1);
4941 if (ret) {
4942 btrfs_err_rl(fs_info,
4943 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
4944 ret);
4945 }
4946 }
4947
4948
4949
4950
4951
4952
4953
4954 wc->drop_level = level;
4955 find_next_key(path, level, &wc->drop_progress);
4956
4957 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
4958 fs_info->nodesize, parent);
4959 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
4960 ret = btrfs_free_extent(trans, &ref);
4961 if (ret)
4962 goto out_unlock;
4963 }
4964no_delete:
4965 *lookup_info = 1;
4966 ret = 1;
4967
4968out_unlock:
4969 btrfs_tree_unlock(next);
4970 free_extent_buffer(next);
4971
4972 return ret;
4973}
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4988 struct btrfs_root *root,
4989 struct btrfs_path *path,
4990 struct walk_control *wc)
4991{
4992 struct btrfs_fs_info *fs_info = root->fs_info;
4993 int ret;
4994 int level = wc->level;
4995 struct extent_buffer *eb = path->nodes[level];
4996 u64 parent = 0;
4997
4998 if (wc->stage == UPDATE_BACKREF) {
4999 BUG_ON(wc->shared_level < level);
5000 if (level < wc->shared_level)
5001 goto out;
5002
5003 ret = find_next_key(path, level + 1, &wc->update_progress);
5004 if (ret > 0)
5005 wc->update_ref = 0;
5006
5007 wc->stage = DROP_REFERENCE;
5008 wc->shared_level = -1;
5009 path->slots[level] = 0;
5010
5011
5012
5013
5014
5015
5016 if (!path->locks[level]) {
5017 BUG_ON(level == 0);
5018 btrfs_tree_lock(eb);
5019 btrfs_set_lock_blocking_write(eb);
5020 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5021
5022 ret = btrfs_lookup_extent_info(trans, fs_info,
5023 eb->start, level, 1,
5024 &wc->refs[level],
5025 &wc->flags[level]);
5026 if (ret < 0) {
5027 btrfs_tree_unlock_rw(eb, path->locks[level]);
5028 path->locks[level] = 0;
5029 return ret;
5030 }
5031 BUG_ON(wc->refs[level] == 0);
5032 if (wc->refs[level] == 1) {
5033 btrfs_tree_unlock_rw(eb, path->locks[level]);
5034 path->locks[level] = 0;
5035 return 1;
5036 }
5037 }
5038 }
5039
5040
5041 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5042
5043 if (wc->refs[level] == 1) {
5044 if (level == 0) {
5045 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5046 ret = btrfs_dec_ref(trans, root, eb, 1);
5047 else
5048 ret = btrfs_dec_ref(trans, root, eb, 0);
5049 BUG_ON(ret);
5050 if (is_fstree(root->root_key.objectid)) {
5051 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5052 if (ret) {
5053 btrfs_err_rl(fs_info,
5054 "error %d accounting leaf items, quota is out of sync, rescan required",
5055 ret);
5056 }
5057 }
5058 }
5059
5060 if (!path->locks[level] &&
5061 btrfs_header_generation(eb) == trans->transid) {
5062 btrfs_tree_lock(eb);
5063 btrfs_set_lock_blocking_write(eb);
5064 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5065 }
5066 btrfs_clean_tree_block(eb);
5067 }
5068
5069 if (eb == root->node) {
5070 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5071 parent = eb->start;
5072 else if (root->root_key.objectid != btrfs_header_owner(eb))
5073 goto owner_mismatch;
5074 } else {
5075 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5076 parent = path->nodes[level + 1]->start;
5077 else if (root->root_key.objectid !=
5078 btrfs_header_owner(path->nodes[level + 1]))
5079 goto owner_mismatch;
5080 }
5081
5082 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5083out:
5084 wc->refs[level] = 0;
5085 wc->flags[level] = 0;
5086 return 0;
5087
5088owner_mismatch:
5089 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5090 btrfs_header_owner(eb), root->root_key.objectid);
5091 return -EUCLEAN;
5092}
5093
5094static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5095 struct btrfs_root *root,
5096 struct btrfs_path *path,
5097 struct walk_control *wc)
5098{
5099 int level = wc->level;
5100 int lookup_info = 1;
5101 int ret;
5102
5103 while (level >= 0) {
5104 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5105 if (ret > 0)
5106 break;
5107
5108 if (level == 0)
5109 break;
5110
5111 if (path->slots[level] >=
5112 btrfs_header_nritems(path->nodes[level]))
5113 break;
5114
5115 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5116 if (ret > 0) {
5117 path->slots[level]++;
5118 continue;
5119 } else if (ret < 0)
5120 return ret;
5121 level = wc->level;
5122 }
5123 return 0;
5124}
5125
5126static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5127 struct btrfs_root *root,
5128 struct btrfs_path *path,
5129 struct walk_control *wc, int max_level)
5130{
5131 int level = wc->level;
5132 int ret;
5133
5134 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5135 while (level < max_level && path->nodes[level]) {
5136 wc->level = level;
5137 if (path->slots[level] + 1 <
5138 btrfs_header_nritems(path->nodes[level])) {
5139 path->slots[level]++;
5140 return 0;
5141 } else {
5142 ret = walk_up_proc(trans, root, path, wc);
5143 if (ret > 0)
5144 return 0;
5145 if (ret < 0)
5146 return ret;
5147
5148 if (path->locks[level]) {
5149 btrfs_tree_unlock_rw(path->nodes[level],
5150 path->locks[level]);
5151 path->locks[level] = 0;
5152 }
5153 free_extent_buffer(path->nodes[level]);
5154 path->nodes[level] = NULL;
5155 level++;
5156 }
5157 }
5158 return 1;
5159}
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174int btrfs_drop_snapshot(struct btrfs_root *root,
5175 struct btrfs_block_rsv *block_rsv, int update_ref,
5176 int for_reloc)
5177{
5178 struct btrfs_fs_info *fs_info = root->fs_info;
5179 struct btrfs_path *path;
5180 struct btrfs_trans_handle *trans;
5181 struct btrfs_root *tree_root = fs_info->tree_root;
5182 struct btrfs_root_item *root_item = &root->root_item;
5183 struct walk_control *wc;
5184 struct btrfs_key key;
5185 int err = 0;
5186 int ret;
5187 int level;
5188 bool root_dropped = false;
5189
5190 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5191
5192 path = btrfs_alloc_path();
5193 if (!path) {
5194 err = -ENOMEM;
5195 goto out;
5196 }
5197
5198 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5199 if (!wc) {
5200 btrfs_free_path(path);
5201 err = -ENOMEM;
5202 goto out;
5203 }
5204
5205 trans = btrfs_start_transaction(tree_root, 0);
5206 if (IS_ERR(trans)) {
5207 err = PTR_ERR(trans);
5208 goto out_free;
5209 }
5210
5211 err = btrfs_run_delayed_items(trans);
5212 if (err)
5213 goto out_end_trans;
5214
5215 if (block_rsv)
5216 trans->block_rsv = block_rsv;
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226 set_bit(BTRFS_ROOT_DELETING, &root->state);
5227 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5228 level = btrfs_header_level(root->node);
5229 path->nodes[level] = btrfs_lock_root_node(root);
5230 btrfs_set_lock_blocking_write(path->nodes[level]);
5231 path->slots[level] = 0;
5232 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5233 memset(&wc->update_progress, 0,
5234 sizeof(wc->update_progress));
5235 } else {
5236 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5237 memcpy(&wc->update_progress, &key,
5238 sizeof(wc->update_progress));
5239
5240 level = root_item->drop_level;
5241 BUG_ON(level == 0);
5242 path->lowest_level = level;
5243 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5244 path->lowest_level = 0;
5245 if (ret < 0) {
5246 err = ret;
5247 goto out_end_trans;
5248 }
5249 WARN_ON(ret > 0);
5250
5251
5252
5253
5254
5255 btrfs_unlock_up_safe(path, 0);
5256
5257 level = btrfs_header_level(root->node);
5258 while (1) {
5259 btrfs_tree_lock(path->nodes[level]);
5260 btrfs_set_lock_blocking_write(path->nodes[level]);
5261 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5262
5263 ret = btrfs_lookup_extent_info(trans, fs_info,
5264 path->nodes[level]->start,
5265 level, 1, &wc->refs[level],
5266 &wc->flags[level]);
5267 if (ret < 0) {
5268 err = ret;
5269 goto out_end_trans;
5270 }
5271 BUG_ON(wc->refs[level] == 0);
5272
5273 if (level == root_item->drop_level)
5274 break;
5275
5276 btrfs_tree_unlock(path->nodes[level]);
5277 path->locks[level] = 0;
5278 WARN_ON(wc->refs[level] != 1);
5279 level--;
5280 }
5281 }
5282
5283 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5284 wc->level = level;
5285 wc->shared_level = -1;
5286 wc->stage = DROP_REFERENCE;
5287 wc->update_ref = update_ref;
5288 wc->keep_locks = 0;
5289 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5290
5291 while (1) {
5292
5293 ret = walk_down_tree(trans, root, path, wc);
5294 if (ret < 0) {
5295 err = ret;
5296 break;
5297 }
5298
5299 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5300 if (ret < 0) {
5301 err = ret;
5302 break;
5303 }
5304
5305 if (ret > 0) {
5306 BUG_ON(wc->stage != DROP_REFERENCE);
5307 break;
5308 }
5309
5310 if (wc->stage == DROP_REFERENCE) {
5311 wc->drop_level = wc->level;
5312 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5313 &wc->drop_progress,
5314 path->slots[wc->drop_level]);
5315 }
5316 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5317 &wc->drop_progress);
5318 root_item->drop_level = wc->drop_level;
5319
5320 BUG_ON(wc->level == 0);
5321 if (btrfs_should_end_transaction(trans) ||
5322 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5323 ret = btrfs_update_root(trans, tree_root,
5324 &root->root_key,
5325 root_item);
5326 if (ret) {
5327 btrfs_abort_transaction(trans, ret);
5328 err = ret;
5329 goto out_end_trans;
5330 }
5331
5332 btrfs_end_transaction_throttle(trans);
5333 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5334 btrfs_debug(fs_info,
5335 "drop snapshot early exit");
5336 err = -EAGAIN;
5337 goto out_free;
5338 }
5339
5340 trans = btrfs_start_transaction(tree_root, 0);
5341 if (IS_ERR(trans)) {
5342 err = PTR_ERR(trans);
5343 goto out_free;
5344 }
5345 if (block_rsv)
5346 trans->block_rsv = block_rsv;
5347 }
5348 }
5349 btrfs_release_path(path);
5350 if (err)
5351 goto out_end_trans;
5352
5353 ret = btrfs_del_root(trans, &root->root_key);
5354 if (ret) {
5355 btrfs_abort_transaction(trans, ret);
5356 err = ret;
5357 goto out_end_trans;
5358 }
5359
5360 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5361 ret = btrfs_find_root(tree_root, &root->root_key, path,
5362 NULL, NULL);
5363 if (ret < 0) {
5364 btrfs_abort_transaction(trans, ret);
5365 err = ret;
5366 goto out_end_trans;
5367 } else if (ret > 0) {
5368
5369
5370
5371
5372
5373 btrfs_del_orphan_item(trans, tree_root,
5374 root->root_key.objectid);
5375 }
5376 }
5377
5378 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
5379 btrfs_add_dropped_root(trans, root);
5380 } else {
5381 free_extent_buffer(root->node);
5382 free_extent_buffer(root->commit_root);
5383 btrfs_put_fs_root(root);
5384 }
5385 root_dropped = true;
5386out_end_trans:
5387 btrfs_end_transaction_throttle(trans);
5388out_free:
5389 kfree(wc);
5390 btrfs_free_path(path);
5391out:
5392
5393
5394
5395
5396
5397
5398
5399 if (!for_reloc && !root_dropped)
5400 btrfs_add_dead_root(root);
5401 if (err && err != -EAGAIN)
5402 btrfs_handle_fs_error(fs_info, err, NULL);
5403 return err;
5404}
5405
5406
5407
5408
5409
5410
5411
5412int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5413 struct btrfs_root *root,
5414 struct extent_buffer *node,
5415 struct extent_buffer *parent)
5416{
5417 struct btrfs_fs_info *fs_info = root->fs_info;
5418 struct btrfs_path *path;
5419 struct walk_control *wc;
5420 int level;
5421 int parent_level;
5422 int ret = 0;
5423 int wret;
5424
5425 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5426
5427 path = btrfs_alloc_path();
5428 if (!path)
5429 return -ENOMEM;
5430
5431 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5432 if (!wc) {
5433 btrfs_free_path(path);
5434 return -ENOMEM;
5435 }
5436
5437 btrfs_assert_tree_locked(parent);
5438 parent_level = btrfs_header_level(parent);
5439 extent_buffer_get(parent);
5440 path->nodes[parent_level] = parent;
5441 path->slots[parent_level] = btrfs_header_nritems(parent);
5442
5443 btrfs_assert_tree_locked(node);
5444 level = btrfs_header_level(node);
5445 path->nodes[level] = node;
5446 path->slots[level] = 0;
5447 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5448
5449 wc->refs[parent_level] = 1;
5450 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5451 wc->level = level;
5452 wc->shared_level = -1;
5453 wc->stage = DROP_REFERENCE;
5454 wc->update_ref = 0;
5455 wc->keep_locks = 1;
5456 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5457
5458 while (1) {
5459 wret = walk_down_tree(trans, root, path, wc);
5460 if (wret < 0) {
5461 ret = wret;
5462 break;
5463 }
5464
5465 wret = walk_up_tree(trans, root, path, wc, parent_level);
5466 if (wret < 0)
5467 ret = wret;
5468 if (wret != 0)
5469 break;
5470 }
5471
5472 kfree(wc);
5473 btrfs_free_path(path);
5474 return ret;
5475}
5476
5477
5478
5479
5480
5481u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5482{
5483 struct btrfs_block_group_cache *block_group;
5484 u64 free_bytes = 0;
5485 int factor;
5486
5487
5488 if (list_empty(&sinfo->ro_bgs))
5489 return 0;
5490
5491 spin_lock(&sinfo->lock);
5492 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5493 spin_lock(&block_group->lock);
5494
5495 if (!block_group->ro) {
5496 spin_unlock(&block_group->lock);
5497 continue;
5498 }
5499
5500 factor = btrfs_bg_type_to_factor(block_group->flags);
5501 free_bytes += (block_group->key.offset -
5502 btrfs_block_group_used(&block_group->item)) *
5503 factor;
5504
5505 spin_unlock(&block_group->lock);
5506 }
5507 spin_unlock(&sinfo->lock);
5508
5509 return free_bytes;
5510}
5511
5512int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5513 u64 start, u64 end)
5514{
5515 return unpin_extent_range(fs_info, start, end, false);
5516}
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5539{
5540 u64 start = SZ_1M, len = 0, end = 0;
5541 int ret;
5542
5543 *trimmed = 0;
5544
5545
5546 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
5547 return 0;
5548
5549
5550 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
5551 return 0;
5552
5553
5554 if (device->total_bytes <= device->bytes_used)
5555 return 0;
5556
5557 ret = 0;
5558
5559 while (1) {
5560 struct btrfs_fs_info *fs_info = device->fs_info;
5561 u64 bytes;
5562
5563 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
5564 if (ret)
5565 break;
5566
5567 find_first_clear_extent_bit(&device->alloc_state, start,
5568 &start, &end,
5569 CHUNK_TRIMMED | CHUNK_ALLOCATED);
5570
5571
5572 start = max_t(u64, start, SZ_1M);
5573
5574
5575
5576
5577
5578
5579 end = min(end, device->total_bytes - 1);
5580
5581 len = end - start + 1;
5582
5583
5584 if (!len) {
5585 mutex_unlock(&fs_info->chunk_mutex);
5586 ret = 0;
5587 break;
5588 }
5589
5590 ret = btrfs_issue_discard(device->bdev, start, len,
5591 &bytes);
5592 if (!ret)
5593 set_extent_bits(&device->alloc_state, start,
5594 start + bytes - 1,
5595 CHUNK_TRIMMED);
5596 mutex_unlock(&fs_info->chunk_mutex);
5597
5598 if (ret)
5599 break;
5600
5601 start += len;
5602 *trimmed += bytes;
5603
5604 if (fatal_signal_pending(current)) {
5605 ret = -ERESTARTSYS;
5606 break;
5607 }
5608
5609 cond_resched();
5610 }
5611
5612 return ret;
5613}
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
5625{
5626 struct btrfs_block_group_cache *cache = NULL;
5627 struct btrfs_device *device;
5628 struct list_head *devices;
5629 u64 group_trimmed;
5630 u64 range_end = U64_MAX;
5631 u64 start;
5632 u64 end;
5633 u64 trimmed = 0;
5634 u64 bg_failed = 0;
5635 u64 dev_failed = 0;
5636 int bg_ret = 0;
5637 int dev_ret = 0;
5638 int ret = 0;
5639
5640
5641
5642
5643
5644 if (range->len != U64_MAX &&
5645 check_add_overflow(range->start, range->len, &range_end))
5646 return -EINVAL;
5647
5648 cache = btrfs_lookup_first_block_group(fs_info, range->start);
5649 for (; cache; cache = btrfs_next_block_group(cache)) {
5650 if (cache->key.objectid >= range_end) {
5651 btrfs_put_block_group(cache);
5652 break;
5653 }
5654
5655 start = max(range->start, cache->key.objectid);
5656 end = min(range_end, cache->key.objectid + cache->key.offset);
5657
5658 if (end - start >= range->minlen) {
5659 if (!btrfs_block_group_cache_done(cache)) {
5660 ret = btrfs_cache_block_group(cache, 0);
5661 if (ret) {
5662 bg_failed++;
5663 bg_ret = ret;
5664 continue;
5665 }
5666 ret = btrfs_wait_block_group_cache_done(cache);
5667 if (ret) {
5668 bg_failed++;
5669 bg_ret = ret;
5670 continue;
5671 }
5672 }
5673 ret = btrfs_trim_block_group(cache,
5674 &group_trimmed,
5675 start,
5676 end,
5677 range->minlen);
5678
5679 trimmed += group_trimmed;
5680 if (ret) {
5681 bg_failed++;
5682 bg_ret = ret;
5683 continue;
5684 }
5685 }
5686 }
5687
5688 if (bg_failed)
5689 btrfs_warn(fs_info,
5690 "failed to trim %llu block group(s), last error %d",
5691 bg_failed, bg_ret);
5692 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5693 devices = &fs_info->fs_devices->devices;
5694 list_for_each_entry(device, devices, dev_list) {
5695 ret = btrfs_trim_free_extents(device, &group_trimmed);
5696 if (ret) {
5697 dev_failed++;
5698 dev_ret = ret;
5699 break;
5700 }
5701
5702 trimmed += group_trimmed;
5703 }
5704 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5705
5706 if (dev_failed)
5707 btrfs_warn(fs_info,
5708 "failed to trim %llu device(s), last error %d",
5709 dev_failed, dev_ret);
5710 range->len = trimmed;
5711 if (bg_ret)
5712 return bg_ret;
5713 return dev_ret;
5714}
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
5725{
5726 percpu_counter_dec(&root->subv_writers->counter);
5727 cond_wake_up(&root->subv_writers->wait);
5728}
5729
5730int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
5731{
5732 if (atomic_read(&root->will_be_snapshotted))
5733 return 0;
5734
5735 percpu_counter_inc(&root->subv_writers->counter);
5736
5737
5738
5739 smp_mb();
5740 if (atomic_read(&root->will_be_snapshotted)) {
5741 btrfs_end_write_no_snapshotting(root);
5742 return 0;
5743 }
5744 return 1;
5745}
5746
5747void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
5748{
5749 while (true) {
5750 int ret;
5751
5752 ret = btrfs_start_write_no_snapshotting(root);
5753 if (ret)
5754 break;
5755 wait_var_event(&root->will_be_snapshotted,
5756 !atomic_read(&root->will_be_snapshotted));
5757 }
5758}
5759