1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "misc.h"
20#include "tree-log.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "locking.h"
26#include "free-space-cache.h"
27#include "free-space-tree.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31#include "space-info.h"
32#include "block-rsv.h"
33#include "delalloc-space.h"
34#include "block-group.h"
35
36#undef SCRAMBLE_DELAYED_REFS
37
38
39static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
40 struct btrfs_delayed_ref_node *node, u64 parent,
41 u64 root_objectid, u64 owner_objectid,
42 u64 owner_offset, int refs_to_drop,
43 struct btrfs_delayed_extent_op *extra_op);
44static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
45 struct extent_buffer *leaf,
46 struct btrfs_extent_item *ei);
47static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
48 u64 parent, u64 root_objectid,
49 u64 flags, u64 owner, u64 offset,
50 struct btrfs_key *ins, int ref_mod);
51static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
52 struct btrfs_delayed_ref_node *node,
53 struct btrfs_delayed_extent_op *extent_op);
54static int find_next_key(struct btrfs_path *path, int level,
55 struct btrfs_key *key);
56
57static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
58{
59 return (cache->flags & bits) == bits;
60}
61
62int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
63 u64 start, u64 num_bytes)
64{
65 u64 end = start + num_bytes - 1;
66 set_extent_bits(&fs_info->freed_extents[0],
67 start, end, EXTENT_UPTODATE);
68 set_extent_bits(&fs_info->freed_extents[1],
69 start, end, EXTENT_UPTODATE);
70 return 0;
71}
72
73void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
74{
75 struct btrfs_fs_info *fs_info = cache->fs_info;
76 u64 start, end;
77
78 start = cache->start;
79 end = start + cache->length - 1;
80
81 clear_extent_bits(&fs_info->freed_extents[0],
82 start, end, EXTENT_UPTODATE);
83 clear_extent_bits(&fs_info->freed_extents[1],
84 start, end, EXTENT_UPTODATE);
85}
86
87static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
88{
89 if (ref->type == BTRFS_REF_METADATA) {
90 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
91 return BTRFS_BLOCK_GROUP_SYSTEM;
92 else
93 return BTRFS_BLOCK_GROUP_METADATA;
94 }
95 return BTRFS_BLOCK_GROUP_DATA;
96}
97
98static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
99 struct btrfs_ref *ref)
100{
101 struct btrfs_space_info *space_info;
102 u64 flags = generic_ref_to_space_flags(ref);
103
104 space_info = btrfs_find_space_info(fs_info, flags);
105 ASSERT(space_info);
106 percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
107 BTRFS_TOTAL_BYTES_PINNED_BATCH);
108}
109
110static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
111 struct btrfs_ref *ref)
112{
113 struct btrfs_space_info *space_info;
114 u64 flags = generic_ref_to_space_flags(ref);
115
116 space_info = btrfs_find_space_info(fs_info, flags);
117 ASSERT(space_info);
118 percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
119 BTRFS_TOTAL_BYTES_PINNED_BATCH);
120}
121
122
123int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
124{
125 int ret;
126 struct btrfs_key key;
127 struct btrfs_path *path;
128
129 path = btrfs_alloc_path();
130 if (!path)
131 return -ENOMEM;
132
133 key.objectid = start;
134 key.offset = len;
135 key.type = BTRFS_EXTENT_ITEM_KEY;
136 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
137 btrfs_free_path(path);
138 return ret;
139}
140
141
142
143
144
145
146
147
148
149
150int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
151 struct btrfs_fs_info *fs_info, u64 bytenr,
152 u64 offset, int metadata, u64 *refs, u64 *flags)
153{
154 struct btrfs_delayed_ref_head *head;
155 struct btrfs_delayed_ref_root *delayed_refs;
156 struct btrfs_path *path;
157 struct btrfs_extent_item *ei;
158 struct extent_buffer *leaf;
159 struct btrfs_key key;
160 u32 item_size;
161 u64 num_refs;
162 u64 extent_flags;
163 int ret;
164
165
166
167
168
169 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
170 offset = fs_info->nodesize;
171 metadata = 0;
172 }
173
174 path = btrfs_alloc_path();
175 if (!path)
176 return -ENOMEM;
177
178 if (!trans) {
179 path->skip_locking = 1;
180 path->search_commit_root = 1;
181 }
182
183search_again:
184 key.objectid = bytenr;
185 key.offset = offset;
186 if (metadata)
187 key.type = BTRFS_METADATA_ITEM_KEY;
188 else
189 key.type = BTRFS_EXTENT_ITEM_KEY;
190
191 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
192 if (ret < 0)
193 goto out_free;
194
195 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
196 if (path->slots[0]) {
197 path->slots[0]--;
198 btrfs_item_key_to_cpu(path->nodes[0], &key,
199 path->slots[0]);
200 if (key.objectid == bytenr &&
201 key.type == BTRFS_EXTENT_ITEM_KEY &&
202 key.offset == fs_info->nodesize)
203 ret = 0;
204 }
205 }
206
207 if (ret == 0) {
208 leaf = path->nodes[0];
209 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
210 if (item_size >= sizeof(*ei)) {
211 ei = btrfs_item_ptr(leaf, path->slots[0],
212 struct btrfs_extent_item);
213 num_refs = btrfs_extent_refs(leaf, ei);
214 extent_flags = btrfs_extent_flags(leaf, ei);
215 } else {
216 ret = -EINVAL;
217 btrfs_print_v0_err(fs_info);
218 if (trans)
219 btrfs_abort_transaction(trans, ret);
220 else
221 btrfs_handle_fs_error(fs_info, ret, NULL);
222
223 goto out_free;
224 }
225
226 BUG_ON(num_refs == 0);
227 } else {
228 num_refs = 0;
229 extent_flags = 0;
230 ret = 0;
231 }
232
233 if (!trans)
234 goto out;
235
236 delayed_refs = &trans->transaction->delayed_refs;
237 spin_lock(&delayed_refs->lock);
238 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
239 if (head) {
240 if (!mutex_trylock(&head->mutex)) {
241 refcount_inc(&head->refs);
242 spin_unlock(&delayed_refs->lock);
243
244 btrfs_release_path(path);
245
246
247
248
249
250 mutex_lock(&head->mutex);
251 mutex_unlock(&head->mutex);
252 btrfs_put_delayed_ref_head(head);
253 goto search_again;
254 }
255 spin_lock(&head->lock);
256 if (head->extent_op && head->extent_op->update_flags)
257 extent_flags |= head->extent_op->flags_to_set;
258 else
259 BUG_ON(num_refs == 0);
260
261 num_refs += head->ref_mod;
262 spin_unlock(&head->lock);
263 mutex_unlock(&head->mutex);
264 }
265 spin_unlock(&delayed_refs->lock);
266out:
267 WARN_ON(num_refs == 0);
268 if (refs)
269 *refs = num_refs;
270 if (flags)
271 *flags = extent_flags;
272out_free:
273 btrfs_free_path(path);
274 return ret;
275}
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
389 struct btrfs_extent_inline_ref *iref,
390 enum btrfs_inline_ref_type is_data)
391{
392 int type = btrfs_extent_inline_ref_type(eb, iref);
393 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
394
395 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
396 type == BTRFS_SHARED_BLOCK_REF_KEY ||
397 type == BTRFS_SHARED_DATA_REF_KEY ||
398 type == BTRFS_EXTENT_DATA_REF_KEY) {
399 if (is_data == BTRFS_REF_TYPE_BLOCK) {
400 if (type == BTRFS_TREE_BLOCK_REF_KEY)
401 return type;
402 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
403 ASSERT(eb->fs_info);
404
405
406
407
408
409 if (offset &&
410 IS_ALIGNED(offset, eb->fs_info->nodesize))
411 return type;
412 }
413 } else if (is_data == BTRFS_REF_TYPE_DATA) {
414 if (type == BTRFS_EXTENT_DATA_REF_KEY)
415 return type;
416 if (type == BTRFS_SHARED_DATA_REF_KEY) {
417 ASSERT(eb->fs_info);
418
419
420
421
422
423 if (offset &&
424 IS_ALIGNED(offset, eb->fs_info->nodesize))
425 return type;
426 }
427 } else {
428 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
429 return type;
430 }
431 }
432
433 btrfs_print_leaf((struct extent_buffer *)eb);
434 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
435 eb->start, type);
436 WARN_ON(1);
437
438 return BTRFS_REF_TYPE_INVALID;
439}
440
441u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
442{
443 u32 high_crc = ~(u32)0;
444 u32 low_crc = ~(u32)0;
445 __le64 lenum;
446
447 lenum = cpu_to_le64(root_objectid);
448 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
449 lenum = cpu_to_le64(owner);
450 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
451 lenum = cpu_to_le64(offset);
452 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
453
454 return ((u64)high_crc << 31) ^ (u64)low_crc;
455}
456
457static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
458 struct btrfs_extent_data_ref *ref)
459{
460 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
461 btrfs_extent_data_ref_objectid(leaf, ref),
462 btrfs_extent_data_ref_offset(leaf, ref));
463}
464
465static int match_extent_data_ref(struct extent_buffer *leaf,
466 struct btrfs_extent_data_ref *ref,
467 u64 root_objectid, u64 owner, u64 offset)
468{
469 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
470 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
471 btrfs_extent_data_ref_offset(leaf, ref) != offset)
472 return 0;
473 return 1;
474}
475
476static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
477 struct btrfs_path *path,
478 u64 bytenr, u64 parent,
479 u64 root_objectid,
480 u64 owner, u64 offset)
481{
482 struct btrfs_root *root = trans->fs_info->extent_root;
483 struct btrfs_key key;
484 struct btrfs_extent_data_ref *ref;
485 struct extent_buffer *leaf;
486 u32 nritems;
487 int ret;
488 int recow;
489 int err = -ENOENT;
490
491 key.objectid = bytenr;
492 if (parent) {
493 key.type = BTRFS_SHARED_DATA_REF_KEY;
494 key.offset = parent;
495 } else {
496 key.type = BTRFS_EXTENT_DATA_REF_KEY;
497 key.offset = hash_extent_data_ref(root_objectid,
498 owner, offset);
499 }
500again:
501 recow = 0;
502 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
503 if (ret < 0) {
504 err = ret;
505 goto fail;
506 }
507
508 if (parent) {
509 if (!ret)
510 return 0;
511 goto fail;
512 }
513
514 leaf = path->nodes[0];
515 nritems = btrfs_header_nritems(leaf);
516 while (1) {
517 if (path->slots[0] >= nritems) {
518 ret = btrfs_next_leaf(root, path);
519 if (ret < 0)
520 err = ret;
521 if (ret)
522 goto fail;
523
524 leaf = path->nodes[0];
525 nritems = btrfs_header_nritems(leaf);
526 recow = 1;
527 }
528
529 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
530 if (key.objectid != bytenr ||
531 key.type != BTRFS_EXTENT_DATA_REF_KEY)
532 goto fail;
533
534 ref = btrfs_item_ptr(leaf, path->slots[0],
535 struct btrfs_extent_data_ref);
536
537 if (match_extent_data_ref(leaf, ref, root_objectid,
538 owner, offset)) {
539 if (recow) {
540 btrfs_release_path(path);
541 goto again;
542 }
543 err = 0;
544 break;
545 }
546 path->slots[0]++;
547 }
548fail:
549 return err;
550}
551
552static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
553 struct btrfs_path *path,
554 u64 bytenr, u64 parent,
555 u64 root_objectid, u64 owner,
556 u64 offset, int refs_to_add)
557{
558 struct btrfs_root *root = trans->fs_info->extent_root;
559 struct btrfs_key key;
560 struct extent_buffer *leaf;
561 u32 size;
562 u32 num_refs;
563 int ret;
564
565 key.objectid = bytenr;
566 if (parent) {
567 key.type = BTRFS_SHARED_DATA_REF_KEY;
568 key.offset = parent;
569 size = sizeof(struct btrfs_shared_data_ref);
570 } else {
571 key.type = BTRFS_EXTENT_DATA_REF_KEY;
572 key.offset = hash_extent_data_ref(root_objectid,
573 owner, offset);
574 size = sizeof(struct btrfs_extent_data_ref);
575 }
576
577 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
578 if (ret && ret != -EEXIST)
579 goto fail;
580
581 leaf = path->nodes[0];
582 if (parent) {
583 struct btrfs_shared_data_ref *ref;
584 ref = btrfs_item_ptr(leaf, path->slots[0],
585 struct btrfs_shared_data_ref);
586 if (ret == 0) {
587 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
588 } else {
589 num_refs = btrfs_shared_data_ref_count(leaf, ref);
590 num_refs += refs_to_add;
591 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
592 }
593 } else {
594 struct btrfs_extent_data_ref *ref;
595 while (ret == -EEXIST) {
596 ref = btrfs_item_ptr(leaf, path->slots[0],
597 struct btrfs_extent_data_ref);
598 if (match_extent_data_ref(leaf, ref, root_objectid,
599 owner, offset))
600 break;
601 btrfs_release_path(path);
602 key.offset++;
603 ret = btrfs_insert_empty_item(trans, root, path, &key,
604 size);
605 if (ret && ret != -EEXIST)
606 goto fail;
607
608 leaf = path->nodes[0];
609 }
610 ref = btrfs_item_ptr(leaf, path->slots[0],
611 struct btrfs_extent_data_ref);
612 if (ret == 0) {
613 btrfs_set_extent_data_ref_root(leaf, ref,
614 root_objectid);
615 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
616 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
617 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
618 } else {
619 num_refs = btrfs_extent_data_ref_count(leaf, ref);
620 num_refs += refs_to_add;
621 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
622 }
623 }
624 btrfs_mark_buffer_dirty(leaf);
625 ret = 0;
626fail:
627 btrfs_release_path(path);
628 return ret;
629}
630
631static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
632 struct btrfs_path *path,
633 int refs_to_drop, int *last_ref)
634{
635 struct btrfs_key key;
636 struct btrfs_extent_data_ref *ref1 = NULL;
637 struct btrfs_shared_data_ref *ref2 = NULL;
638 struct extent_buffer *leaf;
639 u32 num_refs = 0;
640 int ret = 0;
641
642 leaf = path->nodes[0];
643 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
644
645 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
646 ref1 = btrfs_item_ptr(leaf, path->slots[0],
647 struct btrfs_extent_data_ref);
648 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
649 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
650 ref2 = btrfs_item_ptr(leaf, path->slots[0],
651 struct btrfs_shared_data_ref);
652 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
653 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
654 btrfs_print_v0_err(trans->fs_info);
655 btrfs_abort_transaction(trans, -EINVAL);
656 return -EINVAL;
657 } else {
658 BUG();
659 }
660
661 BUG_ON(num_refs < refs_to_drop);
662 num_refs -= refs_to_drop;
663
664 if (num_refs == 0) {
665 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
666 *last_ref = 1;
667 } else {
668 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
669 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
670 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
671 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
672 btrfs_mark_buffer_dirty(leaf);
673 }
674 return ret;
675}
676
677static noinline u32 extent_data_ref_count(struct btrfs_path *path,
678 struct btrfs_extent_inline_ref *iref)
679{
680 struct btrfs_key key;
681 struct extent_buffer *leaf;
682 struct btrfs_extent_data_ref *ref1;
683 struct btrfs_shared_data_ref *ref2;
684 u32 num_refs = 0;
685 int type;
686
687 leaf = path->nodes[0];
688 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
689
690 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
691 if (iref) {
692
693
694
695
696 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
697 ASSERT(type != BTRFS_REF_TYPE_INVALID);
698 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
699 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
700 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
701 } else {
702 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
703 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
704 }
705 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
706 ref1 = btrfs_item_ptr(leaf, path->slots[0],
707 struct btrfs_extent_data_ref);
708 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
709 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
710 ref2 = btrfs_item_ptr(leaf, path->slots[0],
711 struct btrfs_shared_data_ref);
712 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
713 } else {
714 WARN_ON(1);
715 }
716 return num_refs;
717}
718
719static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
720 struct btrfs_path *path,
721 u64 bytenr, u64 parent,
722 u64 root_objectid)
723{
724 struct btrfs_root *root = trans->fs_info->extent_root;
725 struct btrfs_key key;
726 int ret;
727
728 key.objectid = bytenr;
729 if (parent) {
730 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
731 key.offset = parent;
732 } else {
733 key.type = BTRFS_TREE_BLOCK_REF_KEY;
734 key.offset = root_objectid;
735 }
736
737 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
738 if (ret > 0)
739 ret = -ENOENT;
740 return ret;
741}
742
743static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
744 struct btrfs_path *path,
745 u64 bytenr, u64 parent,
746 u64 root_objectid)
747{
748 struct btrfs_key key;
749 int ret;
750
751 key.objectid = bytenr;
752 if (parent) {
753 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
754 key.offset = parent;
755 } else {
756 key.type = BTRFS_TREE_BLOCK_REF_KEY;
757 key.offset = root_objectid;
758 }
759
760 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
761 path, &key, 0);
762 btrfs_release_path(path);
763 return ret;
764}
765
766static inline int extent_ref_type(u64 parent, u64 owner)
767{
768 int type;
769 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
770 if (parent > 0)
771 type = BTRFS_SHARED_BLOCK_REF_KEY;
772 else
773 type = BTRFS_TREE_BLOCK_REF_KEY;
774 } else {
775 if (parent > 0)
776 type = BTRFS_SHARED_DATA_REF_KEY;
777 else
778 type = BTRFS_EXTENT_DATA_REF_KEY;
779 }
780 return type;
781}
782
783static int find_next_key(struct btrfs_path *path, int level,
784 struct btrfs_key *key)
785
786{
787 for (; level < BTRFS_MAX_LEVEL; level++) {
788 if (!path->nodes[level])
789 break;
790 if (path->slots[level] + 1 >=
791 btrfs_header_nritems(path->nodes[level]))
792 continue;
793 if (level == 0)
794 btrfs_item_key_to_cpu(path->nodes[level], key,
795 path->slots[level] + 1);
796 else
797 btrfs_node_key_to_cpu(path->nodes[level], key,
798 path->slots[level] + 1);
799 return 0;
800 }
801 return 1;
802}
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817static noinline_for_stack
818int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
819 struct btrfs_path *path,
820 struct btrfs_extent_inline_ref **ref_ret,
821 u64 bytenr, u64 num_bytes,
822 u64 parent, u64 root_objectid,
823 u64 owner, u64 offset, int insert)
824{
825 struct btrfs_fs_info *fs_info = trans->fs_info;
826 struct btrfs_root *root = fs_info->extent_root;
827 struct btrfs_key key;
828 struct extent_buffer *leaf;
829 struct btrfs_extent_item *ei;
830 struct btrfs_extent_inline_ref *iref;
831 u64 flags;
832 u64 item_size;
833 unsigned long ptr;
834 unsigned long end;
835 int extra_size;
836 int type;
837 int want;
838 int ret;
839 int err = 0;
840 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
841 int needed;
842
843 key.objectid = bytenr;
844 key.type = BTRFS_EXTENT_ITEM_KEY;
845 key.offset = num_bytes;
846
847 want = extent_ref_type(parent, owner);
848 if (insert) {
849 extra_size = btrfs_extent_inline_ref_size(want);
850 path->keep_locks = 1;
851 } else
852 extra_size = -1;
853
854
855
856
857
858 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
859 key.type = BTRFS_METADATA_ITEM_KEY;
860 key.offset = owner;
861 }
862
863again:
864 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
865 if (ret < 0) {
866 err = ret;
867 goto out;
868 }
869
870
871
872
873
874 if (ret > 0 && skinny_metadata) {
875 skinny_metadata = false;
876 if (path->slots[0]) {
877 path->slots[0]--;
878 btrfs_item_key_to_cpu(path->nodes[0], &key,
879 path->slots[0]);
880 if (key.objectid == bytenr &&
881 key.type == BTRFS_EXTENT_ITEM_KEY &&
882 key.offset == num_bytes)
883 ret = 0;
884 }
885 if (ret) {
886 key.objectid = bytenr;
887 key.type = BTRFS_EXTENT_ITEM_KEY;
888 key.offset = num_bytes;
889 btrfs_release_path(path);
890 goto again;
891 }
892 }
893
894 if (ret && !insert) {
895 err = -ENOENT;
896 goto out;
897 } else if (WARN_ON(ret)) {
898 err = -EIO;
899 goto out;
900 }
901
902 leaf = path->nodes[0];
903 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
904 if (unlikely(item_size < sizeof(*ei))) {
905 err = -EINVAL;
906 btrfs_print_v0_err(fs_info);
907 btrfs_abort_transaction(trans, err);
908 goto out;
909 }
910
911 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
912 flags = btrfs_extent_flags(leaf, ei);
913
914 ptr = (unsigned long)(ei + 1);
915 end = (unsigned long)ei + item_size;
916
917 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
918 ptr += sizeof(struct btrfs_tree_block_info);
919 BUG_ON(ptr > end);
920 }
921
922 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
923 needed = BTRFS_REF_TYPE_DATA;
924 else
925 needed = BTRFS_REF_TYPE_BLOCK;
926
927 err = -ENOENT;
928 while (1) {
929 if (ptr >= end) {
930 WARN_ON(ptr > end);
931 break;
932 }
933 iref = (struct btrfs_extent_inline_ref *)ptr;
934 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
935 if (type == BTRFS_REF_TYPE_INVALID) {
936 err = -EUCLEAN;
937 goto out;
938 }
939
940 if (want < type)
941 break;
942 if (want > type) {
943 ptr += btrfs_extent_inline_ref_size(type);
944 continue;
945 }
946
947 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
948 struct btrfs_extent_data_ref *dref;
949 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
950 if (match_extent_data_ref(leaf, dref, root_objectid,
951 owner, offset)) {
952 err = 0;
953 break;
954 }
955 if (hash_extent_data_ref_item(leaf, dref) <
956 hash_extent_data_ref(root_objectid, owner, offset))
957 break;
958 } else {
959 u64 ref_offset;
960 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
961 if (parent > 0) {
962 if (parent == ref_offset) {
963 err = 0;
964 break;
965 }
966 if (ref_offset < parent)
967 break;
968 } else {
969 if (root_objectid == ref_offset) {
970 err = 0;
971 break;
972 }
973 if (ref_offset < root_objectid)
974 break;
975 }
976 }
977 ptr += btrfs_extent_inline_ref_size(type);
978 }
979 if (err == -ENOENT && insert) {
980 if (item_size + extra_size >=
981 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
982 err = -EAGAIN;
983 goto out;
984 }
985
986
987
988
989
990
991 if (find_next_key(path, 0, &key) == 0 &&
992 key.objectid == bytenr &&
993 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
994 err = -EAGAIN;
995 goto out;
996 }
997 }
998 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
999out:
1000 if (insert) {
1001 path->keep_locks = 0;
1002 btrfs_unlock_up_safe(path, 1);
1003 }
1004 return err;
1005}
1006
1007
1008
1009
1010static noinline_for_stack
1011void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1012 struct btrfs_path *path,
1013 struct btrfs_extent_inline_ref *iref,
1014 u64 parent, u64 root_objectid,
1015 u64 owner, u64 offset, int refs_to_add,
1016 struct btrfs_delayed_extent_op *extent_op)
1017{
1018 struct extent_buffer *leaf;
1019 struct btrfs_extent_item *ei;
1020 unsigned long ptr;
1021 unsigned long end;
1022 unsigned long item_offset;
1023 u64 refs;
1024 int size;
1025 int type;
1026
1027 leaf = path->nodes[0];
1028 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1029 item_offset = (unsigned long)iref - (unsigned long)ei;
1030
1031 type = extent_ref_type(parent, owner);
1032 size = btrfs_extent_inline_ref_size(type);
1033
1034 btrfs_extend_item(path, size);
1035
1036 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1037 refs = btrfs_extent_refs(leaf, ei);
1038 refs += refs_to_add;
1039 btrfs_set_extent_refs(leaf, ei, refs);
1040 if (extent_op)
1041 __run_delayed_extent_op(extent_op, leaf, ei);
1042
1043 ptr = (unsigned long)ei + item_offset;
1044 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1045 if (ptr < end - size)
1046 memmove_extent_buffer(leaf, ptr + size, ptr,
1047 end - size - ptr);
1048
1049 iref = (struct btrfs_extent_inline_ref *)ptr;
1050 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1051 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1052 struct btrfs_extent_data_ref *dref;
1053 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1054 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1055 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1056 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1057 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1058 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1059 struct btrfs_shared_data_ref *sref;
1060 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1061 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1062 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1063 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1064 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1065 } else {
1066 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1067 }
1068 btrfs_mark_buffer_dirty(leaf);
1069}
1070
1071static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1072 struct btrfs_path *path,
1073 struct btrfs_extent_inline_ref **ref_ret,
1074 u64 bytenr, u64 num_bytes, u64 parent,
1075 u64 root_objectid, u64 owner, u64 offset)
1076{
1077 int ret;
1078
1079 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1080 num_bytes, parent, root_objectid,
1081 owner, offset, 0);
1082 if (ret != -ENOENT)
1083 return ret;
1084
1085 btrfs_release_path(path);
1086 *ref_ret = NULL;
1087
1088 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1089 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1090 root_objectid);
1091 } else {
1092 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1093 root_objectid, owner, offset);
1094 }
1095 return ret;
1096}
1097
1098
1099
1100
1101static noinline_for_stack
1102void update_inline_extent_backref(struct btrfs_path *path,
1103 struct btrfs_extent_inline_ref *iref,
1104 int refs_to_mod,
1105 struct btrfs_delayed_extent_op *extent_op,
1106 int *last_ref)
1107{
1108 struct extent_buffer *leaf = path->nodes[0];
1109 struct btrfs_extent_item *ei;
1110 struct btrfs_extent_data_ref *dref = NULL;
1111 struct btrfs_shared_data_ref *sref = NULL;
1112 unsigned long ptr;
1113 unsigned long end;
1114 u32 item_size;
1115 int size;
1116 int type;
1117 u64 refs;
1118
1119 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1120 refs = btrfs_extent_refs(leaf, ei);
1121 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1122 refs += refs_to_mod;
1123 btrfs_set_extent_refs(leaf, ei, refs);
1124 if (extent_op)
1125 __run_delayed_extent_op(extent_op, leaf, ei);
1126
1127
1128
1129
1130
1131 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1132 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1133
1134 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1135 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1136 refs = btrfs_extent_data_ref_count(leaf, dref);
1137 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1138 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1139 refs = btrfs_shared_data_ref_count(leaf, sref);
1140 } else {
1141 refs = 1;
1142 BUG_ON(refs_to_mod != -1);
1143 }
1144
1145 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1146 refs += refs_to_mod;
1147
1148 if (refs > 0) {
1149 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1150 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1151 else
1152 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1153 } else {
1154 *last_ref = 1;
1155 size = btrfs_extent_inline_ref_size(type);
1156 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1157 ptr = (unsigned long)iref;
1158 end = (unsigned long)ei + item_size;
1159 if (ptr + size < end)
1160 memmove_extent_buffer(leaf, ptr, ptr + size,
1161 end - ptr - size);
1162 item_size -= size;
1163 btrfs_truncate_item(path, item_size, 1);
1164 }
1165 btrfs_mark_buffer_dirty(leaf);
1166}
1167
1168static noinline_for_stack
1169int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1170 struct btrfs_path *path,
1171 u64 bytenr, u64 num_bytes, u64 parent,
1172 u64 root_objectid, u64 owner,
1173 u64 offset, int refs_to_add,
1174 struct btrfs_delayed_extent_op *extent_op)
1175{
1176 struct btrfs_extent_inline_ref *iref;
1177 int ret;
1178
1179 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1180 num_bytes, parent, root_objectid,
1181 owner, offset, 1);
1182 if (ret == 0) {
1183 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1184 update_inline_extent_backref(path, iref, refs_to_add,
1185 extent_op, NULL);
1186 } else if (ret == -ENOENT) {
1187 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1188 root_objectid, owner, offset,
1189 refs_to_add, extent_op);
1190 ret = 0;
1191 }
1192 return ret;
1193}
1194
1195static int insert_extent_backref(struct btrfs_trans_handle *trans,
1196 struct btrfs_path *path,
1197 u64 bytenr, u64 parent, u64 root_objectid,
1198 u64 owner, u64 offset, int refs_to_add)
1199{
1200 int ret;
1201 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1202 BUG_ON(refs_to_add != 1);
1203 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1204 root_objectid);
1205 } else {
1206 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1207 root_objectid, owner, offset,
1208 refs_to_add);
1209 }
1210 return ret;
1211}
1212
1213static int remove_extent_backref(struct btrfs_trans_handle *trans,
1214 struct btrfs_path *path,
1215 struct btrfs_extent_inline_ref *iref,
1216 int refs_to_drop, int is_data, int *last_ref)
1217{
1218 int ret = 0;
1219
1220 BUG_ON(!is_data && refs_to_drop != 1);
1221 if (iref) {
1222 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1223 last_ref);
1224 } else if (is_data) {
1225 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1226 last_ref);
1227 } else {
1228 *last_ref = 1;
1229 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1230 }
1231 return ret;
1232}
1233
1234static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1235 u64 *discarded_bytes)
1236{
1237 int j, ret = 0;
1238 u64 bytes_left, end;
1239 u64 aligned_start = ALIGN(start, 1 << 9);
1240
1241 if (WARN_ON(start != aligned_start)) {
1242 len -= aligned_start - start;
1243 len = round_down(len, 1 << 9);
1244 start = aligned_start;
1245 }
1246
1247 *discarded_bytes = 0;
1248
1249 if (!len)
1250 return 0;
1251
1252 end = start + len;
1253 bytes_left = len;
1254
1255
1256 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1257 u64 sb_start = btrfs_sb_offset(j);
1258 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1259 u64 size = sb_start - start;
1260
1261 if (!in_range(sb_start, start, bytes_left) &&
1262 !in_range(sb_end, start, bytes_left) &&
1263 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1264 continue;
1265
1266
1267
1268
1269
1270 if (sb_start <= start) {
1271 start += sb_end - start;
1272 if (start > end) {
1273 bytes_left = 0;
1274 break;
1275 }
1276 bytes_left = end - start;
1277 continue;
1278 }
1279
1280 if (size) {
1281 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1282 GFP_NOFS, 0);
1283 if (!ret)
1284 *discarded_bytes += size;
1285 else if (ret != -EOPNOTSUPP)
1286 return ret;
1287 }
1288
1289 start = sb_end;
1290 if (start > end) {
1291 bytes_left = 0;
1292 break;
1293 }
1294 bytes_left = end - start;
1295 }
1296
1297 if (bytes_left) {
1298 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1299 GFP_NOFS, 0);
1300 if (!ret)
1301 *discarded_bytes += bytes_left;
1302 }
1303 return ret;
1304}
1305
1306int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1307 u64 num_bytes, u64 *actual_bytes)
1308{
1309 int ret = 0;
1310 u64 discarded_bytes = 0;
1311 u64 end = bytenr + num_bytes;
1312 u64 cur = bytenr;
1313 struct btrfs_bio *bbio = NULL;
1314
1315
1316
1317
1318
1319
1320 btrfs_bio_counter_inc_blocked(fs_info);
1321 while (cur < end) {
1322 struct btrfs_bio_stripe *stripe;
1323 int i;
1324
1325 num_bytes = end - cur;
1326
1327 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
1328 &num_bytes, &bbio, 0);
1329
1330
1331
1332
1333
1334 if (ret < 0)
1335 goto out;
1336
1337 stripe = bbio->stripes;
1338 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1339 u64 bytes;
1340 struct request_queue *req_q;
1341
1342 if (!stripe->dev->bdev) {
1343 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1344 continue;
1345 }
1346 req_q = bdev_get_queue(stripe->dev->bdev);
1347 if (!blk_queue_discard(req_q))
1348 continue;
1349
1350 ret = btrfs_issue_discard(stripe->dev->bdev,
1351 stripe->physical,
1352 stripe->length,
1353 &bytes);
1354 if (!ret) {
1355 discarded_bytes += bytes;
1356 } else if (ret != -EOPNOTSUPP) {
1357
1358
1359
1360
1361
1362
1363
1364 btrfs_put_bbio(bbio);
1365 goto out;
1366 }
1367
1368
1369
1370
1371
1372
1373 ret = 0;
1374 }
1375 btrfs_put_bbio(bbio);
1376 cur += num_bytes;
1377 }
1378out:
1379 btrfs_bio_counter_dec(fs_info);
1380
1381 if (actual_bytes)
1382 *actual_bytes = discarded_bytes;
1383
1384
1385 if (ret == -EOPNOTSUPP)
1386 ret = 0;
1387 return ret;
1388}
1389
1390
1391int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1392 struct btrfs_ref *generic_ref)
1393{
1394 struct btrfs_fs_info *fs_info = trans->fs_info;
1395 int old_ref_mod, new_ref_mod;
1396 int ret;
1397
1398 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1399 generic_ref->action);
1400 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1401 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
1402
1403 if (generic_ref->type == BTRFS_REF_METADATA)
1404 ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
1405 NULL, &old_ref_mod, &new_ref_mod);
1406 else
1407 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
1408 &old_ref_mod, &new_ref_mod);
1409
1410 btrfs_ref_tree_mod(fs_info, generic_ref);
1411
1412 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
1413 sub_pinned_bytes(fs_info, generic_ref);
1414
1415 return ret;
1416}
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1453 struct btrfs_delayed_ref_node *node,
1454 u64 parent, u64 root_objectid,
1455 u64 owner, u64 offset, int refs_to_add,
1456 struct btrfs_delayed_extent_op *extent_op)
1457{
1458 struct btrfs_path *path;
1459 struct extent_buffer *leaf;
1460 struct btrfs_extent_item *item;
1461 struct btrfs_key key;
1462 u64 bytenr = node->bytenr;
1463 u64 num_bytes = node->num_bytes;
1464 u64 refs;
1465 int ret;
1466
1467 path = btrfs_alloc_path();
1468 if (!path)
1469 return -ENOMEM;
1470
1471 path->reada = READA_FORWARD;
1472 path->leave_spinning = 1;
1473
1474 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1475 parent, root_objectid, owner,
1476 offset, refs_to_add, extent_op);
1477 if ((ret < 0 && ret != -EAGAIN) || !ret)
1478 goto out;
1479
1480
1481
1482
1483
1484
1485 leaf = path->nodes[0];
1486 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1487 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1488 refs = btrfs_extent_refs(leaf, item);
1489 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1490 if (extent_op)
1491 __run_delayed_extent_op(extent_op, leaf, item);
1492
1493 btrfs_mark_buffer_dirty(leaf);
1494 btrfs_release_path(path);
1495
1496 path->reada = READA_FORWARD;
1497 path->leave_spinning = 1;
1498
1499 ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
1500 owner, offset, refs_to_add);
1501 if (ret)
1502 btrfs_abort_transaction(trans, ret);
1503out:
1504 btrfs_free_path(path);
1505 return ret;
1506}
1507
1508static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1509 struct btrfs_delayed_ref_node *node,
1510 struct btrfs_delayed_extent_op *extent_op,
1511 int insert_reserved)
1512{
1513 int ret = 0;
1514 struct btrfs_delayed_data_ref *ref;
1515 struct btrfs_key ins;
1516 u64 parent = 0;
1517 u64 ref_root = 0;
1518 u64 flags = 0;
1519
1520 ins.objectid = node->bytenr;
1521 ins.offset = node->num_bytes;
1522 ins.type = BTRFS_EXTENT_ITEM_KEY;
1523
1524 ref = btrfs_delayed_node_to_data_ref(node);
1525 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1526
1527 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1528 parent = ref->parent;
1529 ref_root = ref->root;
1530
1531 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1532 if (extent_op)
1533 flags |= extent_op->flags_to_set;
1534 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1535 flags, ref->objectid,
1536 ref->offset, &ins,
1537 node->ref_mod);
1538 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1539 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1540 ref->objectid, ref->offset,
1541 node->ref_mod, extent_op);
1542 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1543 ret = __btrfs_free_extent(trans, node, parent,
1544 ref_root, ref->objectid,
1545 ref->offset, node->ref_mod,
1546 extent_op);
1547 } else {
1548 BUG();
1549 }
1550 return ret;
1551}
1552
1553static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1554 struct extent_buffer *leaf,
1555 struct btrfs_extent_item *ei)
1556{
1557 u64 flags = btrfs_extent_flags(leaf, ei);
1558 if (extent_op->update_flags) {
1559 flags |= extent_op->flags_to_set;
1560 btrfs_set_extent_flags(leaf, ei, flags);
1561 }
1562
1563 if (extent_op->update_key) {
1564 struct btrfs_tree_block_info *bi;
1565 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1566 bi = (struct btrfs_tree_block_info *)(ei + 1);
1567 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1568 }
1569}
1570
1571static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1572 struct btrfs_delayed_ref_head *head,
1573 struct btrfs_delayed_extent_op *extent_op)
1574{
1575 struct btrfs_fs_info *fs_info = trans->fs_info;
1576 struct btrfs_key key;
1577 struct btrfs_path *path;
1578 struct btrfs_extent_item *ei;
1579 struct extent_buffer *leaf;
1580 u32 item_size;
1581 int ret;
1582 int err = 0;
1583 int metadata = !extent_op->is_data;
1584
1585 if (trans->aborted)
1586 return 0;
1587
1588 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1589 metadata = 0;
1590
1591 path = btrfs_alloc_path();
1592 if (!path)
1593 return -ENOMEM;
1594
1595 key.objectid = head->bytenr;
1596
1597 if (metadata) {
1598 key.type = BTRFS_METADATA_ITEM_KEY;
1599 key.offset = extent_op->level;
1600 } else {
1601 key.type = BTRFS_EXTENT_ITEM_KEY;
1602 key.offset = head->num_bytes;
1603 }
1604
1605again:
1606 path->reada = READA_FORWARD;
1607 path->leave_spinning = 1;
1608 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
1609 if (ret < 0) {
1610 err = ret;
1611 goto out;
1612 }
1613 if (ret > 0) {
1614 if (metadata) {
1615 if (path->slots[0] > 0) {
1616 path->slots[0]--;
1617 btrfs_item_key_to_cpu(path->nodes[0], &key,
1618 path->slots[0]);
1619 if (key.objectid == head->bytenr &&
1620 key.type == BTRFS_EXTENT_ITEM_KEY &&
1621 key.offset == head->num_bytes)
1622 ret = 0;
1623 }
1624 if (ret > 0) {
1625 btrfs_release_path(path);
1626 metadata = 0;
1627
1628 key.objectid = head->bytenr;
1629 key.offset = head->num_bytes;
1630 key.type = BTRFS_EXTENT_ITEM_KEY;
1631 goto again;
1632 }
1633 } else {
1634 err = -EIO;
1635 goto out;
1636 }
1637 }
1638
1639 leaf = path->nodes[0];
1640 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1641
1642 if (unlikely(item_size < sizeof(*ei))) {
1643 err = -EINVAL;
1644 btrfs_print_v0_err(fs_info);
1645 btrfs_abort_transaction(trans, err);
1646 goto out;
1647 }
1648
1649 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1650 __run_delayed_extent_op(extent_op, leaf, ei);
1651
1652 btrfs_mark_buffer_dirty(leaf);
1653out:
1654 btrfs_free_path(path);
1655 return err;
1656}
1657
1658static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1659 struct btrfs_delayed_ref_node *node,
1660 struct btrfs_delayed_extent_op *extent_op,
1661 int insert_reserved)
1662{
1663 int ret = 0;
1664 struct btrfs_delayed_tree_ref *ref;
1665 u64 parent = 0;
1666 u64 ref_root = 0;
1667
1668 ref = btrfs_delayed_node_to_tree_ref(node);
1669 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1670
1671 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1672 parent = ref->parent;
1673 ref_root = ref->root;
1674
1675 if (node->ref_mod != 1) {
1676 btrfs_err(trans->fs_info,
1677 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1678 node->bytenr, node->ref_mod, node->action, ref_root,
1679 parent);
1680 return -EIO;
1681 }
1682 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1683 BUG_ON(!extent_op || !extent_op->update_flags);
1684 ret = alloc_reserved_tree_block(trans, node, extent_op);
1685 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1686 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1687 ref->level, 0, 1, extent_op);
1688 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1689 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1690 ref->level, 0, 1, extent_op);
1691 } else {
1692 BUG();
1693 }
1694 return ret;
1695}
1696
1697
1698static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1699 struct btrfs_delayed_ref_node *node,
1700 struct btrfs_delayed_extent_op *extent_op,
1701 int insert_reserved)
1702{
1703 int ret = 0;
1704
1705 if (trans->aborted) {
1706 if (insert_reserved)
1707 btrfs_pin_extent(trans->fs_info, node->bytenr,
1708 node->num_bytes, 1);
1709 return 0;
1710 }
1711
1712 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1713 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1714 ret = run_delayed_tree_ref(trans, node, extent_op,
1715 insert_reserved);
1716 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1717 node->type == BTRFS_SHARED_DATA_REF_KEY)
1718 ret = run_delayed_data_ref(trans, node, extent_op,
1719 insert_reserved);
1720 else
1721 BUG();
1722 if (ret && insert_reserved)
1723 btrfs_pin_extent(trans->fs_info, node->bytenr,
1724 node->num_bytes, 1);
1725 return ret;
1726}
1727
1728static inline struct btrfs_delayed_ref_node *
1729select_delayed_ref(struct btrfs_delayed_ref_head *head)
1730{
1731 struct btrfs_delayed_ref_node *ref;
1732
1733 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1734 return NULL;
1735
1736
1737
1738
1739
1740
1741
1742 if (!list_empty(&head->ref_add_list))
1743 return list_first_entry(&head->ref_add_list,
1744 struct btrfs_delayed_ref_node, add_list);
1745
1746 ref = rb_entry(rb_first_cached(&head->ref_tree),
1747 struct btrfs_delayed_ref_node, ref_node);
1748 ASSERT(list_empty(&ref->add_list));
1749 return ref;
1750}
1751
1752static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1753 struct btrfs_delayed_ref_head *head)
1754{
1755 spin_lock(&delayed_refs->lock);
1756 head->processing = 0;
1757 delayed_refs->num_heads_ready++;
1758 spin_unlock(&delayed_refs->lock);
1759 btrfs_delayed_ref_unlock(head);
1760}
1761
1762static struct btrfs_delayed_extent_op *cleanup_extent_op(
1763 struct btrfs_delayed_ref_head *head)
1764{
1765 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1766
1767 if (!extent_op)
1768 return NULL;
1769
1770 if (head->must_insert_reserved) {
1771 head->extent_op = NULL;
1772 btrfs_free_delayed_extent_op(extent_op);
1773 return NULL;
1774 }
1775 return extent_op;
1776}
1777
1778static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1779 struct btrfs_delayed_ref_head *head)
1780{
1781 struct btrfs_delayed_extent_op *extent_op;
1782 int ret;
1783
1784 extent_op = cleanup_extent_op(head);
1785 if (!extent_op)
1786 return 0;
1787 head->extent_op = NULL;
1788 spin_unlock(&head->lock);
1789 ret = run_delayed_extent_op(trans, head, extent_op);
1790 btrfs_free_delayed_extent_op(extent_op);
1791 return ret ? ret : 1;
1792}
1793
1794void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1795 struct btrfs_delayed_ref_root *delayed_refs,
1796 struct btrfs_delayed_ref_head *head)
1797{
1798 int nr_items = 1;
1799
1800 if (head->total_ref_mod < 0) {
1801 struct btrfs_space_info *space_info;
1802 u64 flags;
1803
1804 if (head->is_data)
1805 flags = BTRFS_BLOCK_GROUP_DATA;
1806 else if (head->is_system)
1807 flags = BTRFS_BLOCK_GROUP_SYSTEM;
1808 else
1809 flags = BTRFS_BLOCK_GROUP_METADATA;
1810 space_info = btrfs_find_space_info(fs_info, flags);
1811 ASSERT(space_info);
1812 percpu_counter_add_batch(&space_info->total_bytes_pinned,
1813 -head->num_bytes,
1814 BTRFS_TOTAL_BYTES_PINNED_BATCH);
1815
1816
1817
1818
1819
1820
1821 if (head->is_data) {
1822 spin_lock(&delayed_refs->lock);
1823 delayed_refs->pending_csums -= head->num_bytes;
1824 spin_unlock(&delayed_refs->lock);
1825 nr_items += btrfs_csum_bytes_to_leaves(fs_info,
1826 head->num_bytes);
1827 }
1828 }
1829
1830 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1831}
1832
1833static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1834 struct btrfs_delayed_ref_head *head)
1835{
1836
1837 struct btrfs_fs_info *fs_info = trans->fs_info;
1838 struct btrfs_delayed_ref_root *delayed_refs;
1839 int ret;
1840
1841 delayed_refs = &trans->transaction->delayed_refs;
1842
1843 ret = run_and_cleanup_extent_op(trans, head);
1844 if (ret < 0) {
1845 unselect_delayed_ref_head(delayed_refs, head);
1846 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1847 return ret;
1848 } else if (ret) {
1849 return ret;
1850 }
1851
1852
1853
1854
1855
1856 spin_unlock(&head->lock);
1857 spin_lock(&delayed_refs->lock);
1858 spin_lock(&head->lock);
1859 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1860 spin_unlock(&head->lock);
1861 spin_unlock(&delayed_refs->lock);
1862 return 1;
1863 }
1864 btrfs_delete_ref_head(delayed_refs, head);
1865 spin_unlock(&head->lock);
1866 spin_unlock(&delayed_refs->lock);
1867
1868 if (head->must_insert_reserved) {
1869 btrfs_pin_extent(fs_info, head->bytenr,
1870 head->num_bytes, 1);
1871 if (head->is_data) {
1872 ret = btrfs_del_csums(trans, fs_info->csum_root,
1873 head->bytenr, head->num_bytes);
1874 }
1875 }
1876
1877 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1878
1879 trace_run_delayed_ref_head(fs_info, head, 0);
1880 btrfs_delayed_ref_unlock(head);
1881 btrfs_put_delayed_ref_head(head);
1882 return 0;
1883}
1884
1885static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1886 struct btrfs_trans_handle *trans)
1887{
1888 struct btrfs_delayed_ref_root *delayed_refs =
1889 &trans->transaction->delayed_refs;
1890 struct btrfs_delayed_ref_head *head = NULL;
1891 int ret;
1892
1893 spin_lock(&delayed_refs->lock);
1894 head = btrfs_select_ref_head(delayed_refs);
1895 if (!head) {
1896 spin_unlock(&delayed_refs->lock);
1897 return head;
1898 }
1899
1900
1901
1902
1903
1904 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1905 spin_unlock(&delayed_refs->lock);
1906
1907
1908
1909
1910
1911
1912 if (ret == -EAGAIN)
1913 head = ERR_PTR(-EAGAIN);
1914
1915 return head;
1916}
1917
1918static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1919 struct btrfs_delayed_ref_head *locked_ref,
1920 unsigned long *run_refs)
1921{
1922 struct btrfs_fs_info *fs_info = trans->fs_info;
1923 struct btrfs_delayed_ref_root *delayed_refs;
1924 struct btrfs_delayed_extent_op *extent_op;
1925 struct btrfs_delayed_ref_node *ref;
1926 int must_insert_reserved = 0;
1927 int ret;
1928
1929 delayed_refs = &trans->transaction->delayed_refs;
1930
1931 lockdep_assert_held(&locked_ref->mutex);
1932 lockdep_assert_held(&locked_ref->lock);
1933
1934 while ((ref = select_delayed_ref(locked_ref))) {
1935 if (ref->seq &&
1936 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1937 spin_unlock(&locked_ref->lock);
1938 unselect_delayed_ref_head(delayed_refs, locked_ref);
1939 return -EAGAIN;
1940 }
1941
1942 (*run_refs)++;
1943 ref->in_tree = 0;
1944 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1945 RB_CLEAR_NODE(&ref->ref_node);
1946 if (!list_empty(&ref->add_list))
1947 list_del(&ref->add_list);
1948
1949
1950
1951
1952 switch (ref->action) {
1953 case BTRFS_ADD_DELAYED_REF:
1954 case BTRFS_ADD_DELAYED_EXTENT:
1955 locked_ref->ref_mod -= ref->ref_mod;
1956 break;
1957 case BTRFS_DROP_DELAYED_REF:
1958 locked_ref->ref_mod += ref->ref_mod;
1959 break;
1960 default:
1961 WARN_ON(1);
1962 }
1963 atomic_dec(&delayed_refs->num_entries);
1964
1965
1966
1967
1968
1969 must_insert_reserved = locked_ref->must_insert_reserved;
1970 locked_ref->must_insert_reserved = 0;
1971
1972 extent_op = locked_ref->extent_op;
1973 locked_ref->extent_op = NULL;
1974 spin_unlock(&locked_ref->lock);
1975
1976 ret = run_one_delayed_ref(trans, ref, extent_op,
1977 must_insert_reserved);
1978
1979 btrfs_free_delayed_extent_op(extent_op);
1980 if (ret) {
1981 unselect_delayed_ref_head(delayed_refs, locked_ref);
1982 btrfs_put_delayed_ref(ref);
1983 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1984 ret);
1985 return ret;
1986 }
1987
1988 btrfs_put_delayed_ref(ref);
1989 cond_resched();
1990
1991 spin_lock(&locked_ref->lock);
1992 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1993 }
1994
1995 return 0;
1996}
1997
1998
1999
2000
2001
2002static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2003 unsigned long nr)
2004{
2005 struct btrfs_fs_info *fs_info = trans->fs_info;
2006 struct btrfs_delayed_ref_root *delayed_refs;
2007 struct btrfs_delayed_ref_head *locked_ref = NULL;
2008 ktime_t start = ktime_get();
2009 int ret;
2010 unsigned long count = 0;
2011 unsigned long actual_count = 0;
2012
2013 delayed_refs = &trans->transaction->delayed_refs;
2014 do {
2015 if (!locked_ref) {
2016 locked_ref = btrfs_obtain_ref_head(trans);
2017 if (IS_ERR_OR_NULL(locked_ref)) {
2018 if (PTR_ERR(locked_ref) == -EAGAIN) {
2019 continue;
2020 } else {
2021 break;
2022 }
2023 }
2024 count++;
2025 }
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038 spin_lock(&locked_ref->lock);
2039 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2040
2041 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2042 &actual_count);
2043 if (ret < 0 && ret != -EAGAIN) {
2044
2045
2046
2047
2048 return ret;
2049 } else if (!ret) {
2050
2051
2052
2053
2054 ret = cleanup_ref_head(trans, locked_ref);
2055 if (ret > 0 ) {
2056
2057 ret = 0;
2058 continue;
2059 } else if (ret) {
2060 return ret;
2061 }
2062 }
2063
2064
2065
2066
2067
2068
2069 locked_ref = NULL;
2070 cond_resched();
2071 } while ((nr != -1 && count < nr) || locked_ref);
2072
2073
2074
2075
2076
2077
2078 if (actual_count > 0) {
2079 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2080 u64 avg;
2081
2082
2083
2084
2085
2086 spin_lock(&delayed_refs->lock);
2087 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2088 fs_info->avg_delayed_ref_runtime = avg >> 2;
2089 spin_unlock(&delayed_refs->lock);
2090 }
2091 return 0;
2092}
2093
2094#ifdef SCRAMBLE_DELAYED_REFS
2095
2096
2097
2098
2099
2100static u64 find_middle(struct rb_root *root)
2101{
2102 struct rb_node *n = root->rb_node;
2103 struct btrfs_delayed_ref_node *entry;
2104 int alt = 1;
2105 u64 middle;
2106 u64 first = 0, last = 0;
2107
2108 n = rb_first(root);
2109 if (n) {
2110 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2111 first = entry->bytenr;
2112 }
2113 n = rb_last(root);
2114 if (n) {
2115 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2116 last = entry->bytenr;
2117 }
2118 n = root->rb_node;
2119
2120 while (n) {
2121 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2122 WARN_ON(!entry->in_tree);
2123
2124 middle = entry->bytenr;
2125
2126 if (alt)
2127 n = n->rb_left;
2128 else
2129 n = n->rb_right;
2130
2131 alt = 1 - alt;
2132 }
2133 return middle;
2134}
2135#endif
2136
2137static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2138{
2139 u64 num_bytes;
2140
2141 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2142 sizeof(struct btrfs_extent_inline_ref));
2143 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2144 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2145
2146
2147
2148
2149
2150 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2151}
2152
2153
2154
2155
2156
2157u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2158{
2159 u64 csum_size;
2160 u64 num_csums_per_leaf;
2161 u64 num_csums;
2162
2163 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2164 num_csums_per_leaf = div64_u64(csum_size,
2165 (u64)btrfs_super_csum_size(fs_info->super_copy));
2166 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2167 num_csums += num_csums_per_leaf - 1;
2168 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2169 return num_csums;
2170}
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2183 unsigned long count)
2184{
2185 struct btrfs_fs_info *fs_info = trans->fs_info;
2186 struct rb_node *node;
2187 struct btrfs_delayed_ref_root *delayed_refs;
2188 struct btrfs_delayed_ref_head *head;
2189 int ret;
2190 int run_all = count == (unsigned long)-1;
2191
2192
2193 if (trans->aborted)
2194 return 0;
2195
2196 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2197 return 0;
2198
2199 delayed_refs = &trans->transaction->delayed_refs;
2200 if (count == 0)
2201 count = atomic_read(&delayed_refs->num_entries) * 2;
2202
2203again:
2204#ifdef SCRAMBLE_DELAYED_REFS
2205 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2206#endif
2207 ret = __btrfs_run_delayed_refs(trans, count);
2208 if (ret < 0) {
2209 btrfs_abort_transaction(trans, ret);
2210 return ret;
2211 }
2212
2213 if (run_all) {
2214 btrfs_create_pending_block_groups(trans);
2215
2216 spin_lock(&delayed_refs->lock);
2217 node = rb_first_cached(&delayed_refs->href_root);
2218 if (!node) {
2219 spin_unlock(&delayed_refs->lock);
2220 goto out;
2221 }
2222 head = rb_entry(node, struct btrfs_delayed_ref_head,
2223 href_node);
2224 refcount_inc(&head->refs);
2225 spin_unlock(&delayed_refs->lock);
2226
2227
2228 mutex_lock(&head->mutex);
2229 mutex_unlock(&head->mutex);
2230
2231 btrfs_put_delayed_ref_head(head);
2232 cond_resched();
2233 goto again;
2234 }
2235out:
2236 return 0;
2237}
2238
2239int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2240 u64 bytenr, u64 num_bytes, u64 flags,
2241 int level, int is_data)
2242{
2243 struct btrfs_delayed_extent_op *extent_op;
2244 int ret;
2245
2246 extent_op = btrfs_alloc_delayed_extent_op();
2247 if (!extent_op)
2248 return -ENOMEM;
2249
2250 extent_op->flags_to_set = flags;
2251 extent_op->update_flags = true;
2252 extent_op->update_key = false;
2253 extent_op->is_data = is_data ? true : false;
2254 extent_op->level = level;
2255
2256 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
2257 if (ret)
2258 btrfs_free_delayed_extent_op(extent_op);
2259 return ret;
2260}
2261
2262static noinline int check_delayed_ref(struct btrfs_root *root,
2263 struct btrfs_path *path,
2264 u64 objectid, u64 offset, u64 bytenr)
2265{
2266 struct btrfs_delayed_ref_head *head;
2267 struct btrfs_delayed_ref_node *ref;
2268 struct btrfs_delayed_data_ref *data_ref;
2269 struct btrfs_delayed_ref_root *delayed_refs;
2270 struct btrfs_transaction *cur_trans;
2271 struct rb_node *node;
2272 int ret = 0;
2273
2274 spin_lock(&root->fs_info->trans_lock);
2275 cur_trans = root->fs_info->running_transaction;
2276 if (cur_trans)
2277 refcount_inc(&cur_trans->use_count);
2278 spin_unlock(&root->fs_info->trans_lock);
2279 if (!cur_trans)
2280 return 0;
2281
2282 delayed_refs = &cur_trans->delayed_refs;
2283 spin_lock(&delayed_refs->lock);
2284 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2285 if (!head) {
2286 spin_unlock(&delayed_refs->lock);
2287 btrfs_put_transaction(cur_trans);
2288 return 0;
2289 }
2290
2291 if (!mutex_trylock(&head->mutex)) {
2292 refcount_inc(&head->refs);
2293 spin_unlock(&delayed_refs->lock);
2294
2295 btrfs_release_path(path);
2296
2297
2298
2299
2300
2301 mutex_lock(&head->mutex);
2302 mutex_unlock(&head->mutex);
2303 btrfs_put_delayed_ref_head(head);
2304 btrfs_put_transaction(cur_trans);
2305 return -EAGAIN;
2306 }
2307 spin_unlock(&delayed_refs->lock);
2308
2309 spin_lock(&head->lock);
2310
2311
2312
2313
2314 for (node = rb_first_cached(&head->ref_tree); node;
2315 node = rb_next(node)) {
2316 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2317
2318 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2319 ret = 1;
2320 break;
2321 }
2322
2323 data_ref = btrfs_delayed_node_to_data_ref(ref);
2324
2325
2326
2327
2328
2329 if (data_ref->root != root->root_key.objectid ||
2330 data_ref->objectid != objectid ||
2331 data_ref->offset != offset) {
2332 ret = 1;
2333 break;
2334 }
2335 }
2336 spin_unlock(&head->lock);
2337 mutex_unlock(&head->mutex);
2338 btrfs_put_transaction(cur_trans);
2339 return ret;
2340}
2341
2342static noinline int check_committed_ref(struct btrfs_root *root,
2343 struct btrfs_path *path,
2344 u64 objectid, u64 offset, u64 bytenr)
2345{
2346 struct btrfs_fs_info *fs_info = root->fs_info;
2347 struct btrfs_root *extent_root = fs_info->extent_root;
2348 struct extent_buffer *leaf;
2349 struct btrfs_extent_data_ref *ref;
2350 struct btrfs_extent_inline_ref *iref;
2351 struct btrfs_extent_item *ei;
2352 struct btrfs_key key;
2353 u32 item_size;
2354 int type;
2355 int ret;
2356
2357 key.objectid = bytenr;
2358 key.offset = (u64)-1;
2359 key.type = BTRFS_EXTENT_ITEM_KEY;
2360
2361 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2362 if (ret < 0)
2363 goto out;
2364 BUG_ON(ret == 0);
2365
2366 ret = -ENOENT;
2367 if (path->slots[0] == 0)
2368 goto out;
2369
2370 path->slots[0]--;
2371 leaf = path->nodes[0];
2372 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2373
2374 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2375 goto out;
2376
2377 ret = 1;
2378 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2379 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2380
2381
2382 if (item_size != sizeof(*ei) +
2383 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2384 goto out;
2385
2386
2387 if (btrfs_extent_generation(leaf, ei) <=
2388 btrfs_root_last_snapshot(&root->root_item))
2389 goto out;
2390
2391 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2392
2393
2394 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2395 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2396 goto out;
2397
2398 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2399 if (btrfs_extent_refs(leaf, ei) !=
2400 btrfs_extent_data_ref_count(leaf, ref) ||
2401 btrfs_extent_data_ref_root(leaf, ref) !=
2402 root->root_key.objectid ||
2403 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2404 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2405 goto out;
2406
2407 ret = 0;
2408out:
2409 return ret;
2410}
2411
2412int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2413 u64 bytenr)
2414{
2415 struct btrfs_path *path;
2416 int ret;
2417
2418 path = btrfs_alloc_path();
2419 if (!path)
2420 return -ENOMEM;
2421
2422 do {
2423 ret = check_committed_ref(root, path, objectid,
2424 offset, bytenr);
2425 if (ret && ret != -ENOENT)
2426 goto out;
2427
2428 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2429 } while (ret == -EAGAIN);
2430
2431out:
2432 btrfs_free_path(path);
2433 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2434 WARN_ON(ret > 0);
2435 return ret;
2436}
2437
2438static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2439 struct btrfs_root *root,
2440 struct extent_buffer *buf,
2441 int full_backref, int inc)
2442{
2443 struct btrfs_fs_info *fs_info = root->fs_info;
2444 u64 bytenr;
2445 u64 num_bytes;
2446 u64 parent;
2447 u64 ref_root;
2448 u32 nritems;
2449 struct btrfs_key key;
2450 struct btrfs_file_extent_item *fi;
2451 struct btrfs_ref generic_ref = { 0 };
2452 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2453 int i;
2454 int action;
2455 int level;
2456 int ret = 0;
2457
2458 if (btrfs_is_testing(fs_info))
2459 return 0;
2460
2461 ref_root = btrfs_header_owner(buf);
2462 nritems = btrfs_header_nritems(buf);
2463 level = btrfs_header_level(buf);
2464
2465 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
2466 return 0;
2467
2468 if (full_backref)
2469 parent = buf->start;
2470 else
2471 parent = 0;
2472 if (inc)
2473 action = BTRFS_ADD_DELAYED_REF;
2474 else
2475 action = BTRFS_DROP_DELAYED_REF;
2476
2477 for (i = 0; i < nritems; i++) {
2478 if (level == 0) {
2479 btrfs_item_key_to_cpu(buf, &key, i);
2480 if (key.type != BTRFS_EXTENT_DATA_KEY)
2481 continue;
2482 fi = btrfs_item_ptr(buf, i,
2483 struct btrfs_file_extent_item);
2484 if (btrfs_file_extent_type(buf, fi) ==
2485 BTRFS_FILE_EXTENT_INLINE)
2486 continue;
2487 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2488 if (bytenr == 0)
2489 continue;
2490
2491 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2492 key.offset -= btrfs_file_extent_offset(buf, fi);
2493 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2494 num_bytes, parent);
2495 generic_ref.real_root = root->root_key.objectid;
2496 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2497 key.offset);
2498 generic_ref.skip_qgroup = for_reloc;
2499 if (inc)
2500 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2501 else
2502 ret = btrfs_free_extent(trans, &generic_ref);
2503 if (ret)
2504 goto fail;
2505 } else {
2506 bytenr = btrfs_node_blockptr(buf, i);
2507 num_bytes = fs_info->nodesize;
2508 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2509 num_bytes, parent);
2510 generic_ref.real_root = root->root_key.objectid;
2511 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
2512 generic_ref.skip_qgroup = for_reloc;
2513 if (inc)
2514 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2515 else
2516 ret = btrfs_free_extent(trans, &generic_ref);
2517 if (ret)
2518 goto fail;
2519 }
2520 }
2521 return 0;
2522fail:
2523 return ret;
2524}
2525
2526int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2527 struct extent_buffer *buf, int full_backref)
2528{
2529 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2530}
2531
2532int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2533 struct extent_buffer *buf, int full_backref)
2534{
2535 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2536}
2537
2538int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
2539{
2540 struct btrfs_block_group *block_group;
2541 int readonly = 0;
2542
2543 block_group = btrfs_lookup_block_group(fs_info, bytenr);
2544 if (!block_group || block_group->ro)
2545 readonly = 1;
2546 if (block_group)
2547 btrfs_put_block_group(block_group);
2548 return readonly;
2549}
2550
2551static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2552{
2553 struct btrfs_fs_info *fs_info = root->fs_info;
2554 u64 flags;
2555 u64 ret;
2556
2557 if (data)
2558 flags = BTRFS_BLOCK_GROUP_DATA;
2559 else if (root == fs_info->chunk_root)
2560 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2561 else
2562 flags = BTRFS_BLOCK_GROUP_METADATA;
2563
2564 ret = btrfs_get_alloc_profile(fs_info, flags);
2565 return ret;
2566}
2567
2568static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
2569{
2570 struct btrfs_block_group *cache;
2571 u64 bytenr;
2572
2573 spin_lock(&fs_info->block_group_cache_lock);
2574 bytenr = fs_info->first_logical_byte;
2575 spin_unlock(&fs_info->block_group_cache_lock);
2576
2577 if (bytenr < (u64)-1)
2578 return bytenr;
2579
2580 cache = btrfs_lookup_first_block_group(fs_info, search_start);
2581 if (!cache)
2582 return 0;
2583
2584 bytenr = cache->start;
2585 btrfs_put_block_group(cache);
2586
2587 return bytenr;
2588}
2589
2590static int pin_down_extent(struct btrfs_block_group *cache,
2591 u64 bytenr, u64 num_bytes, int reserved)
2592{
2593 struct btrfs_fs_info *fs_info = cache->fs_info;
2594
2595 spin_lock(&cache->space_info->lock);
2596 spin_lock(&cache->lock);
2597 cache->pinned += num_bytes;
2598 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2599 num_bytes);
2600 if (reserved) {
2601 cache->reserved -= num_bytes;
2602 cache->space_info->bytes_reserved -= num_bytes;
2603 }
2604 spin_unlock(&cache->lock);
2605 spin_unlock(&cache->space_info->lock);
2606
2607 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
2608 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2609 set_extent_dirty(fs_info->pinned_extents, bytenr,
2610 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2611 return 0;
2612}
2613
2614int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2615 u64 bytenr, u64 num_bytes, int reserved)
2616{
2617 struct btrfs_block_group *cache;
2618
2619 ASSERT(fs_info->running_transaction);
2620
2621 cache = btrfs_lookup_block_group(fs_info, bytenr);
2622 BUG_ON(!cache);
2623
2624 pin_down_extent(cache, bytenr, num_bytes, reserved);
2625
2626 btrfs_put_block_group(cache);
2627 return 0;
2628}
2629
2630
2631
2632
2633int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
2634 u64 bytenr, u64 num_bytes)
2635{
2636 struct btrfs_block_group *cache;
2637 int ret;
2638
2639 cache = btrfs_lookup_block_group(fs_info, bytenr);
2640 if (!cache)
2641 return -EINVAL;
2642
2643
2644
2645
2646
2647
2648
2649 btrfs_cache_block_group(cache, 1);
2650
2651 pin_down_extent(cache, bytenr, num_bytes, 0);
2652
2653
2654 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2655 btrfs_put_block_group(cache);
2656 return ret;
2657}
2658
2659static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2660 u64 start, u64 num_bytes)
2661{
2662 int ret;
2663 struct btrfs_block_group *block_group;
2664 struct btrfs_caching_control *caching_ctl;
2665
2666 block_group = btrfs_lookup_block_group(fs_info, start);
2667 if (!block_group)
2668 return -EINVAL;
2669
2670 btrfs_cache_block_group(block_group, 0);
2671 caching_ctl = btrfs_get_caching_control(block_group);
2672
2673 if (!caching_ctl) {
2674
2675 BUG_ON(!btrfs_block_group_done(block_group));
2676 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2677 } else {
2678 mutex_lock(&caching_ctl->mutex);
2679
2680 if (start >= caching_ctl->progress) {
2681 ret = btrfs_add_excluded_extent(fs_info, start,
2682 num_bytes);
2683 } else if (start + num_bytes <= caching_ctl->progress) {
2684 ret = btrfs_remove_free_space(block_group,
2685 start, num_bytes);
2686 } else {
2687 num_bytes = caching_ctl->progress - start;
2688 ret = btrfs_remove_free_space(block_group,
2689 start, num_bytes);
2690 if (ret)
2691 goto out_lock;
2692
2693 num_bytes = (start + num_bytes) -
2694 caching_ctl->progress;
2695 start = caching_ctl->progress;
2696 ret = btrfs_add_excluded_extent(fs_info, start,
2697 num_bytes);
2698 }
2699out_lock:
2700 mutex_unlock(&caching_ctl->mutex);
2701 btrfs_put_caching_control(caching_ctl);
2702 }
2703 btrfs_put_block_group(block_group);
2704 return ret;
2705}
2706
2707int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2708{
2709 struct btrfs_fs_info *fs_info = eb->fs_info;
2710 struct btrfs_file_extent_item *item;
2711 struct btrfs_key key;
2712 int found_type;
2713 int i;
2714 int ret = 0;
2715
2716 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2717 return 0;
2718
2719 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2720 btrfs_item_key_to_cpu(eb, &key, i);
2721 if (key.type != BTRFS_EXTENT_DATA_KEY)
2722 continue;
2723 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2724 found_type = btrfs_file_extent_type(eb, item);
2725 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2726 continue;
2727 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2728 continue;
2729 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2730 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2731 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2732 if (ret)
2733 break;
2734 }
2735
2736 return ret;
2737}
2738
2739static void
2740btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
2741{
2742 atomic_inc(&bg->reservations);
2743}
2744
2745void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
2746{
2747 struct btrfs_caching_control *next;
2748 struct btrfs_caching_control *caching_ctl;
2749 struct btrfs_block_group *cache;
2750
2751 down_write(&fs_info->commit_root_sem);
2752
2753 list_for_each_entry_safe(caching_ctl, next,
2754 &fs_info->caching_block_groups, list) {
2755 cache = caching_ctl->block_group;
2756 if (btrfs_block_group_done(cache)) {
2757 cache->last_byte_to_unpin = (u64)-1;
2758 list_del_init(&caching_ctl->list);
2759 btrfs_put_caching_control(caching_ctl);
2760 } else {
2761 cache->last_byte_to_unpin = caching_ctl->progress;
2762 }
2763 }
2764
2765 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
2766 fs_info->pinned_extents = &fs_info->freed_extents[1];
2767 else
2768 fs_info->pinned_extents = &fs_info->freed_extents[0];
2769
2770 up_write(&fs_info->commit_root_sem);
2771
2772 btrfs_update_global_block_rsv(fs_info);
2773}
2774
2775
2776
2777
2778
2779static struct btrfs_free_cluster *
2780fetch_cluster_info(struct btrfs_fs_info *fs_info,
2781 struct btrfs_space_info *space_info, u64 *empty_cluster)
2782{
2783 struct btrfs_free_cluster *ret = NULL;
2784
2785 *empty_cluster = 0;
2786 if (btrfs_mixed_space_info(space_info))
2787 return ret;
2788
2789 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2790 ret = &fs_info->meta_alloc_cluster;
2791 if (btrfs_test_opt(fs_info, SSD))
2792 *empty_cluster = SZ_2M;
2793 else
2794 *empty_cluster = SZ_64K;
2795 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2796 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2797 *empty_cluster = SZ_2M;
2798 ret = &fs_info->data_alloc_cluster;
2799 }
2800
2801 return ret;
2802}
2803
2804static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2805 u64 start, u64 end,
2806 const bool return_free_space)
2807{
2808 struct btrfs_block_group *cache = NULL;
2809 struct btrfs_space_info *space_info;
2810 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2811 struct btrfs_free_cluster *cluster = NULL;
2812 u64 len;
2813 u64 total_unpinned = 0;
2814 u64 empty_cluster = 0;
2815 bool readonly;
2816
2817 while (start <= end) {
2818 readonly = false;
2819 if (!cache ||
2820 start >= cache->start + cache->length) {
2821 if (cache)
2822 btrfs_put_block_group(cache);
2823 total_unpinned = 0;
2824 cache = btrfs_lookup_block_group(fs_info, start);
2825 BUG_ON(!cache);
2826
2827 cluster = fetch_cluster_info(fs_info,
2828 cache->space_info,
2829 &empty_cluster);
2830 empty_cluster <<= 1;
2831 }
2832
2833 len = cache->start + cache->length - start;
2834 len = min(len, end + 1 - start);
2835
2836 if (start < cache->last_byte_to_unpin) {
2837 len = min(len, cache->last_byte_to_unpin - start);
2838 if (return_free_space)
2839 btrfs_add_free_space(cache, start, len);
2840 }
2841
2842 start += len;
2843 total_unpinned += len;
2844 space_info = cache->space_info;
2845
2846
2847
2848
2849
2850
2851
2852 if (cluster && cluster->fragmented &&
2853 total_unpinned > empty_cluster) {
2854 spin_lock(&cluster->lock);
2855 cluster->fragmented = 0;
2856 spin_unlock(&cluster->lock);
2857 }
2858
2859 spin_lock(&space_info->lock);
2860 spin_lock(&cache->lock);
2861 cache->pinned -= len;
2862 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2863 space_info->max_extent_size = 0;
2864 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2865 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2866 if (cache->ro) {
2867 space_info->bytes_readonly += len;
2868 readonly = true;
2869 }
2870 spin_unlock(&cache->lock);
2871 if (!readonly && return_free_space &&
2872 global_rsv->space_info == space_info) {
2873 u64 to_add = len;
2874
2875 spin_lock(&global_rsv->lock);
2876 if (!global_rsv->full) {
2877 to_add = min(len, global_rsv->size -
2878 global_rsv->reserved);
2879 global_rsv->reserved += to_add;
2880 btrfs_space_info_update_bytes_may_use(fs_info,
2881 space_info, to_add);
2882 if (global_rsv->reserved >= global_rsv->size)
2883 global_rsv->full = 1;
2884 len -= to_add;
2885 }
2886 spin_unlock(&global_rsv->lock);
2887
2888 if (len)
2889 btrfs_try_granting_tickets(fs_info,
2890 space_info);
2891 }
2892 spin_unlock(&space_info->lock);
2893 }
2894
2895 if (cache)
2896 btrfs_put_block_group(cache);
2897 return 0;
2898}
2899
2900int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2901{
2902 struct btrfs_fs_info *fs_info = trans->fs_info;
2903 struct btrfs_block_group *block_group, *tmp;
2904 struct list_head *deleted_bgs;
2905 struct extent_io_tree *unpin;
2906 u64 start;
2907 u64 end;
2908 int ret;
2909
2910 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
2911 unpin = &fs_info->freed_extents[1];
2912 else
2913 unpin = &fs_info->freed_extents[0];
2914
2915 while (!trans->aborted) {
2916 struct extent_state *cached_state = NULL;
2917
2918 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2919 ret = find_first_extent_bit(unpin, 0, &start, &end,
2920 EXTENT_DIRTY, &cached_state);
2921 if (ret) {
2922 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2923 break;
2924 }
2925
2926 if (btrfs_test_opt(fs_info, DISCARD))
2927 ret = btrfs_discard_extent(fs_info, start,
2928 end + 1 - start, NULL);
2929
2930 clear_extent_dirty(unpin, start, end, &cached_state);
2931 unpin_extent_range(fs_info, start, end, true);
2932 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2933 free_extent_state(cached_state);
2934 cond_resched();
2935 }
2936
2937
2938
2939
2940
2941
2942 deleted_bgs = &trans->transaction->deleted_bgs;
2943 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2944 u64 trimmed = 0;
2945
2946 ret = -EROFS;
2947 if (!trans->aborted)
2948 ret = btrfs_discard_extent(fs_info,
2949 block_group->start,
2950 block_group->length,
2951 &trimmed);
2952
2953 list_del_init(&block_group->bg_list);
2954 btrfs_put_block_group_trimming(block_group);
2955 btrfs_put_block_group(block_group);
2956
2957 if (ret) {
2958 const char *errstr = btrfs_decode_error(ret);
2959 btrfs_warn(fs_info,
2960 "discard failed while removing blockgroup: errno=%d %s",
2961 ret, errstr);
2962 }
2963 }
2964
2965 return 0;
2966}
2967
2968static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2969 struct btrfs_delayed_ref_node *node, u64 parent,
2970 u64 root_objectid, u64 owner_objectid,
2971 u64 owner_offset, int refs_to_drop,
2972 struct btrfs_delayed_extent_op *extent_op)
2973{
2974 struct btrfs_fs_info *info = trans->fs_info;
2975 struct btrfs_key key;
2976 struct btrfs_path *path;
2977 struct btrfs_root *extent_root = info->extent_root;
2978 struct extent_buffer *leaf;
2979 struct btrfs_extent_item *ei;
2980 struct btrfs_extent_inline_ref *iref;
2981 int ret;
2982 int is_data;
2983 int extent_slot = 0;
2984 int found_extent = 0;
2985 int num_to_del = 1;
2986 u32 item_size;
2987 u64 refs;
2988 u64 bytenr = node->bytenr;
2989 u64 num_bytes = node->num_bytes;
2990 int last_ref = 0;
2991 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2992
2993 path = btrfs_alloc_path();
2994 if (!path)
2995 return -ENOMEM;
2996
2997 path->reada = READA_FORWARD;
2998 path->leave_spinning = 1;
2999
3000 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
3001 BUG_ON(!is_data && refs_to_drop != 1);
3002
3003 if (is_data)
3004 skinny_metadata = false;
3005
3006 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
3007 parent, root_objectid, owner_objectid,
3008 owner_offset);
3009 if (ret == 0) {
3010 extent_slot = path->slots[0];
3011 while (extent_slot >= 0) {
3012 btrfs_item_key_to_cpu(path->nodes[0], &key,
3013 extent_slot);
3014 if (key.objectid != bytenr)
3015 break;
3016 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3017 key.offset == num_bytes) {
3018 found_extent = 1;
3019 break;
3020 }
3021 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3022 key.offset == owner_objectid) {
3023 found_extent = 1;
3024 break;
3025 }
3026 if (path->slots[0] - extent_slot > 5)
3027 break;
3028 extent_slot--;
3029 }
3030
3031 if (!found_extent) {
3032 BUG_ON(iref);
3033 ret = remove_extent_backref(trans, path, NULL,
3034 refs_to_drop,
3035 is_data, &last_ref);
3036 if (ret) {
3037 btrfs_abort_transaction(trans, ret);
3038 goto out;
3039 }
3040 btrfs_release_path(path);
3041 path->leave_spinning = 1;
3042
3043 key.objectid = bytenr;
3044 key.type = BTRFS_EXTENT_ITEM_KEY;
3045 key.offset = num_bytes;
3046
3047 if (!is_data && skinny_metadata) {
3048 key.type = BTRFS_METADATA_ITEM_KEY;
3049 key.offset = owner_objectid;
3050 }
3051
3052 ret = btrfs_search_slot(trans, extent_root,
3053 &key, path, -1, 1);
3054 if (ret > 0 && skinny_metadata && path->slots[0]) {
3055
3056
3057
3058
3059 path->slots[0]--;
3060 btrfs_item_key_to_cpu(path->nodes[0], &key,
3061 path->slots[0]);
3062 if (key.objectid == bytenr &&
3063 key.type == BTRFS_EXTENT_ITEM_KEY &&
3064 key.offset == num_bytes)
3065 ret = 0;
3066 }
3067
3068 if (ret > 0 && skinny_metadata) {
3069 skinny_metadata = false;
3070 key.objectid = bytenr;
3071 key.type = BTRFS_EXTENT_ITEM_KEY;
3072 key.offset = num_bytes;
3073 btrfs_release_path(path);
3074 ret = btrfs_search_slot(trans, extent_root,
3075 &key, path, -1, 1);
3076 }
3077
3078 if (ret) {
3079 btrfs_err(info,
3080 "umm, got %d back from search, was looking for %llu",
3081 ret, bytenr);
3082 if (ret > 0)
3083 btrfs_print_leaf(path->nodes[0]);
3084 }
3085 if (ret < 0) {
3086 btrfs_abort_transaction(trans, ret);
3087 goto out;
3088 }
3089 extent_slot = path->slots[0];
3090 }
3091 } else if (WARN_ON(ret == -ENOENT)) {
3092 btrfs_print_leaf(path->nodes[0]);
3093 btrfs_err(info,
3094 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3095 bytenr, parent, root_objectid, owner_objectid,
3096 owner_offset);
3097 btrfs_abort_transaction(trans, ret);
3098 goto out;
3099 } else {
3100 btrfs_abort_transaction(trans, ret);
3101 goto out;
3102 }
3103
3104 leaf = path->nodes[0];
3105 item_size = btrfs_item_size_nr(leaf, extent_slot);
3106 if (unlikely(item_size < sizeof(*ei))) {
3107 ret = -EINVAL;
3108 btrfs_print_v0_err(info);
3109 btrfs_abort_transaction(trans, ret);
3110 goto out;
3111 }
3112 ei = btrfs_item_ptr(leaf, extent_slot,
3113 struct btrfs_extent_item);
3114 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3115 key.type == BTRFS_EXTENT_ITEM_KEY) {
3116 struct btrfs_tree_block_info *bi;
3117 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
3118 bi = (struct btrfs_tree_block_info *)(ei + 1);
3119 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3120 }
3121
3122 refs = btrfs_extent_refs(leaf, ei);
3123 if (refs < refs_to_drop) {
3124 btrfs_err(info,
3125 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
3126 refs_to_drop, refs, bytenr);
3127 ret = -EINVAL;
3128 btrfs_abort_transaction(trans, ret);
3129 goto out;
3130 }
3131 refs -= refs_to_drop;
3132
3133 if (refs > 0) {
3134 if (extent_op)
3135 __run_delayed_extent_op(extent_op, leaf, ei);
3136
3137
3138
3139
3140 if (iref) {
3141 BUG_ON(!found_extent);
3142 } else {
3143 btrfs_set_extent_refs(leaf, ei, refs);
3144 btrfs_mark_buffer_dirty(leaf);
3145 }
3146 if (found_extent) {
3147 ret = remove_extent_backref(trans, path, iref,
3148 refs_to_drop, is_data,
3149 &last_ref);
3150 if (ret) {
3151 btrfs_abort_transaction(trans, ret);
3152 goto out;
3153 }
3154 }
3155 } else {
3156 if (found_extent) {
3157 BUG_ON(is_data && refs_to_drop !=
3158 extent_data_ref_count(path, iref));
3159 if (iref) {
3160 BUG_ON(path->slots[0] != extent_slot);
3161 } else {
3162 BUG_ON(path->slots[0] != extent_slot + 1);
3163 path->slots[0] = extent_slot;
3164 num_to_del = 2;
3165 }
3166 }
3167
3168 last_ref = 1;
3169 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3170 num_to_del);
3171 if (ret) {
3172 btrfs_abort_transaction(trans, ret);
3173 goto out;
3174 }
3175 btrfs_release_path(path);
3176
3177 if (is_data) {
3178 ret = btrfs_del_csums(trans, info->csum_root, bytenr,
3179 num_bytes);
3180 if (ret) {
3181 btrfs_abort_transaction(trans, ret);
3182 goto out;
3183 }
3184 }
3185
3186 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
3187 if (ret) {
3188 btrfs_abort_transaction(trans, ret);
3189 goto out;
3190 }
3191
3192 ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
3193 if (ret) {
3194 btrfs_abort_transaction(trans, ret);
3195 goto out;
3196 }
3197 }
3198 btrfs_release_path(path);
3199
3200out:
3201 btrfs_free_path(path);
3202 return ret;
3203}
3204
3205
3206
3207
3208
3209
3210
3211static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3212 u64 bytenr)
3213{
3214 struct btrfs_delayed_ref_head *head;
3215 struct btrfs_delayed_ref_root *delayed_refs;
3216 int ret = 0;
3217
3218 delayed_refs = &trans->transaction->delayed_refs;
3219 spin_lock(&delayed_refs->lock);
3220 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3221 if (!head)
3222 goto out_delayed_unlock;
3223
3224 spin_lock(&head->lock);
3225 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3226 goto out;
3227
3228 if (cleanup_extent_op(head) != NULL)
3229 goto out;
3230
3231
3232
3233
3234
3235 if (!mutex_trylock(&head->mutex))
3236 goto out;
3237
3238 btrfs_delete_ref_head(delayed_refs, head);
3239 head->processing = 0;
3240
3241 spin_unlock(&head->lock);
3242 spin_unlock(&delayed_refs->lock);
3243
3244 BUG_ON(head->extent_op);
3245 if (head->must_insert_reserved)
3246 ret = 1;
3247
3248 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3249 mutex_unlock(&head->mutex);
3250 btrfs_put_delayed_ref_head(head);
3251 return ret;
3252out:
3253 spin_unlock(&head->lock);
3254
3255out_delayed_unlock:
3256 spin_unlock(&delayed_refs->lock);
3257 return 0;
3258}
3259
3260void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3261 struct btrfs_root *root,
3262 struct extent_buffer *buf,
3263 u64 parent, int last_ref)
3264{
3265 struct btrfs_fs_info *fs_info = root->fs_info;
3266 struct btrfs_ref generic_ref = { 0 };
3267 int pin = 1;
3268 int ret;
3269
3270 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3271 buf->start, buf->len, parent);
3272 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3273 root->root_key.objectid);
3274
3275 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3276 int old_ref_mod, new_ref_mod;
3277
3278 btrfs_ref_tree_mod(fs_info, &generic_ref);
3279 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
3280 &old_ref_mod, &new_ref_mod);
3281 BUG_ON(ret);
3282 pin = old_ref_mod >= 0 && new_ref_mod < 0;
3283 }
3284
3285 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3286 struct btrfs_block_group *cache;
3287
3288 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3289 ret = check_ref_cleanup(trans, buf->start);
3290 if (!ret)
3291 goto out;
3292 }
3293
3294 pin = 0;
3295 cache = btrfs_lookup_block_group(fs_info, buf->start);
3296
3297 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3298 pin_down_extent(cache, buf->start, buf->len, 1);
3299 btrfs_put_block_group(cache);
3300 goto out;
3301 }
3302
3303 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3304
3305 btrfs_add_free_space(cache, buf->start, buf->len);
3306 btrfs_free_reserved_bytes(cache, buf->len, 0);
3307 btrfs_put_block_group(cache);
3308 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3309 }
3310out:
3311 if (pin)
3312 add_pinned_bytes(fs_info, &generic_ref);
3313
3314 if (last_ref) {
3315
3316
3317
3318
3319 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3320 }
3321}
3322
3323
3324int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3325{
3326 struct btrfs_fs_info *fs_info = trans->fs_info;
3327 int old_ref_mod, new_ref_mod;
3328 int ret;
3329
3330 if (btrfs_is_testing(fs_info))
3331 return 0;
3332
3333
3334
3335
3336
3337 if ((ref->type == BTRFS_REF_METADATA &&
3338 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3339 (ref->type == BTRFS_REF_DATA &&
3340 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
3341
3342 btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
3343 old_ref_mod = new_ref_mod = 0;
3344 ret = 0;
3345 } else if (ref->type == BTRFS_REF_METADATA) {
3346 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
3347 &old_ref_mod, &new_ref_mod);
3348 } else {
3349 ret = btrfs_add_delayed_data_ref(trans, ref, 0,
3350 &old_ref_mod, &new_ref_mod);
3351 }
3352
3353 if (!((ref->type == BTRFS_REF_METADATA &&
3354 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3355 (ref->type == BTRFS_REF_DATA &&
3356 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
3357 btrfs_ref_tree_mod(fs_info, ref);
3358
3359 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
3360 add_pinned_bytes(fs_info, ref);
3361
3362 return ret;
3363}
3364
3365enum btrfs_loop_type {
3366 LOOP_CACHING_NOWAIT,
3367 LOOP_CACHING_WAIT,
3368 LOOP_ALLOC_CHUNK,
3369 LOOP_NO_EMPTY_SIZE,
3370};
3371
3372static inline void
3373btrfs_lock_block_group(struct btrfs_block_group *cache,
3374 int delalloc)
3375{
3376 if (delalloc)
3377 down_read(&cache->data_rwsem);
3378}
3379
3380static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3381 int delalloc)
3382{
3383 btrfs_get_block_group(cache);
3384 if (delalloc)
3385 down_read(&cache->data_rwsem);
3386}
3387
3388static struct btrfs_block_group *btrfs_lock_cluster(
3389 struct btrfs_block_group *block_group,
3390 struct btrfs_free_cluster *cluster,
3391 int delalloc)
3392{
3393 struct btrfs_block_group *used_bg = NULL;
3394
3395 spin_lock(&cluster->refill_lock);
3396 while (1) {
3397 used_bg = cluster->block_group;
3398 if (!used_bg)
3399 return NULL;
3400
3401 if (used_bg == block_group)
3402 return used_bg;
3403
3404 btrfs_get_block_group(used_bg);
3405
3406 if (!delalloc)
3407 return used_bg;
3408
3409 if (down_read_trylock(&used_bg->data_rwsem))
3410 return used_bg;
3411
3412 spin_unlock(&cluster->refill_lock);
3413
3414
3415 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3416
3417 spin_lock(&cluster->refill_lock);
3418 if (used_bg == cluster->block_group)
3419 return used_bg;
3420
3421 up_read(&used_bg->data_rwsem);
3422 btrfs_put_block_group(used_bg);
3423 }
3424}
3425
3426static inline void
3427btrfs_release_block_group(struct btrfs_block_group *cache,
3428 int delalloc)
3429{
3430 if (delalloc)
3431 up_read(&cache->data_rwsem);
3432 btrfs_put_block_group(cache);
3433}
3434
3435
3436
3437
3438
3439struct find_free_extent_ctl {
3440
3441 u64 ram_bytes;
3442 u64 num_bytes;
3443 u64 empty_size;
3444 u64 flags;
3445 int delalloc;
3446
3447
3448 u64 search_start;
3449
3450
3451 u64 empty_cluster;
3452
3453 bool have_caching_bg;
3454 bool orig_have_caching_bg;
3455
3456
3457 int index;
3458
3459
3460
3461
3462 int loop;
3463
3464
3465
3466
3467
3468 bool retry_clustered;
3469
3470
3471
3472
3473
3474 bool retry_unclustered;
3475
3476
3477 int cached;
3478
3479
3480 u64 max_extent_size;
3481
3482
3483 u64 total_free_space;
3484
3485
3486 u64 found_offset;
3487};
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498static int find_free_extent_clustered(struct btrfs_block_group *bg,
3499 struct btrfs_free_cluster *last_ptr,
3500 struct find_free_extent_ctl *ffe_ctl,
3501 struct btrfs_block_group **cluster_bg_ret)
3502{
3503 struct btrfs_block_group *cluster_bg;
3504 u64 aligned_cluster;
3505 u64 offset;
3506 int ret;
3507
3508 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3509 if (!cluster_bg)
3510 goto refill_cluster;
3511 if (cluster_bg != bg && (cluster_bg->ro ||
3512 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3513 goto release_cluster;
3514
3515 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3516 ffe_ctl->num_bytes, cluster_bg->start,
3517 &ffe_ctl->max_extent_size);
3518 if (offset) {
3519
3520 spin_unlock(&last_ptr->refill_lock);
3521 trace_btrfs_reserve_extent_cluster(cluster_bg,
3522 ffe_ctl->search_start, ffe_ctl->num_bytes);
3523 *cluster_bg_ret = cluster_bg;
3524 ffe_ctl->found_offset = offset;
3525 return 0;
3526 }
3527 WARN_ON(last_ptr->block_group != cluster_bg);
3528
3529release_cluster:
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3542 spin_unlock(&last_ptr->refill_lock);
3543 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3544 return -ENOENT;
3545 }
3546
3547
3548 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3549
3550 if (cluster_bg != bg)
3551 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3552
3553refill_cluster:
3554 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3555 spin_unlock(&last_ptr->refill_lock);
3556 return -ENOENT;
3557 }
3558
3559 aligned_cluster = max_t(u64,
3560 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3561 bg->full_stripe_len);
3562 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3563 ffe_ctl->num_bytes, aligned_cluster);
3564 if (ret == 0) {
3565
3566 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3567 ffe_ctl->num_bytes, ffe_ctl->search_start,
3568 &ffe_ctl->max_extent_size);
3569 if (offset) {
3570
3571 spin_unlock(&last_ptr->refill_lock);
3572 trace_btrfs_reserve_extent_cluster(bg,
3573 ffe_ctl->search_start,
3574 ffe_ctl->num_bytes);
3575 ffe_ctl->found_offset = offset;
3576 return 0;
3577 }
3578 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3579 !ffe_ctl->retry_clustered) {
3580 spin_unlock(&last_ptr->refill_lock);
3581
3582 ffe_ctl->retry_clustered = true;
3583 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3584 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3585 return -EAGAIN;
3586 }
3587
3588
3589
3590
3591
3592 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3593 spin_unlock(&last_ptr->refill_lock);
3594 return 1;
3595}
3596
3597
3598
3599
3600
3601
3602static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3603 struct btrfs_free_cluster *last_ptr,
3604 struct find_free_extent_ctl *ffe_ctl)
3605{
3606 u64 offset;
3607
3608
3609
3610
3611
3612
3613 if (unlikely(last_ptr)) {
3614 spin_lock(&last_ptr->lock);
3615 last_ptr->fragmented = 1;
3616 spin_unlock(&last_ptr->lock);
3617 }
3618 if (ffe_ctl->cached) {
3619 struct btrfs_free_space_ctl *free_space_ctl;
3620
3621 free_space_ctl = bg->free_space_ctl;
3622 spin_lock(&free_space_ctl->tree_lock);
3623 if (free_space_ctl->free_space <
3624 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3625 ffe_ctl->empty_size) {
3626 ffe_ctl->total_free_space = max_t(u64,
3627 ffe_ctl->total_free_space,
3628 free_space_ctl->free_space);
3629 spin_unlock(&free_space_ctl->tree_lock);
3630 return 1;
3631 }
3632 spin_unlock(&free_space_ctl->tree_lock);
3633 }
3634
3635 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3636 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3637 &ffe_ctl->max_extent_size);
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3649 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3650 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3651 ffe_ctl->empty_size);
3652 ffe_ctl->retry_unclustered = true;
3653 return -EAGAIN;
3654 } else if (!offset) {
3655 return 1;
3656 }
3657 ffe_ctl->found_offset = offset;
3658 return 0;
3659}
3660
3661
3662
3663
3664
3665
3666static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3667 struct btrfs_free_cluster *last_ptr,
3668 struct btrfs_key *ins,
3669 struct find_free_extent_ctl *ffe_ctl,
3670 int full_search, bool use_cluster)
3671{
3672 struct btrfs_root *root = fs_info->extent_root;
3673 int ret;
3674
3675 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3676 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3677 ffe_ctl->orig_have_caching_bg = true;
3678
3679 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
3680 ffe_ctl->have_caching_bg)
3681 return 1;
3682
3683 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
3684 return 1;
3685
3686 if (ins->objectid) {
3687 if (!use_cluster && last_ptr) {
3688 spin_lock(&last_ptr->lock);
3689 last_ptr->window_start = ins->objectid;
3690 spin_unlock(&last_ptr->lock);
3691 }
3692 return 0;
3693 }
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
3704 ffe_ctl->index = 0;
3705 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
3706
3707
3708
3709
3710
3711 if (ffe_ctl->orig_have_caching_bg || !full_search)
3712 ffe_ctl->loop = LOOP_CACHING_WAIT;
3713 else
3714 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
3715 } else {
3716 ffe_ctl->loop++;
3717 }
3718
3719 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
3720 struct btrfs_trans_handle *trans;
3721 int exist = 0;
3722
3723 trans = current->journal_info;
3724 if (trans)
3725 exist = 1;
3726 else
3727 trans = btrfs_join_transaction(root);
3728
3729 if (IS_ERR(trans)) {
3730 ret = PTR_ERR(trans);
3731 return ret;
3732 }
3733
3734 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
3735 CHUNK_ALLOC_FORCE);
3736
3737
3738
3739
3740
3741
3742 if (ret == -ENOSPC)
3743 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
3744
3745
3746 if (ret < 0 && ret != -ENOSPC)
3747 btrfs_abort_transaction(trans, ret);
3748 else
3749 ret = 0;
3750 if (!exist)
3751 btrfs_end_transaction(trans);
3752 if (ret)
3753 return ret;
3754 }
3755
3756 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
3757
3758
3759
3760
3761 if (ffe_ctl->empty_size == 0 &&
3762 ffe_ctl->empty_cluster == 0)
3763 return -ENOSPC;
3764 ffe_ctl->empty_size = 0;
3765 ffe_ctl->empty_cluster = 0;
3766 }
3767 return 1;
3768 }
3769 return -ENOSPC;
3770}
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
3798 u64 ram_bytes, u64 num_bytes, u64 empty_size,
3799 u64 hint_byte, struct btrfs_key *ins,
3800 u64 flags, int delalloc)
3801{
3802 int ret = 0;
3803 int cache_block_group_error = 0;
3804 struct btrfs_free_cluster *last_ptr = NULL;
3805 struct btrfs_block_group *block_group = NULL;
3806 struct find_free_extent_ctl ffe_ctl = {0};
3807 struct btrfs_space_info *space_info;
3808 bool use_cluster = true;
3809 bool full_search = false;
3810
3811 WARN_ON(num_bytes < fs_info->sectorsize);
3812
3813 ffe_ctl.ram_bytes = ram_bytes;
3814 ffe_ctl.num_bytes = num_bytes;
3815 ffe_ctl.empty_size = empty_size;
3816 ffe_ctl.flags = flags;
3817 ffe_ctl.search_start = 0;
3818 ffe_ctl.retry_clustered = false;
3819 ffe_ctl.retry_unclustered = false;
3820 ffe_ctl.delalloc = delalloc;
3821 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
3822 ffe_ctl.have_caching_bg = false;
3823 ffe_ctl.orig_have_caching_bg = false;
3824 ffe_ctl.found_offset = 0;
3825
3826 ins->type = BTRFS_EXTENT_ITEM_KEY;
3827 ins->objectid = 0;
3828 ins->offset = 0;
3829
3830 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
3831
3832 space_info = btrfs_find_space_info(fs_info, flags);
3833 if (!space_info) {
3834 btrfs_err(fs_info, "No space info for %llu", flags);
3835 return -ENOSPC;
3836 }
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848 if (unlikely(space_info->max_extent_size)) {
3849 spin_lock(&space_info->lock);
3850 if (space_info->max_extent_size &&
3851 num_bytes > space_info->max_extent_size) {
3852 ins->offset = space_info->max_extent_size;
3853 spin_unlock(&space_info->lock);
3854 return -ENOSPC;
3855 } else if (space_info->max_extent_size) {
3856 use_cluster = false;
3857 }
3858 spin_unlock(&space_info->lock);
3859 }
3860
3861 last_ptr = fetch_cluster_info(fs_info, space_info,
3862 &ffe_ctl.empty_cluster);
3863 if (last_ptr) {
3864 spin_lock(&last_ptr->lock);
3865 if (last_ptr->block_group)
3866 hint_byte = last_ptr->window_start;
3867 if (last_ptr->fragmented) {
3868
3869
3870
3871
3872
3873 hint_byte = last_ptr->window_start;
3874 use_cluster = false;
3875 }
3876 spin_unlock(&last_ptr->lock);
3877 }
3878
3879 ffe_ctl.search_start = max(ffe_ctl.search_start,
3880 first_logical_byte(fs_info, 0));
3881 ffe_ctl.search_start = max(ffe_ctl.search_start, hint_byte);
3882 if (ffe_ctl.search_start == hint_byte) {
3883 block_group = btrfs_lookup_block_group(fs_info,
3884 ffe_ctl.search_start);
3885
3886
3887
3888
3889
3890
3891
3892 if (block_group && block_group_bits(block_group, flags) &&
3893 block_group->cached != BTRFS_CACHE_NO) {
3894 down_read(&space_info->groups_sem);
3895 if (list_empty(&block_group->list) ||
3896 block_group->ro) {
3897
3898
3899
3900
3901
3902
3903 btrfs_put_block_group(block_group);
3904 up_read(&space_info->groups_sem);
3905 } else {
3906 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
3907 block_group->flags);
3908 btrfs_lock_block_group(block_group, delalloc);
3909 goto have_block_group;
3910 }
3911 } else if (block_group) {
3912 btrfs_put_block_group(block_group);
3913 }
3914 }
3915search:
3916 ffe_ctl.have_caching_bg = false;
3917 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
3918 ffe_ctl.index == 0)
3919 full_search = true;
3920 down_read(&space_info->groups_sem);
3921 list_for_each_entry(block_group,
3922 &space_info->block_groups[ffe_ctl.index], list) {
3923
3924 if (unlikely(block_group->ro))
3925 continue;
3926
3927 btrfs_grab_block_group(block_group, delalloc);
3928 ffe_ctl.search_start = block_group->start;
3929
3930
3931
3932
3933
3934
3935 if (!block_group_bits(block_group, flags)) {
3936 u64 extra = BTRFS_BLOCK_GROUP_DUP |
3937 BTRFS_BLOCK_GROUP_RAID1_MASK |
3938 BTRFS_BLOCK_GROUP_RAID56_MASK |
3939 BTRFS_BLOCK_GROUP_RAID10;
3940
3941
3942
3943
3944
3945
3946 if ((flags & extra) && !(block_group->flags & extra))
3947 goto loop;
3948
3949
3950
3951
3952
3953
3954 btrfs_release_block_group(block_group, delalloc);
3955 continue;
3956 }
3957
3958have_block_group:
3959 ffe_ctl.cached = btrfs_block_group_done(block_group);
3960 if (unlikely(!ffe_ctl.cached)) {
3961 ffe_ctl.have_caching_bg = true;
3962 ret = btrfs_cache_block_group(block_group, 0);
3963
3964
3965
3966
3967
3968
3969
3970
3971 if (ret < 0) {
3972 if (!cache_block_group_error)
3973 cache_block_group_error = ret;
3974 ret = 0;
3975 goto loop;
3976 }
3977 ret = 0;
3978 }
3979
3980 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
3981 goto loop;
3982
3983
3984
3985
3986
3987 if (last_ptr && use_cluster) {
3988 struct btrfs_block_group *cluster_bg = NULL;
3989
3990 ret = find_free_extent_clustered(block_group, last_ptr,
3991 &ffe_ctl, &cluster_bg);
3992
3993 if (ret == 0) {
3994 if (cluster_bg && cluster_bg != block_group) {
3995 btrfs_release_block_group(block_group,
3996 delalloc);
3997 block_group = cluster_bg;
3998 }
3999 goto checks;
4000 } else if (ret == -EAGAIN) {
4001 goto have_block_group;
4002 } else if (ret > 0) {
4003 goto loop;
4004 }
4005
4006 }
4007
4008 ret = find_free_extent_unclustered(block_group, last_ptr,
4009 &ffe_ctl);
4010 if (ret == -EAGAIN)
4011 goto have_block_group;
4012 else if (ret > 0)
4013 goto loop;
4014
4015checks:
4016 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
4017 fs_info->stripesize);
4018
4019
4020 if (ffe_ctl.search_start + num_bytes >
4021 block_group->start + block_group->length) {
4022 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4023 num_bytes);
4024 goto loop;
4025 }
4026
4027 if (ffe_ctl.found_offset < ffe_ctl.search_start)
4028 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4029 ffe_ctl.search_start - ffe_ctl.found_offset);
4030
4031 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
4032 num_bytes, delalloc);
4033 if (ret == -EAGAIN) {
4034 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4035 num_bytes);
4036 goto loop;
4037 }
4038 btrfs_inc_block_group_reservations(block_group);
4039
4040
4041 ins->objectid = ffe_ctl.search_start;
4042 ins->offset = num_bytes;
4043
4044 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
4045 num_bytes);
4046 btrfs_release_block_group(block_group, delalloc);
4047 break;
4048loop:
4049 ffe_ctl.retry_clustered = false;
4050 ffe_ctl.retry_unclustered = false;
4051 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
4052 ffe_ctl.index);
4053 btrfs_release_block_group(block_group, delalloc);
4054 cond_resched();
4055 }
4056 up_read(&space_info->groups_sem);
4057
4058 ret = find_free_extent_update_loop(fs_info, last_ptr, ins, &ffe_ctl,
4059 full_search, use_cluster);
4060 if (ret > 0)
4061 goto search;
4062
4063 if (ret == -ENOSPC && !cache_block_group_error) {
4064
4065
4066
4067
4068 if (!ffe_ctl.max_extent_size)
4069 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
4070 spin_lock(&space_info->lock);
4071 space_info->max_extent_size = ffe_ctl.max_extent_size;
4072 spin_unlock(&space_info->lock);
4073 ins->offset = ffe_ctl.max_extent_size;
4074 } else if (ret == -ENOSPC) {
4075 ret = cache_block_group_error;
4076 }
4077 return ret;
4078}
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4126 u64 num_bytes, u64 min_alloc_size,
4127 u64 empty_size, u64 hint_byte,
4128 struct btrfs_key *ins, int is_data, int delalloc)
4129{
4130 struct btrfs_fs_info *fs_info = root->fs_info;
4131 bool final_tried = num_bytes == min_alloc_size;
4132 u64 flags;
4133 int ret;
4134
4135 flags = get_alloc_profile_by_root(root, is_data);
4136again:
4137 WARN_ON(num_bytes < fs_info->sectorsize);
4138 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
4139 hint_byte, ins, flags, delalloc);
4140 if (!ret && !is_data) {
4141 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4142 } else if (ret == -ENOSPC) {
4143 if (!final_tried && ins->offset) {
4144 num_bytes = min(num_bytes >> 1, ins->offset);
4145 num_bytes = round_down(num_bytes,
4146 fs_info->sectorsize);
4147 num_bytes = max(num_bytes, min_alloc_size);
4148 ram_bytes = num_bytes;
4149 if (num_bytes == min_alloc_size)
4150 final_tried = true;
4151 goto again;
4152 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4153 struct btrfs_space_info *sinfo;
4154
4155 sinfo = btrfs_find_space_info(fs_info, flags);
4156 btrfs_err(fs_info,
4157 "allocation failed flags %llu, wanted %llu",
4158 flags, num_bytes);
4159 if (sinfo)
4160 btrfs_dump_space_info(fs_info, sinfo,
4161 num_bytes, 1);
4162 }
4163 }
4164
4165 return ret;
4166}
4167
4168static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4169 u64 start, u64 len,
4170 int pin, int delalloc)
4171{
4172 struct btrfs_block_group *cache;
4173 int ret = 0;
4174
4175 cache = btrfs_lookup_block_group(fs_info, start);
4176 if (!cache) {
4177 btrfs_err(fs_info, "Unable to find block group for %llu",
4178 start);
4179 return -ENOSPC;
4180 }
4181
4182 if (pin)
4183 pin_down_extent(cache, start, len, 1);
4184 else {
4185 if (btrfs_test_opt(fs_info, DISCARD))
4186 ret = btrfs_discard_extent(fs_info, start, len, NULL);
4187 btrfs_add_free_space(cache, start, len);
4188 btrfs_free_reserved_bytes(cache, len, delalloc);
4189 trace_btrfs_reserved_extent_free(fs_info, start, len);
4190 }
4191
4192 btrfs_put_block_group(cache);
4193 return ret;
4194}
4195
4196int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4197 u64 start, u64 len, int delalloc)
4198{
4199 return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
4200}
4201
4202int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
4203 u64 start, u64 len)
4204{
4205 return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
4206}
4207
4208static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4209 u64 parent, u64 root_objectid,
4210 u64 flags, u64 owner, u64 offset,
4211 struct btrfs_key *ins, int ref_mod)
4212{
4213 struct btrfs_fs_info *fs_info = trans->fs_info;
4214 int ret;
4215 struct btrfs_extent_item *extent_item;
4216 struct btrfs_extent_inline_ref *iref;
4217 struct btrfs_path *path;
4218 struct extent_buffer *leaf;
4219 int type;
4220 u32 size;
4221
4222 if (parent > 0)
4223 type = BTRFS_SHARED_DATA_REF_KEY;
4224 else
4225 type = BTRFS_EXTENT_DATA_REF_KEY;
4226
4227 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4228
4229 path = btrfs_alloc_path();
4230 if (!path)
4231 return -ENOMEM;
4232
4233 path->leave_spinning = 1;
4234 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4235 ins, size);
4236 if (ret) {
4237 btrfs_free_path(path);
4238 return ret;
4239 }
4240
4241 leaf = path->nodes[0];
4242 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4243 struct btrfs_extent_item);
4244 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4245 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4246 btrfs_set_extent_flags(leaf, extent_item,
4247 flags | BTRFS_EXTENT_FLAG_DATA);
4248
4249 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4250 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4251 if (parent > 0) {
4252 struct btrfs_shared_data_ref *ref;
4253 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4254 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4255 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4256 } else {
4257 struct btrfs_extent_data_ref *ref;
4258 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4259 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4260 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4261 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4262 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4263 }
4264
4265 btrfs_mark_buffer_dirty(path->nodes[0]);
4266 btrfs_free_path(path);
4267
4268 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
4269 if (ret)
4270 return ret;
4271
4272 ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
4273 if (ret) {
4274 btrfs_err(fs_info, "update block group failed for %llu %llu",
4275 ins->objectid, ins->offset);
4276 BUG();
4277 }
4278 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
4279 return ret;
4280}
4281
4282static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4283 struct btrfs_delayed_ref_node *node,
4284 struct btrfs_delayed_extent_op *extent_op)
4285{
4286 struct btrfs_fs_info *fs_info = trans->fs_info;
4287 int ret;
4288 struct btrfs_extent_item *extent_item;
4289 struct btrfs_key extent_key;
4290 struct btrfs_tree_block_info *block_info;
4291 struct btrfs_extent_inline_ref *iref;
4292 struct btrfs_path *path;
4293 struct extent_buffer *leaf;
4294 struct btrfs_delayed_tree_ref *ref;
4295 u32 size = sizeof(*extent_item) + sizeof(*iref);
4296 u64 num_bytes;
4297 u64 flags = extent_op->flags_to_set;
4298 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4299
4300 ref = btrfs_delayed_node_to_tree_ref(node);
4301
4302 extent_key.objectid = node->bytenr;
4303 if (skinny_metadata) {
4304 extent_key.offset = ref->level;
4305 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4306 num_bytes = fs_info->nodesize;
4307 } else {
4308 extent_key.offset = node->num_bytes;
4309 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4310 size += sizeof(*block_info);
4311 num_bytes = node->num_bytes;
4312 }
4313
4314 path = btrfs_alloc_path();
4315 if (!path)
4316 return -ENOMEM;
4317
4318 path->leave_spinning = 1;
4319 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4320 &extent_key, size);
4321 if (ret) {
4322 btrfs_free_path(path);
4323 return ret;
4324 }
4325
4326 leaf = path->nodes[0];
4327 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4328 struct btrfs_extent_item);
4329 btrfs_set_extent_refs(leaf, extent_item, 1);
4330 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4331 btrfs_set_extent_flags(leaf, extent_item,
4332 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4333
4334 if (skinny_metadata) {
4335 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4336 } else {
4337 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4338 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4339 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4340 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4341 }
4342
4343 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4344 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4345 btrfs_set_extent_inline_ref_type(leaf, iref,
4346 BTRFS_SHARED_BLOCK_REF_KEY);
4347 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4348 } else {
4349 btrfs_set_extent_inline_ref_type(leaf, iref,
4350 BTRFS_TREE_BLOCK_REF_KEY);
4351 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4352 }
4353
4354 btrfs_mark_buffer_dirty(leaf);
4355 btrfs_free_path(path);
4356
4357 ret = remove_from_free_space_tree(trans, extent_key.objectid,
4358 num_bytes);
4359 if (ret)
4360 return ret;
4361
4362 ret = btrfs_update_block_group(trans, extent_key.objectid,
4363 fs_info->nodesize, 1);
4364 if (ret) {
4365 btrfs_err(fs_info, "update block group failed for %llu %llu",
4366 extent_key.objectid, extent_key.offset);
4367 BUG();
4368 }
4369
4370 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
4371 fs_info->nodesize);
4372 return ret;
4373}
4374
4375int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4376 struct btrfs_root *root, u64 owner,
4377 u64 offset, u64 ram_bytes,
4378 struct btrfs_key *ins)
4379{
4380 struct btrfs_ref generic_ref = { 0 };
4381 int ret;
4382
4383 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4384
4385 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4386 ins->objectid, ins->offset, 0);
4387 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
4388 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4389 ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
4390 ram_bytes, NULL, NULL);
4391 return ret;
4392}
4393
4394
4395
4396
4397
4398
4399int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4400 u64 root_objectid, u64 owner, u64 offset,
4401 struct btrfs_key *ins)
4402{
4403 struct btrfs_fs_info *fs_info = trans->fs_info;
4404 int ret;
4405 struct btrfs_block_group *block_group;
4406 struct btrfs_space_info *space_info;
4407
4408
4409
4410
4411
4412 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4413 ret = __exclude_logged_extent(fs_info, ins->objectid,
4414 ins->offset);
4415 if (ret)
4416 return ret;
4417 }
4418
4419 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4420 if (!block_group)
4421 return -EINVAL;
4422
4423 space_info = block_group->space_info;
4424 spin_lock(&space_info->lock);
4425 spin_lock(&block_group->lock);
4426 space_info->bytes_reserved += ins->offset;
4427 block_group->reserved += ins->offset;
4428 spin_unlock(&block_group->lock);
4429 spin_unlock(&space_info->lock);
4430
4431 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4432 offset, ins, 1);
4433 btrfs_put_block_group(block_group);
4434 return ret;
4435}
4436
4437static struct extent_buffer *
4438btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4439 u64 bytenr, int level, u64 owner)
4440{
4441 struct btrfs_fs_info *fs_info = root->fs_info;
4442 struct extent_buffer *buf;
4443
4444 buf = btrfs_find_create_tree_block(fs_info, bytenr);
4445 if (IS_ERR(buf))
4446 return buf;
4447
4448
4449
4450
4451
4452
4453 if (buf->lock_owner == current->pid) {
4454 btrfs_err_rl(fs_info,
4455"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4456 buf->start, btrfs_header_owner(buf), current->pid);
4457 free_extent_buffer(buf);
4458 return ERR_PTR(-EUCLEAN);
4459 }
4460
4461 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
4462 btrfs_tree_lock(buf);
4463 btrfs_clean_tree_block(buf);
4464 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4465
4466 btrfs_set_lock_blocking_write(buf);
4467 set_extent_buffer_uptodate(buf);
4468
4469 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4470 btrfs_set_header_level(buf, level);
4471 btrfs_set_header_bytenr(buf, buf->start);
4472 btrfs_set_header_generation(buf, trans->transid);
4473 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4474 btrfs_set_header_owner(buf, owner);
4475 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4476 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4477 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4478 buf->log_index = root->log_transid % 2;
4479
4480
4481
4482
4483 if (buf->log_index == 0)
4484 set_extent_dirty(&root->dirty_log_pages, buf->start,
4485 buf->start + buf->len - 1, GFP_NOFS);
4486 else
4487 set_extent_new(&root->dirty_log_pages, buf->start,
4488 buf->start + buf->len - 1);
4489 } else {
4490 buf->log_index = -1;
4491 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4492 buf->start + buf->len - 1, GFP_NOFS);
4493 }
4494 trans->dirty = true;
4495
4496 return buf;
4497}
4498
4499
4500
4501
4502
4503struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4504 struct btrfs_root *root,
4505 u64 parent, u64 root_objectid,
4506 const struct btrfs_disk_key *key,
4507 int level, u64 hint,
4508 u64 empty_size)
4509{
4510 struct btrfs_fs_info *fs_info = root->fs_info;
4511 struct btrfs_key ins;
4512 struct btrfs_block_rsv *block_rsv;
4513 struct extent_buffer *buf;
4514 struct btrfs_delayed_extent_op *extent_op;
4515 struct btrfs_ref generic_ref = { 0 };
4516 u64 flags = 0;
4517 int ret;
4518 u32 blocksize = fs_info->nodesize;
4519 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4520
4521#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4522 if (btrfs_is_testing(fs_info)) {
4523 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4524 level, root_objectid);
4525 if (!IS_ERR(buf))
4526 root->alloc_bytenr += blocksize;
4527 return buf;
4528 }
4529#endif
4530
4531 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4532 if (IS_ERR(block_rsv))
4533 return ERR_CAST(block_rsv);
4534
4535 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4536 empty_size, hint, &ins, 0, 0);
4537 if (ret)
4538 goto out_unuse;
4539
4540 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4541 root_objectid);
4542 if (IS_ERR(buf)) {
4543 ret = PTR_ERR(buf);
4544 goto out_free_reserved;
4545 }
4546
4547 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4548 if (parent == 0)
4549 parent = ins.objectid;
4550 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4551 } else
4552 BUG_ON(parent > 0);
4553
4554 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4555 extent_op = btrfs_alloc_delayed_extent_op();
4556 if (!extent_op) {
4557 ret = -ENOMEM;
4558 goto out_free_buf;
4559 }
4560 if (key)
4561 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4562 else
4563 memset(&extent_op->key, 0, sizeof(extent_op->key));
4564 extent_op->flags_to_set = flags;
4565 extent_op->update_key = skinny_metadata ? false : true;
4566 extent_op->update_flags = true;
4567 extent_op->is_data = false;
4568 extent_op->level = level;
4569
4570 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4571 ins.objectid, ins.offset, parent);
4572 generic_ref.real_root = root->root_key.objectid;
4573 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
4574 btrfs_ref_tree_mod(fs_info, &generic_ref);
4575 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
4576 extent_op, NULL, NULL);
4577 if (ret)
4578 goto out_free_delayed;
4579 }
4580 return buf;
4581
4582out_free_delayed:
4583 btrfs_free_delayed_extent_op(extent_op);
4584out_free_buf:
4585 free_extent_buffer(buf);
4586out_free_reserved:
4587 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
4588out_unuse:
4589 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
4590 return ERR_PTR(ret);
4591}
4592
4593struct walk_control {
4594 u64 refs[BTRFS_MAX_LEVEL];
4595 u64 flags[BTRFS_MAX_LEVEL];
4596 struct btrfs_key update_progress;
4597 struct btrfs_key drop_progress;
4598 int drop_level;
4599 int stage;
4600 int level;
4601 int shared_level;
4602 int update_ref;
4603 int keep_locks;
4604 int reada_slot;
4605 int reada_count;
4606 int restarted;
4607};
4608
4609#define DROP_REFERENCE 1
4610#define UPDATE_BACKREF 2
4611
4612static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4613 struct btrfs_root *root,
4614 struct walk_control *wc,
4615 struct btrfs_path *path)
4616{
4617 struct btrfs_fs_info *fs_info = root->fs_info;
4618 u64 bytenr;
4619 u64 generation;
4620 u64 refs;
4621 u64 flags;
4622 u32 nritems;
4623 struct btrfs_key key;
4624 struct extent_buffer *eb;
4625 int ret;
4626 int slot;
4627 int nread = 0;
4628
4629 if (path->slots[wc->level] < wc->reada_slot) {
4630 wc->reada_count = wc->reada_count * 2 / 3;
4631 wc->reada_count = max(wc->reada_count, 2);
4632 } else {
4633 wc->reada_count = wc->reada_count * 3 / 2;
4634 wc->reada_count = min_t(int, wc->reada_count,
4635 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
4636 }
4637
4638 eb = path->nodes[wc->level];
4639 nritems = btrfs_header_nritems(eb);
4640
4641 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4642 if (nread >= wc->reada_count)
4643 break;
4644
4645 cond_resched();
4646 bytenr = btrfs_node_blockptr(eb, slot);
4647 generation = btrfs_node_ptr_generation(eb, slot);
4648
4649 if (slot == path->slots[wc->level])
4650 goto reada;
4651
4652 if (wc->stage == UPDATE_BACKREF &&
4653 generation <= root->root_key.offset)
4654 continue;
4655
4656
4657 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
4658 wc->level - 1, 1, &refs,
4659 &flags);
4660
4661 if (ret < 0)
4662 continue;
4663 BUG_ON(refs == 0);
4664
4665 if (wc->stage == DROP_REFERENCE) {
4666 if (refs == 1)
4667 goto reada;
4668
4669 if (wc->level == 1 &&
4670 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4671 continue;
4672 if (!wc->update_ref ||
4673 generation <= root->root_key.offset)
4674 continue;
4675 btrfs_node_key_to_cpu(eb, &key, slot);
4676 ret = btrfs_comp_cpu_keys(&key,
4677 &wc->update_progress);
4678 if (ret < 0)
4679 continue;
4680 } else {
4681 if (wc->level == 1 &&
4682 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4683 continue;
4684 }
4685reada:
4686 readahead_tree_block(fs_info, bytenr);
4687 nread++;
4688 }
4689 wc->reada_slot = slot;
4690}
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4701 struct btrfs_root *root,
4702 struct btrfs_path *path,
4703 struct walk_control *wc, int lookup_info)
4704{
4705 struct btrfs_fs_info *fs_info = root->fs_info;
4706 int level = wc->level;
4707 struct extent_buffer *eb = path->nodes[level];
4708 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4709 int ret;
4710
4711 if (wc->stage == UPDATE_BACKREF &&
4712 btrfs_header_owner(eb) != root->root_key.objectid)
4713 return 1;
4714
4715
4716
4717
4718
4719 if (lookup_info &&
4720 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4721 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4722 BUG_ON(!path->locks[level]);
4723 ret = btrfs_lookup_extent_info(trans, fs_info,
4724 eb->start, level, 1,
4725 &wc->refs[level],
4726 &wc->flags[level]);
4727 BUG_ON(ret == -ENOMEM);
4728 if (ret)
4729 return ret;
4730 BUG_ON(wc->refs[level] == 0);
4731 }
4732
4733 if (wc->stage == DROP_REFERENCE) {
4734 if (wc->refs[level] > 1)
4735 return 1;
4736
4737 if (path->locks[level] && !wc->keep_locks) {
4738 btrfs_tree_unlock_rw(eb, path->locks[level]);
4739 path->locks[level] = 0;
4740 }
4741 return 0;
4742 }
4743
4744
4745 if (!(wc->flags[level] & flag)) {
4746 BUG_ON(!path->locks[level]);
4747 ret = btrfs_inc_ref(trans, root, eb, 1);
4748 BUG_ON(ret);
4749 ret = btrfs_dec_ref(trans, root, eb, 0);
4750 BUG_ON(ret);
4751 ret = btrfs_set_disk_extent_flags(trans, eb->start,
4752 eb->len, flag,
4753 btrfs_header_level(eb), 0);
4754 BUG_ON(ret);
4755 wc->flags[level] |= flag;
4756 }
4757
4758
4759
4760
4761
4762 if (path->locks[level] && level > 0) {
4763 btrfs_tree_unlock_rw(eb, path->locks[level]);
4764 path->locks[level] = 0;
4765 }
4766 return 0;
4767}
4768
4769
4770
4771
4772
4773static int check_ref_exists(struct btrfs_trans_handle *trans,
4774 struct btrfs_root *root, u64 bytenr, u64 parent,
4775 int level)
4776{
4777 struct btrfs_path *path;
4778 struct btrfs_extent_inline_ref *iref;
4779 int ret;
4780
4781 path = btrfs_alloc_path();
4782 if (!path)
4783 return -ENOMEM;
4784
4785 ret = lookup_extent_backref(trans, path, &iref, bytenr,
4786 root->fs_info->nodesize, parent,
4787 root->root_key.objectid, level, 0);
4788 btrfs_free_path(path);
4789 if (ret == -ENOENT)
4790 return 0;
4791 if (ret < 0)
4792 return ret;
4793 return 1;
4794}
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4810 struct btrfs_root *root,
4811 struct btrfs_path *path,
4812 struct walk_control *wc, int *lookup_info)
4813{
4814 struct btrfs_fs_info *fs_info = root->fs_info;
4815 u64 bytenr;
4816 u64 generation;
4817 u64 parent;
4818 struct btrfs_key key;
4819 struct btrfs_key first_key;
4820 struct btrfs_ref ref = { 0 };
4821 struct extent_buffer *next;
4822 int level = wc->level;
4823 int reada = 0;
4824 int ret = 0;
4825 bool need_account = false;
4826
4827 generation = btrfs_node_ptr_generation(path->nodes[level],
4828 path->slots[level]);
4829
4830
4831
4832
4833
4834 if (wc->stage == UPDATE_BACKREF &&
4835 generation <= root->root_key.offset) {
4836 *lookup_info = 1;
4837 return 1;
4838 }
4839
4840 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4841 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
4842 path->slots[level]);
4843
4844 next = find_extent_buffer(fs_info, bytenr);
4845 if (!next) {
4846 next = btrfs_find_create_tree_block(fs_info, bytenr);
4847 if (IS_ERR(next))
4848 return PTR_ERR(next);
4849
4850 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
4851 level - 1);
4852 reada = 1;
4853 }
4854 btrfs_tree_lock(next);
4855 btrfs_set_lock_blocking_write(next);
4856
4857 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
4858 &wc->refs[level - 1],
4859 &wc->flags[level - 1]);
4860 if (ret < 0)
4861 goto out_unlock;
4862
4863 if (unlikely(wc->refs[level - 1] == 0)) {
4864 btrfs_err(fs_info, "Missing references.");
4865 ret = -EIO;
4866 goto out_unlock;
4867 }
4868 *lookup_info = 0;
4869
4870 if (wc->stage == DROP_REFERENCE) {
4871 if (wc->refs[level - 1] > 1) {
4872 need_account = true;
4873 if (level == 1 &&
4874 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4875 goto skip;
4876
4877 if (!wc->update_ref ||
4878 generation <= root->root_key.offset)
4879 goto skip;
4880
4881 btrfs_node_key_to_cpu(path->nodes[level], &key,
4882 path->slots[level]);
4883 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
4884 if (ret < 0)
4885 goto skip;
4886
4887 wc->stage = UPDATE_BACKREF;
4888 wc->shared_level = level - 1;
4889 }
4890 } else {
4891 if (level == 1 &&
4892 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4893 goto skip;
4894 }
4895
4896 if (!btrfs_buffer_uptodate(next, generation, 0)) {
4897 btrfs_tree_unlock(next);
4898 free_extent_buffer(next);
4899 next = NULL;
4900 *lookup_info = 1;
4901 }
4902
4903 if (!next) {
4904 if (reada && level == 1)
4905 reada_walk_down(trans, root, wc, path);
4906 next = read_tree_block(fs_info, bytenr, generation, level - 1,
4907 &first_key);
4908 if (IS_ERR(next)) {
4909 return PTR_ERR(next);
4910 } else if (!extent_buffer_uptodate(next)) {
4911 free_extent_buffer(next);
4912 return -EIO;
4913 }
4914 btrfs_tree_lock(next);
4915 btrfs_set_lock_blocking_write(next);
4916 }
4917
4918 level--;
4919 ASSERT(level == btrfs_header_level(next));
4920 if (level != btrfs_header_level(next)) {
4921 btrfs_err(root->fs_info, "mismatched level");
4922 ret = -EIO;
4923 goto out_unlock;
4924 }
4925 path->nodes[level] = next;
4926 path->slots[level] = 0;
4927 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
4928 wc->level = level;
4929 if (wc->level == 1)
4930 wc->reada_slot = 0;
4931 return 0;
4932skip:
4933 wc->refs[level - 1] = 0;
4934 wc->flags[level - 1] = 0;
4935 if (wc->stage == DROP_REFERENCE) {
4936 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
4937 parent = path->nodes[level]->start;
4938 } else {
4939 ASSERT(root->root_key.objectid ==
4940 btrfs_header_owner(path->nodes[level]));
4941 if (root->root_key.objectid !=
4942 btrfs_header_owner(path->nodes[level])) {
4943 btrfs_err(root->fs_info,
4944 "mismatched block owner");
4945 ret = -EIO;
4946 goto out_unlock;
4947 }
4948 parent = 0;
4949 }
4950
4951
4952
4953
4954
4955
4956
4957 if (wc->restarted) {
4958 ret = check_ref_exists(trans, root, bytenr, parent,
4959 level - 1);
4960 if (ret < 0)
4961 goto out_unlock;
4962 if (ret == 0)
4963 goto no_delete;
4964 ret = 0;
4965 wc->restarted = 0;
4966 }
4967
4968
4969
4970
4971
4972
4973 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
4974 need_account) {
4975 ret = btrfs_qgroup_trace_subtree(trans, next,
4976 generation, level - 1);
4977 if (ret) {
4978 btrfs_err_rl(fs_info,
4979 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
4980 ret);
4981 }
4982 }
4983
4984
4985
4986
4987
4988
4989
4990 wc->drop_level = level;
4991 find_next_key(path, level, &wc->drop_progress);
4992
4993 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
4994 fs_info->nodesize, parent);
4995 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
4996 ret = btrfs_free_extent(trans, &ref);
4997 if (ret)
4998 goto out_unlock;
4999 }
5000no_delete:
5001 *lookup_info = 1;
5002 ret = 1;
5003
5004out_unlock:
5005 btrfs_tree_unlock(next);
5006 free_extent_buffer(next);
5007
5008 return ret;
5009}
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5024 struct btrfs_root *root,
5025 struct btrfs_path *path,
5026 struct walk_control *wc)
5027{
5028 struct btrfs_fs_info *fs_info = root->fs_info;
5029 int ret;
5030 int level = wc->level;
5031 struct extent_buffer *eb = path->nodes[level];
5032 u64 parent = 0;
5033
5034 if (wc->stage == UPDATE_BACKREF) {
5035 BUG_ON(wc->shared_level < level);
5036 if (level < wc->shared_level)
5037 goto out;
5038
5039 ret = find_next_key(path, level + 1, &wc->update_progress);
5040 if (ret > 0)
5041 wc->update_ref = 0;
5042
5043 wc->stage = DROP_REFERENCE;
5044 wc->shared_level = -1;
5045 path->slots[level] = 0;
5046
5047
5048
5049
5050
5051
5052 if (!path->locks[level]) {
5053 BUG_ON(level == 0);
5054 btrfs_tree_lock(eb);
5055 btrfs_set_lock_blocking_write(eb);
5056 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5057
5058 ret = btrfs_lookup_extent_info(trans, fs_info,
5059 eb->start, level, 1,
5060 &wc->refs[level],
5061 &wc->flags[level]);
5062 if (ret < 0) {
5063 btrfs_tree_unlock_rw(eb, path->locks[level]);
5064 path->locks[level] = 0;
5065 return ret;
5066 }
5067 BUG_ON(wc->refs[level] == 0);
5068 if (wc->refs[level] == 1) {
5069 btrfs_tree_unlock_rw(eb, path->locks[level]);
5070 path->locks[level] = 0;
5071 return 1;
5072 }
5073 }
5074 }
5075
5076
5077 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5078
5079 if (wc->refs[level] == 1) {
5080 if (level == 0) {
5081 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5082 ret = btrfs_dec_ref(trans, root, eb, 1);
5083 else
5084 ret = btrfs_dec_ref(trans, root, eb, 0);
5085 BUG_ON(ret);
5086 if (is_fstree(root->root_key.objectid)) {
5087 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5088 if (ret) {
5089 btrfs_err_rl(fs_info,
5090 "error %d accounting leaf items, quota is out of sync, rescan required",
5091 ret);
5092 }
5093 }
5094 }
5095
5096 if (!path->locks[level] &&
5097 btrfs_header_generation(eb) == trans->transid) {
5098 btrfs_tree_lock(eb);
5099 btrfs_set_lock_blocking_write(eb);
5100 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5101 }
5102 btrfs_clean_tree_block(eb);
5103 }
5104
5105 if (eb == root->node) {
5106 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5107 parent = eb->start;
5108 else if (root->root_key.objectid != btrfs_header_owner(eb))
5109 goto owner_mismatch;
5110 } else {
5111 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5112 parent = path->nodes[level + 1]->start;
5113 else if (root->root_key.objectid !=
5114 btrfs_header_owner(path->nodes[level + 1]))
5115 goto owner_mismatch;
5116 }
5117
5118 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5119out:
5120 wc->refs[level] = 0;
5121 wc->flags[level] = 0;
5122 return 0;
5123
5124owner_mismatch:
5125 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5126 btrfs_header_owner(eb), root->root_key.objectid);
5127 return -EUCLEAN;
5128}
5129
5130static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5131 struct btrfs_root *root,
5132 struct btrfs_path *path,
5133 struct walk_control *wc)
5134{
5135 int level = wc->level;
5136 int lookup_info = 1;
5137 int ret;
5138
5139 while (level >= 0) {
5140 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5141 if (ret > 0)
5142 break;
5143
5144 if (level == 0)
5145 break;
5146
5147 if (path->slots[level] >=
5148 btrfs_header_nritems(path->nodes[level]))
5149 break;
5150
5151 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5152 if (ret > 0) {
5153 path->slots[level]++;
5154 continue;
5155 } else if (ret < 0)
5156 return ret;
5157 level = wc->level;
5158 }
5159 return 0;
5160}
5161
5162static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5163 struct btrfs_root *root,
5164 struct btrfs_path *path,
5165 struct walk_control *wc, int max_level)
5166{
5167 int level = wc->level;
5168 int ret;
5169
5170 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5171 while (level < max_level && path->nodes[level]) {
5172 wc->level = level;
5173 if (path->slots[level] + 1 <
5174 btrfs_header_nritems(path->nodes[level])) {
5175 path->slots[level]++;
5176 return 0;
5177 } else {
5178 ret = walk_up_proc(trans, root, path, wc);
5179 if (ret > 0)
5180 return 0;
5181 if (ret < 0)
5182 return ret;
5183
5184 if (path->locks[level]) {
5185 btrfs_tree_unlock_rw(path->nodes[level],
5186 path->locks[level]);
5187 path->locks[level] = 0;
5188 }
5189 free_extent_buffer(path->nodes[level]);
5190 path->nodes[level] = NULL;
5191 level++;
5192 }
5193 }
5194 return 1;
5195}
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210int btrfs_drop_snapshot(struct btrfs_root *root,
5211 struct btrfs_block_rsv *block_rsv, int update_ref,
5212 int for_reloc)
5213{
5214 struct btrfs_fs_info *fs_info = root->fs_info;
5215 struct btrfs_path *path;
5216 struct btrfs_trans_handle *trans;
5217 struct btrfs_root *tree_root = fs_info->tree_root;
5218 struct btrfs_root_item *root_item = &root->root_item;
5219 struct walk_control *wc;
5220 struct btrfs_key key;
5221 int err = 0;
5222 int ret;
5223 int level;
5224 bool root_dropped = false;
5225
5226 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5227
5228 path = btrfs_alloc_path();
5229 if (!path) {
5230 err = -ENOMEM;
5231 goto out;
5232 }
5233
5234 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5235 if (!wc) {
5236 btrfs_free_path(path);
5237 err = -ENOMEM;
5238 goto out;
5239 }
5240
5241 trans = btrfs_start_transaction(tree_root, 0);
5242 if (IS_ERR(trans)) {
5243 err = PTR_ERR(trans);
5244 goto out_free;
5245 }
5246
5247 err = btrfs_run_delayed_items(trans);
5248 if (err)
5249 goto out_end_trans;
5250
5251 if (block_rsv)
5252 trans->block_rsv = block_rsv;
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262 set_bit(BTRFS_ROOT_DELETING, &root->state);
5263 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5264 level = btrfs_header_level(root->node);
5265 path->nodes[level] = btrfs_lock_root_node(root);
5266 btrfs_set_lock_blocking_write(path->nodes[level]);
5267 path->slots[level] = 0;
5268 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5269 memset(&wc->update_progress, 0,
5270 sizeof(wc->update_progress));
5271 } else {
5272 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5273 memcpy(&wc->update_progress, &key,
5274 sizeof(wc->update_progress));
5275
5276 level = root_item->drop_level;
5277 BUG_ON(level == 0);
5278 path->lowest_level = level;
5279 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5280 path->lowest_level = 0;
5281 if (ret < 0) {
5282 err = ret;
5283 goto out_end_trans;
5284 }
5285 WARN_ON(ret > 0);
5286
5287
5288
5289
5290
5291 btrfs_unlock_up_safe(path, 0);
5292
5293 level = btrfs_header_level(root->node);
5294 while (1) {
5295 btrfs_tree_lock(path->nodes[level]);
5296 btrfs_set_lock_blocking_write(path->nodes[level]);
5297 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5298
5299 ret = btrfs_lookup_extent_info(trans, fs_info,
5300 path->nodes[level]->start,
5301 level, 1, &wc->refs[level],
5302 &wc->flags[level]);
5303 if (ret < 0) {
5304 err = ret;
5305 goto out_end_trans;
5306 }
5307 BUG_ON(wc->refs[level] == 0);
5308
5309 if (level == root_item->drop_level)
5310 break;
5311
5312 btrfs_tree_unlock(path->nodes[level]);
5313 path->locks[level] = 0;
5314 WARN_ON(wc->refs[level] != 1);
5315 level--;
5316 }
5317 }
5318
5319 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5320 wc->level = level;
5321 wc->shared_level = -1;
5322 wc->stage = DROP_REFERENCE;
5323 wc->update_ref = update_ref;
5324 wc->keep_locks = 0;
5325 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5326
5327 while (1) {
5328
5329 ret = walk_down_tree(trans, root, path, wc);
5330 if (ret < 0) {
5331 err = ret;
5332 break;
5333 }
5334
5335 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5336 if (ret < 0) {
5337 err = ret;
5338 break;
5339 }
5340
5341 if (ret > 0) {
5342 BUG_ON(wc->stage != DROP_REFERENCE);
5343 break;
5344 }
5345
5346 if (wc->stage == DROP_REFERENCE) {
5347 wc->drop_level = wc->level;
5348 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5349 &wc->drop_progress,
5350 path->slots[wc->drop_level]);
5351 }
5352 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5353 &wc->drop_progress);
5354 root_item->drop_level = wc->drop_level;
5355
5356 BUG_ON(wc->level == 0);
5357 if (btrfs_should_end_transaction(trans) ||
5358 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5359 ret = btrfs_update_root(trans, tree_root,
5360 &root->root_key,
5361 root_item);
5362 if (ret) {
5363 btrfs_abort_transaction(trans, ret);
5364 err = ret;
5365 goto out_end_trans;
5366 }
5367
5368 btrfs_end_transaction_throttle(trans);
5369 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5370 btrfs_debug(fs_info,
5371 "drop snapshot early exit");
5372 err = -EAGAIN;
5373 goto out_free;
5374 }
5375
5376 trans = btrfs_start_transaction(tree_root, 0);
5377 if (IS_ERR(trans)) {
5378 err = PTR_ERR(trans);
5379 goto out_free;
5380 }
5381 if (block_rsv)
5382 trans->block_rsv = block_rsv;
5383 }
5384 }
5385 btrfs_release_path(path);
5386 if (err)
5387 goto out_end_trans;
5388
5389 ret = btrfs_del_root(trans, &root->root_key);
5390 if (ret) {
5391 btrfs_abort_transaction(trans, ret);
5392 err = ret;
5393 goto out_end_trans;
5394 }
5395
5396 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5397 ret = btrfs_find_root(tree_root, &root->root_key, path,
5398 NULL, NULL);
5399 if (ret < 0) {
5400 btrfs_abort_transaction(trans, ret);
5401 err = ret;
5402 goto out_end_trans;
5403 } else if (ret > 0) {
5404
5405
5406
5407
5408
5409 btrfs_del_orphan_item(trans, tree_root,
5410 root->root_key.objectid);
5411 }
5412 }
5413
5414 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
5415 btrfs_add_dropped_root(trans, root);
5416 } else {
5417 free_extent_buffer(root->node);
5418 free_extent_buffer(root->commit_root);
5419 btrfs_put_fs_root(root);
5420 }
5421 root_dropped = true;
5422out_end_trans:
5423 btrfs_end_transaction_throttle(trans);
5424out_free:
5425 kfree(wc);
5426 btrfs_free_path(path);
5427out:
5428
5429
5430
5431
5432
5433
5434
5435 if (!for_reloc && !root_dropped)
5436 btrfs_add_dead_root(root);
5437 if (err && err != -EAGAIN)
5438 btrfs_handle_fs_error(fs_info, err, NULL);
5439 return err;
5440}
5441
5442
5443
5444
5445
5446
5447
5448int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5449 struct btrfs_root *root,
5450 struct extent_buffer *node,
5451 struct extent_buffer *parent)
5452{
5453 struct btrfs_fs_info *fs_info = root->fs_info;
5454 struct btrfs_path *path;
5455 struct walk_control *wc;
5456 int level;
5457 int parent_level;
5458 int ret = 0;
5459 int wret;
5460
5461 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5462
5463 path = btrfs_alloc_path();
5464 if (!path)
5465 return -ENOMEM;
5466
5467 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5468 if (!wc) {
5469 btrfs_free_path(path);
5470 return -ENOMEM;
5471 }
5472
5473 btrfs_assert_tree_locked(parent);
5474 parent_level = btrfs_header_level(parent);
5475 atomic_inc(&parent->refs);
5476 path->nodes[parent_level] = parent;
5477 path->slots[parent_level] = btrfs_header_nritems(parent);
5478
5479 btrfs_assert_tree_locked(node);
5480 level = btrfs_header_level(node);
5481 path->nodes[level] = node;
5482 path->slots[level] = 0;
5483 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5484
5485 wc->refs[parent_level] = 1;
5486 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5487 wc->level = level;
5488 wc->shared_level = -1;
5489 wc->stage = DROP_REFERENCE;
5490 wc->update_ref = 0;
5491 wc->keep_locks = 1;
5492 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5493
5494 while (1) {
5495 wret = walk_down_tree(trans, root, path, wc);
5496 if (wret < 0) {
5497 ret = wret;
5498 break;
5499 }
5500
5501 wret = walk_up_tree(trans, root, path, wc, parent_level);
5502 if (wret < 0)
5503 ret = wret;
5504 if (wret != 0)
5505 break;
5506 }
5507
5508 kfree(wc);
5509 btrfs_free_path(path);
5510 return ret;
5511}
5512
5513
5514
5515
5516
5517u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5518{
5519 struct btrfs_block_group *block_group;
5520 u64 free_bytes = 0;
5521 int factor;
5522
5523
5524 if (list_empty(&sinfo->ro_bgs))
5525 return 0;
5526
5527 spin_lock(&sinfo->lock);
5528 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5529 spin_lock(&block_group->lock);
5530
5531 if (!block_group->ro) {
5532 spin_unlock(&block_group->lock);
5533 continue;
5534 }
5535
5536 factor = btrfs_bg_type_to_factor(block_group->flags);
5537 free_bytes += (block_group->length -
5538 block_group->used) * factor;
5539
5540 spin_unlock(&block_group->lock);
5541 }
5542 spin_unlock(&sinfo->lock);
5543
5544 return free_bytes;
5545}
5546
5547int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5548 u64 start, u64 end)
5549{
5550 return unpin_extent_range(fs_info, start, end, false);
5551}
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5574{
5575 u64 start = SZ_1M, len = 0, end = 0;
5576 int ret;
5577
5578 *trimmed = 0;
5579
5580
5581 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
5582 return 0;
5583
5584
5585 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
5586 return 0;
5587
5588
5589 if (device->total_bytes <= device->bytes_used)
5590 return 0;
5591
5592 ret = 0;
5593
5594 while (1) {
5595 struct btrfs_fs_info *fs_info = device->fs_info;
5596 u64 bytes;
5597
5598 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
5599 if (ret)
5600 break;
5601
5602 find_first_clear_extent_bit(&device->alloc_state, start,
5603 &start, &end,
5604 CHUNK_TRIMMED | CHUNK_ALLOCATED);
5605
5606
5607 start = max_t(u64, start, SZ_1M);
5608
5609
5610
5611
5612
5613
5614 end = min(end, device->total_bytes - 1);
5615
5616 len = end - start + 1;
5617
5618
5619 if (!len) {
5620 mutex_unlock(&fs_info->chunk_mutex);
5621 ret = 0;
5622 break;
5623 }
5624
5625 ret = btrfs_issue_discard(device->bdev, start, len,
5626 &bytes);
5627 if (!ret)
5628 set_extent_bits(&device->alloc_state, start,
5629 start + bytes - 1,
5630 CHUNK_TRIMMED);
5631 mutex_unlock(&fs_info->chunk_mutex);
5632
5633 if (ret)
5634 break;
5635
5636 start += len;
5637 *trimmed += bytes;
5638
5639 if (fatal_signal_pending(current)) {
5640 ret = -ERESTARTSYS;
5641 break;
5642 }
5643
5644 cond_resched();
5645 }
5646
5647 return ret;
5648}
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
5660{
5661 struct btrfs_block_group *cache = NULL;
5662 struct btrfs_device *device;
5663 struct list_head *devices;
5664 u64 group_trimmed;
5665 u64 range_end = U64_MAX;
5666 u64 start;
5667 u64 end;
5668 u64 trimmed = 0;
5669 u64 bg_failed = 0;
5670 u64 dev_failed = 0;
5671 int bg_ret = 0;
5672 int dev_ret = 0;
5673 int ret = 0;
5674
5675
5676
5677
5678
5679 if (range->len != U64_MAX &&
5680 check_add_overflow(range->start, range->len, &range_end))
5681 return -EINVAL;
5682
5683 cache = btrfs_lookup_first_block_group(fs_info, range->start);
5684 for (; cache; cache = btrfs_next_block_group(cache)) {
5685 if (cache->start >= range_end) {
5686 btrfs_put_block_group(cache);
5687 break;
5688 }
5689
5690 start = max(range->start, cache->start);
5691 end = min(range_end, cache->start + cache->length);
5692
5693 if (end - start >= range->minlen) {
5694 if (!btrfs_block_group_done(cache)) {
5695 ret = btrfs_cache_block_group(cache, 0);
5696 if (ret) {
5697 bg_failed++;
5698 bg_ret = ret;
5699 continue;
5700 }
5701 ret = btrfs_wait_block_group_cache_done(cache);
5702 if (ret) {
5703 bg_failed++;
5704 bg_ret = ret;
5705 continue;
5706 }
5707 }
5708 ret = btrfs_trim_block_group(cache,
5709 &group_trimmed,
5710 start,
5711 end,
5712 range->minlen);
5713
5714 trimmed += group_trimmed;
5715 if (ret) {
5716 bg_failed++;
5717 bg_ret = ret;
5718 continue;
5719 }
5720 }
5721 }
5722
5723 if (bg_failed)
5724 btrfs_warn(fs_info,
5725 "failed to trim %llu block group(s), last error %d",
5726 bg_failed, bg_ret);
5727 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5728 devices = &fs_info->fs_devices->devices;
5729 list_for_each_entry(device, devices, dev_list) {
5730 ret = btrfs_trim_free_extents(device, &group_trimmed);
5731 if (ret) {
5732 dev_failed++;
5733 dev_ret = ret;
5734 break;
5735 }
5736
5737 trimmed += group_trimmed;
5738 }
5739 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5740
5741 if (dev_failed)
5742 btrfs_warn(fs_info,
5743 "failed to trim %llu device(s), last error %d",
5744 dev_failed, dev_ret);
5745 range->len = trimmed;
5746 if (bg_ret)
5747 return bg_ret;
5748 return dev_ret;
5749}
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
5760{
5761 percpu_counter_dec(&root->subv_writers->counter);
5762 cond_wake_up(&root->subv_writers->wait);
5763}
5764
5765int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
5766{
5767 if (atomic_read(&root->will_be_snapshotted))
5768 return 0;
5769
5770 percpu_counter_inc(&root->subv_writers->counter);
5771
5772
5773
5774 smp_mb();
5775 if (atomic_read(&root->will_be_snapshotted)) {
5776 btrfs_end_write_no_snapshotting(root);
5777 return 0;
5778 }
5779 return 1;
5780}
5781
5782void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
5783{
5784 while (true) {
5785 int ret;
5786
5787 ret = btrfs_start_write_no_snapshotting(root);
5788 if (ret)
5789 break;
5790 wait_var_event(&root->will_be_snapshotted,
5791 !atomic_read(&root->will_be_snapshotted));
5792 }
5793}
5794