1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "misc.h"
20#include "tree-log.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "locking.h"
26#include "free-space-cache.h"
27#include "free-space-tree.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31#include "space-info.h"
32#include "block-rsv.h"
33#include "delalloc-space.h"
34#include "block-group.h"
35#include "discard.h"
36#include "rcu-string.h"
37
38#undef SCRAMBLE_DELAYED_REFS
39
40
41static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
42 struct btrfs_delayed_ref_node *node, u64 parent,
43 u64 root_objectid, u64 owner_objectid,
44 u64 owner_offset, int refs_to_drop,
45 struct btrfs_delayed_extent_op *extra_op);
46static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
47 struct extent_buffer *leaf,
48 struct btrfs_extent_item *ei);
49static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
50 u64 parent, u64 root_objectid,
51 u64 flags, u64 owner, u64 offset,
52 struct btrfs_key *ins, int ref_mod);
53static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
54 struct btrfs_delayed_ref_node *node,
55 struct btrfs_delayed_extent_op *extent_op);
56static int find_next_key(struct btrfs_path *path, int level,
57 struct btrfs_key *key);
58
59static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
60{
61 return (cache->flags & bits) == bits;
62}
63
64int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
65 u64 start, u64 num_bytes)
66{
67 u64 end = start + num_bytes - 1;
68 set_extent_bits(&fs_info->excluded_extents, start, end,
69 EXTENT_UPTODATE);
70 return 0;
71}
72
73void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
74{
75 struct btrfs_fs_info *fs_info = cache->fs_info;
76 u64 start, end;
77
78 start = cache->start;
79 end = start + cache->length - 1;
80
81 clear_extent_bits(&fs_info->excluded_extents, start, end,
82 EXTENT_UPTODATE);
83}
84
85static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
86{
87 if (ref->type == BTRFS_REF_METADATA) {
88 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
89 return BTRFS_BLOCK_GROUP_SYSTEM;
90 else
91 return BTRFS_BLOCK_GROUP_METADATA;
92 }
93 return BTRFS_BLOCK_GROUP_DATA;
94}
95
96static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
97 struct btrfs_ref *ref)
98{
99 struct btrfs_space_info *space_info;
100 u64 flags = generic_ref_to_space_flags(ref);
101
102 space_info = btrfs_find_space_info(fs_info, flags);
103 ASSERT(space_info);
104 percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
105 BTRFS_TOTAL_BYTES_PINNED_BATCH);
106}
107
108static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
109 struct btrfs_ref *ref)
110{
111 struct btrfs_space_info *space_info;
112 u64 flags = generic_ref_to_space_flags(ref);
113
114 space_info = btrfs_find_space_info(fs_info, flags);
115 ASSERT(space_info);
116 percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
117 BTRFS_TOTAL_BYTES_PINNED_BATCH);
118}
119
120
121int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
122{
123 int ret;
124 struct btrfs_key key;
125 struct btrfs_path *path;
126
127 path = btrfs_alloc_path();
128 if (!path)
129 return -ENOMEM;
130
131 key.objectid = start;
132 key.offset = len;
133 key.type = BTRFS_EXTENT_ITEM_KEY;
134 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
135 btrfs_free_path(path);
136 return ret;
137}
138
139
140
141
142
143
144
145
146
147
148int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
149 struct btrfs_fs_info *fs_info, u64 bytenr,
150 u64 offset, int metadata, u64 *refs, u64 *flags)
151{
152 struct btrfs_delayed_ref_head *head;
153 struct btrfs_delayed_ref_root *delayed_refs;
154 struct btrfs_path *path;
155 struct btrfs_extent_item *ei;
156 struct extent_buffer *leaf;
157 struct btrfs_key key;
158 u32 item_size;
159 u64 num_refs;
160 u64 extent_flags;
161 int ret;
162
163
164
165
166
167 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
168 offset = fs_info->nodesize;
169 metadata = 0;
170 }
171
172 path = btrfs_alloc_path();
173 if (!path)
174 return -ENOMEM;
175
176 if (!trans) {
177 path->skip_locking = 1;
178 path->search_commit_root = 1;
179 }
180
181search_again:
182 key.objectid = bytenr;
183 key.offset = offset;
184 if (metadata)
185 key.type = BTRFS_METADATA_ITEM_KEY;
186 else
187 key.type = BTRFS_EXTENT_ITEM_KEY;
188
189 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
190 if (ret < 0)
191 goto out_free;
192
193 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
194 if (path->slots[0]) {
195 path->slots[0]--;
196 btrfs_item_key_to_cpu(path->nodes[0], &key,
197 path->slots[0]);
198 if (key.objectid == bytenr &&
199 key.type == BTRFS_EXTENT_ITEM_KEY &&
200 key.offset == fs_info->nodesize)
201 ret = 0;
202 }
203 }
204
205 if (ret == 0) {
206 leaf = path->nodes[0];
207 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
208 if (item_size >= sizeof(*ei)) {
209 ei = btrfs_item_ptr(leaf, path->slots[0],
210 struct btrfs_extent_item);
211 num_refs = btrfs_extent_refs(leaf, ei);
212 extent_flags = btrfs_extent_flags(leaf, ei);
213 } else {
214 ret = -EINVAL;
215 btrfs_print_v0_err(fs_info);
216 if (trans)
217 btrfs_abort_transaction(trans, ret);
218 else
219 btrfs_handle_fs_error(fs_info, ret, NULL);
220
221 goto out_free;
222 }
223
224 BUG_ON(num_refs == 0);
225 } else {
226 num_refs = 0;
227 extent_flags = 0;
228 ret = 0;
229 }
230
231 if (!trans)
232 goto out;
233
234 delayed_refs = &trans->transaction->delayed_refs;
235 spin_lock(&delayed_refs->lock);
236 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
237 if (head) {
238 if (!mutex_trylock(&head->mutex)) {
239 refcount_inc(&head->refs);
240 spin_unlock(&delayed_refs->lock);
241
242 btrfs_release_path(path);
243
244
245
246
247
248 mutex_lock(&head->mutex);
249 mutex_unlock(&head->mutex);
250 btrfs_put_delayed_ref_head(head);
251 goto search_again;
252 }
253 spin_lock(&head->lock);
254 if (head->extent_op && head->extent_op->update_flags)
255 extent_flags |= head->extent_op->flags_to_set;
256 else
257 BUG_ON(num_refs == 0);
258
259 num_refs += head->ref_mod;
260 spin_unlock(&head->lock);
261 mutex_unlock(&head->mutex);
262 }
263 spin_unlock(&delayed_refs->lock);
264out:
265 WARN_ON(num_refs == 0);
266 if (refs)
267 *refs = num_refs;
268 if (flags)
269 *flags = extent_flags;
270out_free:
271 btrfs_free_path(path);
272 return ret;
273}
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
387 struct btrfs_extent_inline_ref *iref,
388 enum btrfs_inline_ref_type is_data)
389{
390 int type = btrfs_extent_inline_ref_type(eb, iref);
391 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
392
393 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
394 type == BTRFS_SHARED_BLOCK_REF_KEY ||
395 type == BTRFS_SHARED_DATA_REF_KEY ||
396 type == BTRFS_EXTENT_DATA_REF_KEY) {
397 if (is_data == BTRFS_REF_TYPE_BLOCK) {
398 if (type == BTRFS_TREE_BLOCK_REF_KEY)
399 return type;
400 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
401 ASSERT(eb->fs_info);
402
403
404
405
406 if (offset &&
407 IS_ALIGNED(offset, eb->fs_info->sectorsize))
408 return type;
409 }
410 } else if (is_data == BTRFS_REF_TYPE_DATA) {
411 if (type == BTRFS_EXTENT_DATA_REF_KEY)
412 return type;
413 if (type == BTRFS_SHARED_DATA_REF_KEY) {
414 ASSERT(eb->fs_info);
415
416
417
418
419 if (offset &&
420 IS_ALIGNED(offset, eb->fs_info->sectorsize))
421 return type;
422 }
423 } else {
424 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
425 return type;
426 }
427 }
428
429 btrfs_print_leaf((struct extent_buffer *)eb);
430 btrfs_err(eb->fs_info,
431 "eb %llu iref 0x%lx invalid extent inline ref type %d",
432 eb->start, (unsigned long)iref, type);
433 WARN_ON(1);
434
435 return BTRFS_REF_TYPE_INVALID;
436}
437
438u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
439{
440 u32 high_crc = ~(u32)0;
441 u32 low_crc = ~(u32)0;
442 __le64 lenum;
443
444 lenum = cpu_to_le64(root_objectid);
445 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
446 lenum = cpu_to_le64(owner);
447 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
448 lenum = cpu_to_le64(offset);
449 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
450
451 return ((u64)high_crc << 31) ^ (u64)low_crc;
452}
453
454static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
455 struct btrfs_extent_data_ref *ref)
456{
457 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
458 btrfs_extent_data_ref_objectid(leaf, ref),
459 btrfs_extent_data_ref_offset(leaf, ref));
460}
461
462static int match_extent_data_ref(struct extent_buffer *leaf,
463 struct btrfs_extent_data_ref *ref,
464 u64 root_objectid, u64 owner, u64 offset)
465{
466 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
467 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
468 btrfs_extent_data_ref_offset(leaf, ref) != offset)
469 return 0;
470 return 1;
471}
472
473static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
474 struct btrfs_path *path,
475 u64 bytenr, u64 parent,
476 u64 root_objectid,
477 u64 owner, u64 offset)
478{
479 struct btrfs_root *root = trans->fs_info->extent_root;
480 struct btrfs_key key;
481 struct btrfs_extent_data_ref *ref;
482 struct extent_buffer *leaf;
483 u32 nritems;
484 int ret;
485 int recow;
486 int err = -ENOENT;
487
488 key.objectid = bytenr;
489 if (parent) {
490 key.type = BTRFS_SHARED_DATA_REF_KEY;
491 key.offset = parent;
492 } else {
493 key.type = BTRFS_EXTENT_DATA_REF_KEY;
494 key.offset = hash_extent_data_ref(root_objectid,
495 owner, offset);
496 }
497again:
498 recow = 0;
499 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
500 if (ret < 0) {
501 err = ret;
502 goto fail;
503 }
504
505 if (parent) {
506 if (!ret)
507 return 0;
508 goto fail;
509 }
510
511 leaf = path->nodes[0];
512 nritems = btrfs_header_nritems(leaf);
513 while (1) {
514 if (path->slots[0] >= nritems) {
515 ret = btrfs_next_leaf(root, path);
516 if (ret < 0)
517 err = ret;
518 if (ret)
519 goto fail;
520
521 leaf = path->nodes[0];
522 nritems = btrfs_header_nritems(leaf);
523 recow = 1;
524 }
525
526 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
527 if (key.objectid != bytenr ||
528 key.type != BTRFS_EXTENT_DATA_REF_KEY)
529 goto fail;
530
531 ref = btrfs_item_ptr(leaf, path->slots[0],
532 struct btrfs_extent_data_ref);
533
534 if (match_extent_data_ref(leaf, ref, root_objectid,
535 owner, offset)) {
536 if (recow) {
537 btrfs_release_path(path);
538 goto again;
539 }
540 err = 0;
541 break;
542 }
543 path->slots[0]++;
544 }
545fail:
546 return err;
547}
548
549static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
550 struct btrfs_path *path,
551 u64 bytenr, u64 parent,
552 u64 root_objectid, u64 owner,
553 u64 offset, int refs_to_add)
554{
555 struct btrfs_root *root = trans->fs_info->extent_root;
556 struct btrfs_key key;
557 struct extent_buffer *leaf;
558 u32 size;
559 u32 num_refs;
560 int ret;
561
562 key.objectid = bytenr;
563 if (parent) {
564 key.type = BTRFS_SHARED_DATA_REF_KEY;
565 key.offset = parent;
566 size = sizeof(struct btrfs_shared_data_ref);
567 } else {
568 key.type = BTRFS_EXTENT_DATA_REF_KEY;
569 key.offset = hash_extent_data_ref(root_objectid,
570 owner, offset);
571 size = sizeof(struct btrfs_extent_data_ref);
572 }
573
574 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
575 if (ret && ret != -EEXIST)
576 goto fail;
577
578 leaf = path->nodes[0];
579 if (parent) {
580 struct btrfs_shared_data_ref *ref;
581 ref = btrfs_item_ptr(leaf, path->slots[0],
582 struct btrfs_shared_data_ref);
583 if (ret == 0) {
584 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
585 } else {
586 num_refs = btrfs_shared_data_ref_count(leaf, ref);
587 num_refs += refs_to_add;
588 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
589 }
590 } else {
591 struct btrfs_extent_data_ref *ref;
592 while (ret == -EEXIST) {
593 ref = btrfs_item_ptr(leaf, path->slots[0],
594 struct btrfs_extent_data_ref);
595 if (match_extent_data_ref(leaf, ref, root_objectid,
596 owner, offset))
597 break;
598 btrfs_release_path(path);
599 key.offset++;
600 ret = btrfs_insert_empty_item(trans, root, path, &key,
601 size);
602 if (ret && ret != -EEXIST)
603 goto fail;
604
605 leaf = path->nodes[0];
606 }
607 ref = btrfs_item_ptr(leaf, path->slots[0],
608 struct btrfs_extent_data_ref);
609 if (ret == 0) {
610 btrfs_set_extent_data_ref_root(leaf, ref,
611 root_objectid);
612 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
613 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
614 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
615 } else {
616 num_refs = btrfs_extent_data_ref_count(leaf, ref);
617 num_refs += refs_to_add;
618 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
619 }
620 }
621 btrfs_mark_buffer_dirty(leaf);
622 ret = 0;
623fail:
624 btrfs_release_path(path);
625 return ret;
626}
627
628static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
629 struct btrfs_path *path,
630 int refs_to_drop, int *last_ref)
631{
632 struct btrfs_key key;
633 struct btrfs_extent_data_ref *ref1 = NULL;
634 struct btrfs_shared_data_ref *ref2 = NULL;
635 struct extent_buffer *leaf;
636 u32 num_refs = 0;
637 int ret = 0;
638
639 leaf = path->nodes[0];
640 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
641
642 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
643 ref1 = btrfs_item_ptr(leaf, path->slots[0],
644 struct btrfs_extent_data_ref);
645 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
646 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
647 ref2 = btrfs_item_ptr(leaf, path->slots[0],
648 struct btrfs_shared_data_ref);
649 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
650 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
651 btrfs_print_v0_err(trans->fs_info);
652 btrfs_abort_transaction(trans, -EINVAL);
653 return -EINVAL;
654 } else {
655 BUG();
656 }
657
658 BUG_ON(num_refs < refs_to_drop);
659 num_refs -= refs_to_drop;
660
661 if (num_refs == 0) {
662 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
663 *last_ref = 1;
664 } else {
665 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
666 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
667 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
668 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
669 btrfs_mark_buffer_dirty(leaf);
670 }
671 return ret;
672}
673
674static noinline u32 extent_data_ref_count(struct btrfs_path *path,
675 struct btrfs_extent_inline_ref *iref)
676{
677 struct btrfs_key key;
678 struct extent_buffer *leaf;
679 struct btrfs_extent_data_ref *ref1;
680 struct btrfs_shared_data_ref *ref2;
681 u32 num_refs = 0;
682 int type;
683
684 leaf = path->nodes[0];
685 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
686
687 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
688 if (iref) {
689
690
691
692
693 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
694 ASSERT(type != BTRFS_REF_TYPE_INVALID);
695 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
696 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
697 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
698 } else {
699 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
700 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
701 }
702 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
703 ref1 = btrfs_item_ptr(leaf, path->slots[0],
704 struct btrfs_extent_data_ref);
705 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
706 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
707 ref2 = btrfs_item_ptr(leaf, path->slots[0],
708 struct btrfs_shared_data_ref);
709 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
710 } else {
711 WARN_ON(1);
712 }
713 return num_refs;
714}
715
716static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
717 struct btrfs_path *path,
718 u64 bytenr, u64 parent,
719 u64 root_objectid)
720{
721 struct btrfs_root *root = trans->fs_info->extent_root;
722 struct btrfs_key key;
723 int ret;
724
725 key.objectid = bytenr;
726 if (parent) {
727 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
728 key.offset = parent;
729 } else {
730 key.type = BTRFS_TREE_BLOCK_REF_KEY;
731 key.offset = root_objectid;
732 }
733
734 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
735 if (ret > 0)
736 ret = -ENOENT;
737 return ret;
738}
739
740static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
741 struct btrfs_path *path,
742 u64 bytenr, u64 parent,
743 u64 root_objectid)
744{
745 struct btrfs_key key;
746 int ret;
747
748 key.objectid = bytenr;
749 if (parent) {
750 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
751 key.offset = parent;
752 } else {
753 key.type = BTRFS_TREE_BLOCK_REF_KEY;
754 key.offset = root_objectid;
755 }
756
757 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
758 path, &key, 0);
759 btrfs_release_path(path);
760 return ret;
761}
762
763static inline int extent_ref_type(u64 parent, u64 owner)
764{
765 int type;
766 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
767 if (parent > 0)
768 type = BTRFS_SHARED_BLOCK_REF_KEY;
769 else
770 type = BTRFS_TREE_BLOCK_REF_KEY;
771 } else {
772 if (parent > 0)
773 type = BTRFS_SHARED_DATA_REF_KEY;
774 else
775 type = BTRFS_EXTENT_DATA_REF_KEY;
776 }
777 return type;
778}
779
780static int find_next_key(struct btrfs_path *path, int level,
781 struct btrfs_key *key)
782
783{
784 for (; level < BTRFS_MAX_LEVEL; level++) {
785 if (!path->nodes[level])
786 break;
787 if (path->slots[level] + 1 >=
788 btrfs_header_nritems(path->nodes[level]))
789 continue;
790 if (level == 0)
791 btrfs_item_key_to_cpu(path->nodes[level], key,
792 path->slots[level] + 1);
793 else
794 btrfs_node_key_to_cpu(path->nodes[level], key,
795 path->slots[level] + 1);
796 return 0;
797 }
798 return 1;
799}
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814static noinline_for_stack
815int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
816 struct btrfs_path *path,
817 struct btrfs_extent_inline_ref **ref_ret,
818 u64 bytenr, u64 num_bytes,
819 u64 parent, u64 root_objectid,
820 u64 owner, u64 offset, int insert)
821{
822 struct btrfs_fs_info *fs_info = trans->fs_info;
823 struct btrfs_root *root = fs_info->extent_root;
824 struct btrfs_key key;
825 struct extent_buffer *leaf;
826 struct btrfs_extent_item *ei;
827 struct btrfs_extent_inline_ref *iref;
828 u64 flags;
829 u64 item_size;
830 unsigned long ptr;
831 unsigned long end;
832 int extra_size;
833 int type;
834 int want;
835 int ret;
836 int err = 0;
837 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
838 int needed;
839
840 key.objectid = bytenr;
841 key.type = BTRFS_EXTENT_ITEM_KEY;
842 key.offset = num_bytes;
843
844 want = extent_ref_type(parent, owner);
845 if (insert) {
846 extra_size = btrfs_extent_inline_ref_size(want);
847 path->keep_locks = 1;
848 } else
849 extra_size = -1;
850
851
852
853
854
855 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
856 key.type = BTRFS_METADATA_ITEM_KEY;
857 key.offset = owner;
858 }
859
860again:
861 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
862 if (ret < 0) {
863 err = ret;
864 goto out;
865 }
866
867
868
869
870
871 if (ret > 0 && skinny_metadata) {
872 skinny_metadata = false;
873 if (path->slots[0]) {
874 path->slots[0]--;
875 btrfs_item_key_to_cpu(path->nodes[0], &key,
876 path->slots[0]);
877 if (key.objectid == bytenr &&
878 key.type == BTRFS_EXTENT_ITEM_KEY &&
879 key.offset == num_bytes)
880 ret = 0;
881 }
882 if (ret) {
883 key.objectid = bytenr;
884 key.type = BTRFS_EXTENT_ITEM_KEY;
885 key.offset = num_bytes;
886 btrfs_release_path(path);
887 goto again;
888 }
889 }
890
891 if (ret && !insert) {
892 err = -ENOENT;
893 goto out;
894 } else if (WARN_ON(ret)) {
895 err = -EIO;
896 goto out;
897 }
898
899 leaf = path->nodes[0];
900 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
901 if (unlikely(item_size < sizeof(*ei))) {
902 err = -EINVAL;
903 btrfs_print_v0_err(fs_info);
904 btrfs_abort_transaction(trans, err);
905 goto out;
906 }
907
908 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
909 flags = btrfs_extent_flags(leaf, ei);
910
911 ptr = (unsigned long)(ei + 1);
912 end = (unsigned long)ei + item_size;
913
914 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
915 ptr += sizeof(struct btrfs_tree_block_info);
916 BUG_ON(ptr > end);
917 }
918
919 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
920 needed = BTRFS_REF_TYPE_DATA;
921 else
922 needed = BTRFS_REF_TYPE_BLOCK;
923
924 err = -ENOENT;
925 while (1) {
926 if (ptr >= end) {
927 WARN_ON(ptr > end);
928 break;
929 }
930 iref = (struct btrfs_extent_inline_ref *)ptr;
931 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
932 if (type == BTRFS_REF_TYPE_INVALID) {
933 err = -EUCLEAN;
934 goto out;
935 }
936
937 if (want < type)
938 break;
939 if (want > type) {
940 ptr += btrfs_extent_inline_ref_size(type);
941 continue;
942 }
943
944 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
945 struct btrfs_extent_data_ref *dref;
946 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
947 if (match_extent_data_ref(leaf, dref, root_objectid,
948 owner, offset)) {
949 err = 0;
950 break;
951 }
952 if (hash_extent_data_ref_item(leaf, dref) <
953 hash_extent_data_ref(root_objectid, owner, offset))
954 break;
955 } else {
956 u64 ref_offset;
957 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
958 if (parent > 0) {
959 if (parent == ref_offset) {
960 err = 0;
961 break;
962 }
963 if (ref_offset < parent)
964 break;
965 } else {
966 if (root_objectid == ref_offset) {
967 err = 0;
968 break;
969 }
970 if (ref_offset < root_objectid)
971 break;
972 }
973 }
974 ptr += btrfs_extent_inline_ref_size(type);
975 }
976 if (err == -ENOENT && insert) {
977 if (item_size + extra_size >=
978 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
979 err = -EAGAIN;
980 goto out;
981 }
982
983
984
985
986
987
988 if (find_next_key(path, 0, &key) == 0 &&
989 key.objectid == bytenr &&
990 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
991 err = -EAGAIN;
992 goto out;
993 }
994 }
995 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
996out:
997 if (insert) {
998 path->keep_locks = 0;
999 btrfs_unlock_up_safe(path, 1);
1000 }
1001 return err;
1002}
1003
1004
1005
1006
1007static noinline_for_stack
1008void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1009 struct btrfs_path *path,
1010 struct btrfs_extent_inline_ref *iref,
1011 u64 parent, u64 root_objectid,
1012 u64 owner, u64 offset, int refs_to_add,
1013 struct btrfs_delayed_extent_op *extent_op)
1014{
1015 struct extent_buffer *leaf;
1016 struct btrfs_extent_item *ei;
1017 unsigned long ptr;
1018 unsigned long end;
1019 unsigned long item_offset;
1020 u64 refs;
1021 int size;
1022 int type;
1023
1024 leaf = path->nodes[0];
1025 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1026 item_offset = (unsigned long)iref - (unsigned long)ei;
1027
1028 type = extent_ref_type(parent, owner);
1029 size = btrfs_extent_inline_ref_size(type);
1030
1031 btrfs_extend_item(path, size);
1032
1033 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1034 refs = btrfs_extent_refs(leaf, ei);
1035 refs += refs_to_add;
1036 btrfs_set_extent_refs(leaf, ei, refs);
1037 if (extent_op)
1038 __run_delayed_extent_op(extent_op, leaf, ei);
1039
1040 ptr = (unsigned long)ei + item_offset;
1041 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1042 if (ptr < end - size)
1043 memmove_extent_buffer(leaf, ptr + size, ptr,
1044 end - size - ptr);
1045
1046 iref = (struct btrfs_extent_inline_ref *)ptr;
1047 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1048 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1049 struct btrfs_extent_data_ref *dref;
1050 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1051 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1052 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1053 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1054 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1055 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1056 struct btrfs_shared_data_ref *sref;
1057 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1058 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1059 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1060 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1061 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1062 } else {
1063 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1064 }
1065 btrfs_mark_buffer_dirty(leaf);
1066}
1067
1068static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1069 struct btrfs_path *path,
1070 struct btrfs_extent_inline_ref **ref_ret,
1071 u64 bytenr, u64 num_bytes, u64 parent,
1072 u64 root_objectid, u64 owner, u64 offset)
1073{
1074 int ret;
1075
1076 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1077 num_bytes, parent, root_objectid,
1078 owner, offset, 0);
1079 if (ret != -ENOENT)
1080 return ret;
1081
1082 btrfs_release_path(path);
1083 *ref_ret = NULL;
1084
1085 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1086 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1087 root_objectid);
1088 } else {
1089 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1090 root_objectid, owner, offset);
1091 }
1092 return ret;
1093}
1094
1095
1096
1097
1098static noinline_for_stack
1099void update_inline_extent_backref(struct btrfs_path *path,
1100 struct btrfs_extent_inline_ref *iref,
1101 int refs_to_mod,
1102 struct btrfs_delayed_extent_op *extent_op,
1103 int *last_ref)
1104{
1105 struct extent_buffer *leaf = path->nodes[0];
1106 struct btrfs_extent_item *ei;
1107 struct btrfs_extent_data_ref *dref = NULL;
1108 struct btrfs_shared_data_ref *sref = NULL;
1109 unsigned long ptr;
1110 unsigned long end;
1111 u32 item_size;
1112 int size;
1113 int type;
1114 u64 refs;
1115
1116 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1117 refs = btrfs_extent_refs(leaf, ei);
1118 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1119 refs += refs_to_mod;
1120 btrfs_set_extent_refs(leaf, ei, refs);
1121 if (extent_op)
1122 __run_delayed_extent_op(extent_op, leaf, ei);
1123
1124
1125
1126
1127
1128 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1129 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1130
1131 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1132 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1133 refs = btrfs_extent_data_ref_count(leaf, dref);
1134 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1135 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1136 refs = btrfs_shared_data_ref_count(leaf, sref);
1137 } else {
1138 refs = 1;
1139 BUG_ON(refs_to_mod != -1);
1140 }
1141
1142 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1143 refs += refs_to_mod;
1144
1145 if (refs > 0) {
1146 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1147 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1148 else
1149 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1150 } else {
1151 *last_ref = 1;
1152 size = btrfs_extent_inline_ref_size(type);
1153 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1154 ptr = (unsigned long)iref;
1155 end = (unsigned long)ei + item_size;
1156 if (ptr + size < end)
1157 memmove_extent_buffer(leaf, ptr, ptr + size,
1158 end - ptr - size);
1159 item_size -= size;
1160 btrfs_truncate_item(path, item_size, 1);
1161 }
1162 btrfs_mark_buffer_dirty(leaf);
1163}
1164
1165static noinline_for_stack
1166int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1167 struct btrfs_path *path,
1168 u64 bytenr, u64 num_bytes, u64 parent,
1169 u64 root_objectid, u64 owner,
1170 u64 offset, int refs_to_add,
1171 struct btrfs_delayed_extent_op *extent_op)
1172{
1173 struct btrfs_extent_inline_ref *iref;
1174 int ret;
1175
1176 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1177 num_bytes, parent, root_objectid,
1178 owner, offset, 1);
1179 if (ret == 0) {
1180
1181
1182
1183
1184 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1185 btrfs_crit(trans->fs_info,
1186"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu",
1187 bytenr, num_bytes, root_objectid);
1188 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
1189 WARN_ON(1);
1190 btrfs_crit(trans->fs_info,
1191 "path->slots[0]=%d path->nodes[0]:", path->slots[0]);
1192 btrfs_print_leaf(path->nodes[0]);
1193 }
1194 return -EUCLEAN;
1195 }
1196 update_inline_extent_backref(path, iref, refs_to_add,
1197 extent_op, NULL);
1198 } else if (ret == -ENOENT) {
1199 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1200 root_objectid, owner, offset,
1201 refs_to_add, extent_op);
1202 ret = 0;
1203 }
1204 return ret;
1205}
1206
1207static int remove_extent_backref(struct btrfs_trans_handle *trans,
1208 struct btrfs_path *path,
1209 struct btrfs_extent_inline_ref *iref,
1210 int refs_to_drop, int is_data, int *last_ref)
1211{
1212 int ret = 0;
1213
1214 BUG_ON(!is_data && refs_to_drop != 1);
1215 if (iref) {
1216 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1217 last_ref);
1218 } else if (is_data) {
1219 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1220 last_ref);
1221 } else {
1222 *last_ref = 1;
1223 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1224 }
1225 return ret;
1226}
1227
1228static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1229 u64 *discarded_bytes)
1230{
1231 int j, ret = 0;
1232 u64 bytes_left, end;
1233 u64 aligned_start = ALIGN(start, 1 << 9);
1234
1235 if (WARN_ON(start != aligned_start)) {
1236 len -= aligned_start - start;
1237 len = round_down(len, 1 << 9);
1238 start = aligned_start;
1239 }
1240
1241 *discarded_bytes = 0;
1242
1243 if (!len)
1244 return 0;
1245
1246 end = start + len;
1247 bytes_left = len;
1248
1249
1250 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1251 u64 sb_start = btrfs_sb_offset(j);
1252 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1253 u64 size = sb_start - start;
1254
1255 if (!in_range(sb_start, start, bytes_left) &&
1256 !in_range(sb_end, start, bytes_left) &&
1257 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1258 continue;
1259
1260
1261
1262
1263
1264 if (sb_start <= start) {
1265 start += sb_end - start;
1266 if (start > end) {
1267 bytes_left = 0;
1268 break;
1269 }
1270 bytes_left = end - start;
1271 continue;
1272 }
1273
1274 if (size) {
1275 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1276 GFP_NOFS, 0);
1277 if (!ret)
1278 *discarded_bytes += size;
1279 else if (ret != -EOPNOTSUPP)
1280 return ret;
1281 }
1282
1283 start = sb_end;
1284 if (start > end) {
1285 bytes_left = 0;
1286 break;
1287 }
1288 bytes_left = end - start;
1289 }
1290
1291 if (bytes_left) {
1292 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1293 GFP_NOFS, 0);
1294 if (!ret)
1295 *discarded_bytes += bytes_left;
1296 }
1297 return ret;
1298}
1299
1300int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1301 u64 num_bytes, u64 *actual_bytes)
1302{
1303 int ret = 0;
1304 u64 discarded_bytes = 0;
1305 u64 end = bytenr + num_bytes;
1306 u64 cur = bytenr;
1307 struct btrfs_bio *bbio = NULL;
1308
1309
1310
1311
1312
1313
1314 btrfs_bio_counter_inc_blocked(fs_info);
1315 while (cur < end) {
1316 struct btrfs_bio_stripe *stripe;
1317 int i;
1318
1319 num_bytes = end - cur;
1320
1321 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
1322 &num_bytes, &bbio, 0);
1323
1324
1325
1326
1327
1328 if (ret < 0)
1329 goto out;
1330
1331 stripe = bbio->stripes;
1332 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1333 u64 bytes;
1334 struct request_queue *req_q;
1335
1336 if (!stripe->dev->bdev) {
1337 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1338 continue;
1339 }
1340 req_q = bdev_get_queue(stripe->dev->bdev);
1341 if (!blk_queue_discard(req_q))
1342 continue;
1343
1344 ret = btrfs_issue_discard(stripe->dev->bdev,
1345 stripe->physical,
1346 stripe->length,
1347 &bytes);
1348 if (!ret) {
1349 discarded_bytes += bytes;
1350 } else if (ret != -EOPNOTSUPP) {
1351
1352
1353
1354
1355
1356
1357
1358 btrfs_put_bbio(bbio);
1359 goto out;
1360 }
1361
1362
1363
1364
1365
1366
1367 ret = 0;
1368 }
1369 btrfs_put_bbio(bbio);
1370 cur += num_bytes;
1371 }
1372out:
1373 btrfs_bio_counter_dec(fs_info);
1374
1375 if (actual_bytes)
1376 *actual_bytes = discarded_bytes;
1377
1378
1379 if (ret == -EOPNOTSUPP)
1380 ret = 0;
1381 return ret;
1382}
1383
1384
1385int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1386 struct btrfs_ref *generic_ref)
1387{
1388 struct btrfs_fs_info *fs_info = trans->fs_info;
1389 int old_ref_mod, new_ref_mod;
1390 int ret;
1391
1392 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1393 generic_ref->action);
1394 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1395 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
1396
1397 if (generic_ref->type == BTRFS_REF_METADATA)
1398 ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
1399 NULL, &old_ref_mod, &new_ref_mod);
1400 else
1401 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
1402 &old_ref_mod, &new_ref_mod);
1403
1404 btrfs_ref_tree_mod(fs_info, generic_ref);
1405
1406 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
1407 sub_pinned_bytes(fs_info, generic_ref);
1408
1409 return ret;
1410}
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1450 struct btrfs_delayed_ref_node *node,
1451 u64 parent, u64 root_objectid,
1452 u64 owner, u64 offset, int refs_to_add,
1453 struct btrfs_delayed_extent_op *extent_op)
1454{
1455 struct btrfs_path *path;
1456 struct extent_buffer *leaf;
1457 struct btrfs_extent_item *item;
1458 struct btrfs_key key;
1459 u64 bytenr = node->bytenr;
1460 u64 num_bytes = node->num_bytes;
1461 u64 refs;
1462 int ret;
1463
1464 path = btrfs_alloc_path();
1465 if (!path)
1466 return -ENOMEM;
1467
1468 path->leave_spinning = 1;
1469
1470 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1471 parent, root_objectid, owner,
1472 offset, refs_to_add, extent_op);
1473 if ((ret < 0 && ret != -EAGAIN) || !ret)
1474 goto out;
1475
1476
1477
1478
1479
1480
1481 leaf = path->nodes[0];
1482 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1483 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1484 refs = btrfs_extent_refs(leaf, item);
1485 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1486 if (extent_op)
1487 __run_delayed_extent_op(extent_op, leaf, item);
1488
1489 btrfs_mark_buffer_dirty(leaf);
1490 btrfs_release_path(path);
1491
1492 path->leave_spinning = 1;
1493
1494 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1495 BUG_ON(refs_to_add != 1);
1496 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1497 root_objectid);
1498 } else {
1499 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1500 root_objectid, owner, offset,
1501 refs_to_add);
1502 }
1503 if (ret)
1504 btrfs_abort_transaction(trans, ret);
1505out:
1506 btrfs_free_path(path);
1507 return ret;
1508}
1509
1510static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1511 struct btrfs_delayed_ref_node *node,
1512 struct btrfs_delayed_extent_op *extent_op,
1513 int insert_reserved)
1514{
1515 int ret = 0;
1516 struct btrfs_delayed_data_ref *ref;
1517 struct btrfs_key ins;
1518 u64 parent = 0;
1519 u64 ref_root = 0;
1520 u64 flags = 0;
1521
1522 ins.objectid = node->bytenr;
1523 ins.offset = node->num_bytes;
1524 ins.type = BTRFS_EXTENT_ITEM_KEY;
1525
1526 ref = btrfs_delayed_node_to_data_ref(node);
1527 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1528
1529 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1530 parent = ref->parent;
1531 ref_root = ref->root;
1532
1533 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1534 if (extent_op)
1535 flags |= extent_op->flags_to_set;
1536 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1537 flags, ref->objectid,
1538 ref->offset, &ins,
1539 node->ref_mod);
1540 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1541 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1542 ref->objectid, ref->offset,
1543 node->ref_mod, extent_op);
1544 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1545 ret = __btrfs_free_extent(trans, node, parent,
1546 ref_root, ref->objectid,
1547 ref->offset, node->ref_mod,
1548 extent_op);
1549 } else {
1550 BUG();
1551 }
1552 return ret;
1553}
1554
1555static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1556 struct extent_buffer *leaf,
1557 struct btrfs_extent_item *ei)
1558{
1559 u64 flags = btrfs_extent_flags(leaf, ei);
1560 if (extent_op->update_flags) {
1561 flags |= extent_op->flags_to_set;
1562 btrfs_set_extent_flags(leaf, ei, flags);
1563 }
1564
1565 if (extent_op->update_key) {
1566 struct btrfs_tree_block_info *bi;
1567 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1568 bi = (struct btrfs_tree_block_info *)(ei + 1);
1569 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1570 }
1571}
1572
1573static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1574 struct btrfs_delayed_ref_head *head,
1575 struct btrfs_delayed_extent_op *extent_op)
1576{
1577 struct btrfs_fs_info *fs_info = trans->fs_info;
1578 struct btrfs_key key;
1579 struct btrfs_path *path;
1580 struct btrfs_extent_item *ei;
1581 struct extent_buffer *leaf;
1582 u32 item_size;
1583 int ret;
1584 int err = 0;
1585 int metadata = !extent_op->is_data;
1586
1587 if (TRANS_ABORTED(trans))
1588 return 0;
1589
1590 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1591 metadata = 0;
1592
1593 path = btrfs_alloc_path();
1594 if (!path)
1595 return -ENOMEM;
1596
1597 key.objectid = head->bytenr;
1598
1599 if (metadata) {
1600 key.type = BTRFS_METADATA_ITEM_KEY;
1601 key.offset = extent_op->level;
1602 } else {
1603 key.type = BTRFS_EXTENT_ITEM_KEY;
1604 key.offset = head->num_bytes;
1605 }
1606
1607again:
1608 path->leave_spinning = 1;
1609 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
1610 if (ret < 0) {
1611 err = ret;
1612 goto out;
1613 }
1614 if (ret > 0) {
1615 if (metadata) {
1616 if (path->slots[0] > 0) {
1617 path->slots[0]--;
1618 btrfs_item_key_to_cpu(path->nodes[0], &key,
1619 path->slots[0]);
1620 if (key.objectid == head->bytenr &&
1621 key.type == BTRFS_EXTENT_ITEM_KEY &&
1622 key.offset == head->num_bytes)
1623 ret = 0;
1624 }
1625 if (ret > 0) {
1626 btrfs_release_path(path);
1627 metadata = 0;
1628
1629 key.objectid = head->bytenr;
1630 key.offset = head->num_bytes;
1631 key.type = BTRFS_EXTENT_ITEM_KEY;
1632 goto again;
1633 }
1634 } else {
1635 err = -EIO;
1636 goto out;
1637 }
1638 }
1639
1640 leaf = path->nodes[0];
1641 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1642
1643 if (unlikely(item_size < sizeof(*ei))) {
1644 err = -EINVAL;
1645 btrfs_print_v0_err(fs_info);
1646 btrfs_abort_transaction(trans, err);
1647 goto out;
1648 }
1649
1650 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1651 __run_delayed_extent_op(extent_op, leaf, ei);
1652
1653 btrfs_mark_buffer_dirty(leaf);
1654out:
1655 btrfs_free_path(path);
1656 return err;
1657}
1658
1659static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1660 struct btrfs_delayed_ref_node *node,
1661 struct btrfs_delayed_extent_op *extent_op,
1662 int insert_reserved)
1663{
1664 int ret = 0;
1665 struct btrfs_delayed_tree_ref *ref;
1666 u64 parent = 0;
1667 u64 ref_root = 0;
1668
1669 ref = btrfs_delayed_node_to_tree_ref(node);
1670 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1671
1672 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1673 parent = ref->parent;
1674 ref_root = ref->root;
1675
1676 if (node->ref_mod != 1) {
1677 btrfs_err(trans->fs_info,
1678 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1679 node->bytenr, node->ref_mod, node->action, ref_root,
1680 parent);
1681 return -EIO;
1682 }
1683 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1684 BUG_ON(!extent_op || !extent_op->update_flags);
1685 ret = alloc_reserved_tree_block(trans, node, extent_op);
1686 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1687 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1688 ref->level, 0, 1, extent_op);
1689 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1690 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1691 ref->level, 0, 1, extent_op);
1692 } else {
1693 BUG();
1694 }
1695 return ret;
1696}
1697
1698
1699static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1700 struct btrfs_delayed_ref_node *node,
1701 struct btrfs_delayed_extent_op *extent_op,
1702 int insert_reserved)
1703{
1704 int ret = 0;
1705
1706 if (TRANS_ABORTED(trans)) {
1707 if (insert_reserved)
1708 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1709 return 0;
1710 }
1711
1712 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1713 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1714 ret = run_delayed_tree_ref(trans, node, extent_op,
1715 insert_reserved);
1716 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1717 node->type == BTRFS_SHARED_DATA_REF_KEY)
1718 ret = run_delayed_data_ref(trans, node, extent_op,
1719 insert_reserved);
1720 else
1721 BUG();
1722 if (ret && insert_reserved)
1723 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1724 return ret;
1725}
1726
1727static inline struct btrfs_delayed_ref_node *
1728select_delayed_ref(struct btrfs_delayed_ref_head *head)
1729{
1730 struct btrfs_delayed_ref_node *ref;
1731
1732 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1733 return NULL;
1734
1735
1736
1737
1738
1739
1740
1741 if (!list_empty(&head->ref_add_list))
1742 return list_first_entry(&head->ref_add_list,
1743 struct btrfs_delayed_ref_node, add_list);
1744
1745 ref = rb_entry(rb_first_cached(&head->ref_tree),
1746 struct btrfs_delayed_ref_node, ref_node);
1747 ASSERT(list_empty(&ref->add_list));
1748 return ref;
1749}
1750
1751static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1752 struct btrfs_delayed_ref_head *head)
1753{
1754 spin_lock(&delayed_refs->lock);
1755 head->processing = 0;
1756 delayed_refs->num_heads_ready++;
1757 spin_unlock(&delayed_refs->lock);
1758 btrfs_delayed_ref_unlock(head);
1759}
1760
1761static struct btrfs_delayed_extent_op *cleanup_extent_op(
1762 struct btrfs_delayed_ref_head *head)
1763{
1764 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1765
1766 if (!extent_op)
1767 return NULL;
1768
1769 if (head->must_insert_reserved) {
1770 head->extent_op = NULL;
1771 btrfs_free_delayed_extent_op(extent_op);
1772 return NULL;
1773 }
1774 return extent_op;
1775}
1776
1777static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1778 struct btrfs_delayed_ref_head *head)
1779{
1780 struct btrfs_delayed_extent_op *extent_op;
1781 int ret;
1782
1783 extent_op = cleanup_extent_op(head);
1784 if (!extent_op)
1785 return 0;
1786 head->extent_op = NULL;
1787 spin_unlock(&head->lock);
1788 ret = run_delayed_extent_op(trans, head, extent_op);
1789 btrfs_free_delayed_extent_op(extent_op);
1790 return ret ? ret : 1;
1791}
1792
1793void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1794 struct btrfs_delayed_ref_root *delayed_refs,
1795 struct btrfs_delayed_ref_head *head)
1796{
1797 int nr_items = 1;
1798
1799 if (head->total_ref_mod < 0) {
1800 struct btrfs_space_info *space_info;
1801 u64 flags;
1802
1803 if (head->is_data)
1804 flags = BTRFS_BLOCK_GROUP_DATA;
1805 else if (head->is_system)
1806 flags = BTRFS_BLOCK_GROUP_SYSTEM;
1807 else
1808 flags = BTRFS_BLOCK_GROUP_METADATA;
1809 space_info = btrfs_find_space_info(fs_info, flags);
1810 ASSERT(space_info);
1811 percpu_counter_add_batch(&space_info->total_bytes_pinned,
1812 -head->num_bytes,
1813 BTRFS_TOTAL_BYTES_PINNED_BATCH);
1814
1815
1816
1817
1818
1819
1820 if (head->is_data) {
1821 spin_lock(&delayed_refs->lock);
1822 delayed_refs->pending_csums -= head->num_bytes;
1823 spin_unlock(&delayed_refs->lock);
1824 nr_items += btrfs_csum_bytes_to_leaves(fs_info,
1825 head->num_bytes);
1826 }
1827 }
1828
1829 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1830}
1831
1832static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1833 struct btrfs_delayed_ref_head *head)
1834{
1835
1836 struct btrfs_fs_info *fs_info = trans->fs_info;
1837 struct btrfs_delayed_ref_root *delayed_refs;
1838 int ret;
1839
1840 delayed_refs = &trans->transaction->delayed_refs;
1841
1842 ret = run_and_cleanup_extent_op(trans, head);
1843 if (ret < 0) {
1844 unselect_delayed_ref_head(delayed_refs, head);
1845 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1846 return ret;
1847 } else if (ret) {
1848 return ret;
1849 }
1850
1851
1852
1853
1854
1855 spin_unlock(&head->lock);
1856 spin_lock(&delayed_refs->lock);
1857 spin_lock(&head->lock);
1858 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1859 spin_unlock(&head->lock);
1860 spin_unlock(&delayed_refs->lock);
1861 return 1;
1862 }
1863 btrfs_delete_ref_head(delayed_refs, head);
1864 spin_unlock(&head->lock);
1865 spin_unlock(&delayed_refs->lock);
1866
1867 if (head->must_insert_reserved) {
1868 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
1869 if (head->is_data) {
1870 ret = btrfs_del_csums(trans, fs_info->csum_root,
1871 head->bytenr, head->num_bytes);
1872 }
1873 }
1874
1875 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1876
1877 trace_run_delayed_ref_head(fs_info, head, 0);
1878 btrfs_delayed_ref_unlock(head);
1879 btrfs_put_delayed_ref_head(head);
1880 return 0;
1881}
1882
1883static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1884 struct btrfs_trans_handle *trans)
1885{
1886 struct btrfs_delayed_ref_root *delayed_refs =
1887 &trans->transaction->delayed_refs;
1888 struct btrfs_delayed_ref_head *head = NULL;
1889 int ret;
1890
1891 spin_lock(&delayed_refs->lock);
1892 head = btrfs_select_ref_head(delayed_refs);
1893 if (!head) {
1894 spin_unlock(&delayed_refs->lock);
1895 return head;
1896 }
1897
1898
1899
1900
1901
1902 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1903 spin_unlock(&delayed_refs->lock);
1904
1905
1906
1907
1908
1909
1910 if (ret == -EAGAIN)
1911 head = ERR_PTR(-EAGAIN);
1912
1913 return head;
1914}
1915
1916static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1917 struct btrfs_delayed_ref_head *locked_ref,
1918 unsigned long *run_refs)
1919{
1920 struct btrfs_fs_info *fs_info = trans->fs_info;
1921 struct btrfs_delayed_ref_root *delayed_refs;
1922 struct btrfs_delayed_extent_op *extent_op;
1923 struct btrfs_delayed_ref_node *ref;
1924 int must_insert_reserved = 0;
1925 int ret;
1926
1927 delayed_refs = &trans->transaction->delayed_refs;
1928
1929 lockdep_assert_held(&locked_ref->mutex);
1930 lockdep_assert_held(&locked_ref->lock);
1931
1932 while ((ref = select_delayed_ref(locked_ref))) {
1933 if (ref->seq &&
1934 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1935 spin_unlock(&locked_ref->lock);
1936 unselect_delayed_ref_head(delayed_refs, locked_ref);
1937 return -EAGAIN;
1938 }
1939
1940 (*run_refs)++;
1941 ref->in_tree = 0;
1942 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1943 RB_CLEAR_NODE(&ref->ref_node);
1944 if (!list_empty(&ref->add_list))
1945 list_del(&ref->add_list);
1946
1947
1948
1949
1950 switch (ref->action) {
1951 case BTRFS_ADD_DELAYED_REF:
1952 case BTRFS_ADD_DELAYED_EXTENT:
1953 locked_ref->ref_mod -= ref->ref_mod;
1954 break;
1955 case BTRFS_DROP_DELAYED_REF:
1956 locked_ref->ref_mod += ref->ref_mod;
1957 break;
1958 default:
1959 WARN_ON(1);
1960 }
1961 atomic_dec(&delayed_refs->num_entries);
1962
1963
1964
1965
1966
1967 must_insert_reserved = locked_ref->must_insert_reserved;
1968 locked_ref->must_insert_reserved = 0;
1969
1970 extent_op = locked_ref->extent_op;
1971 locked_ref->extent_op = NULL;
1972 spin_unlock(&locked_ref->lock);
1973
1974 ret = run_one_delayed_ref(trans, ref, extent_op,
1975 must_insert_reserved);
1976
1977 btrfs_free_delayed_extent_op(extent_op);
1978 if (ret) {
1979 unselect_delayed_ref_head(delayed_refs, locked_ref);
1980 btrfs_put_delayed_ref(ref);
1981 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1982 ret);
1983 return ret;
1984 }
1985
1986 btrfs_put_delayed_ref(ref);
1987 cond_resched();
1988
1989 spin_lock(&locked_ref->lock);
1990 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1991 }
1992
1993 return 0;
1994}
1995
1996
1997
1998
1999
2000static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2001 unsigned long nr)
2002{
2003 struct btrfs_fs_info *fs_info = trans->fs_info;
2004 struct btrfs_delayed_ref_root *delayed_refs;
2005 struct btrfs_delayed_ref_head *locked_ref = NULL;
2006 ktime_t start = ktime_get();
2007 int ret;
2008 unsigned long count = 0;
2009 unsigned long actual_count = 0;
2010
2011 delayed_refs = &trans->transaction->delayed_refs;
2012 do {
2013 if (!locked_ref) {
2014 locked_ref = btrfs_obtain_ref_head(trans);
2015 if (IS_ERR_OR_NULL(locked_ref)) {
2016 if (PTR_ERR(locked_ref) == -EAGAIN) {
2017 continue;
2018 } else {
2019 break;
2020 }
2021 }
2022 count++;
2023 }
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036 spin_lock(&locked_ref->lock);
2037 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2038
2039 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2040 &actual_count);
2041 if (ret < 0 && ret != -EAGAIN) {
2042
2043
2044
2045
2046 return ret;
2047 } else if (!ret) {
2048
2049
2050
2051
2052 ret = cleanup_ref_head(trans, locked_ref);
2053 if (ret > 0 ) {
2054
2055 ret = 0;
2056 continue;
2057 } else if (ret) {
2058 return ret;
2059 }
2060 }
2061
2062
2063
2064
2065
2066
2067 locked_ref = NULL;
2068 cond_resched();
2069 } while ((nr != -1 && count < nr) || locked_ref);
2070
2071
2072
2073
2074
2075
2076 if (actual_count > 0) {
2077 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2078 u64 avg;
2079
2080
2081
2082
2083
2084 spin_lock(&delayed_refs->lock);
2085 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2086 fs_info->avg_delayed_ref_runtime = avg >> 2;
2087 spin_unlock(&delayed_refs->lock);
2088 }
2089 return 0;
2090}
2091
2092#ifdef SCRAMBLE_DELAYED_REFS
2093
2094
2095
2096
2097
2098static u64 find_middle(struct rb_root *root)
2099{
2100 struct rb_node *n = root->rb_node;
2101 struct btrfs_delayed_ref_node *entry;
2102 int alt = 1;
2103 u64 middle;
2104 u64 first = 0, last = 0;
2105
2106 n = rb_first(root);
2107 if (n) {
2108 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2109 first = entry->bytenr;
2110 }
2111 n = rb_last(root);
2112 if (n) {
2113 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2114 last = entry->bytenr;
2115 }
2116 n = root->rb_node;
2117
2118 while (n) {
2119 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2120 WARN_ON(!entry->in_tree);
2121
2122 middle = entry->bytenr;
2123
2124 if (alt)
2125 n = n->rb_left;
2126 else
2127 n = n->rb_right;
2128
2129 alt = 1 - alt;
2130 }
2131 return middle;
2132}
2133#endif
2134
2135
2136
2137
2138
2139u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2140{
2141 u64 csum_size;
2142 u64 num_csums_per_leaf;
2143 u64 num_csums;
2144
2145 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2146 num_csums_per_leaf = div64_u64(csum_size,
2147 (u64)btrfs_super_csum_size(fs_info->super_copy));
2148 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2149 num_csums += num_csums_per_leaf - 1;
2150 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2151 return num_csums;
2152}
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2165 unsigned long count)
2166{
2167 struct btrfs_fs_info *fs_info = trans->fs_info;
2168 struct rb_node *node;
2169 struct btrfs_delayed_ref_root *delayed_refs;
2170 struct btrfs_delayed_ref_head *head;
2171 int ret;
2172 int run_all = count == (unsigned long)-1;
2173
2174
2175 if (TRANS_ABORTED(trans))
2176 return 0;
2177
2178 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2179 return 0;
2180
2181 delayed_refs = &trans->transaction->delayed_refs;
2182 if (count == 0)
2183 count = atomic_read(&delayed_refs->num_entries) * 2;
2184
2185again:
2186#ifdef SCRAMBLE_DELAYED_REFS
2187 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2188#endif
2189 ret = __btrfs_run_delayed_refs(trans, count);
2190 if (ret < 0) {
2191 btrfs_abort_transaction(trans, ret);
2192 return ret;
2193 }
2194
2195 if (run_all) {
2196 btrfs_create_pending_block_groups(trans);
2197
2198 spin_lock(&delayed_refs->lock);
2199 node = rb_first_cached(&delayed_refs->href_root);
2200 if (!node) {
2201 spin_unlock(&delayed_refs->lock);
2202 goto out;
2203 }
2204 head = rb_entry(node, struct btrfs_delayed_ref_head,
2205 href_node);
2206 refcount_inc(&head->refs);
2207 spin_unlock(&delayed_refs->lock);
2208
2209
2210 mutex_lock(&head->mutex);
2211 mutex_unlock(&head->mutex);
2212
2213 btrfs_put_delayed_ref_head(head);
2214 cond_resched();
2215 goto again;
2216 }
2217out:
2218 return 0;
2219}
2220
2221int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2222 struct extent_buffer *eb, u64 flags,
2223 int level, int is_data)
2224{
2225 struct btrfs_delayed_extent_op *extent_op;
2226 int ret;
2227
2228 extent_op = btrfs_alloc_delayed_extent_op();
2229 if (!extent_op)
2230 return -ENOMEM;
2231
2232 extent_op->flags_to_set = flags;
2233 extent_op->update_flags = true;
2234 extent_op->update_key = false;
2235 extent_op->is_data = is_data ? true : false;
2236 extent_op->level = level;
2237
2238 ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
2239 if (ret)
2240 btrfs_free_delayed_extent_op(extent_op);
2241 return ret;
2242}
2243
2244static noinline int check_delayed_ref(struct btrfs_root *root,
2245 struct btrfs_path *path,
2246 u64 objectid, u64 offset, u64 bytenr)
2247{
2248 struct btrfs_delayed_ref_head *head;
2249 struct btrfs_delayed_ref_node *ref;
2250 struct btrfs_delayed_data_ref *data_ref;
2251 struct btrfs_delayed_ref_root *delayed_refs;
2252 struct btrfs_transaction *cur_trans;
2253 struct rb_node *node;
2254 int ret = 0;
2255
2256 spin_lock(&root->fs_info->trans_lock);
2257 cur_trans = root->fs_info->running_transaction;
2258 if (cur_trans)
2259 refcount_inc(&cur_trans->use_count);
2260 spin_unlock(&root->fs_info->trans_lock);
2261 if (!cur_trans)
2262 return 0;
2263
2264 delayed_refs = &cur_trans->delayed_refs;
2265 spin_lock(&delayed_refs->lock);
2266 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2267 if (!head) {
2268 spin_unlock(&delayed_refs->lock);
2269 btrfs_put_transaction(cur_trans);
2270 return 0;
2271 }
2272
2273 if (!mutex_trylock(&head->mutex)) {
2274 refcount_inc(&head->refs);
2275 spin_unlock(&delayed_refs->lock);
2276
2277 btrfs_release_path(path);
2278
2279
2280
2281
2282
2283 mutex_lock(&head->mutex);
2284 mutex_unlock(&head->mutex);
2285 btrfs_put_delayed_ref_head(head);
2286 btrfs_put_transaction(cur_trans);
2287 return -EAGAIN;
2288 }
2289 spin_unlock(&delayed_refs->lock);
2290
2291 spin_lock(&head->lock);
2292
2293
2294
2295
2296 for (node = rb_first_cached(&head->ref_tree); node;
2297 node = rb_next(node)) {
2298 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2299
2300 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2301 ret = 1;
2302 break;
2303 }
2304
2305 data_ref = btrfs_delayed_node_to_data_ref(ref);
2306
2307
2308
2309
2310
2311 if (data_ref->root != root->root_key.objectid ||
2312 data_ref->objectid != objectid ||
2313 data_ref->offset != offset) {
2314 ret = 1;
2315 break;
2316 }
2317 }
2318 spin_unlock(&head->lock);
2319 mutex_unlock(&head->mutex);
2320 btrfs_put_transaction(cur_trans);
2321 return ret;
2322}
2323
2324static noinline int check_committed_ref(struct btrfs_root *root,
2325 struct btrfs_path *path,
2326 u64 objectid, u64 offset, u64 bytenr,
2327 bool strict)
2328{
2329 struct btrfs_fs_info *fs_info = root->fs_info;
2330 struct btrfs_root *extent_root = fs_info->extent_root;
2331 struct extent_buffer *leaf;
2332 struct btrfs_extent_data_ref *ref;
2333 struct btrfs_extent_inline_ref *iref;
2334 struct btrfs_extent_item *ei;
2335 struct btrfs_key key;
2336 u32 item_size;
2337 int type;
2338 int ret;
2339
2340 key.objectid = bytenr;
2341 key.offset = (u64)-1;
2342 key.type = BTRFS_EXTENT_ITEM_KEY;
2343
2344 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2345 if (ret < 0)
2346 goto out;
2347 BUG_ON(ret == 0);
2348
2349 ret = -ENOENT;
2350 if (path->slots[0] == 0)
2351 goto out;
2352
2353 path->slots[0]--;
2354 leaf = path->nodes[0];
2355 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2356
2357 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2358 goto out;
2359
2360 ret = 1;
2361 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2362 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2363
2364
2365 if (item_size != sizeof(*ei) +
2366 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2367 goto out;
2368
2369
2370
2371
2372
2373 if (!strict &&
2374 (btrfs_extent_generation(leaf, ei) <=
2375 btrfs_root_last_snapshot(&root->root_item)))
2376 goto out;
2377
2378 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2379
2380
2381 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2382 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2383 goto out;
2384
2385 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2386 if (btrfs_extent_refs(leaf, ei) !=
2387 btrfs_extent_data_ref_count(leaf, ref) ||
2388 btrfs_extent_data_ref_root(leaf, ref) !=
2389 root->root_key.objectid ||
2390 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2391 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2392 goto out;
2393
2394 ret = 0;
2395out:
2396 return ret;
2397}
2398
2399int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2400 u64 bytenr, bool strict)
2401{
2402 struct btrfs_path *path;
2403 int ret;
2404
2405 path = btrfs_alloc_path();
2406 if (!path)
2407 return -ENOMEM;
2408
2409 do {
2410 ret = check_committed_ref(root, path, objectid,
2411 offset, bytenr, strict);
2412 if (ret && ret != -ENOENT)
2413 goto out;
2414
2415 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2416 } while (ret == -EAGAIN);
2417
2418out:
2419 btrfs_free_path(path);
2420 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2421 WARN_ON(ret > 0);
2422 return ret;
2423}
2424
2425static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2426 struct btrfs_root *root,
2427 struct extent_buffer *buf,
2428 int full_backref, int inc)
2429{
2430 struct btrfs_fs_info *fs_info = root->fs_info;
2431 u64 bytenr;
2432 u64 num_bytes;
2433 u64 parent;
2434 u64 ref_root;
2435 u32 nritems;
2436 struct btrfs_key key;
2437 struct btrfs_file_extent_item *fi;
2438 struct btrfs_ref generic_ref = { 0 };
2439 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2440 int i;
2441 int action;
2442 int level;
2443 int ret = 0;
2444
2445 if (btrfs_is_testing(fs_info))
2446 return 0;
2447
2448 ref_root = btrfs_header_owner(buf);
2449 nritems = btrfs_header_nritems(buf);
2450 level = btrfs_header_level(buf);
2451
2452 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
2453 return 0;
2454
2455 if (full_backref)
2456 parent = buf->start;
2457 else
2458 parent = 0;
2459 if (inc)
2460 action = BTRFS_ADD_DELAYED_REF;
2461 else
2462 action = BTRFS_DROP_DELAYED_REF;
2463
2464 for (i = 0; i < nritems; i++) {
2465 if (level == 0) {
2466 btrfs_item_key_to_cpu(buf, &key, i);
2467 if (key.type != BTRFS_EXTENT_DATA_KEY)
2468 continue;
2469 fi = btrfs_item_ptr(buf, i,
2470 struct btrfs_file_extent_item);
2471 if (btrfs_file_extent_type(buf, fi) ==
2472 BTRFS_FILE_EXTENT_INLINE)
2473 continue;
2474 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2475 if (bytenr == 0)
2476 continue;
2477
2478 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2479 key.offset -= btrfs_file_extent_offset(buf, fi);
2480 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2481 num_bytes, parent);
2482 generic_ref.real_root = root->root_key.objectid;
2483 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2484 key.offset);
2485 generic_ref.skip_qgroup = for_reloc;
2486 if (inc)
2487 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2488 else
2489 ret = btrfs_free_extent(trans, &generic_ref);
2490 if (ret)
2491 goto fail;
2492 } else {
2493 bytenr = btrfs_node_blockptr(buf, i);
2494 num_bytes = fs_info->nodesize;
2495 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2496 num_bytes, parent);
2497 generic_ref.real_root = root->root_key.objectid;
2498 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
2499 generic_ref.skip_qgroup = for_reloc;
2500 if (inc)
2501 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2502 else
2503 ret = btrfs_free_extent(trans, &generic_ref);
2504 if (ret)
2505 goto fail;
2506 }
2507 }
2508 return 0;
2509fail:
2510 return ret;
2511}
2512
2513int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2514 struct extent_buffer *buf, int full_backref)
2515{
2516 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2517}
2518
2519int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2520 struct extent_buffer *buf, int full_backref)
2521{
2522 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2523}
2524
2525int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
2526{
2527 struct btrfs_block_group *block_group;
2528 int readonly = 0;
2529
2530 block_group = btrfs_lookup_block_group(fs_info, bytenr);
2531 if (!block_group || block_group->ro)
2532 readonly = 1;
2533 if (block_group)
2534 btrfs_put_block_group(block_group);
2535 return readonly;
2536}
2537
2538static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2539{
2540 struct btrfs_fs_info *fs_info = root->fs_info;
2541 u64 flags;
2542 u64 ret;
2543
2544 if (data)
2545 flags = BTRFS_BLOCK_GROUP_DATA;
2546 else if (root == fs_info->chunk_root)
2547 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2548 else
2549 flags = BTRFS_BLOCK_GROUP_METADATA;
2550
2551 ret = btrfs_get_alloc_profile(fs_info, flags);
2552 return ret;
2553}
2554
2555static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
2556{
2557 struct btrfs_block_group *cache;
2558 u64 bytenr;
2559
2560 spin_lock(&fs_info->block_group_cache_lock);
2561 bytenr = fs_info->first_logical_byte;
2562 spin_unlock(&fs_info->block_group_cache_lock);
2563
2564 if (bytenr < (u64)-1)
2565 return bytenr;
2566
2567 cache = btrfs_lookup_first_block_group(fs_info, search_start);
2568 if (!cache)
2569 return 0;
2570
2571 bytenr = cache->start;
2572 btrfs_put_block_group(cache);
2573
2574 return bytenr;
2575}
2576
2577static int pin_down_extent(struct btrfs_trans_handle *trans,
2578 struct btrfs_block_group *cache,
2579 u64 bytenr, u64 num_bytes, int reserved)
2580{
2581 struct btrfs_fs_info *fs_info = cache->fs_info;
2582
2583 spin_lock(&cache->space_info->lock);
2584 spin_lock(&cache->lock);
2585 cache->pinned += num_bytes;
2586 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2587 num_bytes);
2588 if (reserved) {
2589 cache->reserved -= num_bytes;
2590 cache->space_info->bytes_reserved -= num_bytes;
2591 }
2592 spin_unlock(&cache->lock);
2593 spin_unlock(&cache->space_info->lock);
2594
2595 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
2596 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2597 set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
2598 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2599 return 0;
2600}
2601
2602int btrfs_pin_extent(struct btrfs_trans_handle *trans,
2603 u64 bytenr, u64 num_bytes, int reserved)
2604{
2605 struct btrfs_block_group *cache;
2606
2607 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2608 BUG_ON(!cache);
2609
2610 pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
2611
2612 btrfs_put_block_group(cache);
2613 return 0;
2614}
2615
2616
2617
2618
2619int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
2620 u64 bytenr, u64 num_bytes)
2621{
2622 struct btrfs_block_group *cache;
2623 int ret;
2624
2625 btrfs_add_excluded_extent(trans->fs_info, bytenr, num_bytes);
2626
2627 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2628 if (!cache)
2629 return -EINVAL;
2630
2631
2632
2633
2634
2635
2636
2637 btrfs_cache_block_group(cache, 1);
2638
2639 pin_down_extent(trans, cache, bytenr, num_bytes, 0);
2640
2641
2642 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2643 btrfs_put_block_group(cache);
2644 return ret;
2645}
2646
2647static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2648 u64 start, u64 num_bytes)
2649{
2650 int ret;
2651 struct btrfs_block_group *block_group;
2652 struct btrfs_caching_control *caching_ctl;
2653
2654 block_group = btrfs_lookup_block_group(fs_info, start);
2655 if (!block_group)
2656 return -EINVAL;
2657
2658 btrfs_cache_block_group(block_group, 0);
2659 caching_ctl = btrfs_get_caching_control(block_group);
2660
2661 if (!caching_ctl) {
2662
2663 BUG_ON(!btrfs_block_group_done(block_group));
2664 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2665 } else {
2666 mutex_lock(&caching_ctl->mutex);
2667
2668 if (start >= caching_ctl->progress) {
2669 ret = btrfs_add_excluded_extent(fs_info, start,
2670 num_bytes);
2671 } else if (start + num_bytes <= caching_ctl->progress) {
2672 ret = btrfs_remove_free_space(block_group,
2673 start, num_bytes);
2674 } else {
2675 num_bytes = caching_ctl->progress - start;
2676 ret = btrfs_remove_free_space(block_group,
2677 start, num_bytes);
2678 if (ret)
2679 goto out_lock;
2680
2681 num_bytes = (start + num_bytes) -
2682 caching_ctl->progress;
2683 start = caching_ctl->progress;
2684 ret = btrfs_add_excluded_extent(fs_info, start,
2685 num_bytes);
2686 }
2687out_lock:
2688 mutex_unlock(&caching_ctl->mutex);
2689 btrfs_put_caching_control(caching_ctl);
2690 }
2691 btrfs_put_block_group(block_group);
2692 return ret;
2693}
2694
2695int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2696{
2697 struct btrfs_fs_info *fs_info = eb->fs_info;
2698 struct btrfs_file_extent_item *item;
2699 struct btrfs_key key;
2700 int found_type;
2701 int i;
2702 int ret = 0;
2703
2704 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2705 return 0;
2706
2707 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2708 btrfs_item_key_to_cpu(eb, &key, i);
2709 if (key.type != BTRFS_EXTENT_DATA_KEY)
2710 continue;
2711 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2712 found_type = btrfs_file_extent_type(eb, item);
2713 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2714 continue;
2715 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2716 continue;
2717 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2718 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2719 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2720 if (ret)
2721 break;
2722 }
2723
2724 return ret;
2725}
2726
2727static void
2728btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
2729{
2730 atomic_inc(&bg->reservations);
2731}
2732
2733void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
2734{
2735 struct btrfs_caching_control *next;
2736 struct btrfs_caching_control *caching_ctl;
2737 struct btrfs_block_group *cache;
2738
2739 down_write(&fs_info->commit_root_sem);
2740
2741 list_for_each_entry_safe(caching_ctl, next,
2742 &fs_info->caching_block_groups, list) {
2743 cache = caching_ctl->block_group;
2744 if (btrfs_block_group_done(cache)) {
2745 cache->last_byte_to_unpin = (u64)-1;
2746 list_del_init(&caching_ctl->list);
2747 btrfs_put_caching_control(caching_ctl);
2748 } else {
2749 cache->last_byte_to_unpin = caching_ctl->progress;
2750 }
2751 }
2752
2753 up_write(&fs_info->commit_root_sem);
2754
2755 btrfs_update_global_block_rsv(fs_info);
2756}
2757
2758
2759
2760
2761
2762static struct btrfs_free_cluster *
2763fetch_cluster_info(struct btrfs_fs_info *fs_info,
2764 struct btrfs_space_info *space_info, u64 *empty_cluster)
2765{
2766 struct btrfs_free_cluster *ret = NULL;
2767
2768 *empty_cluster = 0;
2769 if (btrfs_mixed_space_info(space_info))
2770 return ret;
2771
2772 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2773 ret = &fs_info->meta_alloc_cluster;
2774 if (btrfs_test_opt(fs_info, SSD))
2775 *empty_cluster = SZ_2M;
2776 else
2777 *empty_cluster = SZ_64K;
2778 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2779 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2780 *empty_cluster = SZ_2M;
2781 ret = &fs_info->data_alloc_cluster;
2782 }
2783
2784 return ret;
2785}
2786
2787static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2788 u64 start, u64 end,
2789 const bool return_free_space)
2790{
2791 struct btrfs_block_group *cache = NULL;
2792 struct btrfs_space_info *space_info;
2793 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2794 struct btrfs_free_cluster *cluster = NULL;
2795 u64 len;
2796 u64 total_unpinned = 0;
2797 u64 empty_cluster = 0;
2798 bool readonly;
2799
2800 while (start <= end) {
2801 readonly = false;
2802 if (!cache ||
2803 start >= cache->start + cache->length) {
2804 if (cache)
2805 btrfs_put_block_group(cache);
2806 total_unpinned = 0;
2807 cache = btrfs_lookup_block_group(fs_info, start);
2808 BUG_ON(!cache);
2809
2810 cluster = fetch_cluster_info(fs_info,
2811 cache->space_info,
2812 &empty_cluster);
2813 empty_cluster <<= 1;
2814 }
2815
2816 len = cache->start + cache->length - start;
2817 len = min(len, end + 1 - start);
2818
2819 if (start < cache->last_byte_to_unpin) {
2820 len = min(len, cache->last_byte_to_unpin - start);
2821 if (return_free_space)
2822 btrfs_add_free_space(cache, start, len);
2823 }
2824
2825 start += len;
2826 total_unpinned += len;
2827 space_info = cache->space_info;
2828
2829
2830
2831
2832
2833
2834
2835 if (cluster && cluster->fragmented &&
2836 total_unpinned > empty_cluster) {
2837 spin_lock(&cluster->lock);
2838 cluster->fragmented = 0;
2839 spin_unlock(&cluster->lock);
2840 }
2841
2842 spin_lock(&space_info->lock);
2843 spin_lock(&cache->lock);
2844 cache->pinned -= len;
2845 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2846 space_info->max_extent_size = 0;
2847 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2848 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2849 if (cache->ro) {
2850 space_info->bytes_readonly += len;
2851 readonly = true;
2852 }
2853 spin_unlock(&cache->lock);
2854 if (!readonly && return_free_space &&
2855 global_rsv->space_info == space_info) {
2856 u64 to_add = len;
2857
2858 spin_lock(&global_rsv->lock);
2859 if (!global_rsv->full) {
2860 to_add = min(len, global_rsv->size -
2861 global_rsv->reserved);
2862 global_rsv->reserved += to_add;
2863 btrfs_space_info_update_bytes_may_use(fs_info,
2864 space_info, to_add);
2865 if (global_rsv->reserved >= global_rsv->size)
2866 global_rsv->full = 1;
2867 len -= to_add;
2868 }
2869 spin_unlock(&global_rsv->lock);
2870 }
2871
2872 if (!readonly && return_free_space && len)
2873 btrfs_try_granting_tickets(fs_info, space_info);
2874 spin_unlock(&space_info->lock);
2875 }
2876
2877 if (cache)
2878 btrfs_put_block_group(cache);
2879 return 0;
2880}
2881
2882int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2883{
2884 struct btrfs_fs_info *fs_info = trans->fs_info;
2885 struct btrfs_block_group *block_group, *tmp;
2886 struct list_head *deleted_bgs;
2887 struct extent_io_tree *unpin;
2888 u64 start;
2889 u64 end;
2890 int ret;
2891
2892 unpin = &trans->transaction->pinned_extents;
2893
2894 while (!TRANS_ABORTED(trans)) {
2895 struct extent_state *cached_state = NULL;
2896
2897 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2898 ret = find_first_extent_bit(unpin, 0, &start, &end,
2899 EXTENT_DIRTY, &cached_state);
2900 if (ret) {
2901 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2902 break;
2903 }
2904 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
2905 clear_extent_bits(&fs_info->excluded_extents, start,
2906 end, EXTENT_UPTODATE);
2907
2908 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
2909 ret = btrfs_discard_extent(fs_info, start,
2910 end + 1 - start, NULL);
2911
2912 clear_extent_dirty(unpin, start, end, &cached_state);
2913 unpin_extent_range(fs_info, start, end, true);
2914 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2915 free_extent_state(cached_state);
2916 cond_resched();
2917 }
2918
2919 if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
2920 btrfs_discard_calc_delay(&fs_info->discard_ctl);
2921 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
2922 }
2923
2924
2925
2926
2927
2928
2929 deleted_bgs = &trans->transaction->deleted_bgs;
2930 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2931 u64 trimmed = 0;
2932
2933 ret = -EROFS;
2934 if (!TRANS_ABORTED(trans))
2935 ret = btrfs_discard_extent(fs_info,
2936 block_group->start,
2937 block_group->length,
2938 &trimmed);
2939
2940 list_del_init(&block_group->bg_list);
2941 btrfs_unfreeze_block_group(block_group);
2942 btrfs_put_block_group(block_group);
2943
2944 if (ret) {
2945 const char *errstr = btrfs_decode_error(ret);
2946 btrfs_warn(fs_info,
2947 "discard failed while removing blockgroup: errno=%d %s",
2948 ret, errstr);
2949 }
2950 }
2951
2952 return 0;
2953}
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3015 struct btrfs_delayed_ref_node *node, u64 parent,
3016 u64 root_objectid, u64 owner_objectid,
3017 u64 owner_offset, int refs_to_drop,
3018 struct btrfs_delayed_extent_op *extent_op)
3019{
3020 struct btrfs_fs_info *info = trans->fs_info;
3021 struct btrfs_key key;
3022 struct btrfs_path *path;
3023 struct btrfs_root *extent_root = info->extent_root;
3024 struct extent_buffer *leaf;
3025 struct btrfs_extent_item *ei;
3026 struct btrfs_extent_inline_ref *iref;
3027 int ret;
3028 int is_data;
3029 int extent_slot = 0;
3030 int found_extent = 0;
3031 int num_to_del = 1;
3032 u32 item_size;
3033 u64 refs;
3034 u64 bytenr = node->bytenr;
3035 u64 num_bytes = node->num_bytes;
3036 int last_ref = 0;
3037 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
3038
3039 path = btrfs_alloc_path();
3040 if (!path)
3041 return -ENOMEM;
3042
3043 path->leave_spinning = 1;
3044
3045 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
3046
3047 if (!is_data && refs_to_drop != 1) {
3048 btrfs_crit(info,
3049"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
3050 node->bytenr, refs_to_drop);
3051 ret = -EINVAL;
3052 btrfs_abort_transaction(trans, ret);
3053 goto out;
3054 }
3055
3056 if (is_data)
3057 skinny_metadata = false;
3058
3059 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
3060 parent, root_objectid, owner_objectid,
3061 owner_offset);
3062 if (ret == 0) {
3063
3064
3065
3066
3067
3068
3069
3070 extent_slot = path->slots[0];
3071 while (extent_slot >= 0) {
3072 btrfs_item_key_to_cpu(path->nodes[0], &key,
3073 extent_slot);
3074 if (key.objectid != bytenr)
3075 break;
3076 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3077 key.offset == num_bytes) {
3078 found_extent = 1;
3079 break;
3080 }
3081 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3082 key.offset == owner_objectid) {
3083 found_extent = 1;
3084 break;
3085 }
3086
3087
3088 if (path->slots[0] - extent_slot > 5)
3089 break;
3090 extent_slot--;
3091 }
3092
3093 if (!found_extent) {
3094 if (iref) {
3095 btrfs_crit(info,
3096"invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref");
3097 btrfs_abort_transaction(trans, -EUCLEAN);
3098 goto err_dump;
3099 }
3100
3101 ret = remove_extent_backref(trans, path, NULL,
3102 refs_to_drop,
3103 is_data, &last_ref);
3104 if (ret) {
3105 btrfs_abort_transaction(trans, ret);
3106 goto out;
3107 }
3108 btrfs_release_path(path);
3109 path->leave_spinning = 1;
3110
3111
3112 key.objectid = bytenr;
3113 key.type = BTRFS_EXTENT_ITEM_KEY;
3114 key.offset = num_bytes;
3115
3116 if (!is_data && skinny_metadata) {
3117 key.type = BTRFS_METADATA_ITEM_KEY;
3118 key.offset = owner_objectid;
3119 }
3120
3121 ret = btrfs_search_slot(trans, extent_root,
3122 &key, path, -1, 1);
3123 if (ret > 0 && skinny_metadata && path->slots[0]) {
3124
3125
3126
3127
3128 path->slots[0]--;
3129 btrfs_item_key_to_cpu(path->nodes[0], &key,
3130 path->slots[0]);
3131 if (key.objectid == bytenr &&
3132 key.type == BTRFS_EXTENT_ITEM_KEY &&
3133 key.offset == num_bytes)
3134 ret = 0;
3135 }
3136
3137 if (ret > 0 && skinny_metadata) {
3138 skinny_metadata = false;
3139 key.objectid = bytenr;
3140 key.type = BTRFS_EXTENT_ITEM_KEY;
3141 key.offset = num_bytes;
3142 btrfs_release_path(path);
3143 ret = btrfs_search_slot(trans, extent_root,
3144 &key, path, -1, 1);
3145 }
3146
3147 if (ret) {
3148 btrfs_err(info,
3149 "umm, got %d back from search, was looking for %llu",
3150 ret, bytenr);
3151 if (ret > 0)
3152 btrfs_print_leaf(path->nodes[0]);
3153 }
3154 if (ret < 0) {
3155 btrfs_abort_transaction(trans, ret);
3156 goto out;
3157 }
3158 extent_slot = path->slots[0];
3159 }
3160 } else if (WARN_ON(ret == -ENOENT)) {
3161 btrfs_print_leaf(path->nodes[0]);
3162 btrfs_err(info,
3163 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3164 bytenr, parent, root_objectid, owner_objectid,
3165 owner_offset);
3166 btrfs_abort_transaction(trans, ret);
3167 goto out;
3168 } else {
3169 btrfs_abort_transaction(trans, ret);
3170 goto out;
3171 }
3172
3173 leaf = path->nodes[0];
3174 item_size = btrfs_item_size_nr(leaf, extent_slot);
3175 if (unlikely(item_size < sizeof(*ei))) {
3176 ret = -EINVAL;
3177 btrfs_print_v0_err(info);
3178 btrfs_abort_transaction(trans, ret);
3179 goto out;
3180 }
3181 ei = btrfs_item_ptr(leaf, extent_slot,
3182 struct btrfs_extent_item);
3183 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3184 key.type == BTRFS_EXTENT_ITEM_KEY) {
3185 struct btrfs_tree_block_info *bi;
3186 if (item_size < sizeof(*ei) + sizeof(*bi)) {
3187 btrfs_crit(info,
3188"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
3189 key.objectid, key.type, key.offset,
3190 owner_objectid, item_size,
3191 sizeof(*ei) + sizeof(*bi));
3192 btrfs_abort_transaction(trans, -EUCLEAN);
3193 goto err_dump;
3194 }
3195 bi = (struct btrfs_tree_block_info *)(ei + 1);
3196 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3197 }
3198
3199 refs = btrfs_extent_refs(leaf, ei);
3200 if (refs < refs_to_drop) {
3201 btrfs_crit(info,
3202 "trying to drop %d refs but we only have %llu for bytenr %llu",
3203 refs_to_drop, refs, bytenr);
3204 btrfs_abort_transaction(trans, -EUCLEAN);
3205 goto err_dump;
3206 }
3207 refs -= refs_to_drop;
3208
3209 if (refs > 0) {
3210 if (extent_op)
3211 __run_delayed_extent_op(extent_op, leaf, ei);
3212
3213
3214
3215
3216 if (iref) {
3217 if (!found_extent) {
3218 btrfs_crit(info,
3219"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found");
3220 btrfs_abort_transaction(trans, -EUCLEAN);
3221 goto err_dump;
3222 }
3223 } else {
3224 btrfs_set_extent_refs(leaf, ei, refs);
3225 btrfs_mark_buffer_dirty(leaf);
3226 }
3227 if (found_extent) {
3228 ret = remove_extent_backref(trans, path, iref,
3229 refs_to_drop, is_data,
3230 &last_ref);
3231 if (ret) {
3232 btrfs_abort_transaction(trans, ret);
3233 goto out;
3234 }
3235 }
3236 } else {
3237
3238 if (found_extent) {
3239 if (is_data && refs_to_drop !=
3240 extent_data_ref_count(path, iref)) {
3241 btrfs_crit(info,
3242 "invalid refs_to_drop, current refs %u refs_to_drop %u",
3243 extent_data_ref_count(path, iref),
3244 refs_to_drop);
3245 btrfs_abort_transaction(trans, -EUCLEAN);
3246 goto err_dump;
3247 }
3248 if (iref) {
3249 if (path->slots[0] != extent_slot) {
3250 btrfs_crit(info,
3251"invalid iref, extent item key (%llu %u %llu) doesn't have wanted iref",
3252 key.objectid, key.type,
3253 key.offset);
3254 btrfs_abort_transaction(trans, -EUCLEAN);
3255 goto err_dump;
3256 }
3257 } else {
3258
3259
3260
3261
3262
3263
3264 if (path->slots[0] != extent_slot + 1) {
3265 btrfs_crit(info,
3266 "invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM");
3267 btrfs_abort_transaction(trans, -EUCLEAN);
3268 goto err_dump;
3269 }
3270 path->slots[0] = extent_slot;
3271 num_to_del = 2;
3272 }
3273 }
3274
3275 last_ref = 1;
3276 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3277 num_to_del);
3278 if (ret) {
3279 btrfs_abort_transaction(trans, ret);
3280 goto out;
3281 }
3282 btrfs_release_path(path);
3283
3284 if (is_data) {
3285 ret = btrfs_del_csums(trans, info->csum_root, bytenr,
3286 num_bytes);
3287 if (ret) {
3288 btrfs_abort_transaction(trans, ret);
3289 goto out;
3290 }
3291 }
3292
3293 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
3294 if (ret) {
3295 btrfs_abort_transaction(trans, ret);
3296 goto out;
3297 }
3298
3299 ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
3300 if (ret) {
3301 btrfs_abort_transaction(trans, ret);
3302 goto out;
3303 }
3304 }
3305 btrfs_release_path(path);
3306
3307out:
3308 btrfs_free_path(path);
3309 return ret;
3310err_dump:
3311
3312
3313
3314
3315 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
3316 btrfs_crit(info, "path->slots[0]=%d extent_slot=%d",
3317 path->slots[0], extent_slot);
3318 btrfs_print_leaf(path->nodes[0]);
3319 }
3320
3321 btrfs_free_path(path);
3322 return -EUCLEAN;
3323}
3324
3325
3326
3327
3328
3329
3330
3331static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3332 u64 bytenr)
3333{
3334 struct btrfs_delayed_ref_head *head;
3335 struct btrfs_delayed_ref_root *delayed_refs;
3336 int ret = 0;
3337
3338 delayed_refs = &trans->transaction->delayed_refs;
3339 spin_lock(&delayed_refs->lock);
3340 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3341 if (!head)
3342 goto out_delayed_unlock;
3343
3344 spin_lock(&head->lock);
3345 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3346 goto out;
3347
3348 if (cleanup_extent_op(head) != NULL)
3349 goto out;
3350
3351
3352
3353
3354
3355 if (!mutex_trylock(&head->mutex))
3356 goto out;
3357
3358 btrfs_delete_ref_head(delayed_refs, head);
3359 head->processing = 0;
3360
3361 spin_unlock(&head->lock);
3362 spin_unlock(&delayed_refs->lock);
3363
3364 BUG_ON(head->extent_op);
3365 if (head->must_insert_reserved)
3366 ret = 1;
3367
3368 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3369 mutex_unlock(&head->mutex);
3370 btrfs_put_delayed_ref_head(head);
3371 return ret;
3372out:
3373 spin_unlock(&head->lock);
3374
3375out_delayed_unlock:
3376 spin_unlock(&delayed_refs->lock);
3377 return 0;
3378}
3379
3380void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3381 struct btrfs_root *root,
3382 struct extent_buffer *buf,
3383 u64 parent, int last_ref)
3384{
3385 struct btrfs_fs_info *fs_info = root->fs_info;
3386 struct btrfs_ref generic_ref = { 0 };
3387 int pin = 1;
3388 int ret;
3389
3390 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3391 buf->start, buf->len, parent);
3392 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3393 root->root_key.objectid);
3394
3395 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3396 int old_ref_mod, new_ref_mod;
3397
3398 btrfs_ref_tree_mod(fs_info, &generic_ref);
3399 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
3400 &old_ref_mod, &new_ref_mod);
3401 BUG_ON(ret);
3402 pin = old_ref_mod >= 0 && new_ref_mod < 0;
3403 }
3404
3405 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3406 struct btrfs_block_group *cache;
3407
3408 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3409 ret = check_ref_cleanup(trans, buf->start);
3410 if (!ret)
3411 goto out;
3412 }
3413
3414 pin = 0;
3415 cache = btrfs_lookup_block_group(fs_info, buf->start);
3416
3417 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3418 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3419 btrfs_put_block_group(cache);
3420 goto out;
3421 }
3422
3423 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3424
3425 btrfs_add_free_space(cache, buf->start, buf->len);
3426 btrfs_free_reserved_bytes(cache, buf->len, 0);
3427 btrfs_put_block_group(cache);
3428 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3429 }
3430out:
3431 if (pin)
3432 add_pinned_bytes(fs_info, &generic_ref);
3433
3434 if (last_ref) {
3435
3436
3437
3438
3439 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3440 }
3441}
3442
3443
3444int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3445{
3446 struct btrfs_fs_info *fs_info = trans->fs_info;
3447 int old_ref_mod, new_ref_mod;
3448 int ret;
3449
3450 if (btrfs_is_testing(fs_info))
3451 return 0;
3452
3453
3454
3455
3456
3457 if ((ref->type == BTRFS_REF_METADATA &&
3458 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3459 (ref->type == BTRFS_REF_DATA &&
3460 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
3461
3462 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
3463 old_ref_mod = new_ref_mod = 0;
3464 ret = 0;
3465 } else if (ref->type == BTRFS_REF_METADATA) {
3466 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
3467 &old_ref_mod, &new_ref_mod);
3468 } else {
3469 ret = btrfs_add_delayed_data_ref(trans, ref, 0,
3470 &old_ref_mod, &new_ref_mod);
3471 }
3472
3473 if (!((ref->type == BTRFS_REF_METADATA &&
3474 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3475 (ref->type == BTRFS_REF_DATA &&
3476 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
3477 btrfs_ref_tree_mod(fs_info, ref);
3478
3479 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
3480 add_pinned_bytes(fs_info, ref);
3481
3482 return ret;
3483}
3484
3485enum btrfs_loop_type {
3486 LOOP_CACHING_NOWAIT,
3487 LOOP_CACHING_WAIT,
3488 LOOP_ALLOC_CHUNK,
3489 LOOP_NO_EMPTY_SIZE,
3490};
3491
3492static inline void
3493btrfs_lock_block_group(struct btrfs_block_group *cache,
3494 int delalloc)
3495{
3496 if (delalloc)
3497 down_read(&cache->data_rwsem);
3498}
3499
3500static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3501 int delalloc)
3502{
3503 btrfs_get_block_group(cache);
3504 if (delalloc)
3505 down_read(&cache->data_rwsem);
3506}
3507
3508static struct btrfs_block_group *btrfs_lock_cluster(
3509 struct btrfs_block_group *block_group,
3510 struct btrfs_free_cluster *cluster,
3511 int delalloc)
3512 __acquires(&cluster->refill_lock)
3513{
3514 struct btrfs_block_group *used_bg = NULL;
3515
3516 spin_lock(&cluster->refill_lock);
3517 while (1) {
3518 used_bg = cluster->block_group;
3519 if (!used_bg)
3520 return NULL;
3521
3522 if (used_bg == block_group)
3523 return used_bg;
3524
3525 btrfs_get_block_group(used_bg);
3526
3527 if (!delalloc)
3528 return used_bg;
3529
3530 if (down_read_trylock(&used_bg->data_rwsem))
3531 return used_bg;
3532
3533 spin_unlock(&cluster->refill_lock);
3534
3535
3536 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3537
3538 spin_lock(&cluster->refill_lock);
3539 if (used_bg == cluster->block_group)
3540 return used_bg;
3541
3542 up_read(&used_bg->data_rwsem);
3543 btrfs_put_block_group(used_bg);
3544 }
3545}
3546
3547static inline void
3548btrfs_release_block_group(struct btrfs_block_group *cache,
3549 int delalloc)
3550{
3551 if (delalloc)
3552 up_read(&cache->data_rwsem);
3553 btrfs_put_block_group(cache);
3554}
3555
3556enum btrfs_extent_allocation_policy {
3557 BTRFS_EXTENT_ALLOC_CLUSTERED,
3558};
3559
3560
3561
3562
3563
3564struct find_free_extent_ctl {
3565
3566 u64 num_bytes;
3567 u64 empty_size;
3568 u64 flags;
3569 int delalloc;
3570
3571
3572 u64 search_start;
3573
3574
3575 u64 empty_cluster;
3576 struct btrfs_free_cluster *last_ptr;
3577 bool use_cluster;
3578
3579 bool have_caching_bg;
3580 bool orig_have_caching_bg;
3581
3582
3583 int index;
3584
3585
3586
3587
3588 int loop;
3589
3590
3591
3592
3593
3594 bool retry_clustered;
3595
3596
3597
3598
3599
3600 bool retry_unclustered;
3601
3602
3603 int cached;
3604
3605
3606 u64 max_extent_size;
3607
3608
3609 u64 total_free_space;
3610
3611
3612 u64 found_offset;
3613
3614
3615 u64 hint_byte;
3616
3617
3618 enum btrfs_extent_allocation_policy policy;
3619};
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630static int find_free_extent_clustered(struct btrfs_block_group *bg,
3631 struct find_free_extent_ctl *ffe_ctl,
3632 struct btrfs_block_group **cluster_bg_ret)
3633{
3634 struct btrfs_block_group *cluster_bg;
3635 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3636 u64 aligned_cluster;
3637 u64 offset;
3638 int ret;
3639
3640 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3641 if (!cluster_bg)
3642 goto refill_cluster;
3643 if (cluster_bg != bg && (cluster_bg->ro ||
3644 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3645 goto release_cluster;
3646
3647 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3648 ffe_ctl->num_bytes, cluster_bg->start,
3649 &ffe_ctl->max_extent_size);
3650 if (offset) {
3651
3652 spin_unlock(&last_ptr->refill_lock);
3653 trace_btrfs_reserve_extent_cluster(cluster_bg,
3654 ffe_ctl->search_start, ffe_ctl->num_bytes);
3655 *cluster_bg_ret = cluster_bg;
3656 ffe_ctl->found_offset = offset;
3657 return 0;
3658 }
3659 WARN_ON(last_ptr->block_group != cluster_bg);
3660
3661release_cluster:
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3674 spin_unlock(&last_ptr->refill_lock);
3675 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3676 return -ENOENT;
3677 }
3678
3679
3680 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3681
3682 if (cluster_bg != bg)
3683 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3684
3685refill_cluster:
3686 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3687 spin_unlock(&last_ptr->refill_lock);
3688 return -ENOENT;
3689 }
3690
3691 aligned_cluster = max_t(u64,
3692 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3693 bg->full_stripe_len);
3694 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3695 ffe_ctl->num_bytes, aligned_cluster);
3696 if (ret == 0) {
3697
3698 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3699 ffe_ctl->num_bytes, ffe_ctl->search_start,
3700 &ffe_ctl->max_extent_size);
3701 if (offset) {
3702
3703 spin_unlock(&last_ptr->refill_lock);
3704 trace_btrfs_reserve_extent_cluster(bg,
3705 ffe_ctl->search_start,
3706 ffe_ctl->num_bytes);
3707 ffe_ctl->found_offset = offset;
3708 return 0;
3709 }
3710 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3711 !ffe_ctl->retry_clustered) {
3712 spin_unlock(&last_ptr->refill_lock);
3713
3714 ffe_ctl->retry_clustered = true;
3715 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3716 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3717 return -EAGAIN;
3718 }
3719
3720
3721
3722
3723
3724 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3725 spin_unlock(&last_ptr->refill_lock);
3726 return 1;
3727}
3728
3729
3730
3731
3732
3733
3734static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3735 struct find_free_extent_ctl *ffe_ctl)
3736{
3737 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3738 u64 offset;
3739
3740
3741
3742
3743
3744
3745 if (unlikely(last_ptr)) {
3746 spin_lock(&last_ptr->lock);
3747 last_ptr->fragmented = 1;
3748 spin_unlock(&last_ptr->lock);
3749 }
3750 if (ffe_ctl->cached) {
3751 struct btrfs_free_space_ctl *free_space_ctl;
3752
3753 free_space_ctl = bg->free_space_ctl;
3754 spin_lock(&free_space_ctl->tree_lock);
3755 if (free_space_ctl->free_space <
3756 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3757 ffe_ctl->empty_size) {
3758 ffe_ctl->total_free_space = max_t(u64,
3759 ffe_ctl->total_free_space,
3760 free_space_ctl->free_space);
3761 spin_unlock(&free_space_ctl->tree_lock);
3762 return 1;
3763 }
3764 spin_unlock(&free_space_ctl->tree_lock);
3765 }
3766
3767 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3768 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3769 &ffe_ctl->max_extent_size);
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3781 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3782 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3783 ffe_ctl->empty_size);
3784 ffe_ctl->retry_unclustered = true;
3785 return -EAGAIN;
3786 } else if (!offset) {
3787 return 1;
3788 }
3789 ffe_ctl->found_offset = offset;
3790 return 0;
3791}
3792
3793static int do_allocation_clustered(struct btrfs_block_group *block_group,
3794 struct find_free_extent_ctl *ffe_ctl,
3795 struct btrfs_block_group **bg_ret)
3796{
3797 int ret;
3798
3799
3800 if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
3801 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
3802 if (ret >= 0 || ret == -EAGAIN)
3803 return ret;
3804
3805 }
3806
3807 return find_free_extent_unclustered(block_group, ffe_ctl);
3808}
3809
3810static int do_allocation(struct btrfs_block_group *block_group,
3811 struct find_free_extent_ctl *ffe_ctl,
3812 struct btrfs_block_group **bg_ret)
3813{
3814 switch (ffe_ctl->policy) {
3815 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3816 return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
3817 default:
3818 BUG();
3819 }
3820}
3821
3822static void release_block_group(struct btrfs_block_group *block_group,
3823 struct find_free_extent_ctl *ffe_ctl,
3824 int delalloc)
3825{
3826 switch (ffe_ctl->policy) {
3827 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3828 ffe_ctl->retry_clustered = false;
3829 ffe_ctl->retry_unclustered = false;
3830 break;
3831 default:
3832 BUG();
3833 }
3834
3835 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
3836 ffe_ctl->index);
3837 btrfs_release_block_group(block_group, delalloc);
3838}
3839
3840static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
3841 struct btrfs_key *ins)
3842{
3843 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3844
3845 if (!ffe_ctl->use_cluster && last_ptr) {
3846 spin_lock(&last_ptr->lock);
3847 last_ptr->window_start = ins->objectid;
3848 spin_unlock(&last_ptr->lock);
3849 }
3850}
3851
3852static void found_extent(struct find_free_extent_ctl *ffe_ctl,
3853 struct btrfs_key *ins)
3854{
3855 switch (ffe_ctl->policy) {
3856 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3857 found_extent_clustered(ffe_ctl, ins);
3858 break;
3859 default:
3860 BUG();
3861 }
3862}
3863
3864static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
3865{
3866 switch (ffe_ctl->policy) {
3867 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3868
3869
3870
3871
3872 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
3873 return 0;
3874 default:
3875 BUG();
3876 }
3877}
3878
3879
3880
3881
3882
3883
3884static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3885 struct btrfs_key *ins,
3886 struct find_free_extent_ctl *ffe_ctl,
3887 bool full_search)
3888{
3889 struct btrfs_root *root = fs_info->extent_root;
3890 int ret;
3891
3892 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3893 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3894 ffe_ctl->orig_have_caching_bg = true;
3895
3896 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
3897 ffe_ctl->have_caching_bg)
3898 return 1;
3899
3900 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
3901 return 1;
3902
3903 if (ins->objectid) {
3904 found_extent(ffe_ctl, ins);
3905 return 0;
3906 }
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
3917 ffe_ctl->index = 0;
3918 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
3919
3920
3921
3922
3923
3924 if (ffe_ctl->orig_have_caching_bg || !full_search)
3925 ffe_ctl->loop = LOOP_CACHING_WAIT;
3926 else
3927 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
3928 } else {
3929 ffe_ctl->loop++;
3930 }
3931
3932 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
3933 struct btrfs_trans_handle *trans;
3934 int exist = 0;
3935
3936 trans = current->journal_info;
3937 if (trans)
3938 exist = 1;
3939 else
3940 trans = btrfs_join_transaction(root);
3941
3942 if (IS_ERR(trans)) {
3943 ret = PTR_ERR(trans);
3944 return ret;
3945 }
3946
3947 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
3948 CHUNK_ALLOC_FORCE);
3949
3950
3951 if (ret == -ENOSPC)
3952 ret = chunk_allocation_failed(ffe_ctl);
3953 else if (ret < 0)
3954 btrfs_abort_transaction(trans, ret);
3955 else
3956 ret = 0;
3957 if (!exist)
3958 btrfs_end_transaction(trans);
3959 if (ret)
3960 return ret;
3961 }
3962
3963 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
3964 if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
3965 return -ENOSPC;
3966
3967
3968
3969
3970
3971 if (ffe_ctl->empty_size == 0 &&
3972 ffe_ctl->empty_cluster == 0)
3973 return -ENOSPC;
3974 ffe_ctl->empty_size = 0;
3975 ffe_ctl->empty_cluster = 0;
3976 }
3977 return 1;
3978 }
3979 return -ENOSPC;
3980}
3981
3982static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
3983 struct find_free_extent_ctl *ffe_ctl,
3984 struct btrfs_space_info *space_info,
3985 struct btrfs_key *ins)
3986{
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997 if (space_info->max_extent_size) {
3998 spin_lock(&space_info->lock);
3999 if (space_info->max_extent_size &&
4000 ffe_ctl->num_bytes > space_info->max_extent_size) {
4001 ins->offset = space_info->max_extent_size;
4002 spin_unlock(&space_info->lock);
4003 return -ENOSPC;
4004 } else if (space_info->max_extent_size) {
4005 ffe_ctl->use_cluster = false;
4006 }
4007 spin_unlock(&space_info->lock);
4008 }
4009
4010 ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
4011 &ffe_ctl->empty_cluster);
4012 if (ffe_ctl->last_ptr) {
4013 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
4014
4015 spin_lock(&last_ptr->lock);
4016 if (last_ptr->block_group)
4017 ffe_ctl->hint_byte = last_ptr->window_start;
4018 if (last_ptr->fragmented) {
4019
4020
4021
4022
4023
4024 ffe_ctl->hint_byte = last_ptr->window_start;
4025 ffe_ctl->use_cluster = false;
4026 }
4027 spin_unlock(&last_ptr->lock);
4028 }
4029
4030 return 0;
4031}
4032
4033static int prepare_allocation(struct btrfs_fs_info *fs_info,
4034 struct find_free_extent_ctl *ffe_ctl,
4035 struct btrfs_space_info *space_info,
4036 struct btrfs_key *ins)
4037{
4038 switch (ffe_ctl->policy) {
4039 case BTRFS_EXTENT_ALLOC_CLUSTERED:
4040 return prepare_allocation_clustered(fs_info, ffe_ctl,
4041 space_info, ins);
4042 default:
4043 BUG();
4044 }
4045}
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072static noinline int find_free_extent(struct btrfs_root *root,
4073 u64 ram_bytes, u64 num_bytes, u64 empty_size,
4074 u64 hint_byte_orig, struct btrfs_key *ins,
4075 u64 flags, int delalloc)
4076{
4077 struct btrfs_fs_info *fs_info = root->fs_info;
4078 int ret = 0;
4079 int cache_block_group_error = 0;
4080 struct btrfs_block_group *block_group = NULL;
4081 struct find_free_extent_ctl ffe_ctl = {0};
4082 struct btrfs_space_info *space_info;
4083 bool full_search = false;
4084
4085 WARN_ON(num_bytes < fs_info->sectorsize);
4086
4087 ffe_ctl.num_bytes = num_bytes;
4088 ffe_ctl.empty_size = empty_size;
4089 ffe_ctl.flags = flags;
4090 ffe_ctl.search_start = 0;
4091 ffe_ctl.delalloc = delalloc;
4092 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
4093 ffe_ctl.have_caching_bg = false;
4094 ffe_ctl.orig_have_caching_bg = false;
4095 ffe_ctl.found_offset = 0;
4096 ffe_ctl.hint_byte = hint_byte_orig;
4097 ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
4098
4099
4100 ffe_ctl.retry_clustered = false;
4101 ffe_ctl.retry_unclustered = false;
4102 ffe_ctl.last_ptr = NULL;
4103 ffe_ctl.use_cluster = true;
4104
4105 ins->type = BTRFS_EXTENT_ITEM_KEY;
4106 ins->objectid = 0;
4107 ins->offset = 0;
4108
4109 trace_find_free_extent(root, num_bytes, empty_size, flags);
4110
4111 space_info = btrfs_find_space_info(fs_info, flags);
4112 if (!space_info) {
4113 btrfs_err(fs_info, "No space info for %llu", flags);
4114 return -ENOSPC;
4115 }
4116
4117 ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins);
4118 if (ret < 0)
4119 return ret;
4120
4121 ffe_ctl.search_start = max(ffe_ctl.search_start,
4122 first_logical_byte(fs_info, 0));
4123 ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte);
4124 if (ffe_ctl.search_start == ffe_ctl.hint_byte) {
4125 block_group = btrfs_lookup_block_group(fs_info,
4126 ffe_ctl.search_start);
4127
4128
4129
4130
4131
4132
4133
4134 if (block_group && block_group_bits(block_group, flags) &&
4135 block_group->cached != BTRFS_CACHE_NO) {
4136 down_read(&space_info->groups_sem);
4137 if (list_empty(&block_group->list) ||
4138 block_group->ro) {
4139
4140
4141
4142
4143
4144
4145 btrfs_put_block_group(block_group);
4146 up_read(&space_info->groups_sem);
4147 } else {
4148 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
4149 block_group->flags);
4150 btrfs_lock_block_group(block_group, delalloc);
4151 goto have_block_group;
4152 }
4153 } else if (block_group) {
4154 btrfs_put_block_group(block_group);
4155 }
4156 }
4157search:
4158 ffe_ctl.have_caching_bg = false;
4159 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
4160 ffe_ctl.index == 0)
4161 full_search = true;
4162 down_read(&space_info->groups_sem);
4163 list_for_each_entry(block_group,
4164 &space_info->block_groups[ffe_ctl.index], list) {
4165 struct btrfs_block_group *bg_ret;
4166
4167
4168 if (unlikely(block_group->ro))
4169 continue;
4170
4171 btrfs_grab_block_group(block_group, delalloc);
4172 ffe_ctl.search_start = block_group->start;
4173
4174
4175
4176
4177
4178
4179 if (!block_group_bits(block_group, flags)) {
4180 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4181 BTRFS_BLOCK_GROUP_RAID1_MASK |
4182 BTRFS_BLOCK_GROUP_RAID56_MASK |
4183 BTRFS_BLOCK_GROUP_RAID10;
4184
4185
4186
4187
4188
4189
4190 if ((flags & extra) && !(block_group->flags & extra))
4191 goto loop;
4192
4193
4194
4195
4196
4197
4198 btrfs_release_block_group(block_group, delalloc);
4199 continue;
4200 }
4201
4202have_block_group:
4203 ffe_ctl.cached = btrfs_block_group_done(block_group);
4204 if (unlikely(!ffe_ctl.cached)) {
4205 ffe_ctl.have_caching_bg = true;
4206 ret = btrfs_cache_block_group(block_group, 0);
4207
4208
4209
4210
4211
4212
4213
4214
4215 if (ret < 0) {
4216 if (!cache_block_group_error)
4217 cache_block_group_error = ret;
4218 ret = 0;
4219 goto loop;
4220 }
4221 ret = 0;
4222 }
4223
4224 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
4225 goto loop;
4226
4227 bg_ret = NULL;
4228 ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
4229 if (ret == 0) {
4230 if (bg_ret && bg_ret != block_group) {
4231 btrfs_release_block_group(block_group, delalloc);
4232 block_group = bg_ret;
4233 }
4234 } else if (ret == -EAGAIN) {
4235 goto have_block_group;
4236 } else if (ret > 0) {
4237 goto loop;
4238 }
4239
4240
4241 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
4242 fs_info->stripesize);
4243
4244
4245 if (ffe_ctl.search_start + num_bytes >
4246 block_group->start + block_group->length) {
4247 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4248 num_bytes);
4249 goto loop;
4250 }
4251
4252 if (ffe_ctl.found_offset < ffe_ctl.search_start)
4253 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4254 ffe_ctl.search_start - ffe_ctl.found_offset);
4255
4256 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
4257 num_bytes, delalloc);
4258 if (ret == -EAGAIN) {
4259 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4260 num_bytes);
4261 goto loop;
4262 }
4263 btrfs_inc_block_group_reservations(block_group);
4264
4265
4266 ins->objectid = ffe_ctl.search_start;
4267 ins->offset = num_bytes;
4268
4269 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
4270 num_bytes);
4271 btrfs_release_block_group(block_group, delalloc);
4272 break;
4273loop:
4274 release_block_group(block_group, &ffe_ctl, delalloc);
4275 cond_resched();
4276 }
4277 up_read(&space_info->groups_sem);
4278
4279 ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search);
4280 if (ret > 0)
4281 goto search;
4282
4283 if (ret == -ENOSPC && !cache_block_group_error) {
4284
4285
4286
4287
4288 if (!ffe_ctl.max_extent_size)
4289 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
4290 spin_lock(&space_info->lock);
4291 space_info->max_extent_size = ffe_ctl.max_extent_size;
4292 spin_unlock(&space_info->lock);
4293 ins->offset = ffe_ctl.max_extent_size;
4294 } else if (ret == -ENOSPC) {
4295 ret = cache_block_group_error;
4296 }
4297 return ret;
4298}
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4346 u64 num_bytes, u64 min_alloc_size,
4347 u64 empty_size, u64 hint_byte,
4348 struct btrfs_key *ins, int is_data, int delalloc)
4349{
4350 struct btrfs_fs_info *fs_info = root->fs_info;
4351 bool final_tried = num_bytes == min_alloc_size;
4352 u64 flags;
4353 int ret;
4354
4355 flags = get_alloc_profile_by_root(root, is_data);
4356again:
4357 WARN_ON(num_bytes < fs_info->sectorsize);
4358 ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
4359 hint_byte, ins, flags, delalloc);
4360 if (!ret && !is_data) {
4361 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4362 } else if (ret == -ENOSPC) {
4363 if (!final_tried && ins->offset) {
4364 num_bytes = min(num_bytes >> 1, ins->offset);
4365 num_bytes = round_down(num_bytes,
4366 fs_info->sectorsize);
4367 num_bytes = max(num_bytes, min_alloc_size);
4368 ram_bytes = num_bytes;
4369 if (num_bytes == min_alloc_size)
4370 final_tried = true;
4371 goto again;
4372 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4373 struct btrfs_space_info *sinfo;
4374
4375 sinfo = btrfs_find_space_info(fs_info, flags);
4376 btrfs_err(fs_info,
4377 "allocation failed flags %llu, wanted %llu",
4378 flags, num_bytes);
4379 if (sinfo)
4380 btrfs_dump_space_info(fs_info, sinfo,
4381 num_bytes, 1);
4382 }
4383 }
4384
4385 return ret;
4386}
4387
4388int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4389 u64 start, u64 len, int delalloc)
4390{
4391 struct btrfs_block_group *cache;
4392
4393 cache = btrfs_lookup_block_group(fs_info, start);
4394 if (!cache) {
4395 btrfs_err(fs_info, "Unable to find block group for %llu",
4396 start);
4397 return -ENOSPC;
4398 }
4399
4400 btrfs_add_free_space(cache, start, len);
4401 btrfs_free_reserved_bytes(cache, len, delalloc);
4402 trace_btrfs_reserved_extent_free(fs_info, start, len);
4403
4404 btrfs_put_block_group(cache);
4405 return 0;
4406}
4407
4408int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
4409 u64 len)
4410{
4411 struct btrfs_block_group *cache;
4412 int ret = 0;
4413
4414 cache = btrfs_lookup_block_group(trans->fs_info, start);
4415 if (!cache) {
4416 btrfs_err(trans->fs_info, "unable to find block group for %llu",
4417 start);
4418 return -ENOSPC;
4419 }
4420
4421 ret = pin_down_extent(trans, cache, start, len, 1);
4422 btrfs_put_block_group(cache);
4423 return ret;
4424}
4425
4426static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4427 u64 parent, u64 root_objectid,
4428 u64 flags, u64 owner, u64 offset,
4429 struct btrfs_key *ins, int ref_mod)
4430{
4431 struct btrfs_fs_info *fs_info = trans->fs_info;
4432 int ret;
4433 struct btrfs_extent_item *extent_item;
4434 struct btrfs_extent_inline_ref *iref;
4435 struct btrfs_path *path;
4436 struct extent_buffer *leaf;
4437 int type;
4438 u32 size;
4439
4440 if (parent > 0)
4441 type = BTRFS_SHARED_DATA_REF_KEY;
4442 else
4443 type = BTRFS_EXTENT_DATA_REF_KEY;
4444
4445 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4446
4447 path = btrfs_alloc_path();
4448 if (!path)
4449 return -ENOMEM;
4450
4451 path->leave_spinning = 1;
4452 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4453 ins, size);
4454 if (ret) {
4455 btrfs_free_path(path);
4456 return ret;
4457 }
4458
4459 leaf = path->nodes[0];
4460 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4461 struct btrfs_extent_item);
4462 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4463 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4464 btrfs_set_extent_flags(leaf, extent_item,
4465 flags | BTRFS_EXTENT_FLAG_DATA);
4466
4467 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4468 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4469 if (parent > 0) {
4470 struct btrfs_shared_data_ref *ref;
4471 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4472 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4473 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4474 } else {
4475 struct btrfs_extent_data_ref *ref;
4476 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4477 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4478 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4479 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4480 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4481 }
4482
4483 btrfs_mark_buffer_dirty(path->nodes[0]);
4484 btrfs_free_path(path);
4485
4486 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
4487 if (ret)
4488 return ret;
4489
4490 ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
4491 if (ret) {
4492 btrfs_err(fs_info, "update block group failed for %llu %llu",
4493 ins->objectid, ins->offset);
4494 BUG();
4495 }
4496 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
4497 return ret;
4498}
4499
4500static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4501 struct btrfs_delayed_ref_node *node,
4502 struct btrfs_delayed_extent_op *extent_op)
4503{
4504 struct btrfs_fs_info *fs_info = trans->fs_info;
4505 int ret;
4506 struct btrfs_extent_item *extent_item;
4507 struct btrfs_key extent_key;
4508 struct btrfs_tree_block_info *block_info;
4509 struct btrfs_extent_inline_ref *iref;
4510 struct btrfs_path *path;
4511 struct extent_buffer *leaf;
4512 struct btrfs_delayed_tree_ref *ref;
4513 u32 size = sizeof(*extent_item) + sizeof(*iref);
4514 u64 num_bytes;
4515 u64 flags = extent_op->flags_to_set;
4516 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4517
4518 ref = btrfs_delayed_node_to_tree_ref(node);
4519
4520 extent_key.objectid = node->bytenr;
4521 if (skinny_metadata) {
4522 extent_key.offset = ref->level;
4523 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4524 num_bytes = fs_info->nodesize;
4525 } else {
4526 extent_key.offset = node->num_bytes;
4527 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4528 size += sizeof(*block_info);
4529 num_bytes = node->num_bytes;
4530 }
4531
4532 path = btrfs_alloc_path();
4533 if (!path)
4534 return -ENOMEM;
4535
4536 path->leave_spinning = 1;
4537 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4538 &extent_key, size);
4539 if (ret) {
4540 btrfs_free_path(path);
4541 return ret;
4542 }
4543
4544 leaf = path->nodes[0];
4545 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4546 struct btrfs_extent_item);
4547 btrfs_set_extent_refs(leaf, extent_item, 1);
4548 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4549 btrfs_set_extent_flags(leaf, extent_item,
4550 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4551
4552 if (skinny_metadata) {
4553 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4554 } else {
4555 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4556 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4557 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4558 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4559 }
4560
4561 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4562 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4563 btrfs_set_extent_inline_ref_type(leaf, iref,
4564 BTRFS_SHARED_BLOCK_REF_KEY);
4565 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4566 } else {
4567 btrfs_set_extent_inline_ref_type(leaf, iref,
4568 BTRFS_TREE_BLOCK_REF_KEY);
4569 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4570 }
4571
4572 btrfs_mark_buffer_dirty(leaf);
4573 btrfs_free_path(path);
4574
4575 ret = remove_from_free_space_tree(trans, extent_key.objectid,
4576 num_bytes);
4577 if (ret)
4578 return ret;
4579
4580 ret = btrfs_update_block_group(trans, extent_key.objectid,
4581 fs_info->nodesize, 1);
4582 if (ret) {
4583 btrfs_err(fs_info, "update block group failed for %llu %llu",
4584 extent_key.objectid, extent_key.offset);
4585 BUG();
4586 }
4587
4588 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
4589 fs_info->nodesize);
4590 return ret;
4591}
4592
4593int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4594 struct btrfs_root *root, u64 owner,
4595 u64 offset, u64 ram_bytes,
4596 struct btrfs_key *ins)
4597{
4598 struct btrfs_ref generic_ref = { 0 };
4599 int ret;
4600
4601 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4602
4603 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4604 ins->objectid, ins->offset, 0);
4605 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
4606 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4607 ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
4608 ram_bytes, NULL, NULL);
4609 return ret;
4610}
4611
4612
4613
4614
4615
4616
4617int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4618 u64 root_objectid, u64 owner, u64 offset,
4619 struct btrfs_key *ins)
4620{
4621 struct btrfs_fs_info *fs_info = trans->fs_info;
4622 int ret;
4623 struct btrfs_block_group *block_group;
4624 struct btrfs_space_info *space_info;
4625
4626
4627
4628
4629
4630 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4631 ret = __exclude_logged_extent(fs_info, ins->objectid,
4632 ins->offset);
4633 if (ret)
4634 return ret;
4635 }
4636
4637 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4638 if (!block_group)
4639 return -EINVAL;
4640
4641 space_info = block_group->space_info;
4642 spin_lock(&space_info->lock);
4643 spin_lock(&block_group->lock);
4644 space_info->bytes_reserved += ins->offset;
4645 block_group->reserved += ins->offset;
4646 spin_unlock(&block_group->lock);
4647 spin_unlock(&space_info->lock);
4648
4649 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4650 offset, ins, 1);
4651 if (ret)
4652 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
4653 btrfs_put_block_group(block_group);
4654 return ret;
4655}
4656
4657static struct extent_buffer *
4658btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4659 u64 bytenr, int level, u64 owner,
4660 enum btrfs_lock_nesting nest)
4661{
4662 struct btrfs_fs_info *fs_info = root->fs_info;
4663 struct extent_buffer *buf;
4664
4665 buf = btrfs_find_create_tree_block(fs_info, bytenr);
4666 if (IS_ERR(buf))
4667 return buf;
4668
4669
4670
4671
4672
4673
4674 if (buf->lock_owner == current->pid) {
4675 btrfs_err_rl(fs_info,
4676"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4677 buf->start, btrfs_header_owner(buf), current->pid);
4678 free_extent_buffer(buf);
4679 return ERR_PTR(-EUCLEAN);
4680 }
4681
4682 btrfs_set_buffer_lockdep_class(owner, buf, level);
4683 __btrfs_tree_lock(buf, nest);
4684 btrfs_clean_tree_block(buf);
4685 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4686
4687 btrfs_set_lock_blocking_write(buf);
4688 set_extent_buffer_uptodate(buf);
4689
4690 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4691 btrfs_set_header_level(buf, level);
4692 btrfs_set_header_bytenr(buf, buf->start);
4693 btrfs_set_header_generation(buf, trans->transid);
4694 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4695 btrfs_set_header_owner(buf, owner);
4696 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4697 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4698 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4699 buf->log_index = root->log_transid % 2;
4700
4701
4702
4703
4704 if (buf->log_index == 0)
4705 set_extent_dirty(&root->dirty_log_pages, buf->start,
4706 buf->start + buf->len - 1, GFP_NOFS);
4707 else
4708 set_extent_new(&root->dirty_log_pages, buf->start,
4709 buf->start + buf->len - 1);
4710 } else {
4711 buf->log_index = -1;
4712 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4713 buf->start + buf->len - 1, GFP_NOFS);
4714 }
4715 trans->dirty = true;
4716
4717 return buf;
4718}
4719
4720
4721
4722
4723
4724struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4725 struct btrfs_root *root,
4726 u64 parent, u64 root_objectid,
4727 const struct btrfs_disk_key *key,
4728 int level, u64 hint,
4729 u64 empty_size,
4730 enum btrfs_lock_nesting nest)
4731{
4732 struct btrfs_fs_info *fs_info = root->fs_info;
4733 struct btrfs_key ins;
4734 struct btrfs_block_rsv *block_rsv;
4735 struct extent_buffer *buf;
4736 struct btrfs_delayed_extent_op *extent_op;
4737 struct btrfs_ref generic_ref = { 0 };
4738 u64 flags = 0;
4739 int ret;
4740 u32 blocksize = fs_info->nodesize;
4741 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4742
4743#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4744 if (btrfs_is_testing(fs_info)) {
4745 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4746 level, root_objectid, nest);
4747 if (!IS_ERR(buf))
4748 root->alloc_bytenr += blocksize;
4749 return buf;
4750 }
4751#endif
4752
4753 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4754 if (IS_ERR(block_rsv))
4755 return ERR_CAST(block_rsv);
4756
4757 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4758 empty_size, hint, &ins, 0, 0);
4759 if (ret)
4760 goto out_unuse;
4761
4762 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4763 root_objectid, nest);
4764 if (IS_ERR(buf)) {
4765 ret = PTR_ERR(buf);
4766 goto out_free_reserved;
4767 }
4768
4769 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4770 if (parent == 0)
4771 parent = ins.objectid;
4772 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4773 } else
4774 BUG_ON(parent > 0);
4775
4776 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4777 extent_op = btrfs_alloc_delayed_extent_op();
4778 if (!extent_op) {
4779 ret = -ENOMEM;
4780 goto out_free_buf;
4781 }
4782 if (key)
4783 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4784 else
4785 memset(&extent_op->key, 0, sizeof(extent_op->key));
4786 extent_op->flags_to_set = flags;
4787 extent_op->update_key = skinny_metadata ? false : true;
4788 extent_op->update_flags = true;
4789 extent_op->is_data = false;
4790 extent_op->level = level;
4791
4792 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4793 ins.objectid, ins.offset, parent);
4794 generic_ref.real_root = root->root_key.objectid;
4795 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
4796 btrfs_ref_tree_mod(fs_info, &generic_ref);
4797 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
4798 extent_op, NULL, NULL);
4799 if (ret)
4800 goto out_free_delayed;
4801 }
4802 return buf;
4803
4804out_free_delayed:
4805 btrfs_free_delayed_extent_op(extent_op);
4806out_free_buf:
4807 free_extent_buffer(buf);
4808out_free_reserved:
4809 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
4810out_unuse:
4811 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
4812 return ERR_PTR(ret);
4813}
4814
4815struct walk_control {
4816 u64 refs[BTRFS_MAX_LEVEL];
4817 u64 flags[BTRFS_MAX_LEVEL];
4818 struct btrfs_key update_progress;
4819 struct btrfs_key drop_progress;
4820 int drop_level;
4821 int stage;
4822 int level;
4823 int shared_level;
4824 int update_ref;
4825 int keep_locks;
4826 int reada_slot;
4827 int reada_count;
4828 int restarted;
4829};
4830
4831#define DROP_REFERENCE 1
4832#define UPDATE_BACKREF 2
4833
4834static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4835 struct btrfs_root *root,
4836 struct walk_control *wc,
4837 struct btrfs_path *path)
4838{
4839 struct btrfs_fs_info *fs_info = root->fs_info;
4840 u64 bytenr;
4841 u64 generation;
4842 u64 refs;
4843 u64 flags;
4844 u32 nritems;
4845 struct btrfs_key key;
4846 struct extent_buffer *eb;
4847 int ret;
4848 int slot;
4849 int nread = 0;
4850
4851 if (path->slots[wc->level] < wc->reada_slot) {
4852 wc->reada_count = wc->reada_count * 2 / 3;
4853 wc->reada_count = max(wc->reada_count, 2);
4854 } else {
4855 wc->reada_count = wc->reada_count * 3 / 2;
4856 wc->reada_count = min_t(int, wc->reada_count,
4857 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
4858 }
4859
4860 eb = path->nodes[wc->level];
4861 nritems = btrfs_header_nritems(eb);
4862
4863 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4864 if (nread >= wc->reada_count)
4865 break;
4866
4867 cond_resched();
4868 bytenr = btrfs_node_blockptr(eb, slot);
4869 generation = btrfs_node_ptr_generation(eb, slot);
4870
4871 if (slot == path->slots[wc->level])
4872 goto reada;
4873
4874 if (wc->stage == UPDATE_BACKREF &&
4875 generation <= root->root_key.offset)
4876 continue;
4877
4878
4879 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
4880 wc->level - 1, 1, &refs,
4881 &flags);
4882
4883 if (ret < 0)
4884 continue;
4885 BUG_ON(refs == 0);
4886
4887 if (wc->stage == DROP_REFERENCE) {
4888 if (refs == 1)
4889 goto reada;
4890
4891 if (wc->level == 1 &&
4892 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4893 continue;
4894 if (!wc->update_ref ||
4895 generation <= root->root_key.offset)
4896 continue;
4897 btrfs_node_key_to_cpu(eb, &key, slot);
4898 ret = btrfs_comp_cpu_keys(&key,
4899 &wc->update_progress);
4900 if (ret < 0)
4901 continue;
4902 } else {
4903 if (wc->level == 1 &&
4904 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4905 continue;
4906 }
4907reada:
4908 readahead_tree_block(fs_info, bytenr);
4909 nread++;
4910 }
4911 wc->reada_slot = slot;
4912}
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4923 struct btrfs_root *root,
4924 struct btrfs_path *path,
4925 struct walk_control *wc, int lookup_info)
4926{
4927 struct btrfs_fs_info *fs_info = root->fs_info;
4928 int level = wc->level;
4929 struct extent_buffer *eb = path->nodes[level];
4930 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4931 int ret;
4932
4933 if (wc->stage == UPDATE_BACKREF &&
4934 btrfs_header_owner(eb) != root->root_key.objectid)
4935 return 1;
4936
4937
4938
4939
4940
4941 if (lookup_info &&
4942 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4943 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4944 BUG_ON(!path->locks[level]);
4945 ret = btrfs_lookup_extent_info(trans, fs_info,
4946 eb->start, level, 1,
4947 &wc->refs[level],
4948 &wc->flags[level]);
4949 BUG_ON(ret == -ENOMEM);
4950 if (ret)
4951 return ret;
4952 BUG_ON(wc->refs[level] == 0);
4953 }
4954
4955 if (wc->stage == DROP_REFERENCE) {
4956 if (wc->refs[level] > 1)
4957 return 1;
4958
4959 if (path->locks[level] && !wc->keep_locks) {
4960 btrfs_tree_unlock_rw(eb, path->locks[level]);
4961 path->locks[level] = 0;
4962 }
4963 return 0;
4964 }
4965
4966
4967 if (!(wc->flags[level] & flag)) {
4968 BUG_ON(!path->locks[level]);
4969 ret = btrfs_inc_ref(trans, root, eb, 1);
4970 BUG_ON(ret);
4971 ret = btrfs_dec_ref(trans, root, eb, 0);
4972 BUG_ON(ret);
4973 ret = btrfs_set_disk_extent_flags(trans, eb, flag,
4974 btrfs_header_level(eb), 0);
4975 BUG_ON(ret);
4976 wc->flags[level] |= flag;
4977 }
4978
4979
4980
4981
4982
4983 if (path->locks[level] && level > 0) {
4984 btrfs_tree_unlock_rw(eb, path->locks[level]);
4985 path->locks[level] = 0;
4986 }
4987 return 0;
4988}
4989
4990
4991
4992
4993
4994static int check_ref_exists(struct btrfs_trans_handle *trans,
4995 struct btrfs_root *root, u64 bytenr, u64 parent,
4996 int level)
4997{
4998 struct btrfs_path *path;
4999 struct btrfs_extent_inline_ref *iref;
5000 int ret;
5001
5002 path = btrfs_alloc_path();
5003 if (!path)
5004 return -ENOMEM;
5005
5006 ret = lookup_extent_backref(trans, path, &iref, bytenr,
5007 root->fs_info->nodesize, parent,
5008 root->root_key.objectid, level, 0);
5009 btrfs_free_path(path);
5010 if (ret == -ENOENT)
5011 return 0;
5012 if (ret < 0)
5013 return ret;
5014 return 1;
5015}
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5031 struct btrfs_root *root,
5032 struct btrfs_path *path,
5033 struct walk_control *wc, int *lookup_info)
5034{
5035 struct btrfs_fs_info *fs_info = root->fs_info;
5036 u64 bytenr;
5037 u64 generation;
5038 u64 parent;
5039 struct btrfs_key key;
5040 struct btrfs_key first_key;
5041 struct btrfs_ref ref = { 0 };
5042 struct extent_buffer *next;
5043 int level = wc->level;
5044 int reada = 0;
5045 int ret = 0;
5046 bool need_account = false;
5047
5048 generation = btrfs_node_ptr_generation(path->nodes[level],
5049 path->slots[level]);
5050
5051
5052
5053
5054
5055 if (wc->stage == UPDATE_BACKREF &&
5056 generation <= root->root_key.offset) {
5057 *lookup_info = 1;
5058 return 1;
5059 }
5060
5061 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
5062 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
5063 path->slots[level]);
5064
5065 next = find_extent_buffer(fs_info, bytenr);
5066 if (!next) {
5067 next = btrfs_find_create_tree_block(fs_info, bytenr);
5068 if (IS_ERR(next))
5069 return PTR_ERR(next);
5070
5071 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
5072 level - 1);
5073 reada = 1;
5074 }
5075 btrfs_tree_lock(next);
5076 btrfs_set_lock_blocking_write(next);
5077
5078 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
5079 &wc->refs[level - 1],
5080 &wc->flags[level - 1]);
5081 if (ret < 0)
5082 goto out_unlock;
5083
5084 if (unlikely(wc->refs[level - 1] == 0)) {
5085 btrfs_err(fs_info, "Missing references.");
5086 ret = -EIO;
5087 goto out_unlock;
5088 }
5089 *lookup_info = 0;
5090
5091 if (wc->stage == DROP_REFERENCE) {
5092 if (wc->refs[level - 1] > 1) {
5093 need_account = true;
5094 if (level == 1 &&
5095 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5096 goto skip;
5097
5098 if (!wc->update_ref ||
5099 generation <= root->root_key.offset)
5100 goto skip;
5101
5102 btrfs_node_key_to_cpu(path->nodes[level], &key,
5103 path->slots[level]);
5104 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
5105 if (ret < 0)
5106 goto skip;
5107
5108 wc->stage = UPDATE_BACKREF;
5109 wc->shared_level = level - 1;
5110 }
5111 } else {
5112 if (level == 1 &&
5113 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5114 goto skip;
5115 }
5116
5117 if (!btrfs_buffer_uptodate(next, generation, 0)) {
5118 btrfs_tree_unlock(next);
5119 free_extent_buffer(next);
5120 next = NULL;
5121 *lookup_info = 1;
5122 }
5123
5124 if (!next) {
5125 if (reada && level == 1)
5126 reada_walk_down(trans, root, wc, path);
5127 next = read_tree_block(fs_info, bytenr, generation, level - 1,
5128 &first_key);
5129 if (IS_ERR(next)) {
5130 return PTR_ERR(next);
5131 } else if (!extent_buffer_uptodate(next)) {
5132 free_extent_buffer(next);
5133 return -EIO;
5134 }
5135 btrfs_tree_lock(next);
5136 btrfs_set_lock_blocking_write(next);
5137 }
5138
5139 level--;
5140 ASSERT(level == btrfs_header_level(next));
5141 if (level != btrfs_header_level(next)) {
5142 btrfs_err(root->fs_info, "mismatched level");
5143 ret = -EIO;
5144 goto out_unlock;
5145 }
5146 path->nodes[level] = next;
5147 path->slots[level] = 0;
5148 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5149 wc->level = level;
5150 if (wc->level == 1)
5151 wc->reada_slot = 0;
5152 return 0;
5153skip:
5154 wc->refs[level - 1] = 0;
5155 wc->flags[level - 1] = 0;
5156 if (wc->stage == DROP_REFERENCE) {
5157 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5158 parent = path->nodes[level]->start;
5159 } else {
5160 ASSERT(root->root_key.objectid ==
5161 btrfs_header_owner(path->nodes[level]));
5162 if (root->root_key.objectid !=
5163 btrfs_header_owner(path->nodes[level])) {
5164 btrfs_err(root->fs_info,
5165 "mismatched block owner");
5166 ret = -EIO;
5167 goto out_unlock;
5168 }
5169 parent = 0;
5170 }
5171
5172
5173
5174
5175
5176
5177
5178 if (wc->restarted) {
5179 ret = check_ref_exists(trans, root, bytenr, parent,
5180 level - 1);
5181 if (ret < 0)
5182 goto out_unlock;
5183 if (ret == 0)
5184 goto no_delete;
5185 ret = 0;
5186 wc->restarted = 0;
5187 }
5188
5189
5190
5191
5192
5193
5194 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
5195 need_account) {
5196 ret = btrfs_qgroup_trace_subtree(trans, next,
5197 generation, level - 1);
5198 if (ret) {
5199 btrfs_err_rl(fs_info,
5200 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
5201 ret);
5202 }
5203 }
5204
5205
5206
5207
5208
5209
5210
5211 wc->drop_level = level;
5212 find_next_key(path, level, &wc->drop_progress);
5213
5214 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
5215 fs_info->nodesize, parent);
5216 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
5217 ret = btrfs_free_extent(trans, &ref);
5218 if (ret)
5219 goto out_unlock;
5220 }
5221no_delete:
5222 *lookup_info = 1;
5223 ret = 1;
5224
5225out_unlock:
5226 btrfs_tree_unlock(next);
5227 free_extent_buffer(next);
5228
5229 return ret;
5230}
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5245 struct btrfs_root *root,
5246 struct btrfs_path *path,
5247 struct walk_control *wc)
5248{
5249 struct btrfs_fs_info *fs_info = root->fs_info;
5250 int ret;
5251 int level = wc->level;
5252 struct extent_buffer *eb = path->nodes[level];
5253 u64 parent = 0;
5254
5255 if (wc->stage == UPDATE_BACKREF) {
5256 BUG_ON(wc->shared_level < level);
5257 if (level < wc->shared_level)
5258 goto out;
5259
5260 ret = find_next_key(path, level + 1, &wc->update_progress);
5261 if (ret > 0)
5262 wc->update_ref = 0;
5263
5264 wc->stage = DROP_REFERENCE;
5265 wc->shared_level = -1;
5266 path->slots[level] = 0;
5267
5268
5269
5270
5271
5272
5273 if (!path->locks[level]) {
5274 BUG_ON(level == 0);
5275 btrfs_tree_lock(eb);
5276 btrfs_set_lock_blocking_write(eb);
5277 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5278
5279 ret = btrfs_lookup_extent_info(trans, fs_info,
5280 eb->start, level, 1,
5281 &wc->refs[level],
5282 &wc->flags[level]);
5283 if (ret < 0) {
5284 btrfs_tree_unlock_rw(eb, path->locks[level]);
5285 path->locks[level] = 0;
5286 return ret;
5287 }
5288 BUG_ON(wc->refs[level] == 0);
5289 if (wc->refs[level] == 1) {
5290 btrfs_tree_unlock_rw(eb, path->locks[level]);
5291 path->locks[level] = 0;
5292 return 1;
5293 }
5294 }
5295 }
5296
5297
5298 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5299
5300 if (wc->refs[level] == 1) {
5301 if (level == 0) {
5302 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5303 ret = btrfs_dec_ref(trans, root, eb, 1);
5304 else
5305 ret = btrfs_dec_ref(trans, root, eb, 0);
5306 BUG_ON(ret);
5307 if (is_fstree(root->root_key.objectid)) {
5308 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5309 if (ret) {
5310 btrfs_err_rl(fs_info,
5311 "error %d accounting leaf items, quota is out of sync, rescan required",
5312 ret);
5313 }
5314 }
5315 }
5316
5317 if (!path->locks[level] &&
5318 btrfs_header_generation(eb) == trans->transid) {
5319 btrfs_tree_lock(eb);
5320 btrfs_set_lock_blocking_write(eb);
5321 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5322 }
5323 btrfs_clean_tree_block(eb);
5324 }
5325
5326 if (eb == root->node) {
5327 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5328 parent = eb->start;
5329 else if (root->root_key.objectid != btrfs_header_owner(eb))
5330 goto owner_mismatch;
5331 } else {
5332 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5333 parent = path->nodes[level + 1]->start;
5334 else if (root->root_key.objectid !=
5335 btrfs_header_owner(path->nodes[level + 1]))
5336 goto owner_mismatch;
5337 }
5338
5339 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5340out:
5341 wc->refs[level] = 0;
5342 wc->flags[level] = 0;
5343 return 0;
5344
5345owner_mismatch:
5346 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5347 btrfs_header_owner(eb), root->root_key.objectid);
5348 return -EUCLEAN;
5349}
5350
5351static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5352 struct btrfs_root *root,
5353 struct btrfs_path *path,
5354 struct walk_control *wc)
5355{
5356 int level = wc->level;
5357 int lookup_info = 1;
5358 int ret;
5359
5360 while (level >= 0) {
5361 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5362 if (ret > 0)
5363 break;
5364
5365 if (level == 0)
5366 break;
5367
5368 if (path->slots[level] >=
5369 btrfs_header_nritems(path->nodes[level]))
5370 break;
5371
5372 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5373 if (ret > 0) {
5374 path->slots[level]++;
5375 continue;
5376 } else if (ret < 0)
5377 return ret;
5378 level = wc->level;
5379 }
5380 return 0;
5381}
5382
5383static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5384 struct btrfs_root *root,
5385 struct btrfs_path *path,
5386 struct walk_control *wc, int max_level)
5387{
5388 int level = wc->level;
5389 int ret;
5390
5391 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5392 while (level < max_level && path->nodes[level]) {
5393 wc->level = level;
5394 if (path->slots[level] + 1 <
5395 btrfs_header_nritems(path->nodes[level])) {
5396 path->slots[level]++;
5397 return 0;
5398 } else {
5399 ret = walk_up_proc(trans, root, path, wc);
5400 if (ret > 0)
5401 return 0;
5402 if (ret < 0)
5403 return ret;
5404
5405 if (path->locks[level]) {
5406 btrfs_tree_unlock_rw(path->nodes[level],
5407 path->locks[level]);
5408 path->locks[level] = 0;
5409 }
5410 free_extent_buffer(path->nodes[level]);
5411 path->nodes[level] = NULL;
5412 level++;
5413 }
5414 }
5415 return 1;
5416}
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
5432{
5433 struct btrfs_fs_info *fs_info = root->fs_info;
5434 struct btrfs_path *path;
5435 struct btrfs_trans_handle *trans;
5436 struct btrfs_root *tree_root = fs_info->tree_root;
5437 struct btrfs_root_item *root_item = &root->root_item;
5438 struct walk_control *wc;
5439 struct btrfs_key key;
5440 int err = 0;
5441 int ret;
5442 int level;
5443 bool root_dropped = false;
5444
5445 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5446
5447 path = btrfs_alloc_path();
5448 if (!path) {
5449 err = -ENOMEM;
5450 goto out;
5451 }
5452
5453 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5454 if (!wc) {
5455 btrfs_free_path(path);
5456 err = -ENOMEM;
5457 goto out;
5458 }
5459
5460
5461
5462
5463
5464 if (for_reloc)
5465 trans = btrfs_join_transaction(tree_root);
5466 else
5467 trans = btrfs_start_transaction(tree_root, 0);
5468 if (IS_ERR(trans)) {
5469 err = PTR_ERR(trans);
5470 goto out_free;
5471 }
5472
5473 err = btrfs_run_delayed_items(trans);
5474 if (err)
5475 goto out_end_trans;
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485 set_bit(BTRFS_ROOT_DELETING, &root->state);
5486 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5487 level = btrfs_header_level(root->node);
5488 path->nodes[level] = btrfs_lock_root_node(root);
5489 btrfs_set_lock_blocking_write(path->nodes[level]);
5490 path->slots[level] = 0;
5491 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5492 memset(&wc->update_progress, 0,
5493 sizeof(wc->update_progress));
5494 } else {
5495 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5496 memcpy(&wc->update_progress, &key,
5497 sizeof(wc->update_progress));
5498
5499 level = root_item->drop_level;
5500 BUG_ON(level == 0);
5501 path->lowest_level = level;
5502 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5503 path->lowest_level = 0;
5504 if (ret < 0) {
5505 err = ret;
5506 goto out_end_trans;
5507 }
5508 WARN_ON(ret > 0);
5509
5510
5511
5512
5513
5514 btrfs_unlock_up_safe(path, 0);
5515
5516 level = btrfs_header_level(root->node);
5517 while (1) {
5518 btrfs_tree_lock(path->nodes[level]);
5519 btrfs_set_lock_blocking_write(path->nodes[level]);
5520 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5521
5522 ret = btrfs_lookup_extent_info(trans, fs_info,
5523 path->nodes[level]->start,
5524 level, 1, &wc->refs[level],
5525 &wc->flags[level]);
5526 if (ret < 0) {
5527 err = ret;
5528 goto out_end_trans;
5529 }
5530 BUG_ON(wc->refs[level] == 0);
5531
5532 if (level == root_item->drop_level)
5533 break;
5534
5535 btrfs_tree_unlock(path->nodes[level]);
5536 path->locks[level] = 0;
5537 WARN_ON(wc->refs[level] != 1);
5538 level--;
5539 }
5540 }
5541
5542 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5543 wc->level = level;
5544 wc->shared_level = -1;
5545 wc->stage = DROP_REFERENCE;
5546 wc->update_ref = update_ref;
5547 wc->keep_locks = 0;
5548 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5549
5550 while (1) {
5551
5552 ret = walk_down_tree(trans, root, path, wc);
5553 if (ret < 0) {
5554 err = ret;
5555 break;
5556 }
5557
5558 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5559 if (ret < 0) {
5560 err = ret;
5561 break;
5562 }
5563
5564 if (ret > 0) {
5565 BUG_ON(wc->stage != DROP_REFERENCE);
5566 break;
5567 }
5568
5569 if (wc->stage == DROP_REFERENCE) {
5570 wc->drop_level = wc->level;
5571 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5572 &wc->drop_progress,
5573 path->slots[wc->drop_level]);
5574 }
5575 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5576 &wc->drop_progress);
5577 root_item->drop_level = wc->drop_level;
5578
5579 BUG_ON(wc->level == 0);
5580 if (btrfs_should_end_transaction(trans) ||
5581 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5582 ret = btrfs_update_root(trans, tree_root,
5583 &root->root_key,
5584 root_item);
5585 if (ret) {
5586 btrfs_abort_transaction(trans, ret);
5587 err = ret;
5588 goto out_end_trans;
5589 }
5590
5591 btrfs_end_transaction_throttle(trans);
5592 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5593 btrfs_debug(fs_info,
5594 "drop snapshot early exit");
5595 err = -EAGAIN;
5596 goto out_free;
5597 }
5598
5599 trans = btrfs_start_transaction(tree_root, 0);
5600 if (IS_ERR(trans)) {
5601 err = PTR_ERR(trans);
5602 goto out_free;
5603 }
5604 }
5605 }
5606 btrfs_release_path(path);
5607 if (err)
5608 goto out_end_trans;
5609
5610 ret = btrfs_del_root(trans, &root->root_key);
5611 if (ret) {
5612 btrfs_abort_transaction(trans, ret);
5613 err = ret;
5614 goto out_end_trans;
5615 }
5616
5617 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5618 ret = btrfs_find_root(tree_root, &root->root_key, path,
5619 NULL, NULL);
5620 if (ret < 0) {
5621 btrfs_abort_transaction(trans, ret);
5622 err = ret;
5623 goto out_end_trans;
5624 } else if (ret > 0) {
5625
5626
5627
5628
5629
5630 btrfs_del_orphan_item(trans, tree_root,
5631 root->root_key.objectid);
5632 }
5633 }
5634
5635
5636
5637
5638
5639
5640 btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
5641 btrfs_qgroup_free_meta_all_pertrans(root);
5642
5643 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
5644 btrfs_add_dropped_root(trans, root);
5645 else
5646 btrfs_put_root(root);
5647 root_dropped = true;
5648out_end_trans:
5649 btrfs_end_transaction_throttle(trans);
5650out_free:
5651 kfree(wc);
5652 btrfs_free_path(path);
5653out:
5654
5655
5656
5657
5658
5659
5660
5661 if (!for_reloc && !root_dropped)
5662 btrfs_add_dead_root(root);
5663 return err;
5664}
5665
5666
5667
5668
5669
5670
5671
5672int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5673 struct btrfs_root *root,
5674 struct extent_buffer *node,
5675 struct extent_buffer *parent)
5676{
5677 struct btrfs_fs_info *fs_info = root->fs_info;
5678 struct btrfs_path *path;
5679 struct walk_control *wc;
5680 int level;
5681 int parent_level;
5682 int ret = 0;
5683 int wret;
5684
5685 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5686
5687 path = btrfs_alloc_path();
5688 if (!path)
5689 return -ENOMEM;
5690
5691 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5692 if (!wc) {
5693 btrfs_free_path(path);
5694 return -ENOMEM;
5695 }
5696
5697 btrfs_assert_tree_locked(parent);
5698 parent_level = btrfs_header_level(parent);
5699 atomic_inc(&parent->refs);
5700 path->nodes[parent_level] = parent;
5701 path->slots[parent_level] = btrfs_header_nritems(parent);
5702
5703 btrfs_assert_tree_locked(node);
5704 level = btrfs_header_level(node);
5705 path->nodes[level] = node;
5706 path->slots[level] = 0;
5707 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5708
5709 wc->refs[parent_level] = 1;
5710 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5711 wc->level = level;
5712 wc->shared_level = -1;
5713 wc->stage = DROP_REFERENCE;
5714 wc->update_ref = 0;
5715 wc->keep_locks = 1;
5716 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5717
5718 while (1) {
5719 wret = walk_down_tree(trans, root, path, wc);
5720 if (wret < 0) {
5721 ret = wret;
5722 break;
5723 }
5724
5725 wret = walk_up_tree(trans, root, path, wc, parent_level);
5726 if (wret < 0)
5727 ret = wret;
5728 if (wret != 0)
5729 break;
5730 }
5731
5732 kfree(wc);
5733 btrfs_free_path(path);
5734 return ret;
5735}
5736
5737
5738
5739
5740
5741u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5742{
5743 struct btrfs_block_group *block_group;
5744 u64 free_bytes = 0;
5745 int factor;
5746
5747
5748 if (list_empty(&sinfo->ro_bgs))
5749 return 0;
5750
5751 spin_lock(&sinfo->lock);
5752 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5753 spin_lock(&block_group->lock);
5754
5755 if (!block_group->ro) {
5756 spin_unlock(&block_group->lock);
5757 continue;
5758 }
5759
5760 factor = btrfs_bg_type_to_factor(block_group->flags);
5761 free_bytes += (block_group->length -
5762 block_group->used) * factor;
5763
5764 spin_unlock(&block_group->lock);
5765 }
5766 spin_unlock(&sinfo->lock);
5767
5768 return free_bytes;
5769}
5770
5771int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5772 u64 start, u64 end)
5773{
5774 return unpin_extent_range(fs_info, start, end, false);
5775}
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5798{
5799 u64 start = SZ_1M, len = 0, end = 0;
5800 int ret;
5801
5802 *trimmed = 0;
5803
5804
5805 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
5806 return 0;
5807
5808
5809 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
5810 return 0;
5811
5812
5813 if (device->total_bytes <= device->bytes_used)
5814 return 0;
5815
5816 ret = 0;
5817
5818 while (1) {
5819 struct btrfs_fs_info *fs_info = device->fs_info;
5820 u64 bytes;
5821
5822 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
5823 if (ret)
5824 break;
5825
5826 find_first_clear_extent_bit(&device->alloc_state, start,
5827 &start, &end,
5828 CHUNK_TRIMMED | CHUNK_ALLOCATED);
5829
5830
5831 if (start > device->total_bytes) {
5832 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
5833 btrfs_warn_in_rcu(fs_info,
5834"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
5835 start, end - start + 1,
5836 rcu_str_deref(device->name),
5837 device->total_bytes);
5838 mutex_unlock(&fs_info->chunk_mutex);
5839 ret = 0;
5840 break;
5841 }
5842
5843
5844 start = max_t(u64, start, SZ_1M);
5845
5846
5847
5848
5849
5850
5851 end = min(end, device->total_bytes - 1);
5852
5853 len = end - start + 1;
5854
5855
5856 if (!len) {
5857 mutex_unlock(&fs_info->chunk_mutex);
5858 ret = 0;
5859 break;
5860 }
5861
5862 ret = btrfs_issue_discard(device->bdev, start, len,
5863 &bytes);
5864 if (!ret)
5865 set_extent_bits(&device->alloc_state, start,
5866 start + bytes - 1,
5867 CHUNK_TRIMMED);
5868 mutex_unlock(&fs_info->chunk_mutex);
5869
5870 if (ret)
5871 break;
5872
5873 start += len;
5874 *trimmed += bytes;
5875
5876 if (fatal_signal_pending(current)) {
5877 ret = -ERESTARTSYS;
5878 break;
5879 }
5880
5881 cond_resched();
5882 }
5883
5884 return ret;
5885}
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
5897{
5898 struct btrfs_block_group *cache = NULL;
5899 struct btrfs_device *device;
5900 struct list_head *devices;
5901 u64 group_trimmed;
5902 u64 range_end = U64_MAX;
5903 u64 start;
5904 u64 end;
5905 u64 trimmed = 0;
5906 u64 bg_failed = 0;
5907 u64 dev_failed = 0;
5908 int bg_ret = 0;
5909 int dev_ret = 0;
5910 int ret = 0;
5911
5912
5913
5914
5915
5916 if (range->len != U64_MAX &&
5917 check_add_overflow(range->start, range->len, &range_end))
5918 return -EINVAL;
5919
5920 cache = btrfs_lookup_first_block_group(fs_info, range->start);
5921 for (; cache; cache = btrfs_next_block_group(cache)) {
5922 if (cache->start >= range_end) {
5923 btrfs_put_block_group(cache);
5924 break;
5925 }
5926
5927 start = max(range->start, cache->start);
5928 end = min(range_end, cache->start + cache->length);
5929
5930 if (end - start >= range->minlen) {
5931 if (!btrfs_block_group_done(cache)) {
5932 ret = btrfs_cache_block_group(cache, 0);
5933 if (ret) {
5934 bg_failed++;
5935 bg_ret = ret;
5936 continue;
5937 }
5938 ret = btrfs_wait_block_group_cache_done(cache);
5939 if (ret) {
5940 bg_failed++;
5941 bg_ret = ret;
5942 continue;
5943 }
5944 }
5945 ret = btrfs_trim_block_group(cache,
5946 &group_trimmed,
5947 start,
5948 end,
5949 range->minlen);
5950
5951 trimmed += group_trimmed;
5952 if (ret) {
5953 bg_failed++;
5954 bg_ret = ret;
5955 continue;
5956 }
5957 }
5958 }
5959
5960 if (bg_failed)
5961 btrfs_warn(fs_info,
5962 "failed to trim %llu block group(s), last error %d",
5963 bg_failed, bg_ret);
5964 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5965 devices = &fs_info->fs_devices->devices;
5966 list_for_each_entry(device, devices, dev_list) {
5967 ret = btrfs_trim_free_extents(device, &group_trimmed);
5968 if (ret) {
5969 dev_failed++;
5970 dev_ret = ret;
5971 break;
5972 }
5973
5974 trimmed += group_trimmed;
5975 }
5976 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5977
5978 if (dev_failed)
5979 btrfs_warn(fs_info,
5980 "failed to trim %llu device(s), last error %d",
5981 dev_failed, dev_ret);
5982 range->len = trimmed;
5983 if (bg_ret)
5984 return bg_ret;
5985 return dev_ret;
5986}
5987