1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "misc.h"
20#include "tree-log.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "locking.h"
26#include "free-space-cache.h"
27#include "free-space-tree.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31#include "space-info.h"
32#include "block-rsv.h"
33#include "delalloc-space.h"
34#include "block-group.h"
35#include "discard.h"
36
37#undef SCRAMBLE_DELAYED_REFS
38
39
40static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
41 struct btrfs_delayed_ref_node *node, u64 parent,
42 u64 root_objectid, u64 owner_objectid,
43 u64 owner_offset, int refs_to_drop,
44 struct btrfs_delayed_extent_op *extra_op);
45static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
46 struct extent_buffer *leaf,
47 struct btrfs_extent_item *ei);
48static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
49 u64 parent, u64 root_objectid,
50 u64 flags, u64 owner, u64 offset,
51 struct btrfs_key *ins, int ref_mod);
52static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
53 struct btrfs_delayed_ref_node *node,
54 struct btrfs_delayed_extent_op *extent_op);
55static int find_next_key(struct btrfs_path *path, int level,
56 struct btrfs_key *key);
57
58static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
59{
60 return (cache->flags & bits) == bits;
61}
62
63int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
64 u64 start, u64 num_bytes)
65{
66 u64 end = start + num_bytes - 1;
67 set_extent_bits(&fs_info->excluded_extents, start, end,
68 EXTENT_UPTODATE);
69 return 0;
70}
71
72void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
73{
74 struct btrfs_fs_info *fs_info = cache->fs_info;
75 u64 start, end;
76
77 start = cache->start;
78 end = start + cache->length - 1;
79
80 clear_extent_bits(&fs_info->excluded_extents, start, end,
81 EXTENT_UPTODATE);
82}
83
84static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
85{
86 if (ref->type == BTRFS_REF_METADATA) {
87 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
88 return BTRFS_BLOCK_GROUP_SYSTEM;
89 else
90 return BTRFS_BLOCK_GROUP_METADATA;
91 }
92 return BTRFS_BLOCK_GROUP_DATA;
93}
94
95static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
96 struct btrfs_ref *ref)
97{
98 struct btrfs_space_info *space_info;
99 u64 flags = generic_ref_to_space_flags(ref);
100
101 space_info = btrfs_find_space_info(fs_info, flags);
102 ASSERT(space_info);
103 percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
104 BTRFS_TOTAL_BYTES_PINNED_BATCH);
105}
106
107static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
108 struct btrfs_ref *ref)
109{
110 struct btrfs_space_info *space_info;
111 u64 flags = generic_ref_to_space_flags(ref);
112
113 space_info = btrfs_find_space_info(fs_info, flags);
114 ASSERT(space_info);
115 percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
116 BTRFS_TOTAL_BYTES_PINNED_BATCH);
117}
118
119
120int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
121{
122 int ret;
123 struct btrfs_key key;
124 struct btrfs_path *path;
125
126 path = btrfs_alloc_path();
127 if (!path)
128 return -ENOMEM;
129
130 key.objectid = start;
131 key.offset = len;
132 key.type = BTRFS_EXTENT_ITEM_KEY;
133 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
134 btrfs_free_path(path);
135 return ret;
136}
137
138
139
140
141
142
143
144
145
146
147int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
148 struct btrfs_fs_info *fs_info, u64 bytenr,
149 u64 offset, int metadata, u64 *refs, u64 *flags)
150{
151 struct btrfs_delayed_ref_head *head;
152 struct btrfs_delayed_ref_root *delayed_refs;
153 struct btrfs_path *path;
154 struct btrfs_extent_item *ei;
155 struct extent_buffer *leaf;
156 struct btrfs_key key;
157 u32 item_size;
158 u64 num_refs;
159 u64 extent_flags;
160 int ret;
161
162
163
164
165
166 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
167 offset = fs_info->nodesize;
168 metadata = 0;
169 }
170
171 path = btrfs_alloc_path();
172 if (!path)
173 return -ENOMEM;
174
175 if (!trans) {
176 path->skip_locking = 1;
177 path->search_commit_root = 1;
178 }
179
180search_again:
181 key.objectid = bytenr;
182 key.offset = offset;
183 if (metadata)
184 key.type = BTRFS_METADATA_ITEM_KEY;
185 else
186 key.type = BTRFS_EXTENT_ITEM_KEY;
187
188 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
189 if (ret < 0)
190 goto out_free;
191
192 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
193 if (path->slots[0]) {
194 path->slots[0]--;
195 btrfs_item_key_to_cpu(path->nodes[0], &key,
196 path->slots[0]);
197 if (key.objectid == bytenr &&
198 key.type == BTRFS_EXTENT_ITEM_KEY &&
199 key.offset == fs_info->nodesize)
200 ret = 0;
201 }
202 }
203
204 if (ret == 0) {
205 leaf = path->nodes[0];
206 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
207 if (item_size >= sizeof(*ei)) {
208 ei = btrfs_item_ptr(leaf, path->slots[0],
209 struct btrfs_extent_item);
210 num_refs = btrfs_extent_refs(leaf, ei);
211 extent_flags = btrfs_extent_flags(leaf, ei);
212 } else {
213 ret = -EINVAL;
214 btrfs_print_v0_err(fs_info);
215 if (trans)
216 btrfs_abort_transaction(trans, ret);
217 else
218 btrfs_handle_fs_error(fs_info, ret, NULL);
219
220 goto out_free;
221 }
222
223 BUG_ON(num_refs == 0);
224 } else {
225 num_refs = 0;
226 extent_flags = 0;
227 ret = 0;
228 }
229
230 if (!trans)
231 goto out;
232
233 delayed_refs = &trans->transaction->delayed_refs;
234 spin_lock(&delayed_refs->lock);
235 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
236 if (head) {
237 if (!mutex_trylock(&head->mutex)) {
238 refcount_inc(&head->refs);
239 spin_unlock(&delayed_refs->lock);
240
241 btrfs_release_path(path);
242
243
244
245
246
247 mutex_lock(&head->mutex);
248 mutex_unlock(&head->mutex);
249 btrfs_put_delayed_ref_head(head);
250 goto search_again;
251 }
252 spin_lock(&head->lock);
253 if (head->extent_op && head->extent_op->update_flags)
254 extent_flags |= head->extent_op->flags_to_set;
255 else
256 BUG_ON(num_refs == 0);
257
258 num_refs += head->ref_mod;
259 spin_unlock(&head->lock);
260 mutex_unlock(&head->mutex);
261 }
262 spin_unlock(&delayed_refs->lock);
263out:
264 WARN_ON(num_refs == 0);
265 if (refs)
266 *refs = num_refs;
267 if (flags)
268 *flags = extent_flags;
269out_free:
270 btrfs_free_path(path);
271 return ret;
272}
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
386 struct btrfs_extent_inline_ref *iref,
387 enum btrfs_inline_ref_type is_data)
388{
389 int type = btrfs_extent_inline_ref_type(eb, iref);
390 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
391
392 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
393 type == BTRFS_SHARED_BLOCK_REF_KEY ||
394 type == BTRFS_SHARED_DATA_REF_KEY ||
395 type == BTRFS_EXTENT_DATA_REF_KEY) {
396 if (is_data == BTRFS_REF_TYPE_BLOCK) {
397 if (type == BTRFS_TREE_BLOCK_REF_KEY)
398 return type;
399 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
400 ASSERT(eb->fs_info);
401
402
403
404
405
406 if (offset &&
407 IS_ALIGNED(offset, eb->fs_info->nodesize))
408 return type;
409 }
410 } else if (is_data == BTRFS_REF_TYPE_DATA) {
411 if (type == BTRFS_EXTENT_DATA_REF_KEY)
412 return type;
413 if (type == BTRFS_SHARED_DATA_REF_KEY) {
414 ASSERT(eb->fs_info);
415
416
417
418
419
420 if (offset &&
421 IS_ALIGNED(offset, eb->fs_info->nodesize))
422 return type;
423 }
424 } else {
425 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
426 return type;
427 }
428 }
429
430 btrfs_print_leaf((struct extent_buffer *)eb);
431 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
432 eb->start, type);
433 WARN_ON(1);
434
435 return BTRFS_REF_TYPE_INVALID;
436}
437
438u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
439{
440 u32 high_crc = ~(u32)0;
441 u32 low_crc = ~(u32)0;
442 __le64 lenum;
443
444 lenum = cpu_to_le64(root_objectid);
445 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
446 lenum = cpu_to_le64(owner);
447 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
448 lenum = cpu_to_le64(offset);
449 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
450
451 return ((u64)high_crc << 31) ^ (u64)low_crc;
452}
453
454static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
455 struct btrfs_extent_data_ref *ref)
456{
457 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
458 btrfs_extent_data_ref_objectid(leaf, ref),
459 btrfs_extent_data_ref_offset(leaf, ref));
460}
461
462static int match_extent_data_ref(struct extent_buffer *leaf,
463 struct btrfs_extent_data_ref *ref,
464 u64 root_objectid, u64 owner, u64 offset)
465{
466 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
467 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
468 btrfs_extent_data_ref_offset(leaf, ref) != offset)
469 return 0;
470 return 1;
471}
472
473static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
474 struct btrfs_path *path,
475 u64 bytenr, u64 parent,
476 u64 root_objectid,
477 u64 owner, u64 offset)
478{
479 struct btrfs_root *root = trans->fs_info->extent_root;
480 struct btrfs_key key;
481 struct btrfs_extent_data_ref *ref;
482 struct extent_buffer *leaf;
483 u32 nritems;
484 int ret;
485 int recow;
486 int err = -ENOENT;
487
488 key.objectid = bytenr;
489 if (parent) {
490 key.type = BTRFS_SHARED_DATA_REF_KEY;
491 key.offset = parent;
492 } else {
493 key.type = BTRFS_EXTENT_DATA_REF_KEY;
494 key.offset = hash_extent_data_ref(root_objectid,
495 owner, offset);
496 }
497again:
498 recow = 0;
499 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
500 if (ret < 0) {
501 err = ret;
502 goto fail;
503 }
504
505 if (parent) {
506 if (!ret)
507 return 0;
508 goto fail;
509 }
510
511 leaf = path->nodes[0];
512 nritems = btrfs_header_nritems(leaf);
513 while (1) {
514 if (path->slots[0] >= nritems) {
515 ret = btrfs_next_leaf(root, path);
516 if (ret < 0)
517 err = ret;
518 if (ret)
519 goto fail;
520
521 leaf = path->nodes[0];
522 nritems = btrfs_header_nritems(leaf);
523 recow = 1;
524 }
525
526 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
527 if (key.objectid != bytenr ||
528 key.type != BTRFS_EXTENT_DATA_REF_KEY)
529 goto fail;
530
531 ref = btrfs_item_ptr(leaf, path->slots[0],
532 struct btrfs_extent_data_ref);
533
534 if (match_extent_data_ref(leaf, ref, root_objectid,
535 owner, offset)) {
536 if (recow) {
537 btrfs_release_path(path);
538 goto again;
539 }
540 err = 0;
541 break;
542 }
543 path->slots[0]++;
544 }
545fail:
546 return err;
547}
548
549static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
550 struct btrfs_path *path,
551 u64 bytenr, u64 parent,
552 u64 root_objectid, u64 owner,
553 u64 offset, int refs_to_add)
554{
555 struct btrfs_root *root = trans->fs_info->extent_root;
556 struct btrfs_key key;
557 struct extent_buffer *leaf;
558 u32 size;
559 u32 num_refs;
560 int ret;
561
562 key.objectid = bytenr;
563 if (parent) {
564 key.type = BTRFS_SHARED_DATA_REF_KEY;
565 key.offset = parent;
566 size = sizeof(struct btrfs_shared_data_ref);
567 } else {
568 key.type = BTRFS_EXTENT_DATA_REF_KEY;
569 key.offset = hash_extent_data_ref(root_objectid,
570 owner, offset);
571 size = sizeof(struct btrfs_extent_data_ref);
572 }
573
574 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
575 if (ret && ret != -EEXIST)
576 goto fail;
577
578 leaf = path->nodes[0];
579 if (parent) {
580 struct btrfs_shared_data_ref *ref;
581 ref = btrfs_item_ptr(leaf, path->slots[0],
582 struct btrfs_shared_data_ref);
583 if (ret == 0) {
584 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
585 } else {
586 num_refs = btrfs_shared_data_ref_count(leaf, ref);
587 num_refs += refs_to_add;
588 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
589 }
590 } else {
591 struct btrfs_extent_data_ref *ref;
592 while (ret == -EEXIST) {
593 ref = btrfs_item_ptr(leaf, path->slots[0],
594 struct btrfs_extent_data_ref);
595 if (match_extent_data_ref(leaf, ref, root_objectid,
596 owner, offset))
597 break;
598 btrfs_release_path(path);
599 key.offset++;
600 ret = btrfs_insert_empty_item(trans, root, path, &key,
601 size);
602 if (ret && ret != -EEXIST)
603 goto fail;
604
605 leaf = path->nodes[0];
606 }
607 ref = btrfs_item_ptr(leaf, path->slots[0],
608 struct btrfs_extent_data_ref);
609 if (ret == 0) {
610 btrfs_set_extent_data_ref_root(leaf, ref,
611 root_objectid);
612 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
613 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
614 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
615 } else {
616 num_refs = btrfs_extent_data_ref_count(leaf, ref);
617 num_refs += refs_to_add;
618 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
619 }
620 }
621 btrfs_mark_buffer_dirty(leaf);
622 ret = 0;
623fail:
624 btrfs_release_path(path);
625 return ret;
626}
627
628static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
629 struct btrfs_path *path,
630 int refs_to_drop, int *last_ref)
631{
632 struct btrfs_key key;
633 struct btrfs_extent_data_ref *ref1 = NULL;
634 struct btrfs_shared_data_ref *ref2 = NULL;
635 struct extent_buffer *leaf;
636 u32 num_refs = 0;
637 int ret = 0;
638
639 leaf = path->nodes[0];
640 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
641
642 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
643 ref1 = btrfs_item_ptr(leaf, path->slots[0],
644 struct btrfs_extent_data_ref);
645 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
646 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
647 ref2 = btrfs_item_ptr(leaf, path->slots[0],
648 struct btrfs_shared_data_ref);
649 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
650 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
651 btrfs_print_v0_err(trans->fs_info);
652 btrfs_abort_transaction(trans, -EINVAL);
653 return -EINVAL;
654 } else {
655 BUG();
656 }
657
658 BUG_ON(num_refs < refs_to_drop);
659 num_refs -= refs_to_drop;
660
661 if (num_refs == 0) {
662 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
663 *last_ref = 1;
664 } else {
665 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
666 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
667 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
668 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
669 btrfs_mark_buffer_dirty(leaf);
670 }
671 return ret;
672}
673
674static noinline u32 extent_data_ref_count(struct btrfs_path *path,
675 struct btrfs_extent_inline_ref *iref)
676{
677 struct btrfs_key key;
678 struct extent_buffer *leaf;
679 struct btrfs_extent_data_ref *ref1;
680 struct btrfs_shared_data_ref *ref2;
681 u32 num_refs = 0;
682 int type;
683
684 leaf = path->nodes[0];
685 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
686
687 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
688 if (iref) {
689
690
691
692
693 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
694 ASSERT(type != BTRFS_REF_TYPE_INVALID);
695 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
696 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
697 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
698 } else {
699 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
700 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
701 }
702 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
703 ref1 = btrfs_item_ptr(leaf, path->slots[0],
704 struct btrfs_extent_data_ref);
705 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
706 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
707 ref2 = btrfs_item_ptr(leaf, path->slots[0],
708 struct btrfs_shared_data_ref);
709 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
710 } else {
711 WARN_ON(1);
712 }
713 return num_refs;
714}
715
716static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
717 struct btrfs_path *path,
718 u64 bytenr, u64 parent,
719 u64 root_objectid)
720{
721 struct btrfs_root *root = trans->fs_info->extent_root;
722 struct btrfs_key key;
723 int ret;
724
725 key.objectid = bytenr;
726 if (parent) {
727 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
728 key.offset = parent;
729 } else {
730 key.type = BTRFS_TREE_BLOCK_REF_KEY;
731 key.offset = root_objectid;
732 }
733
734 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
735 if (ret > 0)
736 ret = -ENOENT;
737 return ret;
738}
739
740static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
741 struct btrfs_path *path,
742 u64 bytenr, u64 parent,
743 u64 root_objectid)
744{
745 struct btrfs_key key;
746 int ret;
747
748 key.objectid = bytenr;
749 if (parent) {
750 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
751 key.offset = parent;
752 } else {
753 key.type = BTRFS_TREE_BLOCK_REF_KEY;
754 key.offset = root_objectid;
755 }
756
757 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
758 path, &key, 0);
759 btrfs_release_path(path);
760 return ret;
761}
762
763static inline int extent_ref_type(u64 parent, u64 owner)
764{
765 int type;
766 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
767 if (parent > 0)
768 type = BTRFS_SHARED_BLOCK_REF_KEY;
769 else
770 type = BTRFS_TREE_BLOCK_REF_KEY;
771 } else {
772 if (parent > 0)
773 type = BTRFS_SHARED_DATA_REF_KEY;
774 else
775 type = BTRFS_EXTENT_DATA_REF_KEY;
776 }
777 return type;
778}
779
780static int find_next_key(struct btrfs_path *path, int level,
781 struct btrfs_key *key)
782
783{
784 for (; level < BTRFS_MAX_LEVEL; level++) {
785 if (!path->nodes[level])
786 break;
787 if (path->slots[level] + 1 >=
788 btrfs_header_nritems(path->nodes[level]))
789 continue;
790 if (level == 0)
791 btrfs_item_key_to_cpu(path->nodes[level], key,
792 path->slots[level] + 1);
793 else
794 btrfs_node_key_to_cpu(path->nodes[level], key,
795 path->slots[level] + 1);
796 return 0;
797 }
798 return 1;
799}
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814static noinline_for_stack
815int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
816 struct btrfs_path *path,
817 struct btrfs_extent_inline_ref **ref_ret,
818 u64 bytenr, u64 num_bytes,
819 u64 parent, u64 root_objectid,
820 u64 owner, u64 offset, int insert)
821{
822 struct btrfs_fs_info *fs_info = trans->fs_info;
823 struct btrfs_root *root = fs_info->extent_root;
824 struct btrfs_key key;
825 struct extent_buffer *leaf;
826 struct btrfs_extent_item *ei;
827 struct btrfs_extent_inline_ref *iref;
828 u64 flags;
829 u64 item_size;
830 unsigned long ptr;
831 unsigned long end;
832 int extra_size;
833 int type;
834 int want;
835 int ret;
836 int err = 0;
837 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
838 int needed;
839
840 key.objectid = bytenr;
841 key.type = BTRFS_EXTENT_ITEM_KEY;
842 key.offset = num_bytes;
843
844 want = extent_ref_type(parent, owner);
845 if (insert) {
846 extra_size = btrfs_extent_inline_ref_size(want);
847 path->keep_locks = 1;
848 } else
849 extra_size = -1;
850
851
852
853
854
855 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
856 key.type = BTRFS_METADATA_ITEM_KEY;
857 key.offset = owner;
858 }
859
860again:
861 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
862 if (ret < 0) {
863 err = ret;
864 goto out;
865 }
866
867
868
869
870
871 if (ret > 0 && skinny_metadata) {
872 skinny_metadata = false;
873 if (path->slots[0]) {
874 path->slots[0]--;
875 btrfs_item_key_to_cpu(path->nodes[0], &key,
876 path->slots[0]);
877 if (key.objectid == bytenr &&
878 key.type == BTRFS_EXTENT_ITEM_KEY &&
879 key.offset == num_bytes)
880 ret = 0;
881 }
882 if (ret) {
883 key.objectid = bytenr;
884 key.type = BTRFS_EXTENT_ITEM_KEY;
885 key.offset = num_bytes;
886 btrfs_release_path(path);
887 goto again;
888 }
889 }
890
891 if (ret && !insert) {
892 err = -ENOENT;
893 goto out;
894 } else if (WARN_ON(ret)) {
895 err = -EIO;
896 goto out;
897 }
898
899 leaf = path->nodes[0];
900 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
901 if (unlikely(item_size < sizeof(*ei))) {
902 err = -EINVAL;
903 btrfs_print_v0_err(fs_info);
904 btrfs_abort_transaction(trans, err);
905 goto out;
906 }
907
908 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
909 flags = btrfs_extent_flags(leaf, ei);
910
911 ptr = (unsigned long)(ei + 1);
912 end = (unsigned long)ei + item_size;
913
914 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
915 ptr += sizeof(struct btrfs_tree_block_info);
916 BUG_ON(ptr > end);
917 }
918
919 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
920 needed = BTRFS_REF_TYPE_DATA;
921 else
922 needed = BTRFS_REF_TYPE_BLOCK;
923
924 err = -ENOENT;
925 while (1) {
926 if (ptr >= end) {
927 WARN_ON(ptr > end);
928 break;
929 }
930 iref = (struct btrfs_extent_inline_ref *)ptr;
931 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
932 if (type == BTRFS_REF_TYPE_INVALID) {
933 err = -EUCLEAN;
934 goto out;
935 }
936
937 if (want < type)
938 break;
939 if (want > type) {
940 ptr += btrfs_extent_inline_ref_size(type);
941 continue;
942 }
943
944 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
945 struct btrfs_extent_data_ref *dref;
946 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
947 if (match_extent_data_ref(leaf, dref, root_objectid,
948 owner, offset)) {
949 err = 0;
950 break;
951 }
952 if (hash_extent_data_ref_item(leaf, dref) <
953 hash_extent_data_ref(root_objectid, owner, offset))
954 break;
955 } else {
956 u64 ref_offset;
957 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
958 if (parent > 0) {
959 if (parent == ref_offset) {
960 err = 0;
961 break;
962 }
963 if (ref_offset < parent)
964 break;
965 } else {
966 if (root_objectid == ref_offset) {
967 err = 0;
968 break;
969 }
970 if (ref_offset < root_objectid)
971 break;
972 }
973 }
974 ptr += btrfs_extent_inline_ref_size(type);
975 }
976 if (err == -ENOENT && insert) {
977 if (item_size + extra_size >=
978 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
979 err = -EAGAIN;
980 goto out;
981 }
982
983
984
985
986
987
988 if (find_next_key(path, 0, &key) == 0 &&
989 key.objectid == bytenr &&
990 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
991 err = -EAGAIN;
992 goto out;
993 }
994 }
995 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
996out:
997 if (insert) {
998 path->keep_locks = 0;
999 btrfs_unlock_up_safe(path, 1);
1000 }
1001 return err;
1002}
1003
1004
1005
1006
1007static noinline_for_stack
1008void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1009 struct btrfs_path *path,
1010 struct btrfs_extent_inline_ref *iref,
1011 u64 parent, u64 root_objectid,
1012 u64 owner, u64 offset, int refs_to_add,
1013 struct btrfs_delayed_extent_op *extent_op)
1014{
1015 struct extent_buffer *leaf;
1016 struct btrfs_extent_item *ei;
1017 unsigned long ptr;
1018 unsigned long end;
1019 unsigned long item_offset;
1020 u64 refs;
1021 int size;
1022 int type;
1023
1024 leaf = path->nodes[0];
1025 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1026 item_offset = (unsigned long)iref - (unsigned long)ei;
1027
1028 type = extent_ref_type(parent, owner);
1029 size = btrfs_extent_inline_ref_size(type);
1030
1031 btrfs_extend_item(path, size);
1032
1033 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1034 refs = btrfs_extent_refs(leaf, ei);
1035 refs += refs_to_add;
1036 btrfs_set_extent_refs(leaf, ei, refs);
1037 if (extent_op)
1038 __run_delayed_extent_op(extent_op, leaf, ei);
1039
1040 ptr = (unsigned long)ei + item_offset;
1041 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1042 if (ptr < end - size)
1043 memmove_extent_buffer(leaf, ptr + size, ptr,
1044 end - size - ptr);
1045
1046 iref = (struct btrfs_extent_inline_ref *)ptr;
1047 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1048 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1049 struct btrfs_extent_data_ref *dref;
1050 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1051 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1052 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1053 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1054 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1055 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1056 struct btrfs_shared_data_ref *sref;
1057 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1058 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1059 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1060 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1061 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1062 } else {
1063 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1064 }
1065 btrfs_mark_buffer_dirty(leaf);
1066}
1067
1068static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1069 struct btrfs_path *path,
1070 struct btrfs_extent_inline_ref **ref_ret,
1071 u64 bytenr, u64 num_bytes, u64 parent,
1072 u64 root_objectid, u64 owner, u64 offset)
1073{
1074 int ret;
1075
1076 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1077 num_bytes, parent, root_objectid,
1078 owner, offset, 0);
1079 if (ret != -ENOENT)
1080 return ret;
1081
1082 btrfs_release_path(path);
1083 *ref_ret = NULL;
1084
1085 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1086 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1087 root_objectid);
1088 } else {
1089 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1090 root_objectid, owner, offset);
1091 }
1092 return ret;
1093}
1094
1095
1096
1097
1098static noinline_for_stack
1099void update_inline_extent_backref(struct btrfs_path *path,
1100 struct btrfs_extent_inline_ref *iref,
1101 int refs_to_mod,
1102 struct btrfs_delayed_extent_op *extent_op,
1103 int *last_ref)
1104{
1105 struct extent_buffer *leaf = path->nodes[0];
1106 struct btrfs_extent_item *ei;
1107 struct btrfs_extent_data_ref *dref = NULL;
1108 struct btrfs_shared_data_ref *sref = NULL;
1109 unsigned long ptr;
1110 unsigned long end;
1111 u32 item_size;
1112 int size;
1113 int type;
1114 u64 refs;
1115
1116 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1117 refs = btrfs_extent_refs(leaf, ei);
1118 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1119 refs += refs_to_mod;
1120 btrfs_set_extent_refs(leaf, ei, refs);
1121 if (extent_op)
1122 __run_delayed_extent_op(extent_op, leaf, ei);
1123
1124
1125
1126
1127
1128 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1129 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1130
1131 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1132 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1133 refs = btrfs_extent_data_ref_count(leaf, dref);
1134 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1135 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1136 refs = btrfs_shared_data_ref_count(leaf, sref);
1137 } else {
1138 refs = 1;
1139 BUG_ON(refs_to_mod != -1);
1140 }
1141
1142 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1143 refs += refs_to_mod;
1144
1145 if (refs > 0) {
1146 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1147 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1148 else
1149 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1150 } else {
1151 *last_ref = 1;
1152 size = btrfs_extent_inline_ref_size(type);
1153 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1154 ptr = (unsigned long)iref;
1155 end = (unsigned long)ei + item_size;
1156 if (ptr + size < end)
1157 memmove_extent_buffer(leaf, ptr, ptr + size,
1158 end - ptr - size);
1159 item_size -= size;
1160 btrfs_truncate_item(path, item_size, 1);
1161 }
1162 btrfs_mark_buffer_dirty(leaf);
1163}
1164
1165static noinline_for_stack
1166int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1167 struct btrfs_path *path,
1168 u64 bytenr, u64 num_bytes, u64 parent,
1169 u64 root_objectid, u64 owner,
1170 u64 offset, int refs_to_add,
1171 struct btrfs_delayed_extent_op *extent_op)
1172{
1173 struct btrfs_extent_inline_ref *iref;
1174 int ret;
1175
1176 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1177 num_bytes, parent, root_objectid,
1178 owner, offset, 1);
1179 if (ret == 0) {
1180 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1181 update_inline_extent_backref(path, iref, refs_to_add,
1182 extent_op, NULL);
1183 } else if (ret == -ENOENT) {
1184 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1185 root_objectid, owner, offset,
1186 refs_to_add, extent_op);
1187 ret = 0;
1188 }
1189 return ret;
1190}
1191
1192static int remove_extent_backref(struct btrfs_trans_handle *trans,
1193 struct btrfs_path *path,
1194 struct btrfs_extent_inline_ref *iref,
1195 int refs_to_drop, int is_data, int *last_ref)
1196{
1197 int ret = 0;
1198
1199 BUG_ON(!is_data && refs_to_drop != 1);
1200 if (iref) {
1201 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1202 last_ref);
1203 } else if (is_data) {
1204 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1205 last_ref);
1206 } else {
1207 *last_ref = 1;
1208 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1209 }
1210 return ret;
1211}
1212
1213static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1214 u64 *discarded_bytes)
1215{
1216 int j, ret = 0;
1217 u64 bytes_left, end;
1218 u64 aligned_start = ALIGN(start, 1 << 9);
1219
1220 if (WARN_ON(start != aligned_start)) {
1221 len -= aligned_start - start;
1222 len = round_down(len, 1 << 9);
1223 start = aligned_start;
1224 }
1225
1226 *discarded_bytes = 0;
1227
1228 if (!len)
1229 return 0;
1230
1231 end = start + len;
1232 bytes_left = len;
1233
1234
1235 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1236 u64 sb_start = btrfs_sb_offset(j);
1237 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1238 u64 size = sb_start - start;
1239
1240 if (!in_range(sb_start, start, bytes_left) &&
1241 !in_range(sb_end, start, bytes_left) &&
1242 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1243 continue;
1244
1245
1246
1247
1248
1249 if (sb_start <= start) {
1250 start += sb_end - start;
1251 if (start > end) {
1252 bytes_left = 0;
1253 break;
1254 }
1255 bytes_left = end - start;
1256 continue;
1257 }
1258
1259 if (size) {
1260 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1261 GFP_NOFS, 0);
1262 if (!ret)
1263 *discarded_bytes += size;
1264 else if (ret != -EOPNOTSUPP)
1265 return ret;
1266 }
1267
1268 start = sb_end;
1269 if (start > end) {
1270 bytes_left = 0;
1271 break;
1272 }
1273 bytes_left = end - start;
1274 }
1275
1276 if (bytes_left) {
1277 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1278 GFP_NOFS, 0);
1279 if (!ret)
1280 *discarded_bytes += bytes_left;
1281 }
1282 return ret;
1283}
1284
1285int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1286 u64 num_bytes, u64 *actual_bytes)
1287{
1288 int ret = 0;
1289 u64 discarded_bytes = 0;
1290 u64 end = bytenr + num_bytes;
1291 u64 cur = bytenr;
1292 struct btrfs_bio *bbio = NULL;
1293
1294
1295
1296
1297
1298
1299 btrfs_bio_counter_inc_blocked(fs_info);
1300 while (cur < end) {
1301 struct btrfs_bio_stripe *stripe;
1302 int i;
1303
1304 num_bytes = end - cur;
1305
1306 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
1307 &num_bytes, &bbio, 0);
1308
1309
1310
1311
1312
1313 if (ret < 0)
1314 goto out;
1315
1316 stripe = bbio->stripes;
1317 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1318 u64 bytes;
1319 struct request_queue *req_q;
1320
1321 if (!stripe->dev->bdev) {
1322 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1323 continue;
1324 }
1325 req_q = bdev_get_queue(stripe->dev->bdev);
1326 if (!blk_queue_discard(req_q))
1327 continue;
1328
1329 ret = btrfs_issue_discard(stripe->dev->bdev,
1330 stripe->physical,
1331 stripe->length,
1332 &bytes);
1333 if (!ret) {
1334 discarded_bytes += bytes;
1335 } else if (ret != -EOPNOTSUPP) {
1336
1337
1338
1339
1340
1341
1342
1343 btrfs_put_bbio(bbio);
1344 goto out;
1345 }
1346
1347
1348
1349
1350
1351
1352 ret = 0;
1353 }
1354 btrfs_put_bbio(bbio);
1355 cur += num_bytes;
1356 }
1357out:
1358 btrfs_bio_counter_dec(fs_info);
1359
1360 if (actual_bytes)
1361 *actual_bytes = discarded_bytes;
1362
1363
1364 if (ret == -EOPNOTSUPP)
1365 ret = 0;
1366 return ret;
1367}
1368
1369
1370int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1371 struct btrfs_ref *generic_ref)
1372{
1373 struct btrfs_fs_info *fs_info = trans->fs_info;
1374 int old_ref_mod, new_ref_mod;
1375 int ret;
1376
1377 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
1378 generic_ref->action);
1379 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1380 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
1381
1382 if (generic_ref->type == BTRFS_REF_METADATA)
1383 ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
1384 NULL, &old_ref_mod, &new_ref_mod);
1385 else
1386 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
1387 &old_ref_mod, &new_ref_mod);
1388
1389 btrfs_ref_tree_mod(fs_info, generic_ref);
1390
1391 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
1392 sub_pinned_bytes(fs_info, generic_ref);
1393
1394 return ret;
1395}
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1432 struct btrfs_delayed_ref_node *node,
1433 u64 parent, u64 root_objectid,
1434 u64 owner, u64 offset, int refs_to_add,
1435 struct btrfs_delayed_extent_op *extent_op)
1436{
1437 struct btrfs_path *path;
1438 struct extent_buffer *leaf;
1439 struct btrfs_extent_item *item;
1440 struct btrfs_key key;
1441 u64 bytenr = node->bytenr;
1442 u64 num_bytes = node->num_bytes;
1443 u64 refs;
1444 int ret;
1445
1446 path = btrfs_alloc_path();
1447 if (!path)
1448 return -ENOMEM;
1449
1450 path->leave_spinning = 1;
1451
1452 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1453 parent, root_objectid, owner,
1454 offset, refs_to_add, extent_op);
1455 if ((ret < 0 && ret != -EAGAIN) || !ret)
1456 goto out;
1457
1458
1459
1460
1461
1462
1463 leaf = path->nodes[0];
1464 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1465 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1466 refs = btrfs_extent_refs(leaf, item);
1467 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1468 if (extent_op)
1469 __run_delayed_extent_op(extent_op, leaf, item);
1470
1471 btrfs_mark_buffer_dirty(leaf);
1472 btrfs_release_path(path);
1473
1474 path->leave_spinning = 1;
1475
1476 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1477 BUG_ON(refs_to_add != 1);
1478 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1479 root_objectid);
1480 } else {
1481 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1482 root_objectid, owner, offset,
1483 refs_to_add);
1484 }
1485 if (ret)
1486 btrfs_abort_transaction(trans, ret);
1487out:
1488 btrfs_free_path(path);
1489 return ret;
1490}
1491
1492static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1493 struct btrfs_delayed_ref_node *node,
1494 struct btrfs_delayed_extent_op *extent_op,
1495 int insert_reserved)
1496{
1497 int ret = 0;
1498 struct btrfs_delayed_data_ref *ref;
1499 struct btrfs_key ins;
1500 u64 parent = 0;
1501 u64 ref_root = 0;
1502 u64 flags = 0;
1503
1504 ins.objectid = node->bytenr;
1505 ins.offset = node->num_bytes;
1506 ins.type = BTRFS_EXTENT_ITEM_KEY;
1507
1508 ref = btrfs_delayed_node_to_data_ref(node);
1509 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1510
1511 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1512 parent = ref->parent;
1513 ref_root = ref->root;
1514
1515 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1516 if (extent_op)
1517 flags |= extent_op->flags_to_set;
1518 ret = alloc_reserved_file_extent(trans, parent, ref_root,
1519 flags, ref->objectid,
1520 ref->offset, &ins,
1521 node->ref_mod);
1522 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1523 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1524 ref->objectid, ref->offset,
1525 node->ref_mod, extent_op);
1526 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1527 ret = __btrfs_free_extent(trans, node, parent,
1528 ref_root, ref->objectid,
1529 ref->offset, node->ref_mod,
1530 extent_op);
1531 } else {
1532 BUG();
1533 }
1534 return ret;
1535}
1536
1537static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1538 struct extent_buffer *leaf,
1539 struct btrfs_extent_item *ei)
1540{
1541 u64 flags = btrfs_extent_flags(leaf, ei);
1542 if (extent_op->update_flags) {
1543 flags |= extent_op->flags_to_set;
1544 btrfs_set_extent_flags(leaf, ei, flags);
1545 }
1546
1547 if (extent_op->update_key) {
1548 struct btrfs_tree_block_info *bi;
1549 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1550 bi = (struct btrfs_tree_block_info *)(ei + 1);
1551 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1552 }
1553}
1554
1555static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1556 struct btrfs_delayed_ref_head *head,
1557 struct btrfs_delayed_extent_op *extent_op)
1558{
1559 struct btrfs_fs_info *fs_info = trans->fs_info;
1560 struct btrfs_key key;
1561 struct btrfs_path *path;
1562 struct btrfs_extent_item *ei;
1563 struct extent_buffer *leaf;
1564 u32 item_size;
1565 int ret;
1566 int err = 0;
1567 int metadata = !extent_op->is_data;
1568
1569 if (TRANS_ABORTED(trans))
1570 return 0;
1571
1572 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1573 metadata = 0;
1574
1575 path = btrfs_alloc_path();
1576 if (!path)
1577 return -ENOMEM;
1578
1579 key.objectid = head->bytenr;
1580
1581 if (metadata) {
1582 key.type = BTRFS_METADATA_ITEM_KEY;
1583 key.offset = extent_op->level;
1584 } else {
1585 key.type = BTRFS_EXTENT_ITEM_KEY;
1586 key.offset = head->num_bytes;
1587 }
1588
1589again:
1590 path->leave_spinning = 1;
1591 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
1592 if (ret < 0) {
1593 err = ret;
1594 goto out;
1595 }
1596 if (ret > 0) {
1597 if (metadata) {
1598 if (path->slots[0] > 0) {
1599 path->slots[0]--;
1600 btrfs_item_key_to_cpu(path->nodes[0], &key,
1601 path->slots[0]);
1602 if (key.objectid == head->bytenr &&
1603 key.type == BTRFS_EXTENT_ITEM_KEY &&
1604 key.offset == head->num_bytes)
1605 ret = 0;
1606 }
1607 if (ret > 0) {
1608 btrfs_release_path(path);
1609 metadata = 0;
1610
1611 key.objectid = head->bytenr;
1612 key.offset = head->num_bytes;
1613 key.type = BTRFS_EXTENT_ITEM_KEY;
1614 goto again;
1615 }
1616 } else {
1617 err = -EIO;
1618 goto out;
1619 }
1620 }
1621
1622 leaf = path->nodes[0];
1623 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1624
1625 if (unlikely(item_size < sizeof(*ei))) {
1626 err = -EINVAL;
1627 btrfs_print_v0_err(fs_info);
1628 btrfs_abort_transaction(trans, err);
1629 goto out;
1630 }
1631
1632 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1633 __run_delayed_extent_op(extent_op, leaf, ei);
1634
1635 btrfs_mark_buffer_dirty(leaf);
1636out:
1637 btrfs_free_path(path);
1638 return err;
1639}
1640
1641static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1642 struct btrfs_delayed_ref_node *node,
1643 struct btrfs_delayed_extent_op *extent_op,
1644 int insert_reserved)
1645{
1646 int ret = 0;
1647 struct btrfs_delayed_tree_ref *ref;
1648 u64 parent = 0;
1649 u64 ref_root = 0;
1650
1651 ref = btrfs_delayed_node_to_tree_ref(node);
1652 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1653
1654 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1655 parent = ref->parent;
1656 ref_root = ref->root;
1657
1658 if (node->ref_mod != 1) {
1659 btrfs_err(trans->fs_info,
1660 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
1661 node->bytenr, node->ref_mod, node->action, ref_root,
1662 parent);
1663 return -EIO;
1664 }
1665 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1666 BUG_ON(!extent_op || !extent_op->update_flags);
1667 ret = alloc_reserved_tree_block(trans, node, extent_op);
1668 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1669 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
1670 ref->level, 0, 1, extent_op);
1671 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1672 ret = __btrfs_free_extent(trans, node, parent, ref_root,
1673 ref->level, 0, 1, extent_op);
1674 } else {
1675 BUG();
1676 }
1677 return ret;
1678}
1679
1680
1681static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1682 struct btrfs_delayed_ref_node *node,
1683 struct btrfs_delayed_extent_op *extent_op,
1684 int insert_reserved)
1685{
1686 int ret = 0;
1687
1688 if (TRANS_ABORTED(trans)) {
1689 if (insert_reserved)
1690 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1691 return 0;
1692 }
1693
1694 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1695 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1696 ret = run_delayed_tree_ref(trans, node, extent_op,
1697 insert_reserved);
1698 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1699 node->type == BTRFS_SHARED_DATA_REF_KEY)
1700 ret = run_delayed_data_ref(trans, node, extent_op,
1701 insert_reserved);
1702 else
1703 BUG();
1704 if (ret && insert_reserved)
1705 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
1706 return ret;
1707}
1708
1709static inline struct btrfs_delayed_ref_node *
1710select_delayed_ref(struct btrfs_delayed_ref_head *head)
1711{
1712 struct btrfs_delayed_ref_node *ref;
1713
1714 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1715 return NULL;
1716
1717
1718
1719
1720
1721
1722
1723 if (!list_empty(&head->ref_add_list))
1724 return list_first_entry(&head->ref_add_list,
1725 struct btrfs_delayed_ref_node, add_list);
1726
1727 ref = rb_entry(rb_first_cached(&head->ref_tree),
1728 struct btrfs_delayed_ref_node, ref_node);
1729 ASSERT(list_empty(&ref->add_list));
1730 return ref;
1731}
1732
1733static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
1734 struct btrfs_delayed_ref_head *head)
1735{
1736 spin_lock(&delayed_refs->lock);
1737 head->processing = 0;
1738 delayed_refs->num_heads_ready++;
1739 spin_unlock(&delayed_refs->lock);
1740 btrfs_delayed_ref_unlock(head);
1741}
1742
1743static struct btrfs_delayed_extent_op *cleanup_extent_op(
1744 struct btrfs_delayed_ref_head *head)
1745{
1746 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1747
1748 if (!extent_op)
1749 return NULL;
1750
1751 if (head->must_insert_reserved) {
1752 head->extent_op = NULL;
1753 btrfs_free_delayed_extent_op(extent_op);
1754 return NULL;
1755 }
1756 return extent_op;
1757}
1758
1759static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1760 struct btrfs_delayed_ref_head *head)
1761{
1762 struct btrfs_delayed_extent_op *extent_op;
1763 int ret;
1764
1765 extent_op = cleanup_extent_op(head);
1766 if (!extent_op)
1767 return 0;
1768 head->extent_op = NULL;
1769 spin_unlock(&head->lock);
1770 ret = run_delayed_extent_op(trans, head, extent_op);
1771 btrfs_free_delayed_extent_op(extent_op);
1772 return ret ? ret : 1;
1773}
1774
1775void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
1776 struct btrfs_delayed_ref_root *delayed_refs,
1777 struct btrfs_delayed_ref_head *head)
1778{
1779 int nr_items = 1;
1780
1781 if (head->total_ref_mod < 0) {
1782 struct btrfs_space_info *space_info;
1783 u64 flags;
1784
1785 if (head->is_data)
1786 flags = BTRFS_BLOCK_GROUP_DATA;
1787 else if (head->is_system)
1788 flags = BTRFS_BLOCK_GROUP_SYSTEM;
1789 else
1790 flags = BTRFS_BLOCK_GROUP_METADATA;
1791 space_info = btrfs_find_space_info(fs_info, flags);
1792 ASSERT(space_info);
1793 percpu_counter_add_batch(&space_info->total_bytes_pinned,
1794 -head->num_bytes,
1795 BTRFS_TOTAL_BYTES_PINNED_BATCH);
1796
1797
1798
1799
1800
1801
1802 if (head->is_data) {
1803 spin_lock(&delayed_refs->lock);
1804 delayed_refs->pending_csums -= head->num_bytes;
1805 spin_unlock(&delayed_refs->lock);
1806 nr_items += btrfs_csum_bytes_to_leaves(fs_info,
1807 head->num_bytes);
1808 }
1809 }
1810
1811 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
1812}
1813
1814static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1815 struct btrfs_delayed_ref_head *head)
1816{
1817
1818 struct btrfs_fs_info *fs_info = trans->fs_info;
1819 struct btrfs_delayed_ref_root *delayed_refs;
1820 int ret;
1821
1822 delayed_refs = &trans->transaction->delayed_refs;
1823
1824 ret = run_and_cleanup_extent_op(trans, head);
1825 if (ret < 0) {
1826 unselect_delayed_ref_head(delayed_refs, head);
1827 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1828 return ret;
1829 } else if (ret) {
1830 return ret;
1831 }
1832
1833
1834
1835
1836
1837 spin_unlock(&head->lock);
1838 spin_lock(&delayed_refs->lock);
1839 spin_lock(&head->lock);
1840 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1841 spin_unlock(&head->lock);
1842 spin_unlock(&delayed_refs->lock);
1843 return 1;
1844 }
1845 btrfs_delete_ref_head(delayed_refs, head);
1846 spin_unlock(&head->lock);
1847 spin_unlock(&delayed_refs->lock);
1848
1849 if (head->must_insert_reserved) {
1850 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
1851 if (head->is_data) {
1852 ret = btrfs_del_csums(trans, fs_info->csum_root,
1853 head->bytenr, head->num_bytes);
1854 }
1855 }
1856
1857 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
1858
1859 trace_run_delayed_ref_head(fs_info, head, 0);
1860 btrfs_delayed_ref_unlock(head);
1861 btrfs_put_delayed_ref_head(head);
1862 return 0;
1863}
1864
1865static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1866 struct btrfs_trans_handle *trans)
1867{
1868 struct btrfs_delayed_ref_root *delayed_refs =
1869 &trans->transaction->delayed_refs;
1870 struct btrfs_delayed_ref_head *head = NULL;
1871 int ret;
1872
1873 spin_lock(&delayed_refs->lock);
1874 head = btrfs_select_ref_head(delayed_refs);
1875 if (!head) {
1876 spin_unlock(&delayed_refs->lock);
1877 return head;
1878 }
1879
1880
1881
1882
1883
1884 ret = btrfs_delayed_ref_lock(delayed_refs, head);
1885 spin_unlock(&delayed_refs->lock);
1886
1887
1888
1889
1890
1891
1892 if (ret == -EAGAIN)
1893 head = ERR_PTR(-EAGAIN);
1894
1895 return head;
1896}
1897
1898static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1899 struct btrfs_delayed_ref_head *locked_ref,
1900 unsigned long *run_refs)
1901{
1902 struct btrfs_fs_info *fs_info = trans->fs_info;
1903 struct btrfs_delayed_ref_root *delayed_refs;
1904 struct btrfs_delayed_extent_op *extent_op;
1905 struct btrfs_delayed_ref_node *ref;
1906 int must_insert_reserved = 0;
1907 int ret;
1908
1909 delayed_refs = &trans->transaction->delayed_refs;
1910
1911 lockdep_assert_held(&locked_ref->mutex);
1912 lockdep_assert_held(&locked_ref->lock);
1913
1914 while ((ref = select_delayed_ref(locked_ref))) {
1915 if (ref->seq &&
1916 btrfs_check_delayed_seq(fs_info, ref->seq)) {
1917 spin_unlock(&locked_ref->lock);
1918 unselect_delayed_ref_head(delayed_refs, locked_ref);
1919 return -EAGAIN;
1920 }
1921
1922 (*run_refs)++;
1923 ref->in_tree = 0;
1924 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1925 RB_CLEAR_NODE(&ref->ref_node);
1926 if (!list_empty(&ref->add_list))
1927 list_del(&ref->add_list);
1928
1929
1930
1931
1932 switch (ref->action) {
1933 case BTRFS_ADD_DELAYED_REF:
1934 case BTRFS_ADD_DELAYED_EXTENT:
1935 locked_ref->ref_mod -= ref->ref_mod;
1936 break;
1937 case BTRFS_DROP_DELAYED_REF:
1938 locked_ref->ref_mod += ref->ref_mod;
1939 break;
1940 default:
1941 WARN_ON(1);
1942 }
1943 atomic_dec(&delayed_refs->num_entries);
1944
1945
1946
1947
1948
1949 must_insert_reserved = locked_ref->must_insert_reserved;
1950 locked_ref->must_insert_reserved = 0;
1951
1952 extent_op = locked_ref->extent_op;
1953 locked_ref->extent_op = NULL;
1954 spin_unlock(&locked_ref->lock);
1955
1956 ret = run_one_delayed_ref(trans, ref, extent_op,
1957 must_insert_reserved);
1958
1959 btrfs_free_delayed_extent_op(extent_op);
1960 if (ret) {
1961 unselect_delayed_ref_head(delayed_refs, locked_ref);
1962 btrfs_put_delayed_ref(ref);
1963 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
1964 ret);
1965 return ret;
1966 }
1967
1968 btrfs_put_delayed_ref(ref);
1969 cond_resched();
1970
1971 spin_lock(&locked_ref->lock);
1972 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
1973 }
1974
1975 return 0;
1976}
1977
1978
1979
1980
1981
1982static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1983 unsigned long nr)
1984{
1985 struct btrfs_fs_info *fs_info = trans->fs_info;
1986 struct btrfs_delayed_ref_root *delayed_refs;
1987 struct btrfs_delayed_ref_head *locked_ref = NULL;
1988 ktime_t start = ktime_get();
1989 int ret;
1990 unsigned long count = 0;
1991 unsigned long actual_count = 0;
1992
1993 delayed_refs = &trans->transaction->delayed_refs;
1994 do {
1995 if (!locked_ref) {
1996 locked_ref = btrfs_obtain_ref_head(trans);
1997 if (IS_ERR_OR_NULL(locked_ref)) {
1998 if (PTR_ERR(locked_ref) == -EAGAIN) {
1999 continue;
2000 } else {
2001 break;
2002 }
2003 }
2004 count++;
2005 }
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018 spin_lock(&locked_ref->lock);
2019 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2020
2021 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2022 &actual_count);
2023 if (ret < 0 && ret != -EAGAIN) {
2024
2025
2026
2027
2028 return ret;
2029 } else if (!ret) {
2030
2031
2032
2033
2034 ret = cleanup_ref_head(trans, locked_ref);
2035 if (ret > 0 ) {
2036
2037 ret = 0;
2038 continue;
2039 } else if (ret) {
2040 return ret;
2041 }
2042 }
2043
2044
2045
2046
2047
2048
2049 locked_ref = NULL;
2050 cond_resched();
2051 } while ((nr != -1 && count < nr) || locked_ref);
2052
2053
2054
2055
2056
2057
2058 if (actual_count > 0) {
2059 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2060 u64 avg;
2061
2062
2063
2064
2065
2066 spin_lock(&delayed_refs->lock);
2067 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2068 fs_info->avg_delayed_ref_runtime = avg >> 2;
2069 spin_unlock(&delayed_refs->lock);
2070 }
2071 return 0;
2072}
2073
2074#ifdef SCRAMBLE_DELAYED_REFS
2075
2076
2077
2078
2079
2080static u64 find_middle(struct rb_root *root)
2081{
2082 struct rb_node *n = root->rb_node;
2083 struct btrfs_delayed_ref_node *entry;
2084 int alt = 1;
2085 u64 middle;
2086 u64 first = 0, last = 0;
2087
2088 n = rb_first(root);
2089 if (n) {
2090 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2091 first = entry->bytenr;
2092 }
2093 n = rb_last(root);
2094 if (n) {
2095 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2096 last = entry->bytenr;
2097 }
2098 n = root->rb_node;
2099
2100 while (n) {
2101 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2102 WARN_ON(!entry->in_tree);
2103
2104 middle = entry->bytenr;
2105
2106 if (alt)
2107 n = n->rb_left;
2108 else
2109 n = n->rb_right;
2110
2111 alt = 1 - alt;
2112 }
2113 return middle;
2114}
2115#endif
2116
2117static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2118{
2119 u64 num_bytes;
2120
2121 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2122 sizeof(struct btrfs_extent_inline_ref));
2123 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2124 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2125
2126
2127
2128
2129
2130 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2131}
2132
2133
2134
2135
2136
2137u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2138{
2139 u64 csum_size;
2140 u64 num_csums_per_leaf;
2141 u64 num_csums;
2142
2143 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2144 num_csums_per_leaf = div64_u64(csum_size,
2145 (u64)btrfs_super_csum_size(fs_info->super_copy));
2146 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2147 num_csums += num_csums_per_leaf - 1;
2148 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2149 return num_csums;
2150}
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2163 unsigned long count)
2164{
2165 struct btrfs_fs_info *fs_info = trans->fs_info;
2166 struct rb_node *node;
2167 struct btrfs_delayed_ref_root *delayed_refs;
2168 struct btrfs_delayed_ref_head *head;
2169 int ret;
2170 int run_all = count == (unsigned long)-1;
2171
2172
2173 if (TRANS_ABORTED(trans))
2174 return 0;
2175
2176 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2177 return 0;
2178
2179 delayed_refs = &trans->transaction->delayed_refs;
2180 if (count == 0)
2181 count = atomic_read(&delayed_refs->num_entries) * 2;
2182
2183again:
2184#ifdef SCRAMBLE_DELAYED_REFS
2185 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2186#endif
2187 ret = __btrfs_run_delayed_refs(trans, count);
2188 if (ret < 0) {
2189 btrfs_abort_transaction(trans, ret);
2190 return ret;
2191 }
2192
2193 if (run_all) {
2194 btrfs_create_pending_block_groups(trans);
2195
2196 spin_lock(&delayed_refs->lock);
2197 node = rb_first_cached(&delayed_refs->href_root);
2198 if (!node) {
2199 spin_unlock(&delayed_refs->lock);
2200 goto out;
2201 }
2202 head = rb_entry(node, struct btrfs_delayed_ref_head,
2203 href_node);
2204 refcount_inc(&head->refs);
2205 spin_unlock(&delayed_refs->lock);
2206
2207
2208 mutex_lock(&head->mutex);
2209 mutex_unlock(&head->mutex);
2210
2211 btrfs_put_delayed_ref_head(head);
2212 cond_resched();
2213 goto again;
2214 }
2215out:
2216 return 0;
2217}
2218
2219int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2220 struct extent_buffer *eb, u64 flags,
2221 int level, int is_data)
2222{
2223 struct btrfs_delayed_extent_op *extent_op;
2224 int ret;
2225
2226 extent_op = btrfs_alloc_delayed_extent_op();
2227 if (!extent_op)
2228 return -ENOMEM;
2229
2230 extent_op->flags_to_set = flags;
2231 extent_op->update_flags = true;
2232 extent_op->update_key = false;
2233 extent_op->is_data = is_data ? true : false;
2234 extent_op->level = level;
2235
2236 ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
2237 if (ret)
2238 btrfs_free_delayed_extent_op(extent_op);
2239 return ret;
2240}
2241
2242static noinline int check_delayed_ref(struct btrfs_root *root,
2243 struct btrfs_path *path,
2244 u64 objectid, u64 offset, u64 bytenr)
2245{
2246 struct btrfs_delayed_ref_head *head;
2247 struct btrfs_delayed_ref_node *ref;
2248 struct btrfs_delayed_data_ref *data_ref;
2249 struct btrfs_delayed_ref_root *delayed_refs;
2250 struct btrfs_transaction *cur_trans;
2251 struct rb_node *node;
2252 int ret = 0;
2253
2254 spin_lock(&root->fs_info->trans_lock);
2255 cur_trans = root->fs_info->running_transaction;
2256 if (cur_trans)
2257 refcount_inc(&cur_trans->use_count);
2258 spin_unlock(&root->fs_info->trans_lock);
2259 if (!cur_trans)
2260 return 0;
2261
2262 delayed_refs = &cur_trans->delayed_refs;
2263 spin_lock(&delayed_refs->lock);
2264 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2265 if (!head) {
2266 spin_unlock(&delayed_refs->lock);
2267 btrfs_put_transaction(cur_trans);
2268 return 0;
2269 }
2270
2271 if (!mutex_trylock(&head->mutex)) {
2272 refcount_inc(&head->refs);
2273 spin_unlock(&delayed_refs->lock);
2274
2275 btrfs_release_path(path);
2276
2277
2278
2279
2280
2281 mutex_lock(&head->mutex);
2282 mutex_unlock(&head->mutex);
2283 btrfs_put_delayed_ref_head(head);
2284 btrfs_put_transaction(cur_trans);
2285 return -EAGAIN;
2286 }
2287 spin_unlock(&delayed_refs->lock);
2288
2289 spin_lock(&head->lock);
2290
2291
2292
2293
2294 for (node = rb_first_cached(&head->ref_tree); node;
2295 node = rb_next(node)) {
2296 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2297
2298 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2299 ret = 1;
2300 break;
2301 }
2302
2303 data_ref = btrfs_delayed_node_to_data_ref(ref);
2304
2305
2306
2307
2308
2309 if (data_ref->root != root->root_key.objectid ||
2310 data_ref->objectid != objectid ||
2311 data_ref->offset != offset) {
2312 ret = 1;
2313 break;
2314 }
2315 }
2316 spin_unlock(&head->lock);
2317 mutex_unlock(&head->mutex);
2318 btrfs_put_transaction(cur_trans);
2319 return ret;
2320}
2321
2322static noinline int check_committed_ref(struct btrfs_root *root,
2323 struct btrfs_path *path,
2324 u64 objectid, u64 offset, u64 bytenr)
2325{
2326 struct btrfs_fs_info *fs_info = root->fs_info;
2327 struct btrfs_root *extent_root = fs_info->extent_root;
2328 struct extent_buffer *leaf;
2329 struct btrfs_extent_data_ref *ref;
2330 struct btrfs_extent_inline_ref *iref;
2331 struct btrfs_extent_item *ei;
2332 struct btrfs_key key;
2333 u32 item_size;
2334 int type;
2335 int ret;
2336
2337 key.objectid = bytenr;
2338 key.offset = (u64)-1;
2339 key.type = BTRFS_EXTENT_ITEM_KEY;
2340
2341 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2342 if (ret < 0)
2343 goto out;
2344 BUG_ON(ret == 0);
2345
2346 ret = -ENOENT;
2347 if (path->slots[0] == 0)
2348 goto out;
2349
2350 path->slots[0]--;
2351 leaf = path->nodes[0];
2352 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2353
2354 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2355 goto out;
2356
2357 ret = 1;
2358 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2359 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2360
2361
2362 if (item_size != sizeof(*ei) +
2363 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2364 goto out;
2365
2366
2367 if (btrfs_extent_generation(leaf, ei) <=
2368 btrfs_root_last_snapshot(&root->root_item))
2369 goto out;
2370
2371 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2372
2373
2374 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
2375 if (type != BTRFS_EXTENT_DATA_REF_KEY)
2376 goto out;
2377
2378 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2379 if (btrfs_extent_refs(leaf, ei) !=
2380 btrfs_extent_data_ref_count(leaf, ref) ||
2381 btrfs_extent_data_ref_root(leaf, ref) !=
2382 root->root_key.objectid ||
2383 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2384 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2385 goto out;
2386
2387 ret = 0;
2388out:
2389 return ret;
2390}
2391
2392int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
2393 u64 bytenr)
2394{
2395 struct btrfs_path *path;
2396 int ret;
2397
2398 path = btrfs_alloc_path();
2399 if (!path)
2400 return -ENOMEM;
2401
2402 do {
2403 ret = check_committed_ref(root, path, objectid,
2404 offset, bytenr);
2405 if (ret && ret != -ENOENT)
2406 goto out;
2407
2408 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2409 } while (ret == -EAGAIN);
2410
2411out:
2412 btrfs_free_path(path);
2413 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2414 WARN_ON(ret > 0);
2415 return ret;
2416}
2417
2418static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2419 struct btrfs_root *root,
2420 struct extent_buffer *buf,
2421 int full_backref, int inc)
2422{
2423 struct btrfs_fs_info *fs_info = root->fs_info;
2424 u64 bytenr;
2425 u64 num_bytes;
2426 u64 parent;
2427 u64 ref_root;
2428 u32 nritems;
2429 struct btrfs_key key;
2430 struct btrfs_file_extent_item *fi;
2431 struct btrfs_ref generic_ref = { 0 };
2432 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
2433 int i;
2434 int action;
2435 int level;
2436 int ret = 0;
2437
2438 if (btrfs_is_testing(fs_info))
2439 return 0;
2440
2441 ref_root = btrfs_header_owner(buf);
2442 nritems = btrfs_header_nritems(buf);
2443 level = btrfs_header_level(buf);
2444
2445 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
2446 return 0;
2447
2448 if (full_backref)
2449 parent = buf->start;
2450 else
2451 parent = 0;
2452 if (inc)
2453 action = BTRFS_ADD_DELAYED_REF;
2454 else
2455 action = BTRFS_DROP_DELAYED_REF;
2456
2457 for (i = 0; i < nritems; i++) {
2458 if (level == 0) {
2459 btrfs_item_key_to_cpu(buf, &key, i);
2460 if (key.type != BTRFS_EXTENT_DATA_KEY)
2461 continue;
2462 fi = btrfs_item_ptr(buf, i,
2463 struct btrfs_file_extent_item);
2464 if (btrfs_file_extent_type(buf, fi) ==
2465 BTRFS_FILE_EXTENT_INLINE)
2466 continue;
2467 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2468 if (bytenr == 0)
2469 continue;
2470
2471 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2472 key.offset -= btrfs_file_extent_offset(buf, fi);
2473 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2474 num_bytes, parent);
2475 generic_ref.real_root = root->root_key.objectid;
2476 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2477 key.offset);
2478 generic_ref.skip_qgroup = for_reloc;
2479 if (inc)
2480 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2481 else
2482 ret = btrfs_free_extent(trans, &generic_ref);
2483 if (ret)
2484 goto fail;
2485 } else {
2486 bytenr = btrfs_node_blockptr(buf, i);
2487 num_bytes = fs_info->nodesize;
2488 btrfs_init_generic_ref(&generic_ref, action, bytenr,
2489 num_bytes, parent);
2490 generic_ref.real_root = root->root_key.objectid;
2491 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
2492 generic_ref.skip_qgroup = for_reloc;
2493 if (inc)
2494 ret = btrfs_inc_extent_ref(trans, &generic_ref);
2495 else
2496 ret = btrfs_free_extent(trans, &generic_ref);
2497 if (ret)
2498 goto fail;
2499 }
2500 }
2501 return 0;
2502fail:
2503 return ret;
2504}
2505
2506int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2507 struct extent_buffer *buf, int full_backref)
2508{
2509 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2510}
2511
2512int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2513 struct extent_buffer *buf, int full_backref)
2514{
2515 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2516}
2517
2518int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
2519{
2520 struct btrfs_block_group *block_group;
2521 int readonly = 0;
2522
2523 block_group = btrfs_lookup_block_group(fs_info, bytenr);
2524 if (!block_group || block_group->ro)
2525 readonly = 1;
2526 if (block_group)
2527 btrfs_put_block_group(block_group);
2528 return readonly;
2529}
2530
2531static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2532{
2533 struct btrfs_fs_info *fs_info = root->fs_info;
2534 u64 flags;
2535 u64 ret;
2536
2537 if (data)
2538 flags = BTRFS_BLOCK_GROUP_DATA;
2539 else if (root == fs_info->chunk_root)
2540 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2541 else
2542 flags = BTRFS_BLOCK_GROUP_METADATA;
2543
2544 ret = btrfs_get_alloc_profile(fs_info, flags);
2545 return ret;
2546}
2547
2548static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
2549{
2550 struct btrfs_block_group *cache;
2551 u64 bytenr;
2552
2553 spin_lock(&fs_info->block_group_cache_lock);
2554 bytenr = fs_info->first_logical_byte;
2555 spin_unlock(&fs_info->block_group_cache_lock);
2556
2557 if (bytenr < (u64)-1)
2558 return bytenr;
2559
2560 cache = btrfs_lookup_first_block_group(fs_info, search_start);
2561 if (!cache)
2562 return 0;
2563
2564 bytenr = cache->start;
2565 btrfs_put_block_group(cache);
2566
2567 return bytenr;
2568}
2569
2570static int pin_down_extent(struct btrfs_trans_handle *trans,
2571 struct btrfs_block_group *cache,
2572 u64 bytenr, u64 num_bytes, int reserved)
2573{
2574 struct btrfs_fs_info *fs_info = cache->fs_info;
2575
2576 spin_lock(&cache->space_info->lock);
2577 spin_lock(&cache->lock);
2578 cache->pinned += num_bytes;
2579 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2580 num_bytes);
2581 if (reserved) {
2582 cache->reserved -= num_bytes;
2583 cache->space_info->bytes_reserved -= num_bytes;
2584 }
2585 spin_unlock(&cache->lock);
2586 spin_unlock(&cache->space_info->lock);
2587
2588 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
2589 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2590 set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
2591 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
2592 return 0;
2593}
2594
2595int btrfs_pin_extent(struct btrfs_trans_handle *trans,
2596 u64 bytenr, u64 num_bytes, int reserved)
2597{
2598 struct btrfs_block_group *cache;
2599
2600 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2601 BUG_ON(!cache);
2602
2603 pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
2604
2605 btrfs_put_block_group(cache);
2606 return 0;
2607}
2608
2609
2610
2611
2612int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
2613 u64 bytenr, u64 num_bytes)
2614{
2615 struct btrfs_block_group *cache;
2616 int ret;
2617
2618 btrfs_add_excluded_extent(trans->fs_info, bytenr, num_bytes);
2619
2620 cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2621 if (!cache)
2622 return -EINVAL;
2623
2624
2625
2626
2627
2628
2629
2630 btrfs_cache_block_group(cache, 1);
2631
2632 pin_down_extent(trans, cache, bytenr, num_bytes, 0);
2633
2634
2635 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
2636 btrfs_put_block_group(cache);
2637 return ret;
2638}
2639
2640static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
2641 u64 start, u64 num_bytes)
2642{
2643 int ret;
2644 struct btrfs_block_group *block_group;
2645 struct btrfs_caching_control *caching_ctl;
2646
2647 block_group = btrfs_lookup_block_group(fs_info, start);
2648 if (!block_group)
2649 return -EINVAL;
2650
2651 btrfs_cache_block_group(block_group, 0);
2652 caching_ctl = btrfs_get_caching_control(block_group);
2653
2654 if (!caching_ctl) {
2655
2656 BUG_ON(!btrfs_block_group_done(block_group));
2657 ret = btrfs_remove_free_space(block_group, start, num_bytes);
2658 } else {
2659 mutex_lock(&caching_ctl->mutex);
2660
2661 if (start >= caching_ctl->progress) {
2662 ret = btrfs_add_excluded_extent(fs_info, start,
2663 num_bytes);
2664 } else if (start + num_bytes <= caching_ctl->progress) {
2665 ret = btrfs_remove_free_space(block_group,
2666 start, num_bytes);
2667 } else {
2668 num_bytes = caching_ctl->progress - start;
2669 ret = btrfs_remove_free_space(block_group,
2670 start, num_bytes);
2671 if (ret)
2672 goto out_lock;
2673
2674 num_bytes = (start + num_bytes) -
2675 caching_ctl->progress;
2676 start = caching_ctl->progress;
2677 ret = btrfs_add_excluded_extent(fs_info, start,
2678 num_bytes);
2679 }
2680out_lock:
2681 mutex_unlock(&caching_ctl->mutex);
2682 btrfs_put_caching_control(caching_ctl);
2683 }
2684 btrfs_put_block_group(block_group);
2685 return ret;
2686}
2687
2688int btrfs_exclude_logged_extents(struct extent_buffer *eb)
2689{
2690 struct btrfs_fs_info *fs_info = eb->fs_info;
2691 struct btrfs_file_extent_item *item;
2692 struct btrfs_key key;
2693 int found_type;
2694 int i;
2695 int ret = 0;
2696
2697 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
2698 return 0;
2699
2700 for (i = 0; i < btrfs_header_nritems(eb); i++) {
2701 btrfs_item_key_to_cpu(eb, &key, i);
2702 if (key.type != BTRFS_EXTENT_DATA_KEY)
2703 continue;
2704 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
2705 found_type = btrfs_file_extent_type(eb, item);
2706 if (found_type == BTRFS_FILE_EXTENT_INLINE)
2707 continue;
2708 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
2709 continue;
2710 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
2711 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2712 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2713 if (ret)
2714 break;
2715 }
2716
2717 return ret;
2718}
2719
2720static void
2721btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
2722{
2723 atomic_inc(&bg->reservations);
2724}
2725
2726void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
2727{
2728 struct btrfs_caching_control *next;
2729 struct btrfs_caching_control *caching_ctl;
2730 struct btrfs_block_group *cache;
2731
2732 down_write(&fs_info->commit_root_sem);
2733
2734 list_for_each_entry_safe(caching_ctl, next,
2735 &fs_info->caching_block_groups, list) {
2736 cache = caching_ctl->block_group;
2737 if (btrfs_block_group_done(cache)) {
2738 cache->last_byte_to_unpin = (u64)-1;
2739 list_del_init(&caching_ctl->list);
2740 btrfs_put_caching_control(caching_ctl);
2741 } else {
2742 cache->last_byte_to_unpin = caching_ctl->progress;
2743 }
2744 }
2745
2746 up_write(&fs_info->commit_root_sem);
2747
2748 btrfs_update_global_block_rsv(fs_info);
2749}
2750
2751
2752
2753
2754
2755static struct btrfs_free_cluster *
2756fetch_cluster_info(struct btrfs_fs_info *fs_info,
2757 struct btrfs_space_info *space_info, u64 *empty_cluster)
2758{
2759 struct btrfs_free_cluster *ret = NULL;
2760
2761 *empty_cluster = 0;
2762 if (btrfs_mixed_space_info(space_info))
2763 return ret;
2764
2765 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
2766 ret = &fs_info->meta_alloc_cluster;
2767 if (btrfs_test_opt(fs_info, SSD))
2768 *empty_cluster = SZ_2M;
2769 else
2770 *empty_cluster = SZ_64K;
2771 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2772 btrfs_test_opt(fs_info, SSD_SPREAD)) {
2773 *empty_cluster = SZ_2M;
2774 ret = &fs_info->data_alloc_cluster;
2775 }
2776
2777 return ret;
2778}
2779
2780static int unpin_extent_range(struct btrfs_fs_info *fs_info,
2781 u64 start, u64 end,
2782 const bool return_free_space)
2783{
2784 struct btrfs_block_group *cache = NULL;
2785 struct btrfs_space_info *space_info;
2786 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2787 struct btrfs_free_cluster *cluster = NULL;
2788 u64 len;
2789 u64 total_unpinned = 0;
2790 u64 empty_cluster = 0;
2791 bool readonly;
2792
2793 while (start <= end) {
2794 readonly = false;
2795 if (!cache ||
2796 start >= cache->start + cache->length) {
2797 if (cache)
2798 btrfs_put_block_group(cache);
2799 total_unpinned = 0;
2800 cache = btrfs_lookup_block_group(fs_info, start);
2801 BUG_ON(!cache);
2802
2803 cluster = fetch_cluster_info(fs_info,
2804 cache->space_info,
2805 &empty_cluster);
2806 empty_cluster <<= 1;
2807 }
2808
2809 len = cache->start + cache->length - start;
2810 len = min(len, end + 1 - start);
2811
2812 if (start < cache->last_byte_to_unpin) {
2813 len = min(len, cache->last_byte_to_unpin - start);
2814 if (return_free_space)
2815 btrfs_add_free_space(cache, start, len);
2816 }
2817
2818 start += len;
2819 total_unpinned += len;
2820 space_info = cache->space_info;
2821
2822
2823
2824
2825
2826
2827
2828 if (cluster && cluster->fragmented &&
2829 total_unpinned > empty_cluster) {
2830 spin_lock(&cluster->lock);
2831 cluster->fragmented = 0;
2832 spin_unlock(&cluster->lock);
2833 }
2834
2835 spin_lock(&space_info->lock);
2836 spin_lock(&cache->lock);
2837 cache->pinned -= len;
2838 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
2839 space_info->max_extent_size = 0;
2840 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2841 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
2842 if (cache->ro) {
2843 space_info->bytes_readonly += len;
2844 readonly = true;
2845 }
2846 spin_unlock(&cache->lock);
2847 if (!readonly && return_free_space &&
2848 global_rsv->space_info == space_info) {
2849 u64 to_add = len;
2850
2851 spin_lock(&global_rsv->lock);
2852 if (!global_rsv->full) {
2853 to_add = min(len, global_rsv->size -
2854 global_rsv->reserved);
2855 global_rsv->reserved += to_add;
2856 btrfs_space_info_update_bytes_may_use(fs_info,
2857 space_info, to_add);
2858 if (global_rsv->reserved >= global_rsv->size)
2859 global_rsv->full = 1;
2860 len -= to_add;
2861 }
2862 spin_unlock(&global_rsv->lock);
2863
2864 if (len)
2865 btrfs_try_granting_tickets(fs_info,
2866 space_info);
2867 }
2868 spin_unlock(&space_info->lock);
2869 }
2870
2871 if (cache)
2872 btrfs_put_block_group(cache);
2873 return 0;
2874}
2875
2876int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2877{
2878 struct btrfs_fs_info *fs_info = trans->fs_info;
2879 struct btrfs_block_group *block_group, *tmp;
2880 struct list_head *deleted_bgs;
2881 struct extent_io_tree *unpin;
2882 u64 start;
2883 u64 end;
2884 int ret;
2885
2886 unpin = &trans->transaction->pinned_extents;
2887
2888 while (!TRANS_ABORTED(trans)) {
2889 struct extent_state *cached_state = NULL;
2890
2891 mutex_lock(&fs_info->unused_bg_unpin_mutex);
2892 ret = find_first_extent_bit(unpin, 0, &start, &end,
2893 EXTENT_DIRTY, &cached_state);
2894 if (ret) {
2895 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2896 break;
2897 }
2898 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
2899 clear_extent_bits(&fs_info->excluded_extents, start,
2900 end, EXTENT_UPTODATE);
2901
2902 if (btrfs_test_opt(fs_info, DISCARD_SYNC))
2903 ret = btrfs_discard_extent(fs_info, start,
2904 end + 1 - start, NULL);
2905
2906 clear_extent_dirty(unpin, start, end, &cached_state);
2907 unpin_extent_range(fs_info, start, end, true);
2908 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2909 free_extent_state(cached_state);
2910 cond_resched();
2911 }
2912
2913 if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
2914 btrfs_discard_calc_delay(&fs_info->discard_ctl);
2915 btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
2916 }
2917
2918
2919
2920
2921
2922
2923 deleted_bgs = &trans->transaction->deleted_bgs;
2924 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2925 u64 trimmed = 0;
2926
2927 ret = -EROFS;
2928 if (!TRANS_ABORTED(trans))
2929 ret = btrfs_discard_extent(fs_info,
2930 block_group->start,
2931 block_group->length,
2932 &trimmed);
2933
2934 list_del_init(&block_group->bg_list);
2935 btrfs_put_block_group_trimming(block_group);
2936 btrfs_put_block_group(block_group);
2937
2938 if (ret) {
2939 const char *errstr = btrfs_decode_error(ret);
2940 btrfs_warn(fs_info,
2941 "discard failed while removing blockgroup: errno=%d %s",
2942 ret, errstr);
2943 }
2944 }
2945
2946 return 0;
2947}
2948
2949static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2950 struct btrfs_delayed_ref_node *node, u64 parent,
2951 u64 root_objectid, u64 owner_objectid,
2952 u64 owner_offset, int refs_to_drop,
2953 struct btrfs_delayed_extent_op *extent_op)
2954{
2955 struct btrfs_fs_info *info = trans->fs_info;
2956 struct btrfs_key key;
2957 struct btrfs_path *path;
2958 struct btrfs_root *extent_root = info->extent_root;
2959 struct extent_buffer *leaf;
2960 struct btrfs_extent_item *ei;
2961 struct btrfs_extent_inline_ref *iref;
2962 int ret;
2963 int is_data;
2964 int extent_slot = 0;
2965 int found_extent = 0;
2966 int num_to_del = 1;
2967 u32 item_size;
2968 u64 refs;
2969 u64 bytenr = node->bytenr;
2970 u64 num_bytes = node->num_bytes;
2971 int last_ref = 0;
2972 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2973
2974 path = btrfs_alloc_path();
2975 if (!path)
2976 return -ENOMEM;
2977
2978 path->leave_spinning = 1;
2979
2980 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2981 BUG_ON(!is_data && refs_to_drop != 1);
2982
2983 if (is_data)
2984 skinny_metadata = false;
2985
2986 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
2987 parent, root_objectid, owner_objectid,
2988 owner_offset);
2989 if (ret == 0) {
2990 extent_slot = path->slots[0];
2991 while (extent_slot >= 0) {
2992 btrfs_item_key_to_cpu(path->nodes[0], &key,
2993 extent_slot);
2994 if (key.objectid != bytenr)
2995 break;
2996 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2997 key.offset == num_bytes) {
2998 found_extent = 1;
2999 break;
3000 }
3001 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3002 key.offset == owner_objectid) {
3003 found_extent = 1;
3004 break;
3005 }
3006 if (path->slots[0] - extent_slot > 5)
3007 break;
3008 extent_slot--;
3009 }
3010
3011 if (!found_extent) {
3012 BUG_ON(iref);
3013 ret = remove_extent_backref(trans, path, NULL,
3014 refs_to_drop,
3015 is_data, &last_ref);
3016 if (ret) {
3017 btrfs_abort_transaction(trans, ret);
3018 goto out;
3019 }
3020 btrfs_release_path(path);
3021 path->leave_spinning = 1;
3022
3023 key.objectid = bytenr;
3024 key.type = BTRFS_EXTENT_ITEM_KEY;
3025 key.offset = num_bytes;
3026
3027 if (!is_data && skinny_metadata) {
3028 key.type = BTRFS_METADATA_ITEM_KEY;
3029 key.offset = owner_objectid;
3030 }
3031
3032 ret = btrfs_search_slot(trans, extent_root,
3033 &key, path, -1, 1);
3034 if (ret > 0 && skinny_metadata && path->slots[0]) {
3035
3036
3037
3038
3039 path->slots[0]--;
3040 btrfs_item_key_to_cpu(path->nodes[0], &key,
3041 path->slots[0]);
3042 if (key.objectid == bytenr &&
3043 key.type == BTRFS_EXTENT_ITEM_KEY &&
3044 key.offset == num_bytes)
3045 ret = 0;
3046 }
3047
3048 if (ret > 0 && skinny_metadata) {
3049 skinny_metadata = false;
3050 key.objectid = bytenr;
3051 key.type = BTRFS_EXTENT_ITEM_KEY;
3052 key.offset = num_bytes;
3053 btrfs_release_path(path);
3054 ret = btrfs_search_slot(trans, extent_root,
3055 &key, path, -1, 1);
3056 }
3057
3058 if (ret) {
3059 btrfs_err(info,
3060 "umm, got %d back from search, was looking for %llu",
3061 ret, bytenr);
3062 if (ret > 0)
3063 btrfs_print_leaf(path->nodes[0]);
3064 }
3065 if (ret < 0) {
3066 btrfs_abort_transaction(trans, ret);
3067 goto out;
3068 }
3069 extent_slot = path->slots[0];
3070 }
3071 } else if (WARN_ON(ret == -ENOENT)) {
3072 btrfs_print_leaf(path->nodes[0]);
3073 btrfs_err(info,
3074 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
3075 bytenr, parent, root_objectid, owner_objectid,
3076 owner_offset);
3077 btrfs_abort_transaction(trans, ret);
3078 goto out;
3079 } else {
3080 btrfs_abort_transaction(trans, ret);
3081 goto out;
3082 }
3083
3084 leaf = path->nodes[0];
3085 item_size = btrfs_item_size_nr(leaf, extent_slot);
3086 if (unlikely(item_size < sizeof(*ei))) {
3087 ret = -EINVAL;
3088 btrfs_print_v0_err(info);
3089 btrfs_abort_transaction(trans, ret);
3090 goto out;
3091 }
3092 ei = btrfs_item_ptr(leaf, extent_slot,
3093 struct btrfs_extent_item);
3094 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
3095 key.type == BTRFS_EXTENT_ITEM_KEY) {
3096 struct btrfs_tree_block_info *bi;
3097 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
3098 bi = (struct btrfs_tree_block_info *)(ei + 1);
3099 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
3100 }
3101
3102 refs = btrfs_extent_refs(leaf, ei);
3103 if (refs < refs_to_drop) {
3104 btrfs_err(info,
3105 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
3106 refs_to_drop, refs, bytenr);
3107 ret = -EINVAL;
3108 btrfs_abort_transaction(trans, ret);
3109 goto out;
3110 }
3111 refs -= refs_to_drop;
3112
3113 if (refs > 0) {
3114 if (extent_op)
3115 __run_delayed_extent_op(extent_op, leaf, ei);
3116
3117
3118
3119
3120 if (iref) {
3121 BUG_ON(!found_extent);
3122 } else {
3123 btrfs_set_extent_refs(leaf, ei, refs);
3124 btrfs_mark_buffer_dirty(leaf);
3125 }
3126 if (found_extent) {
3127 ret = remove_extent_backref(trans, path, iref,
3128 refs_to_drop, is_data,
3129 &last_ref);
3130 if (ret) {
3131 btrfs_abort_transaction(trans, ret);
3132 goto out;
3133 }
3134 }
3135 } else {
3136 if (found_extent) {
3137 BUG_ON(is_data && refs_to_drop !=
3138 extent_data_ref_count(path, iref));
3139 if (iref) {
3140 BUG_ON(path->slots[0] != extent_slot);
3141 } else {
3142 BUG_ON(path->slots[0] != extent_slot + 1);
3143 path->slots[0] = extent_slot;
3144 num_to_del = 2;
3145 }
3146 }
3147
3148 last_ref = 1;
3149 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3150 num_to_del);
3151 if (ret) {
3152 btrfs_abort_transaction(trans, ret);
3153 goto out;
3154 }
3155 btrfs_release_path(path);
3156
3157 if (is_data) {
3158 ret = btrfs_del_csums(trans, info->csum_root, bytenr,
3159 num_bytes);
3160 if (ret) {
3161 btrfs_abort_transaction(trans, ret);
3162 goto out;
3163 }
3164 }
3165
3166 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
3167 if (ret) {
3168 btrfs_abort_transaction(trans, ret);
3169 goto out;
3170 }
3171
3172 ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
3173 if (ret) {
3174 btrfs_abort_transaction(trans, ret);
3175 goto out;
3176 }
3177 }
3178 btrfs_release_path(path);
3179
3180out:
3181 btrfs_free_path(path);
3182 return ret;
3183}
3184
3185
3186
3187
3188
3189
3190
3191static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3192 u64 bytenr)
3193{
3194 struct btrfs_delayed_ref_head *head;
3195 struct btrfs_delayed_ref_root *delayed_refs;
3196 int ret = 0;
3197
3198 delayed_refs = &trans->transaction->delayed_refs;
3199 spin_lock(&delayed_refs->lock);
3200 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3201 if (!head)
3202 goto out_delayed_unlock;
3203
3204 spin_lock(&head->lock);
3205 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
3206 goto out;
3207
3208 if (cleanup_extent_op(head) != NULL)
3209 goto out;
3210
3211
3212
3213
3214
3215 if (!mutex_trylock(&head->mutex))
3216 goto out;
3217
3218 btrfs_delete_ref_head(delayed_refs, head);
3219 head->processing = 0;
3220
3221 spin_unlock(&head->lock);
3222 spin_unlock(&delayed_refs->lock);
3223
3224 BUG_ON(head->extent_op);
3225 if (head->must_insert_reserved)
3226 ret = 1;
3227
3228 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3229 mutex_unlock(&head->mutex);
3230 btrfs_put_delayed_ref_head(head);
3231 return ret;
3232out:
3233 spin_unlock(&head->lock);
3234
3235out_delayed_unlock:
3236 spin_unlock(&delayed_refs->lock);
3237 return 0;
3238}
3239
3240void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3241 struct btrfs_root *root,
3242 struct extent_buffer *buf,
3243 u64 parent, int last_ref)
3244{
3245 struct btrfs_fs_info *fs_info = root->fs_info;
3246 struct btrfs_ref generic_ref = { 0 };
3247 int pin = 1;
3248 int ret;
3249
3250 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3251 buf->start, buf->len, parent);
3252 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
3253 root->root_key.objectid);
3254
3255 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3256 int old_ref_mod, new_ref_mod;
3257
3258 btrfs_ref_tree_mod(fs_info, &generic_ref);
3259 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
3260 &old_ref_mod, &new_ref_mod);
3261 BUG_ON(ret);
3262 pin = old_ref_mod >= 0 && new_ref_mod < 0;
3263 }
3264
3265 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
3266 struct btrfs_block_group *cache;
3267
3268 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
3269 ret = check_ref_cleanup(trans, buf->start);
3270 if (!ret)
3271 goto out;
3272 }
3273
3274 pin = 0;
3275 cache = btrfs_lookup_block_group(fs_info, buf->start);
3276
3277 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3278 pin_down_extent(trans, cache, buf->start, buf->len, 1);
3279 btrfs_put_block_group(cache);
3280 goto out;
3281 }
3282
3283 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3284
3285 btrfs_add_free_space(cache, buf->start, buf->len);
3286 btrfs_free_reserved_bytes(cache, buf->len, 0);
3287 btrfs_put_block_group(cache);
3288 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3289 }
3290out:
3291 if (pin)
3292 add_pinned_bytes(fs_info, &generic_ref);
3293
3294 if (last_ref) {
3295
3296
3297
3298
3299 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3300 }
3301}
3302
3303
3304int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3305{
3306 struct btrfs_fs_info *fs_info = trans->fs_info;
3307 int old_ref_mod, new_ref_mod;
3308 int ret;
3309
3310 if (btrfs_is_testing(fs_info))
3311 return 0;
3312
3313
3314
3315
3316
3317 if ((ref->type == BTRFS_REF_METADATA &&
3318 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3319 (ref->type == BTRFS_REF_DATA &&
3320 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
3321
3322 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
3323 old_ref_mod = new_ref_mod = 0;
3324 ret = 0;
3325 } else if (ref->type == BTRFS_REF_METADATA) {
3326 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
3327 &old_ref_mod, &new_ref_mod);
3328 } else {
3329 ret = btrfs_add_delayed_data_ref(trans, ref, 0,
3330 &old_ref_mod, &new_ref_mod);
3331 }
3332
3333 if (!((ref->type == BTRFS_REF_METADATA &&
3334 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
3335 (ref->type == BTRFS_REF_DATA &&
3336 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
3337 btrfs_ref_tree_mod(fs_info, ref);
3338
3339 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
3340 add_pinned_bytes(fs_info, ref);
3341
3342 return ret;
3343}
3344
3345enum btrfs_loop_type {
3346 LOOP_CACHING_NOWAIT,
3347 LOOP_CACHING_WAIT,
3348 LOOP_ALLOC_CHUNK,
3349 LOOP_NO_EMPTY_SIZE,
3350};
3351
3352static inline void
3353btrfs_lock_block_group(struct btrfs_block_group *cache,
3354 int delalloc)
3355{
3356 if (delalloc)
3357 down_read(&cache->data_rwsem);
3358}
3359
3360static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3361 int delalloc)
3362{
3363 btrfs_get_block_group(cache);
3364 if (delalloc)
3365 down_read(&cache->data_rwsem);
3366}
3367
3368static struct btrfs_block_group *btrfs_lock_cluster(
3369 struct btrfs_block_group *block_group,
3370 struct btrfs_free_cluster *cluster,
3371 int delalloc)
3372{
3373 struct btrfs_block_group *used_bg = NULL;
3374
3375 spin_lock(&cluster->refill_lock);
3376 while (1) {
3377 used_bg = cluster->block_group;
3378 if (!used_bg)
3379 return NULL;
3380
3381 if (used_bg == block_group)
3382 return used_bg;
3383
3384 btrfs_get_block_group(used_bg);
3385
3386 if (!delalloc)
3387 return used_bg;
3388
3389 if (down_read_trylock(&used_bg->data_rwsem))
3390 return used_bg;
3391
3392 spin_unlock(&cluster->refill_lock);
3393
3394
3395 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
3396
3397 spin_lock(&cluster->refill_lock);
3398 if (used_bg == cluster->block_group)
3399 return used_bg;
3400
3401 up_read(&used_bg->data_rwsem);
3402 btrfs_put_block_group(used_bg);
3403 }
3404}
3405
3406static inline void
3407btrfs_release_block_group(struct btrfs_block_group *cache,
3408 int delalloc)
3409{
3410 if (delalloc)
3411 up_read(&cache->data_rwsem);
3412 btrfs_put_block_group(cache);
3413}
3414
3415enum btrfs_extent_allocation_policy {
3416 BTRFS_EXTENT_ALLOC_CLUSTERED,
3417};
3418
3419
3420
3421
3422
3423struct find_free_extent_ctl {
3424
3425 u64 num_bytes;
3426 u64 empty_size;
3427 u64 flags;
3428 int delalloc;
3429
3430
3431 u64 search_start;
3432
3433
3434 u64 empty_cluster;
3435 struct btrfs_free_cluster *last_ptr;
3436 bool use_cluster;
3437
3438 bool have_caching_bg;
3439 bool orig_have_caching_bg;
3440
3441
3442 int index;
3443
3444
3445
3446
3447 int loop;
3448
3449
3450
3451
3452
3453 bool retry_clustered;
3454
3455
3456
3457
3458
3459 bool retry_unclustered;
3460
3461
3462 int cached;
3463
3464
3465 u64 max_extent_size;
3466
3467
3468 u64 total_free_space;
3469
3470
3471 u64 found_offset;
3472
3473
3474 u64 hint_byte;
3475
3476
3477 enum btrfs_extent_allocation_policy policy;
3478};
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489static int find_free_extent_clustered(struct btrfs_block_group *bg,
3490 struct find_free_extent_ctl *ffe_ctl,
3491 struct btrfs_block_group **cluster_bg_ret)
3492{
3493 struct btrfs_block_group *cluster_bg;
3494 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3495 u64 aligned_cluster;
3496 u64 offset;
3497 int ret;
3498
3499 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3500 if (!cluster_bg)
3501 goto refill_cluster;
3502 if (cluster_bg != bg && (cluster_bg->ro ||
3503 !block_group_bits(cluster_bg, ffe_ctl->flags)))
3504 goto release_cluster;
3505
3506 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3507 ffe_ctl->num_bytes, cluster_bg->start,
3508 &ffe_ctl->max_extent_size);
3509 if (offset) {
3510
3511 spin_unlock(&last_ptr->refill_lock);
3512 trace_btrfs_reserve_extent_cluster(cluster_bg,
3513 ffe_ctl->search_start, ffe_ctl->num_bytes);
3514 *cluster_bg_ret = cluster_bg;
3515 ffe_ctl->found_offset = offset;
3516 return 0;
3517 }
3518 WARN_ON(last_ptr->block_group != cluster_bg);
3519
3520release_cluster:
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3533 spin_unlock(&last_ptr->refill_lock);
3534 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3535 return -ENOENT;
3536 }
3537
3538
3539 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3540
3541 if (cluster_bg != bg)
3542 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3543
3544refill_cluster:
3545 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3546 spin_unlock(&last_ptr->refill_lock);
3547 return -ENOENT;
3548 }
3549
3550 aligned_cluster = max_t(u64,
3551 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3552 bg->full_stripe_len);
3553 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
3554 ffe_ctl->num_bytes, aligned_cluster);
3555 if (ret == 0) {
3556
3557 offset = btrfs_alloc_from_cluster(bg, last_ptr,
3558 ffe_ctl->num_bytes, ffe_ctl->search_start,
3559 &ffe_ctl->max_extent_size);
3560 if (offset) {
3561
3562 spin_unlock(&last_ptr->refill_lock);
3563 trace_btrfs_reserve_extent_cluster(bg,
3564 ffe_ctl->search_start,
3565 ffe_ctl->num_bytes);
3566 ffe_ctl->found_offset = offset;
3567 return 0;
3568 }
3569 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3570 !ffe_ctl->retry_clustered) {
3571 spin_unlock(&last_ptr->refill_lock);
3572
3573 ffe_ctl->retry_clustered = true;
3574 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3575 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3576 return -EAGAIN;
3577 }
3578
3579
3580
3581
3582
3583 btrfs_return_cluster_to_free_space(NULL, last_ptr);
3584 spin_unlock(&last_ptr->refill_lock);
3585 return 1;
3586}
3587
3588
3589
3590
3591
3592
3593static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3594 struct find_free_extent_ctl *ffe_ctl)
3595{
3596 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3597 u64 offset;
3598
3599
3600
3601
3602
3603
3604 if (unlikely(last_ptr)) {
3605 spin_lock(&last_ptr->lock);
3606 last_ptr->fragmented = 1;
3607 spin_unlock(&last_ptr->lock);
3608 }
3609 if (ffe_ctl->cached) {
3610 struct btrfs_free_space_ctl *free_space_ctl;
3611
3612 free_space_ctl = bg->free_space_ctl;
3613 spin_lock(&free_space_ctl->tree_lock);
3614 if (free_space_ctl->free_space <
3615 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3616 ffe_ctl->empty_size) {
3617 ffe_ctl->total_free_space = max_t(u64,
3618 ffe_ctl->total_free_space,
3619 free_space_ctl->free_space);
3620 spin_unlock(&free_space_ctl->tree_lock);
3621 return 1;
3622 }
3623 spin_unlock(&free_space_ctl->tree_lock);
3624 }
3625
3626 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3627 ffe_ctl->num_bytes, ffe_ctl->empty_size,
3628 &ffe_ctl->max_extent_size);
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3640 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3641 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3642 ffe_ctl->empty_size);
3643 ffe_ctl->retry_unclustered = true;
3644 return -EAGAIN;
3645 } else if (!offset) {
3646 return 1;
3647 }
3648 ffe_ctl->found_offset = offset;
3649 return 0;
3650}
3651
3652static int do_allocation_clustered(struct btrfs_block_group *block_group,
3653 struct find_free_extent_ctl *ffe_ctl,
3654 struct btrfs_block_group **bg_ret)
3655{
3656 int ret;
3657
3658
3659 if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
3660 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
3661 if (ret >= 0 || ret == -EAGAIN)
3662 return ret;
3663
3664 }
3665
3666 return find_free_extent_unclustered(block_group, ffe_ctl);
3667}
3668
3669static int do_allocation(struct btrfs_block_group *block_group,
3670 struct find_free_extent_ctl *ffe_ctl,
3671 struct btrfs_block_group **bg_ret)
3672{
3673 switch (ffe_ctl->policy) {
3674 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3675 return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
3676 default:
3677 BUG();
3678 }
3679}
3680
3681static void release_block_group(struct btrfs_block_group *block_group,
3682 struct find_free_extent_ctl *ffe_ctl,
3683 int delalloc)
3684{
3685 switch (ffe_ctl->policy) {
3686 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3687 ffe_ctl->retry_clustered = false;
3688 ffe_ctl->retry_unclustered = false;
3689 break;
3690 default:
3691 BUG();
3692 }
3693
3694 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
3695 ffe_ctl->index);
3696 btrfs_release_block_group(block_group, delalloc);
3697}
3698
3699static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
3700 struct btrfs_key *ins)
3701{
3702 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3703
3704 if (!ffe_ctl->use_cluster && last_ptr) {
3705 spin_lock(&last_ptr->lock);
3706 last_ptr->window_start = ins->objectid;
3707 spin_unlock(&last_ptr->lock);
3708 }
3709}
3710
3711static void found_extent(struct find_free_extent_ctl *ffe_ctl,
3712 struct btrfs_key *ins)
3713{
3714 switch (ffe_ctl->policy) {
3715 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3716 found_extent_clustered(ffe_ctl, ins);
3717 break;
3718 default:
3719 BUG();
3720 }
3721}
3722
3723static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
3724{
3725 switch (ffe_ctl->policy) {
3726 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3727
3728
3729
3730
3731 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
3732 return 0;
3733 default:
3734 BUG();
3735 }
3736}
3737
3738
3739
3740
3741
3742
3743static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3744 struct btrfs_key *ins,
3745 struct find_free_extent_ctl *ffe_ctl,
3746 bool full_search)
3747{
3748 struct btrfs_root *root = fs_info->extent_root;
3749 int ret;
3750
3751 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3752 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3753 ffe_ctl->orig_have_caching_bg = true;
3754
3755 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
3756 ffe_ctl->have_caching_bg)
3757 return 1;
3758
3759 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
3760 return 1;
3761
3762 if (ins->objectid) {
3763 found_extent(ffe_ctl, ins);
3764 return 0;
3765 }
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
3776 ffe_ctl->index = 0;
3777 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
3778
3779
3780
3781
3782
3783 if (ffe_ctl->orig_have_caching_bg || !full_search)
3784 ffe_ctl->loop = LOOP_CACHING_WAIT;
3785 else
3786 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
3787 } else {
3788 ffe_ctl->loop++;
3789 }
3790
3791 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
3792 struct btrfs_trans_handle *trans;
3793 int exist = 0;
3794
3795 trans = current->journal_info;
3796 if (trans)
3797 exist = 1;
3798 else
3799 trans = btrfs_join_transaction(root);
3800
3801 if (IS_ERR(trans)) {
3802 ret = PTR_ERR(trans);
3803 return ret;
3804 }
3805
3806 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
3807 CHUNK_ALLOC_FORCE);
3808
3809
3810 if (ret == -ENOSPC)
3811 ret = chunk_allocation_failed(ffe_ctl);
3812 else if (ret < 0)
3813 btrfs_abort_transaction(trans, ret);
3814 else
3815 ret = 0;
3816 if (!exist)
3817 btrfs_end_transaction(trans);
3818 if (ret)
3819 return ret;
3820 }
3821
3822 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
3823 if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
3824 return -ENOSPC;
3825
3826
3827
3828
3829
3830 if (ffe_ctl->empty_size == 0 &&
3831 ffe_ctl->empty_cluster == 0)
3832 return -ENOSPC;
3833 ffe_ctl->empty_size = 0;
3834 ffe_ctl->empty_cluster = 0;
3835 }
3836 return 1;
3837 }
3838 return -ENOSPC;
3839}
3840
3841static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
3842 struct find_free_extent_ctl *ffe_ctl,
3843 struct btrfs_space_info *space_info,
3844 struct btrfs_key *ins)
3845{
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856 if (space_info->max_extent_size) {
3857 spin_lock(&space_info->lock);
3858 if (space_info->max_extent_size &&
3859 ffe_ctl->num_bytes > space_info->max_extent_size) {
3860 ins->offset = space_info->max_extent_size;
3861 spin_unlock(&space_info->lock);
3862 return -ENOSPC;
3863 } else if (space_info->max_extent_size) {
3864 ffe_ctl->use_cluster = false;
3865 }
3866 spin_unlock(&space_info->lock);
3867 }
3868
3869 ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
3870 &ffe_ctl->empty_cluster);
3871 if (ffe_ctl->last_ptr) {
3872 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3873
3874 spin_lock(&last_ptr->lock);
3875 if (last_ptr->block_group)
3876 ffe_ctl->hint_byte = last_ptr->window_start;
3877 if (last_ptr->fragmented) {
3878
3879
3880
3881
3882
3883 ffe_ctl->hint_byte = last_ptr->window_start;
3884 ffe_ctl->use_cluster = false;
3885 }
3886 spin_unlock(&last_ptr->lock);
3887 }
3888
3889 return 0;
3890}
3891
3892static int prepare_allocation(struct btrfs_fs_info *fs_info,
3893 struct find_free_extent_ctl *ffe_ctl,
3894 struct btrfs_space_info *space_info,
3895 struct btrfs_key *ins)
3896{
3897 switch (ffe_ctl->policy) {
3898 case BTRFS_EXTENT_ALLOC_CLUSTERED:
3899 return prepare_allocation_clustered(fs_info, ffe_ctl,
3900 space_info, ins);
3901 default:
3902 BUG();
3903 }
3904}
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
3932 u64 ram_bytes, u64 num_bytes, u64 empty_size,
3933 u64 hint_byte_orig, struct btrfs_key *ins,
3934 u64 flags, int delalloc)
3935{
3936 int ret = 0;
3937 int cache_block_group_error = 0;
3938 struct btrfs_block_group *block_group = NULL;
3939 struct find_free_extent_ctl ffe_ctl = {0};
3940 struct btrfs_space_info *space_info;
3941 bool full_search = false;
3942
3943 WARN_ON(num_bytes < fs_info->sectorsize);
3944
3945 ffe_ctl.num_bytes = num_bytes;
3946 ffe_ctl.empty_size = empty_size;
3947 ffe_ctl.flags = flags;
3948 ffe_ctl.search_start = 0;
3949 ffe_ctl.delalloc = delalloc;
3950 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
3951 ffe_ctl.have_caching_bg = false;
3952 ffe_ctl.orig_have_caching_bg = false;
3953 ffe_ctl.found_offset = 0;
3954 ffe_ctl.hint_byte = hint_byte_orig;
3955 ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
3956
3957
3958 ffe_ctl.retry_clustered = false;
3959 ffe_ctl.retry_unclustered = false;
3960 ffe_ctl.last_ptr = NULL;
3961 ffe_ctl.use_cluster = true;
3962
3963 ins->type = BTRFS_EXTENT_ITEM_KEY;
3964 ins->objectid = 0;
3965 ins->offset = 0;
3966
3967 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
3968
3969 space_info = btrfs_find_space_info(fs_info, flags);
3970 if (!space_info) {
3971 btrfs_err(fs_info, "No space info for %llu", flags);
3972 return -ENOSPC;
3973 }
3974
3975 ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins);
3976 if (ret < 0)
3977 return ret;
3978
3979 ffe_ctl.search_start = max(ffe_ctl.search_start,
3980 first_logical_byte(fs_info, 0));
3981 ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte);
3982 if (ffe_ctl.search_start == ffe_ctl.hint_byte) {
3983 block_group = btrfs_lookup_block_group(fs_info,
3984 ffe_ctl.search_start);
3985
3986
3987
3988
3989
3990
3991
3992 if (block_group && block_group_bits(block_group, flags) &&
3993 block_group->cached != BTRFS_CACHE_NO) {
3994 down_read(&space_info->groups_sem);
3995 if (list_empty(&block_group->list) ||
3996 block_group->ro) {
3997
3998
3999
4000
4001
4002
4003 btrfs_put_block_group(block_group);
4004 up_read(&space_info->groups_sem);
4005 } else {
4006 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
4007 block_group->flags);
4008 btrfs_lock_block_group(block_group, delalloc);
4009 goto have_block_group;
4010 }
4011 } else if (block_group) {
4012 btrfs_put_block_group(block_group);
4013 }
4014 }
4015search:
4016 ffe_ctl.have_caching_bg = false;
4017 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
4018 ffe_ctl.index == 0)
4019 full_search = true;
4020 down_read(&space_info->groups_sem);
4021 list_for_each_entry(block_group,
4022 &space_info->block_groups[ffe_ctl.index], list) {
4023 struct btrfs_block_group *bg_ret;
4024
4025
4026 if (unlikely(block_group->ro))
4027 continue;
4028
4029 btrfs_grab_block_group(block_group, delalloc);
4030 ffe_ctl.search_start = block_group->start;
4031
4032
4033
4034
4035
4036
4037 if (!block_group_bits(block_group, flags)) {
4038 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4039 BTRFS_BLOCK_GROUP_RAID1_MASK |
4040 BTRFS_BLOCK_GROUP_RAID56_MASK |
4041 BTRFS_BLOCK_GROUP_RAID10;
4042
4043
4044
4045
4046
4047
4048 if ((flags & extra) && !(block_group->flags & extra))
4049 goto loop;
4050
4051
4052
4053
4054
4055
4056 btrfs_release_block_group(block_group, delalloc);
4057 continue;
4058 }
4059
4060have_block_group:
4061 ffe_ctl.cached = btrfs_block_group_done(block_group);
4062 if (unlikely(!ffe_ctl.cached)) {
4063 ffe_ctl.have_caching_bg = true;
4064 ret = btrfs_cache_block_group(block_group, 0);
4065
4066
4067
4068
4069
4070
4071
4072
4073 if (ret < 0) {
4074 if (!cache_block_group_error)
4075 cache_block_group_error = ret;
4076 ret = 0;
4077 goto loop;
4078 }
4079 ret = 0;
4080 }
4081
4082 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
4083 goto loop;
4084
4085 bg_ret = NULL;
4086 ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
4087 if (ret == 0) {
4088 if (bg_ret && bg_ret != block_group) {
4089 btrfs_release_block_group(block_group, delalloc);
4090 block_group = bg_ret;
4091 }
4092 } else if (ret == -EAGAIN) {
4093 goto have_block_group;
4094 } else if (ret > 0) {
4095 goto loop;
4096 }
4097
4098
4099 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
4100 fs_info->stripesize);
4101
4102
4103 if (ffe_ctl.search_start + num_bytes >
4104 block_group->start + block_group->length) {
4105 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4106 num_bytes);
4107 goto loop;
4108 }
4109
4110 if (ffe_ctl.found_offset < ffe_ctl.search_start)
4111 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4112 ffe_ctl.search_start - ffe_ctl.found_offset);
4113
4114 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
4115 num_bytes, delalloc);
4116 if (ret == -EAGAIN) {
4117 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
4118 num_bytes);
4119 goto loop;
4120 }
4121 btrfs_inc_block_group_reservations(block_group);
4122
4123
4124 ins->objectid = ffe_ctl.search_start;
4125 ins->offset = num_bytes;
4126
4127 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
4128 num_bytes);
4129 btrfs_release_block_group(block_group, delalloc);
4130 break;
4131loop:
4132 release_block_group(block_group, &ffe_ctl, delalloc);
4133 cond_resched();
4134 }
4135 up_read(&space_info->groups_sem);
4136
4137 ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search);
4138 if (ret > 0)
4139 goto search;
4140
4141 if (ret == -ENOSPC && !cache_block_group_error) {
4142
4143
4144
4145
4146 if (!ffe_ctl.max_extent_size)
4147 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
4148 spin_lock(&space_info->lock);
4149 space_info->max_extent_size = ffe_ctl.max_extent_size;
4150 spin_unlock(&space_info->lock);
4151 ins->offset = ffe_ctl.max_extent_size;
4152 } else if (ret == -ENOSPC) {
4153 ret = cache_block_group_error;
4154 }
4155 return ret;
4156}
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
4204 u64 num_bytes, u64 min_alloc_size,
4205 u64 empty_size, u64 hint_byte,
4206 struct btrfs_key *ins, int is_data, int delalloc)
4207{
4208 struct btrfs_fs_info *fs_info = root->fs_info;
4209 bool final_tried = num_bytes == min_alloc_size;
4210 u64 flags;
4211 int ret;
4212
4213 flags = get_alloc_profile_by_root(root, is_data);
4214again:
4215 WARN_ON(num_bytes < fs_info->sectorsize);
4216 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
4217 hint_byte, ins, flags, delalloc);
4218 if (!ret && !is_data) {
4219 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
4220 } else if (ret == -ENOSPC) {
4221 if (!final_tried && ins->offset) {
4222 num_bytes = min(num_bytes >> 1, ins->offset);
4223 num_bytes = round_down(num_bytes,
4224 fs_info->sectorsize);
4225 num_bytes = max(num_bytes, min_alloc_size);
4226 ram_bytes = num_bytes;
4227 if (num_bytes == min_alloc_size)
4228 final_tried = true;
4229 goto again;
4230 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4231 struct btrfs_space_info *sinfo;
4232
4233 sinfo = btrfs_find_space_info(fs_info, flags);
4234 btrfs_err(fs_info,
4235 "allocation failed flags %llu, wanted %llu",
4236 flags, num_bytes);
4237 if (sinfo)
4238 btrfs_dump_space_info(fs_info, sinfo,
4239 num_bytes, 1);
4240 }
4241 }
4242
4243 return ret;
4244}
4245
4246int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4247 u64 start, u64 len, int delalloc)
4248{
4249 struct btrfs_block_group *cache;
4250
4251 cache = btrfs_lookup_block_group(fs_info, start);
4252 if (!cache) {
4253 btrfs_err(fs_info, "Unable to find block group for %llu",
4254 start);
4255 return -ENOSPC;
4256 }
4257
4258 btrfs_add_free_space(cache, start, len);
4259 btrfs_free_reserved_bytes(cache, len, delalloc);
4260 trace_btrfs_reserved_extent_free(fs_info, start, len);
4261
4262 btrfs_put_block_group(cache);
4263 return 0;
4264}
4265
4266int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
4267 u64 len)
4268{
4269 struct btrfs_block_group *cache;
4270 int ret = 0;
4271
4272 cache = btrfs_lookup_block_group(trans->fs_info, start);
4273 if (!cache) {
4274 btrfs_err(trans->fs_info, "unable to find block group for %llu",
4275 start);
4276 return -ENOSPC;
4277 }
4278
4279 ret = pin_down_extent(trans, cache, start, len, 1);
4280 btrfs_put_block_group(cache);
4281 return ret;
4282}
4283
4284static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4285 u64 parent, u64 root_objectid,
4286 u64 flags, u64 owner, u64 offset,
4287 struct btrfs_key *ins, int ref_mod)
4288{
4289 struct btrfs_fs_info *fs_info = trans->fs_info;
4290 int ret;
4291 struct btrfs_extent_item *extent_item;
4292 struct btrfs_extent_inline_ref *iref;
4293 struct btrfs_path *path;
4294 struct extent_buffer *leaf;
4295 int type;
4296 u32 size;
4297
4298 if (parent > 0)
4299 type = BTRFS_SHARED_DATA_REF_KEY;
4300 else
4301 type = BTRFS_EXTENT_DATA_REF_KEY;
4302
4303 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
4304
4305 path = btrfs_alloc_path();
4306 if (!path)
4307 return -ENOMEM;
4308
4309 path->leave_spinning = 1;
4310 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4311 ins, size);
4312 if (ret) {
4313 btrfs_free_path(path);
4314 return ret;
4315 }
4316
4317 leaf = path->nodes[0];
4318 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4319 struct btrfs_extent_item);
4320 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
4321 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4322 btrfs_set_extent_flags(leaf, extent_item,
4323 flags | BTRFS_EXTENT_FLAG_DATA);
4324
4325 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4326 btrfs_set_extent_inline_ref_type(leaf, iref, type);
4327 if (parent > 0) {
4328 struct btrfs_shared_data_ref *ref;
4329 ref = (struct btrfs_shared_data_ref *)(iref + 1);
4330 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
4331 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
4332 } else {
4333 struct btrfs_extent_data_ref *ref;
4334 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
4335 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
4336 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
4337 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
4338 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
4339 }
4340
4341 btrfs_mark_buffer_dirty(path->nodes[0]);
4342 btrfs_free_path(path);
4343
4344 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
4345 if (ret)
4346 return ret;
4347
4348 ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
4349 if (ret) {
4350 btrfs_err(fs_info, "update block group failed for %llu %llu",
4351 ins->objectid, ins->offset);
4352 BUG();
4353 }
4354 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
4355 return ret;
4356}
4357
4358static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4359 struct btrfs_delayed_ref_node *node,
4360 struct btrfs_delayed_extent_op *extent_op)
4361{
4362 struct btrfs_fs_info *fs_info = trans->fs_info;
4363 int ret;
4364 struct btrfs_extent_item *extent_item;
4365 struct btrfs_key extent_key;
4366 struct btrfs_tree_block_info *block_info;
4367 struct btrfs_extent_inline_ref *iref;
4368 struct btrfs_path *path;
4369 struct extent_buffer *leaf;
4370 struct btrfs_delayed_tree_ref *ref;
4371 u32 size = sizeof(*extent_item) + sizeof(*iref);
4372 u64 num_bytes;
4373 u64 flags = extent_op->flags_to_set;
4374 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4375
4376 ref = btrfs_delayed_node_to_tree_ref(node);
4377
4378 extent_key.objectid = node->bytenr;
4379 if (skinny_metadata) {
4380 extent_key.offset = ref->level;
4381 extent_key.type = BTRFS_METADATA_ITEM_KEY;
4382 num_bytes = fs_info->nodesize;
4383 } else {
4384 extent_key.offset = node->num_bytes;
4385 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4386 size += sizeof(*block_info);
4387 num_bytes = node->num_bytes;
4388 }
4389
4390 path = btrfs_alloc_path();
4391 if (!path)
4392 return -ENOMEM;
4393
4394 path->leave_spinning = 1;
4395 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
4396 &extent_key, size);
4397 if (ret) {
4398 btrfs_free_path(path);
4399 return ret;
4400 }
4401
4402 leaf = path->nodes[0];
4403 extent_item = btrfs_item_ptr(leaf, path->slots[0],
4404 struct btrfs_extent_item);
4405 btrfs_set_extent_refs(leaf, extent_item, 1);
4406 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
4407 btrfs_set_extent_flags(leaf, extent_item,
4408 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
4409
4410 if (skinny_metadata) {
4411 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
4412 } else {
4413 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
4414 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
4415 btrfs_set_tree_block_level(leaf, block_info, ref->level);
4416 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
4417 }
4418
4419 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
4420 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4421 btrfs_set_extent_inline_ref_type(leaf, iref,
4422 BTRFS_SHARED_BLOCK_REF_KEY);
4423 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
4424 } else {
4425 btrfs_set_extent_inline_ref_type(leaf, iref,
4426 BTRFS_TREE_BLOCK_REF_KEY);
4427 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
4428 }
4429
4430 btrfs_mark_buffer_dirty(leaf);
4431 btrfs_free_path(path);
4432
4433 ret = remove_from_free_space_tree(trans, extent_key.objectid,
4434 num_bytes);
4435 if (ret)
4436 return ret;
4437
4438 ret = btrfs_update_block_group(trans, extent_key.objectid,
4439 fs_info->nodesize, 1);
4440 if (ret) {
4441 btrfs_err(fs_info, "update block group failed for %llu %llu",
4442 extent_key.objectid, extent_key.offset);
4443 BUG();
4444 }
4445
4446 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
4447 fs_info->nodesize);
4448 return ret;
4449}
4450
4451int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4452 struct btrfs_root *root, u64 owner,
4453 u64 offset, u64 ram_bytes,
4454 struct btrfs_key *ins)
4455{
4456 struct btrfs_ref generic_ref = { 0 };
4457 int ret;
4458
4459 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4460
4461 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4462 ins->objectid, ins->offset, 0);
4463 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
4464 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
4465 ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
4466 ram_bytes, NULL, NULL);
4467 return ret;
4468}
4469
4470
4471
4472
4473
4474
4475int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4476 u64 root_objectid, u64 owner, u64 offset,
4477 struct btrfs_key *ins)
4478{
4479 struct btrfs_fs_info *fs_info = trans->fs_info;
4480 int ret;
4481 struct btrfs_block_group *block_group;
4482 struct btrfs_space_info *space_info;
4483
4484
4485
4486
4487
4488 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
4489 ret = __exclude_logged_extent(fs_info, ins->objectid,
4490 ins->offset);
4491 if (ret)
4492 return ret;
4493 }
4494
4495 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
4496 if (!block_group)
4497 return -EINVAL;
4498
4499 space_info = block_group->space_info;
4500 spin_lock(&space_info->lock);
4501 spin_lock(&block_group->lock);
4502 space_info->bytes_reserved += ins->offset;
4503 block_group->reserved += ins->offset;
4504 spin_unlock(&block_group->lock);
4505 spin_unlock(&space_info->lock);
4506
4507 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4508 offset, ins, 1);
4509 if (ret)
4510 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
4511 btrfs_put_block_group(block_group);
4512 return ret;
4513}
4514
4515static struct extent_buffer *
4516btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4517 u64 bytenr, int level, u64 owner)
4518{
4519 struct btrfs_fs_info *fs_info = root->fs_info;
4520 struct extent_buffer *buf;
4521
4522 buf = btrfs_find_create_tree_block(fs_info, bytenr);
4523 if (IS_ERR(buf))
4524 return buf;
4525
4526
4527
4528
4529
4530
4531 if (buf->lock_owner == current->pid) {
4532 btrfs_err_rl(fs_info,
4533"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4534 buf->start, btrfs_header_owner(buf), current->pid);
4535 free_extent_buffer(buf);
4536 return ERR_PTR(-EUCLEAN);
4537 }
4538
4539 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
4540 btrfs_tree_lock(buf);
4541 btrfs_clean_tree_block(buf);
4542 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4543
4544 btrfs_set_lock_blocking_write(buf);
4545 set_extent_buffer_uptodate(buf);
4546
4547 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4548 btrfs_set_header_level(buf, level);
4549 btrfs_set_header_bytenr(buf, buf->start);
4550 btrfs_set_header_generation(buf, trans->transid);
4551 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4552 btrfs_set_header_owner(buf, owner);
4553 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4554 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4555 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4556 buf->log_index = root->log_transid % 2;
4557
4558
4559
4560
4561 if (buf->log_index == 0)
4562 set_extent_dirty(&root->dirty_log_pages, buf->start,
4563 buf->start + buf->len - 1, GFP_NOFS);
4564 else
4565 set_extent_new(&root->dirty_log_pages, buf->start,
4566 buf->start + buf->len - 1);
4567 } else {
4568 buf->log_index = -1;
4569 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
4570 buf->start + buf->len - 1, GFP_NOFS);
4571 }
4572 trans->dirty = true;
4573
4574 return buf;
4575}
4576
4577
4578
4579
4580
4581struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
4582 struct btrfs_root *root,
4583 u64 parent, u64 root_objectid,
4584 const struct btrfs_disk_key *key,
4585 int level, u64 hint,
4586 u64 empty_size)
4587{
4588 struct btrfs_fs_info *fs_info = root->fs_info;
4589 struct btrfs_key ins;
4590 struct btrfs_block_rsv *block_rsv;
4591 struct extent_buffer *buf;
4592 struct btrfs_delayed_extent_op *extent_op;
4593 struct btrfs_ref generic_ref = { 0 };
4594 u64 flags = 0;
4595 int ret;
4596 u32 blocksize = fs_info->nodesize;
4597 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4598
4599#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4600 if (btrfs_is_testing(fs_info)) {
4601 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
4602 level, root_objectid);
4603 if (!IS_ERR(buf))
4604 root->alloc_bytenr += blocksize;
4605 return buf;
4606 }
4607#endif
4608
4609 block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4610 if (IS_ERR(block_rsv))
4611 return ERR_CAST(block_rsv);
4612
4613 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4614 empty_size, hint, &ins, 0, 0);
4615 if (ret)
4616 goto out_unuse;
4617
4618 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
4619 root_objectid);
4620 if (IS_ERR(buf)) {
4621 ret = PTR_ERR(buf);
4622 goto out_free_reserved;
4623 }
4624
4625 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4626 if (parent == 0)
4627 parent = ins.objectid;
4628 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4629 } else
4630 BUG_ON(parent > 0);
4631
4632 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4633 extent_op = btrfs_alloc_delayed_extent_op();
4634 if (!extent_op) {
4635 ret = -ENOMEM;
4636 goto out_free_buf;
4637 }
4638 if (key)
4639 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4640 else
4641 memset(&extent_op->key, 0, sizeof(extent_op->key));
4642 extent_op->flags_to_set = flags;
4643 extent_op->update_key = skinny_metadata ? false : true;
4644 extent_op->update_flags = true;
4645 extent_op->is_data = false;
4646 extent_op->level = level;
4647
4648 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4649 ins.objectid, ins.offset, parent);
4650 generic_ref.real_root = root->root_key.objectid;
4651 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
4652 btrfs_ref_tree_mod(fs_info, &generic_ref);
4653 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
4654 extent_op, NULL, NULL);
4655 if (ret)
4656 goto out_free_delayed;
4657 }
4658 return buf;
4659
4660out_free_delayed:
4661 btrfs_free_delayed_extent_op(extent_op);
4662out_free_buf:
4663 free_extent_buffer(buf);
4664out_free_reserved:
4665 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
4666out_unuse:
4667 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
4668 return ERR_PTR(ret);
4669}
4670
4671struct walk_control {
4672 u64 refs[BTRFS_MAX_LEVEL];
4673 u64 flags[BTRFS_MAX_LEVEL];
4674 struct btrfs_key update_progress;
4675 struct btrfs_key drop_progress;
4676 int drop_level;
4677 int stage;
4678 int level;
4679 int shared_level;
4680 int update_ref;
4681 int keep_locks;
4682 int reada_slot;
4683 int reada_count;
4684 int restarted;
4685};
4686
4687#define DROP_REFERENCE 1
4688#define UPDATE_BACKREF 2
4689
4690static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4691 struct btrfs_root *root,
4692 struct walk_control *wc,
4693 struct btrfs_path *path)
4694{
4695 struct btrfs_fs_info *fs_info = root->fs_info;
4696 u64 bytenr;
4697 u64 generation;
4698 u64 refs;
4699 u64 flags;
4700 u32 nritems;
4701 struct btrfs_key key;
4702 struct extent_buffer *eb;
4703 int ret;
4704 int slot;
4705 int nread = 0;
4706
4707 if (path->slots[wc->level] < wc->reada_slot) {
4708 wc->reada_count = wc->reada_count * 2 / 3;
4709 wc->reada_count = max(wc->reada_count, 2);
4710 } else {
4711 wc->reada_count = wc->reada_count * 3 / 2;
4712 wc->reada_count = min_t(int, wc->reada_count,
4713 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
4714 }
4715
4716 eb = path->nodes[wc->level];
4717 nritems = btrfs_header_nritems(eb);
4718
4719 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
4720 if (nread >= wc->reada_count)
4721 break;
4722
4723 cond_resched();
4724 bytenr = btrfs_node_blockptr(eb, slot);
4725 generation = btrfs_node_ptr_generation(eb, slot);
4726
4727 if (slot == path->slots[wc->level])
4728 goto reada;
4729
4730 if (wc->stage == UPDATE_BACKREF &&
4731 generation <= root->root_key.offset)
4732 continue;
4733
4734
4735 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
4736 wc->level - 1, 1, &refs,
4737 &flags);
4738
4739 if (ret < 0)
4740 continue;
4741 BUG_ON(refs == 0);
4742
4743 if (wc->stage == DROP_REFERENCE) {
4744 if (refs == 1)
4745 goto reada;
4746
4747 if (wc->level == 1 &&
4748 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4749 continue;
4750 if (!wc->update_ref ||
4751 generation <= root->root_key.offset)
4752 continue;
4753 btrfs_node_key_to_cpu(eb, &key, slot);
4754 ret = btrfs_comp_cpu_keys(&key,
4755 &wc->update_progress);
4756 if (ret < 0)
4757 continue;
4758 } else {
4759 if (wc->level == 1 &&
4760 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4761 continue;
4762 }
4763reada:
4764 readahead_tree_block(fs_info, bytenr);
4765 nread++;
4766 }
4767 wc->reada_slot = slot;
4768}
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4779 struct btrfs_root *root,
4780 struct btrfs_path *path,
4781 struct walk_control *wc, int lookup_info)
4782{
4783 struct btrfs_fs_info *fs_info = root->fs_info;
4784 int level = wc->level;
4785 struct extent_buffer *eb = path->nodes[level];
4786 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4787 int ret;
4788
4789 if (wc->stage == UPDATE_BACKREF &&
4790 btrfs_header_owner(eb) != root->root_key.objectid)
4791 return 1;
4792
4793
4794
4795
4796
4797 if (lookup_info &&
4798 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4799 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4800 BUG_ON(!path->locks[level]);
4801 ret = btrfs_lookup_extent_info(trans, fs_info,
4802 eb->start, level, 1,
4803 &wc->refs[level],
4804 &wc->flags[level]);
4805 BUG_ON(ret == -ENOMEM);
4806 if (ret)
4807 return ret;
4808 BUG_ON(wc->refs[level] == 0);
4809 }
4810
4811 if (wc->stage == DROP_REFERENCE) {
4812 if (wc->refs[level] > 1)
4813 return 1;
4814
4815 if (path->locks[level] && !wc->keep_locks) {
4816 btrfs_tree_unlock_rw(eb, path->locks[level]);
4817 path->locks[level] = 0;
4818 }
4819 return 0;
4820 }
4821
4822
4823 if (!(wc->flags[level] & flag)) {
4824 BUG_ON(!path->locks[level]);
4825 ret = btrfs_inc_ref(trans, root, eb, 1);
4826 BUG_ON(ret);
4827 ret = btrfs_dec_ref(trans, root, eb, 0);
4828 BUG_ON(ret);
4829 ret = btrfs_set_disk_extent_flags(trans, eb, flag,
4830 btrfs_header_level(eb), 0);
4831 BUG_ON(ret);
4832 wc->flags[level] |= flag;
4833 }
4834
4835
4836
4837
4838
4839 if (path->locks[level] && level > 0) {
4840 btrfs_tree_unlock_rw(eb, path->locks[level]);
4841 path->locks[level] = 0;
4842 }
4843 return 0;
4844}
4845
4846
4847
4848
4849
4850static int check_ref_exists(struct btrfs_trans_handle *trans,
4851 struct btrfs_root *root, u64 bytenr, u64 parent,
4852 int level)
4853{
4854 struct btrfs_path *path;
4855 struct btrfs_extent_inline_ref *iref;
4856 int ret;
4857
4858 path = btrfs_alloc_path();
4859 if (!path)
4860 return -ENOMEM;
4861
4862 ret = lookup_extent_backref(trans, path, &iref, bytenr,
4863 root->fs_info->nodesize, parent,
4864 root->root_key.objectid, level, 0);
4865 btrfs_free_path(path);
4866 if (ret == -ENOENT)
4867 return 0;
4868 if (ret < 0)
4869 return ret;
4870 return 1;
4871}
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4887 struct btrfs_root *root,
4888 struct btrfs_path *path,
4889 struct walk_control *wc, int *lookup_info)
4890{
4891 struct btrfs_fs_info *fs_info = root->fs_info;
4892 u64 bytenr;
4893 u64 generation;
4894 u64 parent;
4895 struct btrfs_key key;
4896 struct btrfs_key first_key;
4897 struct btrfs_ref ref = { 0 };
4898 struct extent_buffer *next;
4899 int level = wc->level;
4900 int reada = 0;
4901 int ret = 0;
4902 bool need_account = false;
4903
4904 generation = btrfs_node_ptr_generation(path->nodes[level],
4905 path->slots[level]);
4906
4907
4908
4909
4910
4911 if (wc->stage == UPDATE_BACKREF &&
4912 generation <= root->root_key.offset) {
4913 *lookup_info = 1;
4914 return 1;
4915 }
4916
4917 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4918 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
4919 path->slots[level]);
4920
4921 next = find_extent_buffer(fs_info, bytenr);
4922 if (!next) {
4923 next = btrfs_find_create_tree_block(fs_info, bytenr);
4924 if (IS_ERR(next))
4925 return PTR_ERR(next);
4926
4927 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
4928 level - 1);
4929 reada = 1;
4930 }
4931 btrfs_tree_lock(next);
4932 btrfs_set_lock_blocking_write(next);
4933
4934 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
4935 &wc->refs[level - 1],
4936 &wc->flags[level - 1]);
4937 if (ret < 0)
4938 goto out_unlock;
4939
4940 if (unlikely(wc->refs[level - 1] == 0)) {
4941 btrfs_err(fs_info, "Missing references.");
4942 ret = -EIO;
4943 goto out_unlock;
4944 }
4945 *lookup_info = 0;
4946
4947 if (wc->stage == DROP_REFERENCE) {
4948 if (wc->refs[level - 1] > 1) {
4949 need_account = true;
4950 if (level == 1 &&
4951 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4952 goto skip;
4953
4954 if (!wc->update_ref ||
4955 generation <= root->root_key.offset)
4956 goto skip;
4957
4958 btrfs_node_key_to_cpu(path->nodes[level], &key,
4959 path->slots[level]);
4960 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
4961 if (ret < 0)
4962 goto skip;
4963
4964 wc->stage = UPDATE_BACKREF;
4965 wc->shared_level = level - 1;
4966 }
4967 } else {
4968 if (level == 1 &&
4969 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4970 goto skip;
4971 }
4972
4973 if (!btrfs_buffer_uptodate(next, generation, 0)) {
4974 btrfs_tree_unlock(next);
4975 free_extent_buffer(next);
4976 next = NULL;
4977 *lookup_info = 1;
4978 }
4979
4980 if (!next) {
4981 if (reada && level == 1)
4982 reada_walk_down(trans, root, wc, path);
4983 next = read_tree_block(fs_info, bytenr, generation, level - 1,
4984 &first_key);
4985 if (IS_ERR(next)) {
4986 return PTR_ERR(next);
4987 } else if (!extent_buffer_uptodate(next)) {
4988 free_extent_buffer(next);
4989 return -EIO;
4990 }
4991 btrfs_tree_lock(next);
4992 btrfs_set_lock_blocking_write(next);
4993 }
4994
4995 level--;
4996 ASSERT(level == btrfs_header_level(next));
4997 if (level != btrfs_header_level(next)) {
4998 btrfs_err(root->fs_info, "mismatched level");
4999 ret = -EIO;
5000 goto out_unlock;
5001 }
5002 path->nodes[level] = next;
5003 path->slots[level] = 0;
5004 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5005 wc->level = level;
5006 if (wc->level == 1)
5007 wc->reada_slot = 0;
5008 return 0;
5009skip:
5010 wc->refs[level - 1] = 0;
5011 wc->flags[level - 1] = 0;
5012 if (wc->stage == DROP_REFERENCE) {
5013 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5014 parent = path->nodes[level]->start;
5015 } else {
5016 ASSERT(root->root_key.objectid ==
5017 btrfs_header_owner(path->nodes[level]));
5018 if (root->root_key.objectid !=
5019 btrfs_header_owner(path->nodes[level])) {
5020 btrfs_err(root->fs_info,
5021 "mismatched block owner");
5022 ret = -EIO;
5023 goto out_unlock;
5024 }
5025 parent = 0;
5026 }
5027
5028
5029
5030
5031
5032
5033
5034 if (wc->restarted) {
5035 ret = check_ref_exists(trans, root, bytenr, parent,
5036 level - 1);
5037 if (ret < 0)
5038 goto out_unlock;
5039 if (ret == 0)
5040 goto no_delete;
5041 ret = 0;
5042 wc->restarted = 0;
5043 }
5044
5045
5046
5047
5048
5049
5050 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
5051 need_account) {
5052 ret = btrfs_qgroup_trace_subtree(trans, next,
5053 generation, level - 1);
5054 if (ret) {
5055 btrfs_err_rl(fs_info,
5056 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
5057 ret);
5058 }
5059 }
5060
5061
5062
5063
5064
5065
5066
5067 wc->drop_level = level;
5068 find_next_key(path, level, &wc->drop_progress);
5069
5070 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
5071 fs_info->nodesize, parent);
5072 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
5073 ret = btrfs_free_extent(trans, &ref);
5074 if (ret)
5075 goto out_unlock;
5076 }
5077no_delete:
5078 *lookup_info = 1;
5079 ret = 1;
5080
5081out_unlock:
5082 btrfs_tree_unlock(next);
5083 free_extent_buffer(next);
5084
5085 return ret;
5086}
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5101 struct btrfs_root *root,
5102 struct btrfs_path *path,
5103 struct walk_control *wc)
5104{
5105 struct btrfs_fs_info *fs_info = root->fs_info;
5106 int ret;
5107 int level = wc->level;
5108 struct extent_buffer *eb = path->nodes[level];
5109 u64 parent = 0;
5110
5111 if (wc->stage == UPDATE_BACKREF) {
5112 BUG_ON(wc->shared_level < level);
5113 if (level < wc->shared_level)
5114 goto out;
5115
5116 ret = find_next_key(path, level + 1, &wc->update_progress);
5117 if (ret > 0)
5118 wc->update_ref = 0;
5119
5120 wc->stage = DROP_REFERENCE;
5121 wc->shared_level = -1;
5122 path->slots[level] = 0;
5123
5124
5125
5126
5127
5128
5129 if (!path->locks[level]) {
5130 BUG_ON(level == 0);
5131 btrfs_tree_lock(eb);
5132 btrfs_set_lock_blocking_write(eb);
5133 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5134
5135 ret = btrfs_lookup_extent_info(trans, fs_info,
5136 eb->start, level, 1,
5137 &wc->refs[level],
5138 &wc->flags[level]);
5139 if (ret < 0) {
5140 btrfs_tree_unlock_rw(eb, path->locks[level]);
5141 path->locks[level] = 0;
5142 return ret;
5143 }
5144 BUG_ON(wc->refs[level] == 0);
5145 if (wc->refs[level] == 1) {
5146 btrfs_tree_unlock_rw(eb, path->locks[level]);
5147 path->locks[level] = 0;
5148 return 1;
5149 }
5150 }
5151 }
5152
5153
5154 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
5155
5156 if (wc->refs[level] == 1) {
5157 if (level == 0) {
5158 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5159 ret = btrfs_dec_ref(trans, root, eb, 1);
5160 else
5161 ret = btrfs_dec_ref(trans, root, eb, 0);
5162 BUG_ON(ret);
5163 if (is_fstree(root->root_key.objectid)) {
5164 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
5165 if (ret) {
5166 btrfs_err_rl(fs_info,
5167 "error %d accounting leaf items, quota is out of sync, rescan required",
5168 ret);
5169 }
5170 }
5171 }
5172
5173 if (!path->locks[level] &&
5174 btrfs_header_generation(eb) == trans->transid) {
5175 btrfs_tree_lock(eb);
5176 btrfs_set_lock_blocking_write(eb);
5177 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5178 }
5179 btrfs_clean_tree_block(eb);
5180 }
5181
5182 if (eb == root->node) {
5183 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5184 parent = eb->start;
5185 else if (root->root_key.objectid != btrfs_header_owner(eb))
5186 goto owner_mismatch;
5187 } else {
5188 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5189 parent = path->nodes[level + 1]->start;
5190 else if (root->root_key.objectid !=
5191 btrfs_header_owner(path->nodes[level + 1]))
5192 goto owner_mismatch;
5193 }
5194
5195 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5196out:
5197 wc->refs[level] = 0;
5198 wc->flags[level] = 0;
5199 return 0;
5200
5201owner_mismatch:
5202 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
5203 btrfs_header_owner(eb), root->root_key.objectid);
5204 return -EUCLEAN;
5205}
5206
5207static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5208 struct btrfs_root *root,
5209 struct btrfs_path *path,
5210 struct walk_control *wc)
5211{
5212 int level = wc->level;
5213 int lookup_info = 1;
5214 int ret;
5215
5216 while (level >= 0) {
5217 ret = walk_down_proc(trans, root, path, wc, lookup_info);
5218 if (ret > 0)
5219 break;
5220
5221 if (level == 0)
5222 break;
5223
5224 if (path->slots[level] >=
5225 btrfs_header_nritems(path->nodes[level]))
5226 break;
5227
5228 ret = do_walk_down(trans, root, path, wc, &lookup_info);
5229 if (ret > 0) {
5230 path->slots[level]++;
5231 continue;
5232 } else if (ret < 0)
5233 return ret;
5234 level = wc->level;
5235 }
5236 return 0;
5237}
5238
5239static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5240 struct btrfs_root *root,
5241 struct btrfs_path *path,
5242 struct walk_control *wc, int max_level)
5243{
5244 int level = wc->level;
5245 int ret;
5246
5247 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
5248 while (level < max_level && path->nodes[level]) {
5249 wc->level = level;
5250 if (path->slots[level] + 1 <
5251 btrfs_header_nritems(path->nodes[level])) {
5252 path->slots[level]++;
5253 return 0;
5254 } else {
5255 ret = walk_up_proc(trans, root, path, wc);
5256 if (ret > 0)
5257 return 0;
5258 if (ret < 0)
5259 return ret;
5260
5261 if (path->locks[level]) {
5262 btrfs_tree_unlock_rw(path->nodes[level],
5263 path->locks[level]);
5264 path->locks[level] = 0;
5265 }
5266 free_extent_buffer(path->nodes[level]);
5267 path->nodes[level] = NULL;
5268 level++;
5269 }
5270 }
5271 return 1;
5272}
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
5288{
5289 struct btrfs_fs_info *fs_info = root->fs_info;
5290 struct btrfs_path *path;
5291 struct btrfs_trans_handle *trans;
5292 struct btrfs_root *tree_root = fs_info->tree_root;
5293 struct btrfs_root_item *root_item = &root->root_item;
5294 struct walk_control *wc;
5295 struct btrfs_key key;
5296 int err = 0;
5297 int ret;
5298 int level;
5299 bool root_dropped = false;
5300
5301 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
5302
5303 path = btrfs_alloc_path();
5304 if (!path) {
5305 err = -ENOMEM;
5306 goto out;
5307 }
5308
5309 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5310 if (!wc) {
5311 btrfs_free_path(path);
5312 err = -ENOMEM;
5313 goto out;
5314 }
5315
5316 trans = btrfs_start_transaction(tree_root, 0);
5317 if (IS_ERR(trans)) {
5318 err = PTR_ERR(trans);
5319 goto out_free;
5320 }
5321
5322 err = btrfs_run_delayed_items(trans);
5323 if (err)
5324 goto out_end_trans;
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334 set_bit(BTRFS_ROOT_DELETING, &root->state);
5335 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5336 level = btrfs_header_level(root->node);
5337 path->nodes[level] = btrfs_lock_root_node(root);
5338 btrfs_set_lock_blocking_write(path->nodes[level]);
5339 path->slots[level] = 0;
5340 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5341 memset(&wc->update_progress, 0,
5342 sizeof(wc->update_progress));
5343 } else {
5344 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5345 memcpy(&wc->update_progress, &key,
5346 sizeof(wc->update_progress));
5347
5348 level = root_item->drop_level;
5349 BUG_ON(level == 0);
5350 path->lowest_level = level;
5351 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5352 path->lowest_level = 0;
5353 if (ret < 0) {
5354 err = ret;
5355 goto out_end_trans;
5356 }
5357 WARN_ON(ret > 0);
5358
5359
5360
5361
5362
5363 btrfs_unlock_up_safe(path, 0);
5364
5365 level = btrfs_header_level(root->node);
5366 while (1) {
5367 btrfs_tree_lock(path->nodes[level]);
5368 btrfs_set_lock_blocking_write(path->nodes[level]);
5369 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5370
5371 ret = btrfs_lookup_extent_info(trans, fs_info,
5372 path->nodes[level]->start,
5373 level, 1, &wc->refs[level],
5374 &wc->flags[level]);
5375 if (ret < 0) {
5376 err = ret;
5377 goto out_end_trans;
5378 }
5379 BUG_ON(wc->refs[level] == 0);
5380
5381 if (level == root_item->drop_level)
5382 break;
5383
5384 btrfs_tree_unlock(path->nodes[level]);
5385 path->locks[level] = 0;
5386 WARN_ON(wc->refs[level] != 1);
5387 level--;
5388 }
5389 }
5390
5391 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
5392 wc->level = level;
5393 wc->shared_level = -1;
5394 wc->stage = DROP_REFERENCE;
5395 wc->update_ref = update_ref;
5396 wc->keep_locks = 0;
5397 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5398
5399 while (1) {
5400
5401 ret = walk_down_tree(trans, root, path, wc);
5402 if (ret < 0) {
5403 err = ret;
5404 break;
5405 }
5406
5407 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
5408 if (ret < 0) {
5409 err = ret;
5410 break;
5411 }
5412
5413 if (ret > 0) {
5414 BUG_ON(wc->stage != DROP_REFERENCE);
5415 break;
5416 }
5417
5418 if (wc->stage == DROP_REFERENCE) {
5419 wc->drop_level = wc->level;
5420 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5421 &wc->drop_progress,
5422 path->slots[wc->drop_level]);
5423 }
5424 btrfs_cpu_key_to_disk(&root_item->drop_progress,
5425 &wc->drop_progress);
5426 root_item->drop_level = wc->drop_level;
5427
5428 BUG_ON(wc->level == 0);
5429 if (btrfs_should_end_transaction(trans) ||
5430 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
5431 ret = btrfs_update_root(trans, tree_root,
5432 &root->root_key,
5433 root_item);
5434 if (ret) {
5435 btrfs_abort_transaction(trans, ret);
5436 err = ret;
5437 goto out_end_trans;
5438 }
5439
5440 btrfs_end_transaction_throttle(trans);
5441 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5442 btrfs_debug(fs_info,
5443 "drop snapshot early exit");
5444 err = -EAGAIN;
5445 goto out_free;
5446 }
5447
5448 trans = btrfs_start_transaction(tree_root, 0);
5449 if (IS_ERR(trans)) {
5450 err = PTR_ERR(trans);
5451 goto out_free;
5452 }
5453 }
5454 }
5455 btrfs_release_path(path);
5456 if (err)
5457 goto out_end_trans;
5458
5459 ret = btrfs_del_root(trans, &root->root_key);
5460 if (ret) {
5461 btrfs_abort_transaction(trans, ret);
5462 err = ret;
5463 goto out_end_trans;
5464 }
5465
5466 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
5467 ret = btrfs_find_root(tree_root, &root->root_key, path,
5468 NULL, NULL);
5469 if (ret < 0) {
5470 btrfs_abort_transaction(trans, ret);
5471 err = ret;
5472 goto out_end_trans;
5473 } else if (ret > 0) {
5474
5475
5476
5477
5478
5479 btrfs_del_orphan_item(trans, tree_root,
5480 root->root_key.objectid);
5481 }
5482 }
5483
5484 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
5485 btrfs_add_dropped_root(trans, root);
5486 else
5487 btrfs_put_root(root);
5488 root_dropped = true;
5489out_end_trans:
5490 btrfs_end_transaction_throttle(trans);
5491out_free:
5492 kfree(wc);
5493 btrfs_free_path(path);
5494out:
5495
5496
5497
5498
5499
5500
5501
5502 if (!for_reloc && !root_dropped)
5503 btrfs_add_dead_root(root);
5504 if (err && err != -EAGAIN)
5505 btrfs_handle_fs_error(fs_info, err, NULL);
5506 return err;
5507}
5508
5509
5510
5511
5512
5513
5514
5515int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5516 struct btrfs_root *root,
5517 struct extent_buffer *node,
5518 struct extent_buffer *parent)
5519{
5520 struct btrfs_fs_info *fs_info = root->fs_info;
5521 struct btrfs_path *path;
5522 struct walk_control *wc;
5523 int level;
5524 int parent_level;
5525 int ret = 0;
5526 int wret;
5527
5528 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5529
5530 path = btrfs_alloc_path();
5531 if (!path)
5532 return -ENOMEM;
5533
5534 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5535 if (!wc) {
5536 btrfs_free_path(path);
5537 return -ENOMEM;
5538 }
5539
5540 btrfs_assert_tree_locked(parent);
5541 parent_level = btrfs_header_level(parent);
5542 atomic_inc(&parent->refs);
5543 path->nodes[parent_level] = parent;
5544 path->slots[parent_level] = btrfs_header_nritems(parent);
5545
5546 btrfs_assert_tree_locked(node);
5547 level = btrfs_header_level(node);
5548 path->nodes[level] = node;
5549 path->slots[level] = 0;
5550 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
5551
5552 wc->refs[parent_level] = 1;
5553 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5554 wc->level = level;
5555 wc->shared_level = -1;
5556 wc->stage = DROP_REFERENCE;
5557 wc->update_ref = 0;
5558 wc->keep_locks = 1;
5559 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5560
5561 while (1) {
5562 wret = walk_down_tree(trans, root, path, wc);
5563 if (wret < 0) {
5564 ret = wret;
5565 break;
5566 }
5567
5568 wret = walk_up_tree(trans, root, path, wc, parent_level);
5569 if (wret < 0)
5570 ret = wret;
5571 if (wret != 0)
5572 break;
5573 }
5574
5575 kfree(wc);
5576 btrfs_free_path(path);
5577 return ret;
5578}
5579
5580
5581
5582
5583
5584u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
5585{
5586 struct btrfs_block_group *block_group;
5587 u64 free_bytes = 0;
5588 int factor;
5589
5590
5591 if (list_empty(&sinfo->ro_bgs))
5592 return 0;
5593
5594 spin_lock(&sinfo->lock);
5595 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
5596 spin_lock(&block_group->lock);
5597
5598 if (!block_group->ro) {
5599 spin_unlock(&block_group->lock);
5600 continue;
5601 }
5602
5603 factor = btrfs_bg_type_to_factor(block_group->flags);
5604 free_bytes += (block_group->length -
5605 block_group->used) * factor;
5606
5607 spin_unlock(&block_group->lock);
5608 }
5609 spin_unlock(&sinfo->lock);
5610
5611 return free_bytes;
5612}
5613
5614int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
5615 u64 start, u64 end)
5616{
5617 return unpin_extent_range(fs_info, start, end, false);
5618}
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5641{
5642 u64 start = SZ_1M, len = 0, end = 0;
5643 int ret;
5644
5645 *trimmed = 0;
5646
5647
5648 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
5649 return 0;
5650
5651
5652 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
5653 return 0;
5654
5655
5656 if (device->total_bytes <= device->bytes_used)
5657 return 0;
5658
5659 ret = 0;
5660
5661 while (1) {
5662 struct btrfs_fs_info *fs_info = device->fs_info;
5663 u64 bytes;
5664
5665 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
5666 if (ret)
5667 break;
5668
5669 find_first_clear_extent_bit(&device->alloc_state, start,
5670 &start, &end,
5671 CHUNK_TRIMMED | CHUNK_ALLOCATED);
5672
5673
5674 start = max_t(u64, start, SZ_1M);
5675
5676
5677
5678
5679
5680
5681 end = min(end, device->total_bytes - 1);
5682
5683 len = end - start + 1;
5684
5685
5686 if (!len) {
5687 mutex_unlock(&fs_info->chunk_mutex);
5688 ret = 0;
5689 break;
5690 }
5691
5692 ret = btrfs_issue_discard(device->bdev, start, len,
5693 &bytes);
5694 if (!ret)
5695 set_extent_bits(&device->alloc_state, start,
5696 start + bytes - 1,
5697 CHUNK_TRIMMED);
5698 mutex_unlock(&fs_info->chunk_mutex);
5699
5700 if (ret)
5701 break;
5702
5703 start += len;
5704 *trimmed += bytes;
5705
5706 if (fatal_signal_pending(current)) {
5707 ret = -ERESTARTSYS;
5708 break;
5709 }
5710
5711 cond_resched();
5712 }
5713
5714 return ret;
5715}
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
5727{
5728 struct btrfs_block_group *cache = NULL;
5729 struct btrfs_device *device;
5730 struct list_head *devices;
5731 u64 group_trimmed;
5732 u64 range_end = U64_MAX;
5733 u64 start;
5734 u64 end;
5735 u64 trimmed = 0;
5736 u64 bg_failed = 0;
5737 u64 dev_failed = 0;
5738 int bg_ret = 0;
5739 int dev_ret = 0;
5740 int ret = 0;
5741
5742
5743
5744
5745
5746 if (range->len != U64_MAX &&
5747 check_add_overflow(range->start, range->len, &range_end))
5748 return -EINVAL;
5749
5750 cache = btrfs_lookup_first_block_group(fs_info, range->start);
5751 for (; cache; cache = btrfs_next_block_group(cache)) {
5752 if (cache->start >= range_end) {
5753 btrfs_put_block_group(cache);
5754 break;
5755 }
5756
5757 start = max(range->start, cache->start);
5758 end = min(range_end, cache->start + cache->length);
5759
5760 if (end - start >= range->minlen) {
5761 if (!btrfs_block_group_done(cache)) {
5762 ret = btrfs_cache_block_group(cache, 0);
5763 if (ret) {
5764 bg_failed++;
5765 bg_ret = ret;
5766 continue;
5767 }
5768 ret = btrfs_wait_block_group_cache_done(cache);
5769 if (ret) {
5770 bg_failed++;
5771 bg_ret = ret;
5772 continue;
5773 }
5774 }
5775 ret = btrfs_trim_block_group(cache,
5776 &group_trimmed,
5777 start,
5778 end,
5779 range->minlen);
5780
5781 trimmed += group_trimmed;
5782 if (ret) {
5783 bg_failed++;
5784 bg_ret = ret;
5785 continue;
5786 }
5787 }
5788 }
5789
5790 if (bg_failed)
5791 btrfs_warn(fs_info,
5792 "failed to trim %llu block group(s), last error %d",
5793 bg_failed, bg_ret);
5794 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5795 devices = &fs_info->fs_devices->devices;
5796 list_for_each_entry(device, devices, dev_list) {
5797 ret = btrfs_trim_free_extents(device, &group_trimmed);
5798 if (ret) {
5799 dev_failed++;
5800 dev_ret = ret;
5801 break;
5802 }
5803
5804 trimmed += group_trimmed;
5805 }
5806 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5807
5808 if (dev_failed)
5809 btrfs_warn(fs_info,
5810 "failed to trim %llu device(s), last error %d",
5811 dev_failed, dev_ret);
5812 range->len = trimmed;
5813 if (bg_ret)
5814 return bg_ret;
5815 return dev_ret;
5816}
5817