1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "tree-log.h"
20#include "disk-io.h"
21#include "print-tree.h"
22#include "volumes.h"
23#include "raid56.h"
24#include "locking.h"
25#include "free-space-cache.h"
26#include "free-space-tree.h"
27#include "math.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31
32#undef SCRAMBLE_DELAYED_REFS
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48enum {
49 CHUNK_ALLOC_NO_FORCE = 0,
50 CHUNK_ALLOC_LIMITED = 1,
51 CHUNK_ALLOC_FORCE = 2,
52};
53
54
55
56
57#define DECLARE_SPACE_INFO_UPDATE(name) \
58static inline void update_##name(struct btrfs_space_info *sinfo, \
59 s64 bytes) \
60{ \
61 if (bytes < 0 && sinfo->name < -bytes) { \
62 WARN_ON(1); \
63 sinfo->name = 0; \
64 return; \
65 } \
66 sinfo->name += bytes; \
67}
68
69DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
70DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
71
72static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
73 struct btrfs_delayed_ref_node *node, u64 parent,
74 u64 root_objectid, u64 owner_objectid,
75 u64 owner_offset, int refs_to_drop,
76 struct btrfs_delayed_extent_op *extra_op);
77static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
78 struct extent_buffer *leaf,
79 struct btrfs_extent_item *ei);
80static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
81 u64 parent, u64 root_objectid,
82 u64 flags, u64 owner, u64 offset,
83 struct btrfs_key *ins, int ref_mod);
84static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
85 struct btrfs_delayed_ref_node *node,
86 struct btrfs_delayed_extent_op *extent_op);
87static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
88 int force);
89static int find_next_key(struct btrfs_path *path, int level,
90 struct btrfs_key *key);
91static void dump_space_info(struct btrfs_fs_info *fs_info,
92 struct btrfs_space_info *info, u64 bytes,
93 int dump_block_groups);
94static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
95 u64 num_bytes);
96static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
97 struct btrfs_space_info *space_info,
98 u64 num_bytes);
99static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
100 struct btrfs_space_info *space_info,
101 u64 num_bytes);
102
103static noinline int
104block_group_cache_done(struct btrfs_block_group_cache *cache)
105{
106 smp_mb();
107 return cache->cached == BTRFS_CACHE_FINISHED ||
108 cache->cached == BTRFS_CACHE_ERROR;
109}
110
111static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
112{
113 return (cache->flags & bits) == bits;
114}
115
116void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
117{
118 atomic_inc(&cache->count);
119}
120
121void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
122{
123 if (atomic_dec_and_test(&cache->count)) {
124 WARN_ON(cache->pinned > 0);
125 WARN_ON(cache->reserved > 0);
126
127
128
129
130
131
132
133
134
135 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
136 kfree(cache->free_space_ctl);
137 kfree(cache);
138 }
139}
140
141
142
143
144
145static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
146 struct btrfs_block_group_cache *block_group)
147{
148 struct rb_node **p;
149 struct rb_node *parent = NULL;
150 struct btrfs_block_group_cache *cache;
151
152 spin_lock(&info->block_group_cache_lock);
153 p = &info->block_group_cache_tree.rb_node;
154
155 while (*p) {
156 parent = *p;
157 cache = rb_entry(parent, struct btrfs_block_group_cache,
158 cache_node);
159 if (block_group->key.objectid < cache->key.objectid) {
160 p = &(*p)->rb_left;
161 } else if (block_group->key.objectid > cache->key.objectid) {
162 p = &(*p)->rb_right;
163 } else {
164 spin_unlock(&info->block_group_cache_lock);
165 return -EEXIST;
166 }
167 }
168
169 rb_link_node(&block_group->cache_node, parent, p);
170 rb_insert_color(&block_group->cache_node,
171 &info->block_group_cache_tree);
172
173 if (info->first_logical_byte > block_group->key.objectid)
174 info->first_logical_byte = block_group->key.objectid;
175
176 spin_unlock(&info->block_group_cache_lock);
177
178 return 0;
179}
180
181
182
183
184
185static struct btrfs_block_group_cache *
186block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
187 int contains)
188{
189 struct btrfs_block_group_cache *cache, *ret = NULL;
190 struct rb_node *n;
191 u64 end, start;
192
193 spin_lock(&info->block_group_cache_lock);
194 n = info->block_group_cache_tree.rb_node;
195
196 while (n) {
197 cache = rb_entry(n, struct btrfs_block_group_cache,
198 cache_node);
199 end = cache->key.objectid + cache->key.offset - 1;
200 start = cache->key.objectid;
201
202 if (bytenr < start) {
203 if (!contains && (!ret || start < ret->key.objectid))
204 ret = cache;
205 n = n->rb_left;
206 } else if (bytenr > start) {
207 if (contains && bytenr <= end) {
208 ret = cache;
209 break;
210 }
211 n = n->rb_right;
212 } else {
213 ret = cache;
214 break;
215 }
216 }
217 if (ret) {
218 btrfs_get_block_group(ret);
219 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
220 info->first_logical_byte = ret->key.objectid;
221 }
222 spin_unlock(&info->block_group_cache_lock);
223
224 return ret;
225}
226
227static int add_excluded_extent(struct btrfs_fs_info *fs_info,
228 u64 start, u64 num_bytes)
229{
230 u64 end = start + num_bytes - 1;
231 set_extent_bits(&fs_info->freed_extents[0],
232 start, end, EXTENT_UPTODATE);
233 set_extent_bits(&fs_info->freed_extents[1],
234 start, end, EXTENT_UPTODATE);
235 return 0;
236}
237
238static void free_excluded_extents(struct btrfs_block_group_cache *cache)
239{
240 struct btrfs_fs_info *fs_info = cache->fs_info;
241 u64 start, end;
242
243 start = cache->key.objectid;
244 end = start + cache->key.offset - 1;
245
246 clear_extent_bits(&fs_info->freed_extents[0],
247 start, end, EXTENT_UPTODATE);
248 clear_extent_bits(&fs_info->freed_extents[1],
249 start, end, EXTENT_UPTODATE);
250}
251
252static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
253{
254 struct btrfs_fs_info *fs_info = cache->fs_info;
255 u64 bytenr;
256 u64 *logical;
257 int stripe_len;
258 int i, nr, ret;
259
260 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
261 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
262 cache->bytes_super += stripe_len;
263 ret = add_excluded_extent(fs_info, cache->key.objectid,
264 stripe_len);
265 if (ret)
266 return ret;
267 }
268
269 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
270 bytenr = btrfs_sb_offset(i);
271 ret = btrfs_rmap_block(fs_info, cache->key.objectid,
272 bytenr, &logical, &nr, &stripe_len);
273 if (ret)
274 return ret;
275
276 while (nr--) {
277 u64 start, len;
278
279 if (logical[nr] > cache->key.objectid +
280 cache->key.offset)
281 continue;
282
283 if (logical[nr] + stripe_len <= cache->key.objectid)
284 continue;
285
286 start = logical[nr];
287 if (start < cache->key.objectid) {
288 start = cache->key.objectid;
289 len = (logical[nr] + stripe_len) - start;
290 } else {
291 len = min_t(u64, stripe_len,
292 cache->key.objectid +
293 cache->key.offset - start);
294 }
295
296 cache->bytes_super += len;
297 ret = add_excluded_extent(fs_info, start, len);
298 if (ret) {
299 kfree(logical);
300 return ret;
301 }
302 }
303
304 kfree(logical);
305 }
306 return 0;
307}
308
309static struct btrfs_caching_control *
310get_caching_control(struct btrfs_block_group_cache *cache)
311{
312 struct btrfs_caching_control *ctl;
313
314 spin_lock(&cache->lock);
315 if (!cache->caching_ctl) {
316 spin_unlock(&cache->lock);
317 return NULL;
318 }
319
320 ctl = cache->caching_ctl;
321 refcount_inc(&ctl->count);
322 spin_unlock(&cache->lock);
323 return ctl;
324}
325
326static void put_caching_control(struct btrfs_caching_control *ctl)
327{
328 if (refcount_dec_and_test(&ctl->count))
329 kfree(ctl);
330}
331
332#ifdef CONFIG_BTRFS_DEBUG
333static void fragment_free_space(struct btrfs_block_group_cache *block_group)
334{
335 struct btrfs_fs_info *fs_info = block_group->fs_info;
336 u64 start = block_group->key.objectid;
337 u64 len = block_group->key.offset;
338 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
339 fs_info->nodesize : fs_info->sectorsize;
340 u64 step = chunk << 1;
341
342 while (len > chunk) {
343 btrfs_remove_free_space(block_group, start, chunk);
344 start += step;
345 if (len < step)
346 len = 0;
347 else
348 len -= step;
349 }
350}
351#endif
352
353
354
355
356
357
358u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
359 u64 start, u64 end)
360{
361 struct btrfs_fs_info *info = block_group->fs_info;
362 u64 extent_start, extent_end, size, total_added = 0;
363 int ret;
364
365 while (start < end) {
366 ret = find_first_extent_bit(info->pinned_extents, start,
367 &extent_start, &extent_end,
368 EXTENT_DIRTY | EXTENT_UPTODATE,
369 NULL);
370 if (ret)
371 break;
372
373 if (extent_start <= start) {
374 start = extent_end + 1;
375 } else if (extent_start > start && extent_start < end) {
376 size = extent_start - start;
377 total_added += size;
378 ret = btrfs_add_free_space(block_group, start,
379 size);
380 BUG_ON(ret);
381 start = extent_end + 1;
382 } else {
383 break;
384 }
385 }
386
387 if (start < end) {
388 size = end - start;
389 total_added += size;
390 ret = btrfs_add_free_space(block_group, start, size);
391 BUG_ON(ret);
392 }
393
394 return total_added;
395}
396
397static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
398{
399 struct btrfs_block_group_cache *block_group = caching_ctl->block_group;
400 struct btrfs_fs_info *fs_info = block_group->fs_info;
401 struct btrfs_root *extent_root = fs_info->extent_root;
402 struct btrfs_path *path;
403 struct extent_buffer *leaf;
404 struct btrfs_key key;
405 u64 total_found = 0;
406 u64 last = 0;
407 u32 nritems;
408 int ret;
409 bool wakeup = true;
410
411 path = btrfs_alloc_path();
412 if (!path)
413 return -ENOMEM;
414
415 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
416
417#ifdef CONFIG_BTRFS_DEBUG
418
419
420
421
422
423 if (btrfs_should_fragment_free_space(block_group))
424 wakeup = false;
425#endif
426
427
428
429
430
431
432 path->skip_locking = 1;
433 path->search_commit_root = 1;
434 path->reada = READA_FORWARD;
435
436 key.objectid = last;
437 key.offset = 0;
438 key.type = BTRFS_EXTENT_ITEM_KEY;
439
440next:
441 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
442 if (ret < 0)
443 goto out;
444
445 leaf = path->nodes[0];
446 nritems = btrfs_header_nritems(leaf);
447
448 while (1) {
449 if (btrfs_fs_closing(fs_info) > 1) {
450 last = (u64)-1;
451 break;
452 }
453
454 if (path->slots[0] < nritems) {
455 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
456 } else {
457 ret = find_next_key(path, 0, &key);
458 if (ret)
459 break;
460
461 if (need_resched() ||
462 rwsem_is_contended(&fs_info->commit_root_sem)) {
463 if (wakeup)
464 caching_ctl->progress = last;
465 btrfs_release_path(path);
466 up_read(&fs_info->commit_root_sem);
467 mutex_unlock(&caching_ctl->mutex);
468 cond_resched();
469 mutex_lock(&caching_ctl->mutex);
470 down_read(&fs_info->commit_root_sem);
471 goto next;
472 }
473
474 ret = btrfs_next_leaf(extent_root, path);
475 if (ret < 0)
476 goto out;
477 if (ret)
478 break;
479 leaf = path->nodes[0];
480 nritems = btrfs_header_nritems(leaf);
481 continue;
482 }
483
484 if (key.objectid < last) {
485 key.objectid = last;
486 key.offset = 0;
487 key.type = BTRFS_EXTENT_ITEM_KEY;
488
489 if (wakeup)
490 caching_ctl->progress = last;
491 btrfs_release_path(path);
492 goto next;
493 }
494
495 if (key.objectid < block_group->key.objectid) {
496 path->slots[0]++;
497 continue;
498 }
499
500 if (key.objectid >= block_group->key.objectid +
501 block_group->key.offset)
502 break;
503
504 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
505 key.type == BTRFS_METADATA_ITEM_KEY) {
506 total_found += add_new_free_space(block_group, last,
507 key.objectid);
508 if (key.type == BTRFS_METADATA_ITEM_KEY)
509 last = key.objectid +
510 fs_info->nodesize;
511 else
512 last = key.objectid + key.offset;
513
514 if (total_found > CACHING_CTL_WAKE_UP) {
515 total_found = 0;
516 if (wakeup)
517 wake_up(&caching_ctl->wait);
518 }
519 }
520 path->slots[0]++;
521 }
522 ret = 0;
523
524 total_found += add_new_free_space(block_group, last,
525 block_group->key.objectid +
526 block_group->key.offset);
527 caching_ctl->progress = (u64)-1;
528
529out:
530 btrfs_free_path(path);
531 return ret;
532}
533
534static noinline void caching_thread(struct btrfs_work *work)
535{
536 struct btrfs_block_group_cache *block_group;
537 struct btrfs_fs_info *fs_info;
538 struct btrfs_caching_control *caching_ctl;
539 int ret;
540
541 caching_ctl = container_of(work, struct btrfs_caching_control, work);
542 block_group = caching_ctl->block_group;
543 fs_info = block_group->fs_info;
544
545 mutex_lock(&caching_ctl->mutex);
546 down_read(&fs_info->commit_root_sem);
547
548 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
549 ret = load_free_space_tree(caching_ctl);
550 else
551 ret = load_extent_tree_free(caching_ctl);
552
553 spin_lock(&block_group->lock);
554 block_group->caching_ctl = NULL;
555 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
556 spin_unlock(&block_group->lock);
557
558#ifdef CONFIG_BTRFS_DEBUG
559 if (btrfs_should_fragment_free_space(block_group)) {
560 u64 bytes_used;
561
562 spin_lock(&block_group->space_info->lock);
563 spin_lock(&block_group->lock);
564 bytes_used = block_group->key.offset -
565 btrfs_block_group_used(&block_group->item);
566 block_group->space_info->bytes_used += bytes_used >> 1;
567 spin_unlock(&block_group->lock);
568 spin_unlock(&block_group->space_info->lock);
569 fragment_free_space(block_group);
570 }
571#endif
572
573 caching_ctl->progress = (u64)-1;
574
575 up_read(&fs_info->commit_root_sem);
576 free_excluded_extents(block_group);
577 mutex_unlock(&caching_ctl->mutex);
578
579 wake_up(&caching_ctl->wait);
580
581 put_caching_control(caching_ctl);
582 btrfs_put_block_group(block_group);
583}
584
585static int cache_block_group(struct btrfs_block_group_cache *cache,
586 int load_cache_only)
587{
588 DEFINE_WAIT(wait);
589 struct btrfs_fs_info *fs_info = cache->fs_info;
590 struct btrfs_caching_control *caching_ctl;
591 int ret = 0;
592
593 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
594 if (!caching_ctl)
595 return -ENOMEM;
596
597 INIT_LIST_HEAD(&caching_ctl->list);
598 mutex_init(&caching_ctl->mutex);
599 init_waitqueue_head(&caching_ctl->wait);
600 caching_ctl->block_group = cache;
601 caching_ctl->progress = cache->key.objectid;
602 refcount_set(&caching_ctl->count, 1);
603 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
604 caching_thread, NULL, NULL);
605
606 spin_lock(&cache->lock);
607
608
609
610
611
612
613
614
615
616
617
618
619 while (cache->cached == BTRFS_CACHE_FAST) {
620 struct btrfs_caching_control *ctl;
621
622 ctl = cache->caching_ctl;
623 refcount_inc(&ctl->count);
624 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
625 spin_unlock(&cache->lock);
626
627 schedule();
628
629 finish_wait(&ctl->wait, &wait);
630 put_caching_control(ctl);
631 spin_lock(&cache->lock);
632 }
633
634 if (cache->cached != BTRFS_CACHE_NO) {
635 spin_unlock(&cache->lock);
636 kfree(caching_ctl);
637 return 0;
638 }
639 WARN_ON(cache->caching_ctl);
640 cache->caching_ctl = caching_ctl;
641 cache->cached = BTRFS_CACHE_FAST;
642 spin_unlock(&cache->lock);
643
644 if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
645 mutex_lock(&caching_ctl->mutex);
646 ret = load_free_space_cache(cache);
647
648 spin_lock(&cache->lock);
649 if (ret == 1) {
650 cache->caching_ctl = NULL;
651 cache->cached = BTRFS_CACHE_FINISHED;
652 cache->last_byte_to_unpin = (u64)-1;
653 caching_ctl->progress = (u64)-1;
654 } else {
655 if (load_cache_only) {
656 cache->caching_ctl = NULL;
657 cache->cached = BTRFS_CACHE_NO;
658 } else {
659 cache->cached = BTRFS_CACHE_STARTED;
660 cache->has_caching_ctl = 1;
661 }
662 }
663 spin_unlock(&cache->lock);
664#ifdef CONFIG_BTRFS_DEBUG
665 if (ret == 1 &&
666 btrfs_should_fragment_free_space(cache)) {
667 u64 bytes_used;
668
669 spin_lock(&cache->space_info->lock);
670 spin_lock(&cache->lock);
671 bytes_used = cache->key.offset -
672 btrfs_block_group_used(&cache->item);
673 cache->space_info->bytes_used += bytes_used >> 1;
674 spin_unlock(&cache->lock);
675 spin_unlock(&cache->space_info->lock);
676 fragment_free_space(cache);
677 }
678#endif
679 mutex_unlock(&caching_ctl->mutex);
680
681 wake_up(&caching_ctl->wait);
682 if (ret == 1) {
683 put_caching_control(caching_ctl);
684 free_excluded_extents(cache);
685 return 0;
686 }
687 } else {
688
689
690
691
692 spin_lock(&cache->lock);
693 if (load_cache_only) {
694 cache->caching_ctl = NULL;
695 cache->cached = BTRFS_CACHE_NO;
696 } else {
697 cache->cached = BTRFS_CACHE_STARTED;
698 cache->has_caching_ctl = 1;
699 }
700 spin_unlock(&cache->lock);
701 wake_up(&caching_ctl->wait);
702 }
703
704 if (load_cache_only) {
705 put_caching_control(caching_ctl);
706 return 0;
707 }
708
709 down_write(&fs_info->commit_root_sem);
710 refcount_inc(&caching_ctl->count);
711 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
712 up_write(&fs_info->commit_root_sem);
713
714 btrfs_get_block_group(cache);
715
716 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
717
718 return ret;
719}
720
721
722
723
724static struct btrfs_block_group_cache *
725btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
726{
727 return block_group_cache_tree_search(info, bytenr, 0);
728}
729
730
731
732
733struct btrfs_block_group_cache *btrfs_lookup_block_group(
734 struct btrfs_fs_info *info,
735 u64 bytenr)
736{
737 return block_group_cache_tree_search(info, bytenr, 1);
738}
739
740static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
741 u64 flags)
742{
743 struct list_head *head = &info->space_info;
744 struct btrfs_space_info *found;
745
746 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
747
748 rcu_read_lock();
749 list_for_each_entry_rcu(found, head, list) {
750 if (found->flags & flags) {
751 rcu_read_unlock();
752 return found;
753 }
754 }
755 rcu_read_unlock();
756 return NULL;
757}
758
759static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
760 struct btrfs_ref *ref, int sign)
761{
762 struct btrfs_space_info *space_info;
763 s64 num_bytes;
764 u64 flags;
765
766 ASSERT(sign == 1 || sign == -1);
767 num_bytes = sign * ref->len;
768 if (ref->type == BTRFS_REF_METADATA) {
769 if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
770 flags = BTRFS_BLOCK_GROUP_SYSTEM;
771 else
772 flags = BTRFS_BLOCK_GROUP_METADATA;
773 } else {
774 flags = BTRFS_BLOCK_GROUP_DATA;
775 }
776
777 space_info = __find_space_info(fs_info, flags);
778 ASSERT(space_info);
779 percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
780 BTRFS_TOTAL_BYTES_PINNED_BATCH);
781}
782
783
784
785
786
787void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
788{
789 struct list_head *head = &info->space_info;
790 struct btrfs_space_info *found;
791
792 rcu_read_lock();
793 list_for_each_entry_rcu(found, head, list)
794 found->full = 0;
795 rcu_read_unlock();
796}
797
798
799int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
800{
801 int ret;
802 struct btrfs_key key;
803 struct btrfs_path *path;
804
805 path = btrfs_alloc_path();
806 if (!path)
807 return -ENOMEM;
808
809 key.objectid = start;
810 key.offset = len;
811 key.type = BTRFS_EXTENT_ITEM_KEY;
812 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
813 btrfs_free_path(path);
814 return ret;
815}
816
817
818
819
820
821
822
823
824
825
826int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
827 struct btrfs_fs_info *fs_info, u64 bytenr,
828 u64 offset, int metadata, u64 *refs, u64 *flags)
829{
830 struct btrfs_delayed_ref_head *head;
831 struct btrfs_delayed_ref_root *delayed_refs;
832 struct btrfs_path *path;
833 struct btrfs_extent_item *ei;
834 struct extent_buffer *leaf;
835 struct btrfs_key key;
836 u32 item_size;
837 u64 num_refs;
838 u64 extent_flags;
839 int ret;
840
841
842
843
844
845 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
846 offset = fs_info->nodesize;
847 metadata = 0;
848 }
849
850 path = btrfs_alloc_path();
851 if (!path)
852 return -ENOMEM;
853
854 if (!trans) {
855 path->skip_locking = 1;
856 path->search_commit_root = 1;
857 }
858
859search_again:
860 key.objectid = bytenr;
861 key.offset = offset;
862 if (metadata)
863 key.type = BTRFS_METADATA_ITEM_KEY;
864 else
865 key.type = BTRFS_EXTENT_ITEM_KEY;
866
867 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
868 if (ret < 0)
869 goto out_free;
870
871 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
872 if (path->slots[0]) {
873 path->slots[0]--;
874 btrfs_item_key_to_cpu(path->nodes[0], &key,
875 path->slots[0]);
876 if (key.objectid == bytenr &&
877 key.type == BTRFS_EXTENT_ITEM_KEY &&
878 key.offset == fs_info->nodesize)
879 ret = 0;
880 }
881 }
882
883 if (ret == 0) {
884 leaf = path->nodes[0];
885 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
886 if (item_size >= sizeof(*ei)) {
887 ei = btrfs_item_ptr(leaf, path->slots[0],
888 struct btrfs_extent_item);
889 num_refs = btrfs_extent_refs(leaf, ei);
890 extent_flags = btrfs_extent_flags(leaf, ei);
891 } else {
892 ret = -EINVAL;
893 btrfs_print_v0_err(fs_info);
894 if (trans)
895 btrfs_abort_transaction(trans, ret);
896 else
897 btrfs_handle_fs_error(fs_info, ret, NULL);
898
899 goto out_free;
900 }
901
902 BUG_ON(num_refs == 0);
903 } else {
904 num_refs = 0;
905 extent_flags = 0;
906 ret = 0;
907 }
908
909 if (!trans)
910 goto out;
911
912 delayed_refs = &trans->transaction->delayed_refs;
913 spin_lock(&delayed_refs->lock);
914 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
915 if (head) {
916 if (!mutex_trylock(&head->mutex)) {
917 refcount_inc(&head->refs);
918 spin_unlock(&delayed_refs->lock);
919
920 btrfs_release_path(path);
921
922
923
924
925
926 mutex_lock(&head->mutex);
927 mutex_unlock(&head->mutex);
928 btrfs_put_delayed_ref_head(head);
929 goto search_again;
930 }
931 spin_lock(&head->lock);
932 if (head->extent_op && head->extent_op->update_flags)
933 extent_flags |= head->extent_op->flags_to_set;
934 else
935 BUG_ON(num_refs == 0);
936
937 num_refs += head->ref_mod;
938 spin_unlock(&head->lock);
939 mutex_unlock(&head->mutex);
940 }
941 spin_unlock(&delayed_refs->lock);
942out:
943 WARN_ON(num_refs == 0);
944 if (refs)
945 *refs = num_refs;
946 if (flags)
947 *flags = extent_flags;
948out_free:
949 btrfs_free_path(path);
950 return ret;
951}
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
1065 struct btrfs_extent_inline_ref *iref,
1066 enum btrfs_inline_ref_type is_data)
1067{
1068 int type = btrfs_extent_inline_ref_type(eb, iref);
1069 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
1070
1071 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1072 type == BTRFS_SHARED_BLOCK_REF_KEY ||
1073 type == BTRFS_SHARED_DATA_REF_KEY ||
1074 type == BTRFS_EXTENT_DATA_REF_KEY) {
1075 if (is_data == BTRFS_REF_TYPE_BLOCK) {
1076 if (type == BTRFS_TREE_BLOCK_REF_KEY)
1077 return type;
1078 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1079 ASSERT(eb->fs_info);
1080
1081
1082
1083
1084
1085 if (offset &&
1086 IS_ALIGNED(offset, eb->fs_info->nodesize))
1087 return type;
1088 }
1089 } else if (is_data == BTRFS_REF_TYPE_DATA) {
1090 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1091 return type;
1092 if (type == BTRFS_SHARED_DATA_REF_KEY) {
1093 ASSERT(eb->fs_info);
1094
1095
1096
1097
1098
1099 if (offset &&
1100 IS_ALIGNED(offset, eb->fs_info->nodesize))
1101 return type;
1102 }
1103 } else {
1104 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
1105 return type;
1106 }
1107 }
1108
1109 btrfs_print_leaf((struct extent_buffer *)eb);
1110 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
1111 eb->start, type);
1112 WARN_ON(1);
1113
1114 return BTRFS_REF_TYPE_INVALID;
1115}
1116
1117static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1118{
1119 u32 high_crc = ~(u32)0;
1120 u32 low_crc = ~(u32)0;
1121 __le64 lenum;
1122
1123 lenum = cpu_to_le64(root_objectid);
1124 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
1125 lenum = cpu_to_le64(owner);
1126 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1127 lenum = cpu_to_le64(offset);
1128 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1129
1130 return ((u64)high_crc << 31) ^ (u64)low_crc;
1131}
1132
1133static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1134 struct btrfs_extent_data_ref *ref)
1135{
1136 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1137 btrfs_extent_data_ref_objectid(leaf, ref),
1138 btrfs_extent_data_ref_offset(leaf, ref));
1139}
1140
1141static int match_extent_data_ref(struct extent_buffer *leaf,
1142 struct btrfs_extent_data_ref *ref,
1143 u64 root_objectid, u64 owner, u64 offset)
1144{
1145 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1146 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1147 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1148 return 0;
1149 return 1;
1150}
1151
1152static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1153 struct btrfs_path *path,
1154 u64 bytenr, u64 parent,
1155 u64 root_objectid,
1156 u64 owner, u64 offset)
1157{
1158 struct btrfs_root *root = trans->fs_info->extent_root;
1159 struct btrfs_key key;
1160 struct btrfs_extent_data_ref *ref;
1161 struct extent_buffer *leaf;
1162 u32 nritems;
1163 int ret;
1164 int recow;
1165 int err = -ENOENT;
1166
1167 key.objectid = bytenr;
1168 if (parent) {
1169 key.type = BTRFS_SHARED_DATA_REF_KEY;
1170 key.offset = parent;
1171 } else {
1172 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1173 key.offset = hash_extent_data_ref(root_objectid,
1174 owner, offset);
1175 }
1176again:
1177 recow = 0;
1178 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1179 if (ret < 0) {
1180 err = ret;
1181 goto fail;
1182 }
1183
1184 if (parent) {
1185 if (!ret)
1186 return 0;
1187 goto fail;
1188 }
1189
1190 leaf = path->nodes[0];
1191 nritems = btrfs_header_nritems(leaf);
1192 while (1) {
1193 if (path->slots[0] >= nritems) {
1194 ret = btrfs_next_leaf(root, path);
1195 if (ret < 0)
1196 err = ret;
1197 if (ret)
1198 goto fail;
1199
1200 leaf = path->nodes[0];
1201 nritems = btrfs_header_nritems(leaf);
1202 recow = 1;
1203 }
1204
1205 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1206 if (key.objectid != bytenr ||
1207 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1208 goto fail;
1209
1210 ref = btrfs_item_ptr(leaf, path->slots[0],
1211 struct btrfs_extent_data_ref);
1212
1213 if (match_extent_data_ref(leaf, ref, root_objectid,
1214 owner, offset)) {
1215 if (recow) {
1216 btrfs_release_path(path);
1217 goto again;
1218 }
1219 err = 0;
1220 break;
1221 }
1222 path->slots[0]++;
1223 }
1224fail:
1225 return err;
1226}
1227
1228static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1229 struct btrfs_path *path,
1230 u64 bytenr, u64 parent,
1231 u64 root_objectid, u64 owner,
1232 u64 offset, int refs_to_add)
1233{
1234 struct btrfs_root *root = trans->fs_info->extent_root;
1235 struct btrfs_key key;
1236 struct extent_buffer *leaf;
1237 u32 size;
1238 u32 num_refs;
1239 int ret;
1240
1241 key.objectid = bytenr;
1242 if (parent) {
1243 key.type = BTRFS_SHARED_DATA_REF_KEY;
1244 key.offset = parent;
1245 size = sizeof(struct btrfs_shared_data_ref);
1246 } else {
1247 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1248 key.offset = hash_extent_data_ref(root_objectid,
1249 owner, offset);
1250 size = sizeof(struct btrfs_extent_data_ref);
1251 }
1252
1253 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1254 if (ret && ret != -EEXIST)
1255 goto fail;
1256
1257 leaf = path->nodes[0];
1258 if (parent) {
1259 struct btrfs_shared_data_ref *ref;
1260 ref = btrfs_item_ptr(leaf, path->slots[0],
1261 struct btrfs_shared_data_ref);
1262 if (ret == 0) {
1263 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1264 } else {
1265 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1266 num_refs += refs_to_add;
1267 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1268 }
1269 } else {
1270 struct btrfs_extent_data_ref *ref;
1271 while (ret == -EEXIST) {
1272 ref = btrfs_item_ptr(leaf, path->slots[0],
1273 struct btrfs_extent_data_ref);
1274 if (match_extent_data_ref(leaf, ref, root_objectid,
1275 owner, offset))
1276 break;
1277 btrfs_release_path(path);
1278 key.offset++;
1279 ret = btrfs_insert_empty_item(trans, root, path, &key,
1280 size);
1281 if (ret && ret != -EEXIST)
1282 goto fail;
1283
1284 leaf = path->nodes[0];
1285 }
1286 ref = btrfs_item_ptr(leaf, path->slots[0],
1287 struct btrfs_extent_data_ref);
1288 if (ret == 0) {
1289 btrfs_set_extent_data_ref_root(leaf, ref,
1290 root_objectid);
1291 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1292 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1293 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1294 } else {
1295 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1296 num_refs += refs_to_add;
1297 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1298 }
1299 }
1300 btrfs_mark_buffer_dirty(leaf);
1301 ret = 0;
1302fail:
1303 btrfs_release_path(path);
1304 return ret;
1305}
1306
1307static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1308 struct btrfs_path *path,
1309 int refs_to_drop, int *last_ref)
1310{
1311 struct btrfs_key key;
1312 struct btrfs_extent_data_ref *ref1 = NULL;
1313 struct btrfs_shared_data_ref *ref2 = NULL;
1314 struct extent_buffer *leaf;
1315 u32 num_refs = 0;
1316 int ret = 0;
1317
1318 leaf = path->nodes[0];
1319 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1320
1321 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1322 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1323 struct btrfs_extent_data_ref);
1324 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1325 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1326 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1327 struct btrfs_shared_data_ref);
1328 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1329 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
1330 btrfs_print_v0_err(trans->fs_info);
1331 btrfs_abort_transaction(trans, -EINVAL);
1332 return -EINVAL;
1333 } else {
1334 BUG();
1335 }
1336
1337 BUG_ON(num_refs < refs_to_drop);
1338 num_refs -= refs_to_drop;
1339
1340 if (num_refs == 0) {
1341 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1342 *last_ref = 1;
1343 } else {
1344 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1345 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1346 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1347 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1348 btrfs_mark_buffer_dirty(leaf);
1349 }
1350 return ret;
1351}
1352
1353static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1354 struct btrfs_extent_inline_ref *iref)
1355{
1356 struct btrfs_key key;
1357 struct extent_buffer *leaf;
1358 struct btrfs_extent_data_ref *ref1;
1359 struct btrfs_shared_data_ref *ref2;
1360 u32 num_refs = 0;
1361 int type;
1362
1363 leaf = path->nodes[0];
1364 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1365
1366 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
1367 if (iref) {
1368
1369
1370
1371
1372 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
1373 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1374 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1375 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1376 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1377 } else {
1378 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1379 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1380 }
1381 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1382 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1383 struct btrfs_extent_data_ref);
1384 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1385 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1386 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1387 struct btrfs_shared_data_ref);
1388 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1389 } else {
1390 WARN_ON(1);
1391 }
1392 return num_refs;
1393}
1394
1395static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1396 struct btrfs_path *path,
1397 u64 bytenr, u64 parent,
1398 u64 root_objectid)
1399{
1400 struct btrfs_root *root = trans->fs_info->extent_root;
1401 struct btrfs_key key;
1402 int ret;
1403
1404 key.objectid = bytenr;
1405 if (parent) {
1406 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1407 key.offset = parent;
1408 } else {
1409 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1410 key.offset = root_objectid;
1411 }
1412
1413 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1414 if (ret > 0)
1415 ret = -ENOENT;
1416 return ret;
1417}
1418
1419static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1420 struct btrfs_path *path,
1421 u64 bytenr, u64 parent,
1422 u64 root_objectid)
1423{
1424 struct btrfs_key key;
1425 int ret;
1426
1427 key.objectid = bytenr;
1428 if (parent) {
1429 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1430 key.offset = parent;
1431 } else {
1432 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1433 key.offset = root_objectid;
1434 }
1435
1436 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
1437 path, &key, 0);
1438 btrfs_release_path(path);
1439 return ret;
1440}
1441
1442static inline int extent_ref_type(u64 parent, u64 owner)
1443{
1444 int type;
1445 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1446 if (parent > 0)
1447 type = BTRFS_SHARED_BLOCK_REF_KEY;
1448 else
1449 type = BTRFS_TREE_BLOCK_REF_KEY;
1450 } else {
1451 if (parent > 0)
1452 type = BTRFS_SHARED_DATA_REF_KEY;
1453 else
1454 type = BTRFS_EXTENT_DATA_REF_KEY;
1455 }
1456 return type;
1457}
1458
1459static int find_next_key(struct btrfs_path *path, int level,
1460 struct btrfs_key *key)
1461
1462{
1463 for (; level < BTRFS_MAX_LEVEL; level++) {
1464 if (!path->nodes[level])
1465 break;
1466 if (path->slots[level] + 1 >=
1467 btrfs_header_nritems(path->nodes[level]))
1468 continue;
1469 if (level == 0)
1470 btrfs_item_key_to_cpu(path->nodes[level], key,
1471 path->slots[level] + 1);
1472 else
1473 btrfs_node_key_to_cpu(path->nodes[level], key,
1474 path->slots[level] + 1);
1475 return 0;
1476 }
1477 return 1;
1478}
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493static noinline_for_stack
1494int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1495 struct btrfs_path *path,
1496 struct btrfs_extent_inline_ref **ref_ret,
1497 u64 bytenr, u64 num_bytes,
1498 u64 parent, u64 root_objectid,
1499 u64 owner, u64 offset, int insert)
1500{
1501 struct btrfs_fs_info *fs_info = trans->fs_info;
1502 struct btrfs_root *root = fs_info->extent_root;
1503 struct btrfs_key key;
1504 struct extent_buffer *leaf;
1505 struct btrfs_extent_item *ei;
1506 struct btrfs_extent_inline_ref *iref;
1507 u64 flags;
1508 u64 item_size;
1509 unsigned long ptr;
1510 unsigned long end;
1511 int extra_size;
1512 int type;
1513 int want;
1514 int ret;
1515 int err = 0;
1516 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
1517 int needed;
1518
1519 key.objectid = bytenr;
1520 key.type = BTRFS_EXTENT_ITEM_KEY;
1521 key.offset = num_bytes;
1522
1523 want = extent_ref_type(parent, owner);
1524 if (insert) {
1525 extra_size = btrfs_extent_inline_ref_size(want);
1526 path->keep_locks = 1;
1527 } else
1528 extra_size = -1;
1529
1530
1531
1532
1533
1534 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1535 key.type = BTRFS_METADATA_ITEM_KEY;
1536 key.offset = owner;
1537 }
1538
1539again:
1540 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1541 if (ret < 0) {
1542 err = ret;
1543 goto out;
1544 }
1545
1546
1547
1548
1549
1550 if (ret > 0 && skinny_metadata) {
1551 skinny_metadata = false;
1552 if (path->slots[0]) {
1553 path->slots[0]--;
1554 btrfs_item_key_to_cpu(path->nodes[0], &key,
1555 path->slots[0]);
1556 if (key.objectid == bytenr &&
1557 key.type == BTRFS_EXTENT_ITEM_KEY &&
1558 key.offset == num_bytes)
1559 ret = 0;
1560 }
1561 if (ret) {
1562 key.objectid = bytenr;
1563 key.type = BTRFS_EXTENT_ITEM_KEY;
1564 key.offset = num_bytes;
1565 btrfs_release_path(path);
1566 goto again;
1567 }
1568 }
1569
1570 if (ret && !insert) {
1571 err = -ENOENT;
1572 goto out;
1573 } else if (WARN_ON(ret)) {
1574 err = -EIO;
1575 goto out;
1576 }
1577
1578 leaf = path->nodes[0];
1579 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1580 if (unlikely(item_size < sizeof(*ei))) {
1581 err = -EINVAL;
1582 btrfs_print_v0_err(fs_info);
1583 btrfs_abort_transaction(trans, err);
1584 goto out;
1585 }
1586
1587 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1588 flags = btrfs_extent_flags(leaf, ei);
1589
1590 ptr = (unsigned long)(ei + 1);
1591 end = (unsigned long)ei + item_size;
1592
1593 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1594 ptr += sizeof(struct btrfs_tree_block_info);
1595 BUG_ON(ptr > end);
1596 }
1597
1598 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
1599 needed = BTRFS_REF_TYPE_DATA;
1600 else
1601 needed = BTRFS_REF_TYPE_BLOCK;
1602
1603 err = -ENOENT;
1604 while (1) {
1605 if (ptr >= end) {
1606 WARN_ON(ptr > end);
1607 break;
1608 }
1609 iref = (struct btrfs_extent_inline_ref *)ptr;
1610 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
1611 if (type == BTRFS_REF_TYPE_INVALID) {
1612 err = -EUCLEAN;
1613 goto out;
1614 }
1615
1616 if (want < type)
1617 break;
1618 if (want > type) {
1619 ptr += btrfs_extent_inline_ref_size(type);
1620 continue;
1621 }
1622
1623 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1624 struct btrfs_extent_data_ref *dref;
1625 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1626 if (match_extent_data_ref(leaf, dref, root_objectid,
1627 owner, offset)) {
1628 err = 0;
1629 break;
1630 }
1631 if (hash_extent_data_ref_item(leaf, dref) <
1632 hash_extent_data_ref(root_objectid, owner, offset))
1633 break;
1634 } else {
1635 u64 ref_offset;
1636 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1637 if (parent > 0) {
1638 if (parent == ref_offset) {
1639 err = 0;
1640 break;
1641 }
1642 if (ref_offset < parent)
1643 break;
1644 } else {
1645 if (root_objectid == ref_offset) {
1646 err = 0;
1647 break;
1648 }
1649 if (ref_offset < root_objectid)
1650 break;
1651 }
1652 }
1653 ptr += btrfs_extent_inline_ref_size(type);
1654 }
1655 if (err == -ENOENT && insert) {
1656 if (item_size + extra_size >=
1657 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1658 err = -EAGAIN;
1659 goto out;
1660 }
1661
1662
1663
1664
1665
1666
1667 if (find_next_key(path, 0, &key) == 0 &&
1668 key.objectid == bytenr &&
1669 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1670 err = -EAGAIN;
1671 goto out;
1672 }
1673 }
1674 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1675out:
1676 if (insert) {
1677 path->keep_locks = 0;
1678 btrfs_unlock_up_safe(path, 1);
1679 }
1680 return err;
1681}
1682
1683
1684
1685
1686static noinline_for_stack
1687void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1688 struct btrfs_path *path,
1689 struct btrfs_extent_inline_ref *iref,
1690 u64 parent, u64 root_objectid,
1691 u64 owner, u64 offset, int refs_to_add,
1692 struct btrfs_delayed_extent_op *extent_op)
1693{
1694 struct extent_buffer *leaf;
1695 struct btrfs_extent_item *ei;
1696 unsigned long ptr;
1697 unsigned long end;
1698 unsigned long item_offset;
1699 u64 refs;
1700 int size;
1701 int type;
1702
1703 leaf = path->nodes[0];
1704 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1705 item_offset = (unsigned long)iref - (unsigned long)ei;
1706
1707 type = extent_ref_type(parent, owner);
1708 size = btrfs_extent_inline_ref_size(type);
1709
1710 btrfs_extend_item(path, size);
1711
1712 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1713 refs = btrfs_extent_refs(leaf, ei);
1714 refs += refs_to_add;
1715 btrfs_set_extent_refs(leaf, ei, refs);
1716 if (extent_op)
1717 __run_delayed_extent_op(extent_op, leaf, ei);
1718
1719 ptr = (unsigned long)ei + item_offset;
1720 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1721 if (ptr < end - size)
1722 memmove_extent_buffer(leaf, ptr + size, ptr,
1723 end - size - ptr);
1724
1725 iref = (struct btrfs_extent_inline_ref *)ptr;
1726 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1727 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1728 struct btrfs_extent_data_ref *dref;
1729 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1730 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1731 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1732 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1733 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1734 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1735 struct btrfs_shared_data_ref *sref;
1736 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1737 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1738 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1739 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1740 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1741 } else {
1742 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1743 }
1744 btrfs_mark_buffer_dirty(leaf);
1745}
1746
1747static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1748 struct btrfs_path *path,
1749 struct btrfs_extent_inline_ref **ref_ret,
1750 u64 bytenr, u64 num_bytes, u64 parent,
1751 u64 root_objectid, u64 owner, u64 offset)
1752{
1753 int ret;
1754
1755 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1756 num_bytes, parent, root_objectid,
1757 owner, offset, 0);
1758 if (ret != -ENOENT)
1759 return ret;
1760
1761 btrfs_release_path(path);
1762 *ref_ret = NULL;
1763
1764 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1765 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1766 root_objectid);
1767 } else {
1768 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1769 root_objectid, owner, offset);
1770 }
1771 return ret;
1772}
1773
1774
1775
1776
1777static noinline_for_stack
1778void update_inline_extent_backref(struct btrfs_path *path,
1779 struct btrfs_extent_inline_ref *iref,
1780 int refs_to_mod,
1781 struct btrfs_delayed_extent_op *extent_op,
1782 int *last_ref)
1783{
1784 struct extent_buffer *leaf = path->nodes[0];
1785 struct btrfs_extent_item *ei;
1786 struct btrfs_extent_data_ref *dref = NULL;
1787 struct btrfs_shared_data_ref *sref = NULL;
1788 unsigned long ptr;
1789 unsigned long end;
1790 u32 item_size;
1791 int size;
1792 int type;
1793 u64 refs;
1794
1795 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1796 refs = btrfs_extent_refs(leaf, ei);
1797 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1798 refs += refs_to_mod;
1799 btrfs_set_extent_refs(leaf, ei, refs);
1800 if (extent_op)
1801 __run_delayed_extent_op(extent_op, leaf, ei);
1802
1803
1804
1805
1806
1807 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1808 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1809
1810 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1811 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1812 refs = btrfs_extent_data_ref_count(leaf, dref);
1813 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1814 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1815 refs = btrfs_shared_data_ref_count(leaf, sref);
1816 } else {
1817 refs = 1;
1818 BUG_ON(refs_to_mod != -1);
1819 }
1820
1821 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1822 refs += refs_to_mod;
1823
1824 if (refs > 0) {
1825 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1826 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1827 else
1828 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1829 } else {
1830 *last_ref = 1;
1831 size = btrfs_extent_inline_ref_size(type);
1832 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1833 ptr = (unsigned long)iref;
1834 end = (unsigned long)ei + item_size;
1835 if (ptr + size < end)
1836 memmove_extent_buffer(leaf, ptr, ptr + size,
1837 end - ptr - size);
1838 item_size -= size;
1839 btrfs_truncate_item(path, item_size, 1);
1840 }
1841 btrfs_mark_buffer_dirty(leaf);
1842}
1843
1844static noinline_for_stack
1845int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1846 struct btrfs_path *path,
1847 u64 bytenr, u64 num_bytes, u64 parent,
1848 u64 root_objectid, u64 owner,
1849 u64 offset, int refs_to_add,
1850 struct btrfs_delayed_extent_op *extent_op)
1851{
1852 struct btrfs_extent_inline_ref *iref;
1853 int ret;
1854
1855 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1856 num_bytes, parent, root_objectid,
1857 owner, offset, 1);
1858 if (ret == 0) {
1859 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1860 update_inline_extent_backref(path, iref, refs_to_add,
1861 extent_op, NULL);
1862 } else if (ret == -ENOENT) {
1863 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1864 root_objectid, owner, offset,
1865 refs_to_add, extent_op);
1866 ret = 0;
1867 }
1868 return ret;
1869}
1870
1871static int insert_extent_backref(struct btrfs_trans_handle *trans,
1872 struct btrfs_path *path,
1873 u64 bytenr, u64 parent, u64 root_objectid,
1874 u64 owner, u64 offset, int refs_to_add)
1875{
1876 int ret;
1877 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1878 BUG_ON(refs_to_add != 1);
1879 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1880 root_objectid);
1881 } else {
1882 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1883 root_objectid, owner, offset,
1884 refs_to_add);
1885 }
1886 return ret;
1887}
1888
1889static int remove_extent_backref(struct btrfs_trans_handle *trans,
1890 struct btrfs_path *path,
1891 struct btrfs_extent_inline_ref *iref,
1892 int refs_to_drop, int is_data, int *last_ref)
1893{
1894 int ret = 0;
1895
1896 BUG_ON(!is_data && refs_to_drop != 1);
1897 if (iref) {
1898 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1899 last_ref);
1900 } else if (is_data) {
1901 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1902 last_ref);
1903 } else {
1904 *last_ref = 1;
1905 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1906 }
1907 return ret;
1908}
1909
1910static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1911 u64 *discarded_bytes)
1912{
1913 int j, ret = 0;
1914 u64 bytes_left, end;
1915 u64 aligned_start = ALIGN(start, 1 << 9);
1916
1917 if (WARN_ON(start != aligned_start)) {
1918 len -= aligned_start - start;
1919 len = round_down(len, 1 << 9);
1920 start = aligned_start;
1921 }
1922
1923 *discarded_bytes = 0;
1924
1925 if (!len)
1926 return 0;
1927
1928 end = start + len;
1929 bytes_left = len;
1930
1931
1932 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1933 u64 sb_start = btrfs_sb_offset(j);
1934 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1935 u64 size = sb_start - start;
1936
1937 if (!in_range(sb_start, start, bytes_left) &&
1938 !in_range(sb_end, start, bytes_left) &&
1939 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1940 continue;
1941
1942
1943
1944
1945
1946 if (sb_start <= start) {
1947 start += sb_end - start;
1948 if (start > end) {
1949 bytes_left = 0;
1950 break;
1951 }
1952 bytes_left = end - start;
1953 continue;
1954 }
1955
1956 if (size) {
1957 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1958 GFP_NOFS, 0);
1959 if (!ret)
1960 *discarded_bytes += size;
1961 else if (ret != -EOPNOTSUPP)
1962 return ret;
1963 }
1964
1965 start = sb_end;
1966 if (start > end) {
1967 bytes_left = 0;
1968 break;
1969 }
1970 bytes_left = end - start;
1971 }
1972
1973 if (bytes_left) {
1974 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1975 GFP_NOFS, 0);
1976 if (!ret)
1977 *discarded_bytes += bytes_left;
1978 }
1979 return ret;
1980}
1981
1982int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1983 u64 num_bytes, u64 *actual_bytes)
1984{
1985 int ret;
1986 u64 discarded_bytes = 0;
1987 struct btrfs_bio *bbio = NULL;
1988
1989
1990
1991
1992
1993
1994 btrfs_bio_counter_inc_blocked(fs_info);
1995
1996 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
1997 &bbio, 0);
1998
1999 if (!ret) {
2000 struct btrfs_bio_stripe *stripe = bbio->stripes;
2001 int i;
2002
2003
2004 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
2005 u64 bytes;
2006 struct request_queue *req_q;
2007
2008 if (!stripe->dev->bdev) {
2009 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
2010 continue;
2011 }
2012 req_q = bdev_get_queue(stripe->dev->bdev);
2013 if (!blk_queue_discard(req_q))
2014 continue;
2015
2016 ret = btrfs_issue_discard(stripe->dev->bdev,
2017 stripe->physical,
2018 stripe->length,
2019 &bytes);
2020 if (!ret)
2021 discarded_bytes += bytes;
2022 else if (ret != -EOPNOTSUPP)
2023 break;
2024
2025
2026
2027
2028
2029
2030 ret = 0;
2031 }
2032 btrfs_put_bbio(bbio);
2033 }
2034 btrfs_bio_counter_dec(fs_info);
2035
2036 if (actual_bytes)
2037 *actual_bytes = discarded_bytes;
2038
2039
2040 if (ret == -EOPNOTSUPP)
2041 ret = 0;
2042 return ret;
2043}
2044
2045
2046int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2047 struct btrfs_ref *generic_ref)
2048{
2049 struct btrfs_fs_info *fs_info = trans->fs_info;
2050 int old_ref_mod, new_ref_mod;
2051 int ret;
2052
2053 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
2054 generic_ref->action);
2055 BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
2056 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
2057
2058 if (generic_ref->type == BTRFS_REF_METADATA)
2059 ret = btrfs_add_delayed_tree_ref(trans, generic_ref,
2060 NULL, &old_ref_mod, &new_ref_mod);
2061 else
2062 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0,
2063 &old_ref_mod, &new_ref_mod);
2064
2065 btrfs_ref_tree_mod(fs_info, generic_ref);
2066
2067 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
2068 add_pinned_bytes(fs_info, generic_ref, -1);
2069
2070 return ret;
2071}
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2108 struct btrfs_delayed_ref_node *node,
2109 u64 parent, u64 root_objectid,
2110 u64 owner, u64 offset, int refs_to_add,
2111 struct btrfs_delayed_extent_op *extent_op)
2112{
2113 struct btrfs_path *path;
2114 struct extent_buffer *leaf;
2115 struct btrfs_extent_item *item;
2116 struct btrfs_key key;
2117 u64 bytenr = node->bytenr;
2118 u64 num_bytes = node->num_bytes;
2119 u64 refs;
2120 int ret;
2121
2122 path = btrfs_alloc_path();
2123 if (!path)
2124 return -ENOMEM;
2125
2126 path->reada = READA_FORWARD;
2127 path->leave_spinning = 1;
2128
2129 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
2130 parent, root_objectid, owner,
2131 offset, refs_to_add, extent_op);
2132 if ((ret < 0 && ret != -EAGAIN) || !ret)
2133 goto out;
2134
2135
2136
2137
2138
2139
2140 leaf = path->nodes[0];
2141 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2142 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2143 refs = btrfs_extent_refs(leaf, item);
2144 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2145 if (extent_op)
2146 __run_delayed_extent_op(extent_op, leaf, item);
2147
2148 btrfs_mark_buffer_dirty(leaf);
2149 btrfs_release_path(path);
2150
2151 path->reada = READA_FORWARD;
2152 path->leave_spinning = 1;
2153
2154 ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
2155 owner, offset, refs_to_add);
2156 if (ret)
2157 btrfs_abort_transaction(trans, ret);
2158out:
2159 btrfs_free_path(path);
2160 return ret;
2161}
2162
2163static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2164 struct btrfs_delayed_ref_node *node,
2165 struct btrfs_delayed_extent_op *extent_op,
2166 int insert_reserved)
2167{
2168 int ret = 0;
2169 struct btrfs_delayed_data_ref *ref;
2170 struct btrfs_key ins;
2171 u64 parent = 0;
2172 u64 ref_root = 0;
2173 u64 flags = 0;
2174
2175 ins.objectid = node->bytenr;
2176 ins.offset = node->num_bytes;
2177 ins.type = BTRFS_EXTENT_ITEM_KEY;
2178
2179 ref = btrfs_delayed_node_to_data_ref(node);
2180 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
2181
2182 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2183 parent = ref->parent;
2184 ref_root = ref->root;
2185
2186 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2187 if (extent_op)
2188 flags |= extent_op->flags_to_set;
2189 ret = alloc_reserved_file_extent(trans, parent, ref_root,
2190 flags, ref->objectid,
2191 ref->offset, &ins,
2192 node->ref_mod);
2193 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2194 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2195 ref->objectid, ref->offset,
2196 node->ref_mod, extent_op);
2197 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2198 ret = __btrfs_free_extent(trans, node, parent,
2199 ref_root, ref->objectid,
2200 ref->offset, node->ref_mod,
2201 extent_op);
2202 } else {
2203 BUG();
2204 }
2205 return ret;
2206}
2207
2208static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2209 struct extent_buffer *leaf,
2210 struct btrfs_extent_item *ei)
2211{
2212 u64 flags = btrfs_extent_flags(leaf, ei);
2213 if (extent_op->update_flags) {
2214 flags |= extent_op->flags_to_set;
2215 btrfs_set_extent_flags(leaf, ei, flags);
2216 }
2217
2218 if (extent_op->update_key) {
2219 struct btrfs_tree_block_info *bi;
2220 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2221 bi = (struct btrfs_tree_block_info *)(ei + 1);
2222 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2223 }
2224}
2225
2226static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2227 struct btrfs_delayed_ref_head *head,
2228 struct btrfs_delayed_extent_op *extent_op)
2229{
2230 struct btrfs_fs_info *fs_info = trans->fs_info;
2231 struct btrfs_key key;
2232 struct btrfs_path *path;
2233 struct btrfs_extent_item *ei;
2234 struct extent_buffer *leaf;
2235 u32 item_size;
2236 int ret;
2237 int err = 0;
2238 int metadata = !extent_op->is_data;
2239
2240 if (trans->aborted)
2241 return 0;
2242
2243 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2244 metadata = 0;
2245
2246 path = btrfs_alloc_path();
2247 if (!path)
2248 return -ENOMEM;
2249
2250 key.objectid = head->bytenr;
2251
2252 if (metadata) {
2253 key.type = BTRFS_METADATA_ITEM_KEY;
2254 key.offset = extent_op->level;
2255 } else {
2256 key.type = BTRFS_EXTENT_ITEM_KEY;
2257 key.offset = head->num_bytes;
2258 }
2259
2260again:
2261 path->reada = READA_FORWARD;
2262 path->leave_spinning = 1;
2263 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
2264 if (ret < 0) {
2265 err = ret;
2266 goto out;
2267 }
2268 if (ret > 0) {
2269 if (metadata) {
2270 if (path->slots[0] > 0) {
2271 path->slots[0]--;
2272 btrfs_item_key_to_cpu(path->nodes[0], &key,
2273 path->slots[0]);
2274 if (key.objectid == head->bytenr &&
2275 key.type == BTRFS_EXTENT_ITEM_KEY &&
2276 key.offset == head->num_bytes)
2277 ret = 0;
2278 }
2279 if (ret > 0) {
2280 btrfs_release_path(path);
2281 metadata = 0;
2282
2283 key.objectid = head->bytenr;
2284 key.offset = head->num_bytes;
2285 key.type = BTRFS_EXTENT_ITEM_KEY;
2286 goto again;
2287 }
2288 } else {
2289 err = -EIO;
2290 goto out;
2291 }
2292 }
2293
2294 leaf = path->nodes[0];
2295 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2296
2297 if (unlikely(item_size < sizeof(*ei))) {
2298 err = -EINVAL;
2299 btrfs_print_v0_err(fs_info);
2300 btrfs_abort_transaction(trans, err);
2301 goto out;
2302 }
2303
2304 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2305 __run_delayed_extent_op(extent_op, leaf, ei);
2306
2307 btrfs_mark_buffer_dirty(leaf);
2308out:
2309 btrfs_free_path(path);
2310 return err;
2311}
2312
2313static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2314 struct btrfs_delayed_ref_node *node,
2315 struct btrfs_delayed_extent_op *extent_op,
2316 int insert_reserved)
2317{
2318 int ret = 0;
2319 struct btrfs_delayed_tree_ref *ref;
2320 u64 parent = 0;
2321 u64 ref_root = 0;
2322
2323 ref = btrfs_delayed_node_to_tree_ref(node);
2324 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
2325
2326 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2327 parent = ref->parent;
2328 ref_root = ref->root;
2329
2330 if (node->ref_mod != 1) {
2331 btrfs_err(trans->fs_info,
2332 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
2333 node->bytenr, node->ref_mod, node->action, ref_root,
2334 parent);
2335 return -EIO;
2336 }
2337 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2338 BUG_ON(!extent_op || !extent_op->update_flags);
2339 ret = alloc_reserved_tree_block(trans, node, extent_op);
2340 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2341 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2342 ref->level, 0, 1, extent_op);
2343 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2344 ret = __btrfs_free_extent(trans, node, parent, ref_root,
2345 ref->level, 0, 1, extent_op);
2346 } else {
2347 BUG();
2348 }
2349 return ret;
2350}
2351
2352
2353static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2354 struct btrfs_delayed_ref_node *node,
2355 struct btrfs_delayed_extent_op *extent_op,
2356 int insert_reserved)
2357{
2358 int ret = 0;
2359
2360 if (trans->aborted) {
2361 if (insert_reserved)
2362 btrfs_pin_extent(trans->fs_info, node->bytenr,
2363 node->num_bytes, 1);
2364 return 0;
2365 }
2366
2367 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2368 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2369 ret = run_delayed_tree_ref(trans, node, extent_op,
2370 insert_reserved);
2371 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2372 node->type == BTRFS_SHARED_DATA_REF_KEY)
2373 ret = run_delayed_data_ref(trans, node, extent_op,
2374 insert_reserved);
2375 else
2376 BUG();
2377 if (ret && insert_reserved)
2378 btrfs_pin_extent(trans->fs_info, node->bytenr,
2379 node->num_bytes, 1);
2380 return ret;
2381}
2382
2383static inline struct btrfs_delayed_ref_node *
2384select_delayed_ref(struct btrfs_delayed_ref_head *head)
2385{
2386 struct btrfs_delayed_ref_node *ref;
2387
2388 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
2389 return NULL;
2390
2391
2392
2393
2394
2395
2396
2397 if (!list_empty(&head->ref_add_list))
2398 return list_first_entry(&head->ref_add_list,
2399 struct btrfs_delayed_ref_node, add_list);
2400
2401 ref = rb_entry(rb_first_cached(&head->ref_tree),
2402 struct btrfs_delayed_ref_node, ref_node);
2403 ASSERT(list_empty(&ref->add_list));
2404 return ref;
2405}
2406
2407static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
2408 struct btrfs_delayed_ref_head *head)
2409{
2410 spin_lock(&delayed_refs->lock);
2411 head->processing = 0;
2412 delayed_refs->num_heads_ready++;
2413 spin_unlock(&delayed_refs->lock);
2414 btrfs_delayed_ref_unlock(head);
2415}
2416
2417static struct btrfs_delayed_extent_op *cleanup_extent_op(
2418 struct btrfs_delayed_ref_head *head)
2419{
2420 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
2421
2422 if (!extent_op)
2423 return NULL;
2424
2425 if (head->must_insert_reserved) {
2426 head->extent_op = NULL;
2427 btrfs_free_delayed_extent_op(extent_op);
2428 return NULL;
2429 }
2430 return extent_op;
2431}
2432
2433static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
2434 struct btrfs_delayed_ref_head *head)
2435{
2436 struct btrfs_delayed_extent_op *extent_op;
2437 int ret;
2438
2439 extent_op = cleanup_extent_op(head);
2440 if (!extent_op)
2441 return 0;
2442 head->extent_op = NULL;
2443 spin_unlock(&head->lock);
2444 ret = run_delayed_extent_op(trans, head, extent_op);
2445 btrfs_free_delayed_extent_op(extent_op);
2446 return ret ? ret : 1;
2447}
2448
2449void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2450 struct btrfs_delayed_ref_root *delayed_refs,
2451 struct btrfs_delayed_ref_head *head)
2452{
2453 int nr_items = 1;
2454
2455 if (head->total_ref_mod < 0) {
2456 struct btrfs_space_info *space_info;
2457 u64 flags;
2458
2459 if (head->is_data)
2460 flags = BTRFS_BLOCK_GROUP_DATA;
2461 else if (head->is_system)
2462 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2463 else
2464 flags = BTRFS_BLOCK_GROUP_METADATA;
2465 space_info = __find_space_info(fs_info, flags);
2466 ASSERT(space_info);
2467 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2468 -head->num_bytes,
2469 BTRFS_TOTAL_BYTES_PINNED_BATCH);
2470
2471
2472
2473
2474
2475
2476 if (head->is_data) {
2477 spin_lock(&delayed_refs->lock);
2478 delayed_refs->pending_csums -= head->num_bytes;
2479 spin_unlock(&delayed_refs->lock);
2480 nr_items += btrfs_csum_bytes_to_leaves(fs_info,
2481 head->num_bytes);
2482 }
2483 }
2484
2485 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
2486}
2487
2488static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2489 struct btrfs_delayed_ref_head *head)
2490{
2491
2492 struct btrfs_fs_info *fs_info = trans->fs_info;
2493 struct btrfs_delayed_ref_root *delayed_refs;
2494 int ret;
2495
2496 delayed_refs = &trans->transaction->delayed_refs;
2497
2498 ret = run_and_cleanup_extent_op(trans, head);
2499 if (ret < 0) {
2500 unselect_delayed_ref_head(delayed_refs, head);
2501 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2502 return ret;
2503 } else if (ret) {
2504 return ret;
2505 }
2506
2507
2508
2509
2510
2511 spin_unlock(&head->lock);
2512 spin_lock(&delayed_refs->lock);
2513 spin_lock(&head->lock);
2514 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
2515 spin_unlock(&head->lock);
2516 spin_unlock(&delayed_refs->lock);
2517 return 1;
2518 }
2519 btrfs_delete_ref_head(delayed_refs, head);
2520 spin_unlock(&head->lock);
2521 spin_unlock(&delayed_refs->lock);
2522
2523 if (head->must_insert_reserved) {
2524 btrfs_pin_extent(fs_info, head->bytenr,
2525 head->num_bytes, 1);
2526 if (head->is_data) {
2527 ret = btrfs_del_csums(trans, fs_info, head->bytenr,
2528 head->num_bytes);
2529 }
2530 }
2531
2532 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
2533
2534 trace_run_delayed_ref_head(fs_info, head, 0);
2535 btrfs_delayed_ref_unlock(head);
2536 btrfs_put_delayed_ref_head(head);
2537 return 0;
2538}
2539
2540static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
2541 struct btrfs_trans_handle *trans)
2542{
2543 struct btrfs_delayed_ref_root *delayed_refs =
2544 &trans->transaction->delayed_refs;
2545 struct btrfs_delayed_ref_head *head = NULL;
2546 int ret;
2547
2548 spin_lock(&delayed_refs->lock);
2549 head = btrfs_select_ref_head(delayed_refs);
2550 if (!head) {
2551 spin_unlock(&delayed_refs->lock);
2552 return head;
2553 }
2554
2555
2556
2557
2558
2559 ret = btrfs_delayed_ref_lock(delayed_refs, head);
2560 spin_unlock(&delayed_refs->lock);
2561
2562
2563
2564
2565
2566
2567 if (ret == -EAGAIN)
2568 head = ERR_PTR(-EAGAIN);
2569
2570 return head;
2571}
2572
2573static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
2574 struct btrfs_delayed_ref_head *locked_ref,
2575 unsigned long *run_refs)
2576{
2577 struct btrfs_fs_info *fs_info = trans->fs_info;
2578 struct btrfs_delayed_ref_root *delayed_refs;
2579 struct btrfs_delayed_extent_op *extent_op;
2580 struct btrfs_delayed_ref_node *ref;
2581 int must_insert_reserved = 0;
2582 int ret;
2583
2584 delayed_refs = &trans->transaction->delayed_refs;
2585
2586 lockdep_assert_held(&locked_ref->mutex);
2587 lockdep_assert_held(&locked_ref->lock);
2588
2589 while ((ref = select_delayed_ref(locked_ref))) {
2590 if (ref->seq &&
2591 btrfs_check_delayed_seq(fs_info, ref->seq)) {
2592 spin_unlock(&locked_ref->lock);
2593 unselect_delayed_ref_head(delayed_refs, locked_ref);
2594 return -EAGAIN;
2595 }
2596
2597 (*run_refs)++;
2598 ref->in_tree = 0;
2599 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
2600 RB_CLEAR_NODE(&ref->ref_node);
2601 if (!list_empty(&ref->add_list))
2602 list_del(&ref->add_list);
2603
2604
2605
2606
2607 switch (ref->action) {
2608 case BTRFS_ADD_DELAYED_REF:
2609 case BTRFS_ADD_DELAYED_EXTENT:
2610 locked_ref->ref_mod -= ref->ref_mod;
2611 break;
2612 case BTRFS_DROP_DELAYED_REF:
2613 locked_ref->ref_mod += ref->ref_mod;
2614 break;
2615 default:
2616 WARN_ON(1);
2617 }
2618 atomic_dec(&delayed_refs->num_entries);
2619
2620
2621
2622
2623
2624 must_insert_reserved = locked_ref->must_insert_reserved;
2625 locked_ref->must_insert_reserved = 0;
2626
2627 extent_op = locked_ref->extent_op;
2628 locked_ref->extent_op = NULL;
2629 spin_unlock(&locked_ref->lock);
2630
2631 ret = run_one_delayed_ref(trans, ref, extent_op,
2632 must_insert_reserved);
2633
2634 btrfs_free_delayed_extent_op(extent_op);
2635 if (ret) {
2636 unselect_delayed_ref_head(delayed_refs, locked_ref);
2637 btrfs_put_delayed_ref(ref);
2638 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
2639 ret);
2640 return ret;
2641 }
2642
2643 btrfs_put_delayed_ref(ref);
2644 cond_resched();
2645
2646 spin_lock(&locked_ref->lock);
2647 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2648 }
2649
2650 return 0;
2651}
2652
2653
2654
2655
2656
2657static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2658 unsigned long nr)
2659{
2660 struct btrfs_fs_info *fs_info = trans->fs_info;
2661 struct btrfs_delayed_ref_root *delayed_refs;
2662 struct btrfs_delayed_ref_head *locked_ref = NULL;
2663 ktime_t start = ktime_get();
2664 int ret;
2665 unsigned long count = 0;
2666 unsigned long actual_count = 0;
2667
2668 delayed_refs = &trans->transaction->delayed_refs;
2669 do {
2670 if (!locked_ref) {
2671 locked_ref = btrfs_obtain_ref_head(trans);
2672 if (IS_ERR_OR_NULL(locked_ref)) {
2673 if (PTR_ERR(locked_ref) == -EAGAIN) {
2674 continue;
2675 } else {
2676 break;
2677 }
2678 }
2679 count++;
2680 }
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693 spin_lock(&locked_ref->lock);
2694 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2695
2696 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2697 &actual_count);
2698 if (ret < 0 && ret != -EAGAIN) {
2699
2700
2701
2702
2703 return ret;
2704 } else if (!ret) {
2705
2706
2707
2708
2709 ret = cleanup_ref_head(trans, locked_ref);
2710 if (ret > 0 ) {
2711
2712 ret = 0;
2713 continue;
2714 } else if (ret) {
2715 return ret;
2716 }
2717 }
2718
2719
2720
2721
2722
2723
2724 locked_ref = NULL;
2725 cond_resched();
2726 } while ((nr != -1 && count < nr) || locked_ref);
2727
2728
2729
2730
2731
2732
2733 if (actual_count > 0) {
2734 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2735 u64 avg;
2736
2737
2738
2739
2740
2741 spin_lock(&delayed_refs->lock);
2742 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2743 fs_info->avg_delayed_ref_runtime = avg >> 2;
2744 spin_unlock(&delayed_refs->lock);
2745 }
2746 return 0;
2747}
2748
2749#ifdef SCRAMBLE_DELAYED_REFS
2750
2751
2752
2753
2754
2755static u64 find_middle(struct rb_root *root)
2756{
2757 struct rb_node *n = root->rb_node;
2758 struct btrfs_delayed_ref_node *entry;
2759 int alt = 1;
2760 u64 middle;
2761 u64 first = 0, last = 0;
2762
2763 n = rb_first(root);
2764 if (n) {
2765 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2766 first = entry->bytenr;
2767 }
2768 n = rb_last(root);
2769 if (n) {
2770 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2771 last = entry->bytenr;
2772 }
2773 n = root->rb_node;
2774
2775 while (n) {
2776 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2777 WARN_ON(!entry->in_tree);
2778
2779 middle = entry->bytenr;
2780
2781 if (alt)
2782 n = n->rb_left;
2783 else
2784 n = n->rb_right;
2785
2786 alt = 1 - alt;
2787 }
2788 return middle;
2789}
2790#endif
2791
2792static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2793{
2794 u64 num_bytes;
2795
2796 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2797 sizeof(struct btrfs_extent_inline_ref));
2798 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2799 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2800
2801
2802
2803
2804
2805 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2806}
2807
2808
2809
2810
2811
2812u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2813{
2814 u64 csum_size;
2815 u64 num_csums_per_leaf;
2816 u64 num_csums;
2817
2818 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2819 num_csums_per_leaf = div64_u64(csum_size,
2820 (u64)btrfs_super_csum_size(fs_info->super_copy));
2821 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2822 num_csums += num_csums_per_leaf - 1;
2823 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2824 return num_csums;
2825}
2826
2827bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
2828{
2829 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
2830 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2831 bool ret = false;
2832 u64 reserved;
2833
2834 spin_lock(&global_rsv->lock);
2835 reserved = global_rsv->reserved;
2836 spin_unlock(&global_rsv->lock);
2837
2838
2839
2840
2841
2842
2843
2844 spin_lock(&delayed_refs_rsv->lock);
2845 reserved += delayed_refs_rsv->reserved;
2846 if (delayed_refs_rsv->size >= reserved)
2847 ret = true;
2848 spin_unlock(&delayed_refs_rsv->lock);
2849 return ret;
2850}
2851
2852int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
2853{
2854 u64 num_entries =
2855 atomic_read(&trans->transaction->delayed_refs.num_entries);
2856 u64 avg_runtime;
2857 u64 val;
2858
2859 smp_mb();
2860 avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
2861 val = num_entries * avg_runtime;
2862 if (val >= NSEC_PER_SEC)
2863 return 1;
2864 if (val >= NSEC_PER_SEC / 2)
2865 return 2;
2866
2867 return btrfs_check_space_for_delayed_refs(trans->fs_info);
2868}
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2881 unsigned long count)
2882{
2883 struct btrfs_fs_info *fs_info = trans->fs_info;
2884 struct rb_node *node;
2885 struct btrfs_delayed_ref_root *delayed_refs;
2886 struct btrfs_delayed_ref_head *head;
2887 int ret;
2888 int run_all = count == (unsigned long)-1;
2889
2890
2891 if (trans->aborted)
2892 return 0;
2893
2894 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2895 return 0;
2896
2897 delayed_refs = &trans->transaction->delayed_refs;
2898 if (count == 0)
2899 count = atomic_read(&delayed_refs->num_entries) * 2;
2900
2901again:
2902#ifdef SCRAMBLE_DELAYED_REFS
2903 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2904#endif
2905 ret = __btrfs_run_delayed_refs(trans, count);
2906 if (ret < 0) {
2907 btrfs_abort_transaction(trans, ret);
2908 return ret;
2909 }
2910
2911 if (run_all) {
2912 btrfs_create_pending_block_groups(trans);
2913
2914 spin_lock(&delayed_refs->lock);
2915 node = rb_first_cached(&delayed_refs->href_root);
2916 if (!node) {
2917 spin_unlock(&delayed_refs->lock);
2918 goto out;
2919 }
2920 head = rb_entry(node, struct btrfs_delayed_ref_head,
2921 href_node);
2922 refcount_inc(&head->refs);
2923 spin_unlock(&delayed_refs->lock);
2924
2925
2926 mutex_lock(&head->mutex);
2927 mutex_unlock(&head->mutex);
2928
2929 btrfs_put_delayed_ref_head(head);
2930 cond_resched();
2931 goto again;
2932 }
2933out:
2934 return 0;
2935}
2936
2937int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2938 u64 bytenr, u64 num_bytes, u64 flags,
2939 int level, int is_data)
2940{
2941 struct btrfs_delayed_extent_op *extent_op;
2942 int ret;
2943
2944 extent_op = btrfs_alloc_delayed_extent_op();
2945 if (!extent_op)
2946 return -ENOMEM;
2947
2948 extent_op->flags_to_set = flags;
2949 extent_op->update_flags = true;
2950 extent_op->update_key = false;
2951 extent_op->is_data = is_data ? true : false;
2952 extent_op->level = level;
2953
2954 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
2955 if (ret)
2956 btrfs_free_delayed_extent_op(extent_op);
2957 return ret;
2958}
2959
2960static noinline int check_delayed_ref(struct btrfs_root *root,
2961 struct btrfs_path *path,
2962 u64 objectid, u64 offset, u64 bytenr)
2963{
2964 struct btrfs_delayed_ref_head *head;
2965 struct btrfs_delayed_ref_node *ref;
2966 struct btrfs_delayed_data_ref *data_ref;
2967 struct btrfs_delayed_ref_root *delayed_refs;
2968 struct btrfs_transaction *cur_trans;
2969 struct rb_node *node;
2970 int ret = 0;
2971
2972 spin_lock(&root->fs_info->trans_lock);
2973 cur_trans = root->fs_info->running_transaction;
2974 if (cur_trans)
2975 refcount_inc(&cur_trans->use_count);
2976 spin_unlock(&root->fs_info->trans_lock);
2977 if (!cur_trans)
2978 return 0;
2979
2980 delayed_refs = &cur_trans->delayed_refs;
2981 spin_lock(&delayed_refs->lock);
2982 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2983 if (!head) {
2984 spin_unlock(&delayed_refs->lock);
2985 btrfs_put_transaction(cur_trans);
2986 return 0;
2987 }
2988
2989 if (!mutex_trylock(&head->mutex)) {
2990 refcount_inc(&head->refs);
2991 spin_unlock(&delayed_refs->lock);
2992
2993 btrfs_release_path(path);
2994
2995
2996
2997
2998
2999 mutex_lock(&head->mutex);
3000 mutex_unlock(&head->mutex);
3001 btrfs_put_delayed_ref_head(head);
3002 btrfs_put_transaction(cur_trans);
3003 return -EAGAIN;
3004 }
3005 spin_unlock(&delayed_refs->lock);
3006
3007 spin_lock(&head->lock);
3008
3009
3010
3011
3012 for (node = rb_first_cached(&head->ref_tree); node;
3013 node = rb_next(node)) {
3014 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
3015
3016 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
3017 ret = 1;
3018 break;
3019 }
3020
3021 data_ref = btrfs_delayed_node_to_data_ref(ref);
3022
3023
3024
3025
3026
3027 if (data_ref->root != root->root_key.objectid ||
3028 data_ref->objectid != objectid ||
3029 data_ref->offset != offset) {
3030 ret = 1;
3031 break;
3032 }
3033 }
3034 spin_unlock(&head->lock);
3035 mutex_unlock(&head->mutex);
3036 btrfs_put_transaction(cur_trans);
3037 return ret;
3038}
3039
3040static noinline int check_committed_ref(struct btrfs_root *root,
3041 struct btrfs_path *path,
3042 u64 objectid, u64 offset, u64 bytenr)
3043{
3044 struct btrfs_fs_info *fs_info = root->fs_info;
3045 struct btrfs_root *extent_root = fs_info->extent_root;
3046 struct extent_buffer *leaf;
3047 struct btrfs_extent_data_ref *ref;
3048 struct btrfs_extent_inline_ref *iref;
3049 struct btrfs_extent_item *ei;
3050 struct btrfs_key key;
3051 u32 item_size;
3052 int type;
3053 int ret;
3054
3055 key.objectid = bytenr;
3056 key.offset = (u64)-1;
3057 key.type = BTRFS_EXTENT_ITEM_KEY;
3058
3059 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3060 if (ret < 0)
3061 goto out;
3062 BUG_ON(ret == 0);
3063
3064 ret = -ENOENT;
3065 if (path->slots[0] == 0)
3066 goto out;
3067
3068 path->slots[0]--;
3069 leaf = path->nodes[0];
3070 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3071
3072 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
3073 goto out;
3074
3075 ret = 1;
3076 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3077 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3078
3079 if (item_size != sizeof(*ei) +
3080 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3081 goto out;
3082
3083 if (btrfs_extent_generation(leaf, ei) <=
3084 btrfs_root_last_snapshot(&root->root_item))
3085 goto out;
3086
3087 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3088
3089 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
3090 if (type != BTRFS_EXTENT_DATA_REF_KEY)
3091 goto out;
3092
3093 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3094 if (btrfs_extent_refs(leaf, ei) !=
3095 btrfs_extent_data_ref_count(leaf, ref) ||
3096 btrfs_extent_data_ref_root(leaf, ref) !=
3097 root->root_key.objectid ||
3098 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3099 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3100 goto out;
3101
3102 ret = 0;
3103out:
3104 return ret;
3105}
3106
3107int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
3108 u64 bytenr)
3109{
3110 struct btrfs_path *path;
3111 int ret;
3112
3113 path = btrfs_alloc_path();
3114 if (!path)
3115 return -ENOMEM;
3116
3117 do {
3118 ret = check_committed_ref(root, path, objectid,
3119 offset, bytenr);
3120 if (ret && ret != -ENOENT)
3121 goto out;
3122
3123 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
3124 } while (ret == -EAGAIN);
3125
3126out:
3127 btrfs_free_path(path);
3128 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3129 WARN_ON(ret > 0);
3130 return ret;
3131}
3132
3133static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3134 struct btrfs_root *root,
3135 struct extent_buffer *buf,
3136 int full_backref, int inc)
3137{
3138 struct btrfs_fs_info *fs_info = root->fs_info;
3139 u64 bytenr;
3140 u64 num_bytes;
3141 u64 parent;
3142 u64 ref_root;
3143 u32 nritems;
3144 struct btrfs_key key;
3145 struct btrfs_file_extent_item *fi;
3146 struct btrfs_ref generic_ref = { 0 };
3147 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
3148 int i;
3149 int action;
3150 int level;
3151 int ret = 0;
3152
3153 if (btrfs_is_testing(fs_info))
3154 return 0;
3155
3156 ref_root = btrfs_header_owner(buf);
3157 nritems = btrfs_header_nritems(buf);
3158 level = btrfs_header_level(buf);
3159
3160 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3161 return 0;
3162
3163 if (full_backref)
3164 parent = buf->start;
3165 else
3166 parent = 0;
3167 if (inc)
3168 action = BTRFS_ADD_DELAYED_REF;
3169 else
3170 action = BTRFS_DROP_DELAYED_REF;
3171
3172 for (i = 0; i < nritems; i++) {
3173 if (level == 0) {
3174 btrfs_item_key_to_cpu(buf, &key, i);
3175 if (key.type != BTRFS_EXTENT_DATA_KEY)
3176 continue;
3177 fi = btrfs_item_ptr(buf, i,
3178 struct btrfs_file_extent_item);
3179 if (btrfs_file_extent_type(buf, fi) ==
3180 BTRFS_FILE_EXTENT_INLINE)
3181 continue;
3182 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3183 if (bytenr == 0)
3184 continue;
3185
3186 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3187 key.offset -= btrfs_file_extent_offset(buf, fi);
3188 btrfs_init_generic_ref(&generic_ref, action, bytenr,
3189 num_bytes, parent);
3190 generic_ref.real_root = root->root_key.objectid;
3191 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
3192 key.offset);
3193 generic_ref.skip_qgroup = for_reloc;
3194 if (inc)
3195 ret = btrfs_inc_extent_ref(trans, &generic_ref);
3196 else
3197 ret = btrfs_free_extent(trans, &generic_ref);
3198 if (ret)
3199 goto fail;
3200 } else {
3201 bytenr = btrfs_node_blockptr(buf, i);
3202 num_bytes = fs_info->nodesize;
3203 btrfs_init_generic_ref(&generic_ref, action, bytenr,
3204 num_bytes, parent);
3205 generic_ref.real_root = root->root_key.objectid;
3206 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
3207 generic_ref.skip_qgroup = for_reloc;
3208 if (inc)
3209 ret = btrfs_inc_extent_ref(trans, &generic_ref);
3210 else
3211 ret = btrfs_free_extent(trans, &generic_ref);
3212 if (ret)
3213 goto fail;
3214 }
3215 }
3216 return 0;
3217fail:
3218 return ret;
3219}
3220
3221int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3222 struct extent_buffer *buf, int full_backref)
3223{
3224 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3225}
3226
3227int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3228 struct extent_buffer *buf, int full_backref)
3229{
3230 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3231}
3232
3233static int write_one_cache_group(struct btrfs_trans_handle *trans,
3234 struct btrfs_path *path,
3235 struct btrfs_block_group_cache *cache)
3236{
3237 struct btrfs_fs_info *fs_info = trans->fs_info;
3238 int ret;
3239 struct btrfs_root *extent_root = fs_info->extent_root;
3240 unsigned long bi;
3241 struct extent_buffer *leaf;
3242
3243 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3244 if (ret) {
3245 if (ret > 0)
3246 ret = -ENOENT;
3247 goto fail;
3248 }
3249
3250 leaf = path->nodes[0];
3251 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3252 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3253 btrfs_mark_buffer_dirty(leaf);
3254fail:
3255 btrfs_release_path(path);
3256 return ret;
3257
3258}
3259
3260static struct btrfs_block_group_cache *next_block_group(
3261 struct btrfs_block_group_cache *cache)
3262{
3263 struct btrfs_fs_info *fs_info = cache->fs_info;
3264 struct rb_node *node;
3265
3266 spin_lock(&fs_info->block_group_cache_lock);
3267
3268
3269 if (RB_EMPTY_NODE(&cache->cache_node)) {
3270 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3271
3272 spin_unlock(&fs_info->block_group_cache_lock);
3273 btrfs_put_block_group(cache);
3274 cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache;
3275 }
3276 node = rb_next(&cache->cache_node);
3277 btrfs_put_block_group(cache);
3278 if (node) {
3279 cache = rb_entry(node, struct btrfs_block_group_cache,
3280 cache_node);
3281 btrfs_get_block_group(cache);
3282 } else
3283 cache = NULL;
3284 spin_unlock(&fs_info->block_group_cache_lock);
3285 return cache;
3286}
3287
3288static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3289 struct btrfs_trans_handle *trans,
3290 struct btrfs_path *path)
3291{
3292 struct btrfs_fs_info *fs_info = block_group->fs_info;
3293 struct btrfs_root *root = fs_info->tree_root;
3294 struct inode *inode = NULL;
3295 struct extent_changeset *data_reserved = NULL;
3296 u64 alloc_hint = 0;
3297 int dcs = BTRFS_DC_ERROR;
3298 u64 num_pages = 0;
3299 int retries = 0;
3300 int ret = 0;
3301
3302
3303
3304
3305
3306 if (block_group->key.offset < (100 * SZ_1M)) {
3307 spin_lock(&block_group->lock);
3308 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3309 spin_unlock(&block_group->lock);
3310 return 0;
3311 }
3312
3313 if (trans->aborted)
3314 return 0;
3315again:
3316 inode = lookup_free_space_inode(block_group, path);
3317 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3318 ret = PTR_ERR(inode);
3319 btrfs_release_path(path);
3320 goto out;
3321 }
3322
3323 if (IS_ERR(inode)) {
3324 BUG_ON(retries);
3325 retries++;
3326
3327 if (block_group->ro)
3328 goto out_free;
3329
3330 ret = create_free_space_inode(trans, block_group, path);
3331 if (ret)
3332 goto out_free;
3333 goto again;
3334 }
3335
3336
3337
3338
3339
3340
3341 BTRFS_I(inode)->generation = 0;
3342 ret = btrfs_update_inode(trans, root, inode);
3343 if (ret) {
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354 btrfs_abort_transaction(trans, ret);
3355 goto out_put;
3356 }
3357 WARN_ON(ret);
3358
3359
3360 if (block_group->cache_generation == trans->transid &&
3361 i_size_read(inode)) {
3362 dcs = BTRFS_DC_SETUP;
3363 goto out_put;
3364 }
3365
3366 if (i_size_read(inode) > 0) {
3367 ret = btrfs_check_trunc_cache_free_space(fs_info,
3368 &fs_info->global_block_rsv);
3369 if (ret)
3370 goto out_put;
3371
3372 ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
3373 if (ret)
3374 goto out_put;
3375 }
3376
3377 spin_lock(&block_group->lock);
3378 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3379 !btrfs_test_opt(fs_info, SPACE_CACHE)) {
3380
3381
3382
3383
3384
3385
3386 dcs = BTRFS_DC_WRITTEN;
3387 spin_unlock(&block_group->lock);
3388 goto out_put;
3389 }
3390 spin_unlock(&block_group->lock);
3391
3392
3393
3394
3395
3396 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
3397 ret = -ENOSPC;
3398 goto out_put;
3399 }
3400
3401
3402
3403
3404
3405
3406
3407 num_pages = div_u64(block_group->key.offset, SZ_256M);
3408 if (!num_pages)
3409 num_pages = 1;
3410
3411 num_pages *= 16;
3412 num_pages *= PAGE_SIZE;
3413
3414 ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
3415 if (ret)
3416 goto out_put;
3417
3418 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3419 num_pages, num_pages,
3420 &alloc_hint);
3421
3422
3423
3424
3425
3426
3427
3428
3429 if (!ret)
3430 dcs = BTRFS_DC_SETUP;
3431 else if (ret == -ENOSPC)
3432 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3433
3434out_put:
3435 iput(inode);
3436out_free:
3437 btrfs_release_path(path);
3438out:
3439 spin_lock(&block_group->lock);
3440 if (!ret && dcs == BTRFS_DC_SETUP)
3441 block_group->cache_generation = trans->transid;
3442 block_group->disk_cache_state = dcs;
3443 spin_unlock(&block_group->lock);
3444
3445 extent_changeset_free(data_reserved);
3446 return ret;
3447}
3448
3449int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
3450{
3451 struct btrfs_fs_info *fs_info = trans->fs_info;
3452 struct btrfs_block_group_cache *cache, *tmp;
3453 struct btrfs_transaction *cur_trans = trans->transaction;
3454 struct btrfs_path *path;
3455
3456 if (list_empty(&cur_trans->dirty_bgs) ||
3457 !btrfs_test_opt(fs_info, SPACE_CACHE))
3458 return 0;
3459
3460 path = btrfs_alloc_path();
3461 if (!path)
3462 return -ENOMEM;
3463
3464
3465 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3466 dirty_list) {
3467 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3468 cache_save_setup(cache, trans, path);
3469 }
3470
3471 btrfs_free_path(path);
3472 return 0;
3473}
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
3488{
3489 struct btrfs_fs_info *fs_info = trans->fs_info;
3490 struct btrfs_block_group_cache *cache;
3491 struct btrfs_transaction *cur_trans = trans->transaction;
3492 int ret = 0;
3493 int should_put;
3494 struct btrfs_path *path = NULL;
3495 LIST_HEAD(dirty);
3496 struct list_head *io = &cur_trans->io_bgs;
3497 int num_started = 0;
3498 int loops = 0;
3499
3500 spin_lock(&cur_trans->dirty_bgs_lock);
3501 if (list_empty(&cur_trans->dirty_bgs)) {
3502 spin_unlock(&cur_trans->dirty_bgs_lock);
3503 return 0;
3504 }
3505 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3506 spin_unlock(&cur_trans->dirty_bgs_lock);
3507
3508again:
3509
3510
3511
3512
3513 btrfs_create_pending_block_groups(trans);
3514
3515 if (!path) {
3516 path = btrfs_alloc_path();
3517 if (!path)
3518 return -ENOMEM;
3519 }
3520
3521
3522
3523
3524
3525
3526 mutex_lock(&trans->transaction->cache_write_mutex);
3527 while (!list_empty(&dirty)) {
3528 bool drop_reserve = true;
3529
3530 cache = list_first_entry(&dirty,
3531 struct btrfs_block_group_cache,
3532 dirty_list);
3533
3534
3535
3536
3537
3538 if (!list_empty(&cache->io_list)) {
3539 list_del_init(&cache->io_list);
3540 btrfs_wait_cache_io(trans, cache, path);
3541 btrfs_put_block_group(cache);
3542 }
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553 spin_lock(&cur_trans->dirty_bgs_lock);
3554 list_del_init(&cache->dirty_list);
3555 spin_unlock(&cur_trans->dirty_bgs_lock);
3556
3557 should_put = 1;
3558
3559 cache_save_setup(cache, trans, path);
3560
3561 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3562 cache->io_ctl.inode = NULL;
3563 ret = btrfs_write_out_cache(trans, cache, path);
3564 if (ret == 0 && cache->io_ctl.inode) {
3565 num_started++;
3566 should_put = 0;
3567
3568
3569
3570
3571
3572
3573 list_add_tail(&cache->io_list, io);
3574 } else {
3575
3576
3577
3578
3579 ret = 0;
3580 }
3581 }
3582 if (!ret) {
3583 ret = write_one_cache_group(trans, path, cache);
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593 if (ret == -ENOENT) {
3594 ret = 0;
3595 spin_lock(&cur_trans->dirty_bgs_lock);
3596 if (list_empty(&cache->dirty_list)) {
3597 list_add_tail(&cache->dirty_list,
3598 &cur_trans->dirty_bgs);
3599 btrfs_get_block_group(cache);
3600 drop_reserve = false;
3601 }
3602 spin_unlock(&cur_trans->dirty_bgs_lock);
3603 } else if (ret) {
3604 btrfs_abort_transaction(trans, ret);
3605 }
3606 }
3607
3608
3609 if (should_put)
3610 btrfs_put_block_group(cache);
3611 if (drop_reserve)
3612 btrfs_delayed_refs_rsv_release(fs_info, 1);
3613
3614 if (ret)
3615 break;
3616
3617
3618
3619
3620
3621
3622 mutex_unlock(&trans->transaction->cache_write_mutex);
3623 mutex_lock(&trans->transaction->cache_write_mutex);
3624 }
3625 mutex_unlock(&trans->transaction->cache_write_mutex);
3626
3627
3628
3629
3630
3631 ret = btrfs_run_delayed_refs(trans, 0);
3632 if (!ret && loops == 0) {
3633 loops++;
3634 spin_lock(&cur_trans->dirty_bgs_lock);
3635 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3636
3637
3638
3639
3640 if (!list_empty(&dirty)) {
3641 spin_unlock(&cur_trans->dirty_bgs_lock);
3642 goto again;
3643 }
3644 spin_unlock(&cur_trans->dirty_bgs_lock);
3645 } else if (ret < 0) {
3646 btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
3647 }
3648
3649 btrfs_free_path(path);
3650 return ret;
3651}
3652
3653int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
3654{
3655 struct btrfs_fs_info *fs_info = trans->fs_info;
3656 struct btrfs_block_group_cache *cache;
3657 struct btrfs_transaction *cur_trans = trans->transaction;
3658 int ret = 0;
3659 int should_put;
3660 struct btrfs_path *path;
3661 struct list_head *io = &cur_trans->io_bgs;
3662 int num_started = 0;
3663
3664 path = btrfs_alloc_path();
3665 if (!path)
3666 return -ENOMEM;
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683 spin_lock(&cur_trans->dirty_bgs_lock);
3684 while (!list_empty(&cur_trans->dirty_bgs)) {
3685 cache = list_first_entry(&cur_trans->dirty_bgs,
3686 struct btrfs_block_group_cache,
3687 dirty_list);
3688
3689
3690
3691
3692
3693
3694 if (!list_empty(&cache->io_list)) {
3695 spin_unlock(&cur_trans->dirty_bgs_lock);
3696 list_del_init(&cache->io_list);
3697 btrfs_wait_cache_io(trans, cache, path);
3698 btrfs_put_block_group(cache);
3699 spin_lock(&cur_trans->dirty_bgs_lock);
3700 }
3701
3702
3703
3704
3705
3706 list_del_init(&cache->dirty_list);
3707 spin_unlock(&cur_trans->dirty_bgs_lock);
3708 should_put = 1;
3709
3710 cache_save_setup(cache, trans, path);
3711
3712 if (!ret)
3713 ret = btrfs_run_delayed_refs(trans,
3714 (unsigned long) -1);
3715
3716 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3717 cache->io_ctl.inode = NULL;
3718 ret = btrfs_write_out_cache(trans, cache, path);
3719 if (ret == 0 && cache->io_ctl.inode) {
3720 num_started++;
3721 should_put = 0;
3722 list_add_tail(&cache->io_list, io);
3723 } else {
3724
3725
3726
3727
3728 ret = 0;
3729 }
3730 }
3731 if (!ret) {
3732 ret = write_one_cache_group(trans, path, cache);
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746 if (ret == -ENOENT) {
3747 wait_event(cur_trans->writer_wait,
3748 atomic_read(&cur_trans->num_writers) == 1);
3749 ret = write_one_cache_group(trans, path, cache);
3750 }
3751 if (ret)
3752 btrfs_abort_transaction(trans, ret);
3753 }
3754
3755
3756 if (should_put)
3757 btrfs_put_block_group(cache);
3758 btrfs_delayed_refs_rsv_release(fs_info, 1);
3759 spin_lock(&cur_trans->dirty_bgs_lock);
3760 }
3761 spin_unlock(&cur_trans->dirty_bgs_lock);
3762
3763
3764
3765
3766
3767 while (!list_empty(io)) {
3768 cache = list_first_entry(io, struct btrfs_block_group_cache,
3769 io_list);
3770 list_del_init(&cache->io_list);
3771 btrfs_wait_cache_io(trans, cache, path);
3772 btrfs_put_block_group(cache);
3773 }
3774
3775 btrfs_free_path(path);
3776 return ret;
3777}
3778
3779int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
3780{
3781 struct btrfs_block_group_cache *block_group;
3782 int readonly = 0;
3783
3784 block_group = btrfs_lookup_block_group(fs_info, bytenr);
3785 if (!block_group || block_group->ro)
3786 readonly = 1;
3787 if (block_group)
3788 btrfs_put_block_group(block_group);
3789 return readonly;
3790}
3791
3792bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3793{
3794 struct btrfs_block_group_cache *bg;
3795 bool ret = true;
3796
3797 bg = btrfs_lookup_block_group(fs_info, bytenr);
3798 if (!bg)
3799 return false;
3800
3801 spin_lock(&bg->lock);
3802 if (bg->ro)
3803 ret = false;
3804 else
3805 atomic_inc(&bg->nocow_writers);
3806 spin_unlock(&bg->lock);
3807
3808
3809 if (!ret)
3810 btrfs_put_block_group(bg);
3811
3812 return ret;
3813
3814}
3815
3816void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3817{
3818 struct btrfs_block_group_cache *bg;
3819
3820 bg = btrfs_lookup_block_group(fs_info, bytenr);
3821 ASSERT(bg);
3822 if (atomic_dec_and_test(&bg->nocow_writers))
3823 wake_up_var(&bg->nocow_writers);
3824
3825
3826
3827
3828 btrfs_put_block_group(bg);
3829 btrfs_put_block_group(bg);
3830}
3831
3832void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
3833{
3834 wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
3835}
3836
3837static const char *alloc_name(u64 flags)
3838{
3839 switch (flags) {
3840 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3841 return "mixed";
3842 case BTRFS_BLOCK_GROUP_METADATA:
3843 return "metadata";
3844 case BTRFS_BLOCK_GROUP_DATA:
3845 return "data";
3846 case BTRFS_BLOCK_GROUP_SYSTEM:
3847 return "system";
3848 default:
3849 WARN_ON(1);
3850 return "invalid-combination";
3851 };
3852}
3853
3854static int create_space_info(struct btrfs_fs_info *info, u64 flags)
3855{
3856
3857 struct btrfs_space_info *space_info;
3858 int i;
3859 int ret;
3860
3861 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
3862 if (!space_info)
3863 return -ENOMEM;
3864
3865 ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
3866 GFP_KERNEL);
3867 if (ret) {
3868 kfree(space_info);
3869 return ret;
3870 }
3871
3872 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3873 INIT_LIST_HEAD(&space_info->block_groups[i]);
3874 init_rwsem(&space_info->groups_sem);
3875 spin_lock_init(&space_info->lock);
3876 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3877 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3878 init_waitqueue_head(&space_info->wait);
3879 INIT_LIST_HEAD(&space_info->ro_bgs);
3880 INIT_LIST_HEAD(&space_info->tickets);
3881 INIT_LIST_HEAD(&space_info->priority_tickets);
3882
3883 ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
3884 info->space_info_kobj, "%s",
3885 alloc_name(space_info->flags));
3886 if (ret) {
3887 kobject_put(&space_info->kobj);
3888 return ret;
3889 }
3890
3891 list_add_rcu(&space_info->list, &info->space_info);
3892 if (flags & BTRFS_BLOCK_GROUP_DATA)
3893 info->data_sinfo = space_info;
3894
3895 return ret;
3896}
3897
3898static void update_space_info(struct btrfs_fs_info *info, u64 flags,
3899 u64 total_bytes, u64 bytes_used,
3900 u64 bytes_readonly,
3901 struct btrfs_space_info **space_info)
3902{
3903 struct btrfs_space_info *found;
3904 int factor;
3905
3906 factor = btrfs_bg_type_to_factor(flags);
3907
3908 found = __find_space_info(info, flags);
3909 ASSERT(found);
3910 spin_lock(&found->lock);
3911 found->total_bytes += total_bytes;
3912 found->disk_total += total_bytes * factor;
3913 found->bytes_used += bytes_used;
3914 found->disk_used += bytes_used * factor;
3915 found->bytes_readonly += bytes_readonly;
3916 if (total_bytes > 0)
3917 found->full = 0;
3918 space_info_add_new_bytes(info, found, total_bytes -
3919 bytes_used - bytes_readonly);
3920 spin_unlock(&found->lock);
3921 *space_info = found;
3922}
3923
3924static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3925{
3926 u64 extra_flags = chunk_to_extended(flags) &
3927 BTRFS_EXTENDED_PROFILE_MASK;
3928
3929 write_seqlock(&fs_info->profiles_lock);
3930 if (flags & BTRFS_BLOCK_GROUP_DATA)
3931 fs_info->avail_data_alloc_bits |= extra_flags;
3932 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3933 fs_info->avail_metadata_alloc_bits |= extra_flags;
3934 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3935 fs_info->avail_system_alloc_bits |= extra_flags;
3936 write_sequnlock(&fs_info->profiles_lock);
3937}
3938
3939
3940
3941
3942
3943
3944
3945static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3946{
3947 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3948 u64 target = 0;
3949
3950 if (!bctl)
3951 return 0;
3952
3953 if (flags & BTRFS_BLOCK_GROUP_DATA &&
3954 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3955 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3956 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
3957 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3958 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3959 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
3960 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3961 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3962 }
3963
3964 return target;
3965}
3966
3967
3968
3969
3970
3971
3972
3973
3974static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
3975{
3976 u64 num_devices = fs_info->fs_devices->rw_devices;
3977 u64 target;
3978 u64 raid_type;
3979 u64 allowed = 0;
3980
3981
3982
3983
3984
3985 spin_lock(&fs_info->balance_lock);
3986 target = get_restripe_target(fs_info, flags);
3987 if (target) {
3988
3989 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
3990 spin_unlock(&fs_info->balance_lock);
3991 return extended_to_chunk(target);
3992 }
3993 }
3994 spin_unlock(&fs_info->balance_lock);
3995
3996
3997 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
3998 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
3999 allowed |= btrfs_raid_array[raid_type].bg_flag;
4000 }
4001 allowed &= flags;
4002
4003 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
4004 allowed = BTRFS_BLOCK_GROUP_RAID6;
4005 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
4006 allowed = BTRFS_BLOCK_GROUP_RAID5;
4007 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
4008 allowed = BTRFS_BLOCK_GROUP_RAID10;
4009 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
4010 allowed = BTRFS_BLOCK_GROUP_RAID1;
4011 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
4012 allowed = BTRFS_BLOCK_GROUP_RAID0;
4013
4014 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
4015
4016 return extended_to_chunk(flags | allowed);
4017}
4018
4019static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
4020{
4021 unsigned seq;
4022 u64 flags;
4023
4024 do {
4025 flags = orig_flags;
4026 seq = read_seqbegin(&fs_info->profiles_lock);
4027
4028 if (flags & BTRFS_BLOCK_GROUP_DATA)
4029 flags |= fs_info->avail_data_alloc_bits;
4030 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4031 flags |= fs_info->avail_system_alloc_bits;
4032 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
4033 flags |= fs_info->avail_metadata_alloc_bits;
4034 } while (read_seqretry(&fs_info->profiles_lock, seq));
4035
4036 return btrfs_reduce_alloc_profile(fs_info, flags);
4037}
4038
4039static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
4040{
4041 struct btrfs_fs_info *fs_info = root->fs_info;
4042 u64 flags;
4043 u64 ret;
4044
4045 if (data)
4046 flags = BTRFS_BLOCK_GROUP_DATA;
4047 else if (root == fs_info->chunk_root)
4048 flags = BTRFS_BLOCK_GROUP_SYSTEM;
4049 else
4050 flags = BTRFS_BLOCK_GROUP_METADATA;
4051
4052 ret = get_alloc_profile(fs_info, flags);
4053 return ret;
4054}
4055
4056u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
4057{
4058 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
4059}
4060
4061u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
4062{
4063 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4064}
4065
4066u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
4067{
4068 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4069}
4070
4071static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
4072 bool may_use_included)
4073{
4074 ASSERT(s_info);
4075 return s_info->bytes_used + s_info->bytes_reserved +
4076 s_info->bytes_pinned + s_info->bytes_readonly +
4077 (may_use_included ? s_info->bytes_may_use : 0);
4078}
4079
4080int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
4081{
4082 struct btrfs_root *root = inode->root;
4083 struct btrfs_fs_info *fs_info = root->fs_info;
4084 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
4085 u64 used;
4086 int ret = 0;
4087 int need_commit = 2;
4088 int have_pinned_space;
4089
4090
4091 bytes = ALIGN(bytes, fs_info->sectorsize);
4092
4093 if (btrfs_is_free_space_inode(inode)) {
4094 need_commit = 0;
4095 ASSERT(current->journal_info);
4096 }
4097
4098again:
4099
4100 spin_lock(&data_sinfo->lock);
4101 used = btrfs_space_info_used(data_sinfo, true);
4102
4103 if (used + bytes > data_sinfo->total_bytes) {
4104 struct btrfs_trans_handle *trans;
4105
4106
4107
4108
4109
4110 if (!data_sinfo->full) {
4111 u64 alloc_target;
4112
4113 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
4114 spin_unlock(&data_sinfo->lock);
4115
4116 alloc_target = btrfs_data_alloc_profile(fs_info);
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127 trans = btrfs_join_transaction(root);
4128 if (IS_ERR(trans))
4129 return PTR_ERR(trans);
4130
4131 ret = do_chunk_alloc(trans, alloc_target,
4132 CHUNK_ALLOC_NO_FORCE);
4133 btrfs_end_transaction(trans);
4134 if (ret < 0) {
4135 if (ret != -ENOSPC)
4136 return ret;
4137 else {
4138 have_pinned_space = 1;
4139 goto commit_trans;
4140 }
4141 }
4142
4143 goto again;
4144 }
4145
4146
4147
4148
4149
4150
4151 have_pinned_space = __percpu_counter_compare(
4152 &data_sinfo->total_bytes_pinned,
4153 used + bytes - data_sinfo->total_bytes,
4154 BTRFS_TOTAL_BYTES_PINNED_BATCH);
4155 spin_unlock(&data_sinfo->lock);
4156
4157
4158commit_trans:
4159 if (need_commit) {
4160 need_commit--;
4161
4162 if (need_commit > 0) {
4163 btrfs_start_delalloc_roots(fs_info, -1);
4164 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
4165 (u64)-1);
4166 }
4167
4168 trans = btrfs_join_transaction(root);
4169 if (IS_ERR(trans))
4170 return PTR_ERR(trans);
4171 if (have_pinned_space >= 0 ||
4172 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4173 &trans->transaction->flags) ||
4174 need_commit > 0) {
4175 ret = btrfs_commit_transaction(trans);
4176 if (ret)
4177 return ret;
4178
4179
4180
4181
4182
4183
4184
4185
4186 ret = btrfs_wait_on_delayed_iputs(fs_info);
4187 if (ret)
4188 return ret;
4189 goto again;
4190 } else {
4191 btrfs_end_transaction(trans);
4192 }
4193 }
4194
4195 trace_btrfs_space_reservation(fs_info,
4196 "space_info:enospc",
4197 data_sinfo->flags, bytes, 1);
4198 return -ENOSPC;
4199 }
4200 update_bytes_may_use(data_sinfo, bytes);
4201 trace_btrfs_space_reservation(fs_info, "space_info",
4202 data_sinfo->flags, bytes, 1);
4203 spin_unlock(&data_sinfo->lock);
4204
4205 return 0;
4206}
4207
4208int btrfs_check_data_free_space(struct inode *inode,
4209 struct extent_changeset **reserved, u64 start, u64 len)
4210{
4211 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4212 int ret;
4213
4214
4215 len = round_up(start + len, fs_info->sectorsize) -
4216 round_down(start, fs_info->sectorsize);
4217 start = round_down(start, fs_info->sectorsize);
4218
4219 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
4220 if (ret < 0)
4221 return ret;
4222
4223
4224 ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
4225 if (ret < 0)
4226 btrfs_free_reserved_data_space_noquota(inode, start, len);
4227 else
4228 ret = 0;
4229 return ret;
4230}
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
4241 u64 len)
4242{
4243 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4244 struct btrfs_space_info *data_sinfo;
4245
4246
4247 len = round_up(start + len, fs_info->sectorsize) -
4248 round_down(start, fs_info->sectorsize);
4249 start = round_down(start, fs_info->sectorsize);
4250
4251 data_sinfo = fs_info->data_sinfo;
4252 spin_lock(&data_sinfo->lock);
4253 update_bytes_may_use(data_sinfo, -len);
4254 trace_btrfs_space_reservation(fs_info, "space_info",
4255 data_sinfo->flags, len, 0);
4256 spin_unlock(&data_sinfo->lock);
4257}
4258
4259
4260
4261
4262
4263
4264
4265
4266void btrfs_free_reserved_data_space(struct inode *inode,
4267 struct extent_changeset *reserved, u64 start, u64 len)
4268{
4269 struct btrfs_root *root = BTRFS_I(inode)->root;
4270
4271
4272 len = round_up(start + len, root->fs_info->sectorsize) -
4273 round_down(start, root->fs_info->sectorsize);
4274 start = round_down(start, root->fs_info->sectorsize);
4275
4276 btrfs_free_reserved_data_space_noquota(inode, start, len);
4277 btrfs_qgroup_free_data(inode, reserved, start, len);
4278}
4279
4280static void force_metadata_allocation(struct btrfs_fs_info *info)
4281{
4282 struct list_head *head = &info->space_info;
4283 struct btrfs_space_info *found;
4284
4285 rcu_read_lock();
4286 list_for_each_entry_rcu(found, head, list) {
4287 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
4288 found->force_alloc = CHUNK_ALLOC_FORCE;
4289 }
4290 rcu_read_unlock();
4291}
4292
4293static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4294{
4295 return (global->size << 1);
4296}
4297
4298static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
4299 struct btrfs_space_info *sinfo, int force)
4300{
4301 u64 bytes_used = btrfs_space_info_used(sinfo, false);
4302 u64 thresh;
4303
4304 if (force == CHUNK_ALLOC_FORCE)
4305 return 1;
4306
4307
4308
4309
4310
4311 if (force == CHUNK_ALLOC_LIMITED) {
4312 thresh = btrfs_super_total_bytes(fs_info->super_copy);
4313 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
4314
4315 if (sinfo->total_bytes - bytes_used < thresh)
4316 return 1;
4317 }
4318
4319 if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
4320 return 0;
4321 return 1;
4322}
4323
4324static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
4325{
4326 u64 num_dev;
4327
4328 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4329 BTRFS_BLOCK_GROUP_RAID0 |
4330 BTRFS_BLOCK_GROUP_RAID5 |
4331 BTRFS_BLOCK_GROUP_RAID6))
4332 num_dev = fs_info->fs_devices->rw_devices;
4333 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4334 num_dev = 2;
4335 else
4336 num_dev = 1;
4337
4338 return num_dev;
4339}
4340
4341
4342
4343
4344
4345
4346void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
4347{
4348 struct btrfs_fs_info *fs_info = trans->fs_info;
4349 struct btrfs_space_info *info;
4350 u64 left;
4351 u64 thresh;
4352 int ret = 0;
4353 u64 num_devs;
4354
4355
4356
4357
4358
4359 lockdep_assert_held(&fs_info->chunk_mutex);
4360
4361 info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4362 spin_lock(&info->lock);
4363 left = info->total_bytes - btrfs_space_info_used(info, true);
4364 spin_unlock(&info->lock);
4365
4366 num_devs = get_profile_num_devs(fs_info, type);
4367
4368
4369 thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
4370 btrfs_calc_trans_metadata_size(fs_info, 1);
4371
4372 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4373 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
4374 left, thresh, type);
4375 dump_space_info(fs_info, info, 0, 0);
4376 }
4377
4378 if (left < thresh) {
4379 u64 flags = btrfs_system_alloc_profile(fs_info);
4380
4381
4382
4383
4384
4385
4386
4387 ret = btrfs_alloc_chunk(trans, flags);
4388 }
4389
4390 if (!ret) {
4391 ret = btrfs_block_rsv_add(fs_info->chunk_root,
4392 &fs_info->chunk_block_rsv,
4393 thresh, BTRFS_RESERVE_NO_FLUSH);
4394 if (!ret)
4395 trans->chunk_bytes_reserved += thresh;
4396 }
4397}
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
4409 int force)
4410{
4411 struct btrfs_fs_info *fs_info = trans->fs_info;
4412 struct btrfs_space_info *space_info;
4413 bool wait_for_alloc = false;
4414 bool should_alloc = false;
4415 int ret = 0;
4416
4417
4418 if (trans->allocating_chunk)
4419 return -ENOSPC;
4420
4421 space_info = __find_space_info(fs_info, flags);
4422 ASSERT(space_info);
4423
4424 do {
4425 spin_lock(&space_info->lock);
4426 if (force < space_info->force_alloc)
4427 force = space_info->force_alloc;
4428 should_alloc = should_alloc_chunk(fs_info, space_info, force);
4429 if (space_info->full) {
4430
4431 if (should_alloc)
4432 ret = -ENOSPC;
4433 else
4434 ret = 0;
4435 spin_unlock(&space_info->lock);
4436 return ret;
4437 } else if (!should_alloc) {
4438 spin_unlock(&space_info->lock);
4439 return 0;
4440 } else if (space_info->chunk_alloc) {
4441
4442
4443
4444
4445
4446
4447 wait_for_alloc = true;
4448 spin_unlock(&space_info->lock);
4449 mutex_lock(&fs_info->chunk_mutex);
4450 mutex_unlock(&fs_info->chunk_mutex);
4451 } else {
4452
4453 space_info->chunk_alloc = 1;
4454 wait_for_alloc = false;
4455 spin_unlock(&space_info->lock);
4456 }
4457
4458 cond_resched();
4459 } while (wait_for_alloc);
4460
4461 mutex_lock(&fs_info->chunk_mutex);
4462 trans->allocating_chunk = true;
4463
4464
4465
4466
4467
4468 if (btrfs_mixed_space_info(space_info))
4469 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4470
4471
4472
4473
4474
4475
4476 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
4477 fs_info->data_chunk_allocations++;
4478 if (!(fs_info->data_chunk_allocations %
4479 fs_info->metadata_ratio))
4480 force_metadata_allocation(fs_info);
4481 }
4482
4483
4484
4485
4486
4487 check_system_chunk(trans, flags);
4488
4489 ret = btrfs_alloc_chunk(trans, flags);
4490 trans->allocating_chunk = false;
4491
4492 spin_lock(&space_info->lock);
4493 if (ret < 0) {
4494 if (ret == -ENOSPC)
4495 space_info->full = 1;
4496 else
4497 goto out;
4498 } else {
4499 ret = 1;
4500 space_info->max_extent_size = 0;
4501 }
4502
4503 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4504out:
4505 space_info->chunk_alloc = 0;
4506 spin_unlock(&space_info->lock);
4507 mutex_unlock(&fs_info->chunk_mutex);
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522 if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
4523 btrfs_create_pending_block_groups(trans);
4524
4525 return ret;
4526}
4527
4528static int can_overcommit(struct btrfs_fs_info *fs_info,
4529 struct btrfs_space_info *space_info, u64 bytes,
4530 enum btrfs_reserve_flush_enum flush,
4531 bool system_chunk)
4532{
4533 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4534 u64 profile;
4535 u64 space_size;
4536 u64 avail;
4537 u64 used;
4538 int factor;
4539
4540
4541 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
4542 return 0;
4543
4544 if (system_chunk)
4545 profile = btrfs_system_alloc_profile(fs_info);
4546 else
4547 profile = btrfs_metadata_alloc_profile(fs_info);
4548
4549 used = btrfs_space_info_used(space_info, false);
4550
4551
4552
4553
4554
4555
4556
4557 spin_lock(&global_rsv->lock);
4558 space_size = calc_global_rsv_need_space(global_rsv);
4559 spin_unlock(&global_rsv->lock);
4560 if (used + space_size >= space_info->total_bytes)
4561 return 0;
4562
4563 used += space_info->bytes_may_use;
4564
4565 avail = atomic64_read(&fs_info->free_chunk_space);
4566
4567
4568
4569
4570
4571
4572
4573 factor = btrfs_bg_type_to_factor(profile);
4574 avail = div_u64(avail, factor);
4575
4576
4577
4578
4579
4580
4581 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4582 avail >>= 3;
4583 else
4584 avail >>= 1;
4585
4586 if (used + bytes < space_info->total_bytes + avail)
4587 return 1;
4588 return 0;
4589}
4590
4591static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
4592 unsigned long nr_pages, int nr_items)
4593{
4594 struct super_block *sb = fs_info->sb;
4595
4596 if (down_read_trylock(&sb->s_umount)) {
4597 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4598 up_read(&sb->s_umount);
4599 } else {
4600
4601
4602
4603
4604
4605
4606
4607 btrfs_start_delalloc_roots(fs_info, nr_items);
4608 if (!current->journal_info)
4609 btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
4610 }
4611}
4612
4613static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
4614 u64 to_reclaim)
4615{
4616 u64 bytes;
4617 u64 nr;
4618
4619 bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
4620 nr = div64_u64(to_reclaim, bytes);
4621 if (!nr)
4622 nr = 1;
4623 return nr;
4624}
4625
4626#define EXTENT_SIZE_PER_ITEM SZ_256K
4627
4628
4629
4630
4631static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
4632 u64 orig, bool wait_ordered)
4633{
4634 struct btrfs_space_info *space_info;
4635 struct btrfs_trans_handle *trans;
4636 u64 delalloc_bytes;
4637 u64 dio_bytes;
4638 u64 async_pages;
4639 u64 items;
4640 long time_left;
4641 unsigned long nr_pages;
4642 int loops;
4643
4644
4645 items = calc_reclaim_items_nr(fs_info, to_reclaim);
4646 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
4647
4648 trans = (struct btrfs_trans_handle *)current->journal_info;
4649 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4650
4651 delalloc_bytes = percpu_counter_sum_positive(
4652 &fs_info->delalloc_bytes);
4653 dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
4654 if (delalloc_bytes == 0 && dio_bytes == 0) {
4655 if (trans)
4656 return;
4657 if (wait_ordered)
4658 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4659 return;
4660 }
4661
4662
4663
4664
4665
4666
4667 if (dio_bytes > delalloc_bytes)
4668 wait_ordered = true;
4669
4670 loops = 0;
4671 while ((delalloc_bytes || dio_bytes) && loops < 3) {
4672 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
4673
4674
4675
4676
4677
4678
4679 btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
4680
4681
4682
4683
4684
4685 async_pages = atomic_read(&fs_info->async_delalloc_pages);
4686 if (!async_pages)
4687 goto skip_async;
4688
4689
4690
4691
4692
4693
4694 if (async_pages <= nr_pages)
4695 async_pages = 0;
4696 else
4697 async_pages -= nr_pages;
4698
4699 wait_event(fs_info->async_submit_wait,
4700 atomic_read(&fs_info->async_delalloc_pages) <=
4701 (int)async_pages);
4702skip_async:
4703 spin_lock(&space_info->lock);
4704 if (list_empty(&space_info->tickets) &&
4705 list_empty(&space_info->priority_tickets)) {
4706 spin_unlock(&space_info->lock);
4707 break;
4708 }
4709 spin_unlock(&space_info->lock);
4710
4711 loops++;
4712 if (wait_ordered && !trans) {
4713 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4714 } else {
4715 time_left = schedule_timeout_killable(1);
4716 if (time_left)
4717 break;
4718 }
4719 delalloc_bytes = percpu_counter_sum_positive(
4720 &fs_info->delalloc_bytes);
4721 dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
4722 }
4723}
4724
4725struct reserve_ticket {
4726 u64 orig_bytes;
4727 u64 bytes;
4728 int error;
4729 struct list_head list;
4730 wait_queue_head_t wait;
4731};
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743static int may_commit_transaction(struct btrfs_fs_info *fs_info,
4744 struct btrfs_space_info *space_info)
4745{
4746 struct reserve_ticket *ticket = NULL;
4747 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
4748 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
4749 struct btrfs_trans_handle *trans;
4750 u64 bytes_needed;
4751 u64 reclaim_bytes = 0;
4752
4753 trans = (struct btrfs_trans_handle *)current->journal_info;
4754 if (trans)
4755 return -EAGAIN;
4756
4757 spin_lock(&space_info->lock);
4758 if (!list_empty(&space_info->priority_tickets))
4759 ticket = list_first_entry(&space_info->priority_tickets,
4760 struct reserve_ticket, list);
4761 else if (!list_empty(&space_info->tickets))
4762 ticket = list_first_entry(&space_info->tickets,
4763 struct reserve_ticket, list);
4764 bytes_needed = (ticket) ? ticket->bytes : 0;
4765 spin_unlock(&space_info->lock);
4766
4767 if (!bytes_needed)
4768 return 0;
4769
4770 trans = btrfs_join_transaction(fs_info->extent_root);
4771 if (IS_ERR(trans))
4772 return PTR_ERR(trans);
4773
4774
4775
4776
4777
4778
4779 if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
4780 __percpu_counter_compare(&space_info->total_bytes_pinned,
4781 bytes_needed,
4782 BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
4783 goto commit;
4784
4785
4786
4787
4788
4789 if (space_info != delayed_rsv->space_info)
4790 goto enospc;
4791
4792 spin_lock(&delayed_rsv->lock);
4793 reclaim_bytes += delayed_rsv->reserved;
4794 spin_unlock(&delayed_rsv->lock);
4795
4796 spin_lock(&delayed_refs_rsv->lock);
4797 reclaim_bytes += delayed_refs_rsv->reserved;
4798 spin_unlock(&delayed_refs_rsv->lock);
4799 if (reclaim_bytes >= bytes_needed)
4800 goto commit;
4801 bytes_needed -= reclaim_bytes;
4802
4803 if (__percpu_counter_compare(&space_info->total_bytes_pinned,
4804 bytes_needed,
4805 BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
4806 goto enospc;
4807
4808commit:
4809 return btrfs_commit_transaction(trans);
4810enospc:
4811 btrfs_end_transaction(trans);
4812 return -ENOSPC;
4813}
4814
4815
4816
4817
4818
4819
4820static void flush_space(struct btrfs_fs_info *fs_info,
4821 struct btrfs_space_info *space_info, u64 num_bytes,
4822 int state)
4823{
4824 struct btrfs_root *root = fs_info->extent_root;
4825 struct btrfs_trans_handle *trans;
4826 int nr;
4827 int ret = 0;
4828
4829 switch (state) {
4830 case FLUSH_DELAYED_ITEMS_NR:
4831 case FLUSH_DELAYED_ITEMS:
4832 if (state == FLUSH_DELAYED_ITEMS_NR)
4833 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
4834 else
4835 nr = -1;
4836
4837 trans = btrfs_join_transaction(root);
4838 if (IS_ERR(trans)) {
4839 ret = PTR_ERR(trans);
4840 break;
4841 }
4842 ret = btrfs_run_delayed_items_nr(trans, nr);
4843 btrfs_end_transaction(trans);
4844 break;
4845 case FLUSH_DELALLOC:
4846 case FLUSH_DELALLOC_WAIT:
4847 shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
4848 state == FLUSH_DELALLOC_WAIT);
4849 break;
4850 case FLUSH_DELAYED_REFS_NR:
4851 case FLUSH_DELAYED_REFS:
4852 trans = btrfs_join_transaction(root);
4853 if (IS_ERR(trans)) {
4854 ret = PTR_ERR(trans);
4855 break;
4856 }
4857 if (state == FLUSH_DELAYED_REFS_NR)
4858 nr = calc_reclaim_items_nr(fs_info, num_bytes);
4859 else
4860 nr = 0;
4861 btrfs_run_delayed_refs(trans, nr);
4862 btrfs_end_transaction(trans);
4863 break;
4864 case ALLOC_CHUNK:
4865 case ALLOC_CHUNK_FORCE:
4866 trans = btrfs_join_transaction(root);
4867 if (IS_ERR(trans)) {
4868 ret = PTR_ERR(trans);
4869 break;
4870 }
4871 ret = do_chunk_alloc(trans,
4872 btrfs_metadata_alloc_profile(fs_info),
4873 (state == ALLOC_CHUNK) ?
4874 CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
4875 btrfs_end_transaction(trans);
4876 if (ret > 0 || ret == -ENOSPC)
4877 ret = 0;
4878 break;
4879 case COMMIT_TRANS:
4880
4881
4882
4883
4884
4885 btrfs_run_delayed_iputs(fs_info);
4886 btrfs_wait_on_delayed_iputs(fs_info);
4887
4888 ret = may_commit_transaction(fs_info, space_info);
4889 break;
4890 default:
4891 ret = -ENOSPC;
4892 break;
4893 }
4894
4895 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
4896 ret);
4897 return;
4898}
4899
4900static inline u64
4901btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
4902 struct btrfs_space_info *space_info,
4903 bool system_chunk)
4904{
4905 struct reserve_ticket *ticket;
4906 u64 used;
4907 u64 expected;
4908 u64 to_reclaim = 0;
4909
4910 list_for_each_entry(ticket, &space_info->tickets, list)
4911 to_reclaim += ticket->bytes;
4912 list_for_each_entry(ticket, &space_info->priority_tickets, list)
4913 to_reclaim += ticket->bytes;
4914 if (to_reclaim)
4915 return to_reclaim;
4916
4917 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
4918 if (can_overcommit(fs_info, space_info, to_reclaim,
4919 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
4920 return 0;
4921
4922 used = btrfs_space_info_used(space_info, true);
4923
4924 if (can_overcommit(fs_info, space_info, SZ_1M,
4925 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
4926 expected = div_factor_fine(space_info->total_bytes, 95);
4927 else
4928 expected = div_factor_fine(space_info->total_bytes, 90);
4929
4930 if (used > expected)
4931 to_reclaim = used - expected;
4932 else
4933 to_reclaim = 0;
4934 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4935 space_info->bytes_reserved);
4936 return to_reclaim;
4937}
4938
4939static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
4940 struct btrfs_space_info *space_info,
4941 u64 used, bool system_chunk)
4942{
4943 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4944
4945
4946 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
4947 return 0;
4948
4949 if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
4950 system_chunk))
4951 return 0;
4952
4953 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
4954 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4955}
4956
4957static bool wake_all_tickets(struct list_head *head)
4958{
4959 struct reserve_ticket *ticket;
4960
4961 while (!list_empty(head)) {
4962 ticket = list_first_entry(head, struct reserve_ticket, list);
4963 list_del_init(&ticket->list);
4964 ticket->error = -ENOSPC;
4965 wake_up(&ticket->wait);
4966 if (ticket->bytes != ticket->orig_bytes)
4967 return true;
4968 }
4969 return false;
4970}
4971
4972
4973
4974
4975
4976
4977static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4978{
4979 struct btrfs_fs_info *fs_info;
4980 struct btrfs_space_info *space_info;
4981 u64 to_reclaim;
4982 int flush_state;
4983 int commit_cycles = 0;
4984 u64 last_tickets_id;
4985
4986 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4987 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4988
4989 spin_lock(&space_info->lock);
4990 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
4991 false);
4992 if (!to_reclaim) {
4993 space_info->flush = 0;
4994 spin_unlock(&space_info->lock);
4995 return;
4996 }
4997 last_tickets_id = space_info->tickets_id;
4998 spin_unlock(&space_info->lock);
4999
5000 flush_state = FLUSH_DELAYED_ITEMS_NR;
5001 do {
5002 flush_space(fs_info, space_info, to_reclaim, flush_state);
5003 spin_lock(&space_info->lock);
5004 if (list_empty(&space_info->tickets)) {
5005 space_info->flush = 0;
5006 spin_unlock(&space_info->lock);
5007 return;
5008 }
5009 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
5010 space_info,
5011 false);
5012 if (last_tickets_id == space_info->tickets_id) {
5013 flush_state++;
5014 } else {
5015 last_tickets_id = space_info->tickets_id;
5016 flush_state = FLUSH_DELAYED_ITEMS_NR;
5017 if (commit_cycles)
5018 commit_cycles--;
5019 }
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031 if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
5032 flush_state++;
5033
5034 if (flush_state > COMMIT_TRANS) {
5035 commit_cycles++;
5036 if (commit_cycles > 2) {
5037 if (wake_all_tickets(&space_info->tickets)) {
5038 flush_state = FLUSH_DELAYED_ITEMS_NR;
5039 commit_cycles--;
5040 } else {
5041 space_info->flush = 0;
5042 }
5043 } else {
5044 flush_state = FLUSH_DELAYED_ITEMS_NR;
5045 }
5046 }
5047 spin_unlock(&space_info->lock);
5048 } while (flush_state <= COMMIT_TRANS);
5049}
5050
5051void btrfs_init_async_reclaim_work(struct work_struct *work)
5052{
5053 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
5054}
5055
5056static const enum btrfs_flush_state priority_flush_states[] = {
5057 FLUSH_DELAYED_ITEMS_NR,
5058 FLUSH_DELAYED_ITEMS,
5059 ALLOC_CHUNK,
5060};
5061
5062static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
5063 struct btrfs_space_info *space_info,
5064 struct reserve_ticket *ticket)
5065{
5066 u64 to_reclaim;
5067 int flush_state;
5068
5069 spin_lock(&space_info->lock);
5070 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5071 false);
5072 if (!to_reclaim) {
5073 spin_unlock(&space_info->lock);
5074 return;
5075 }
5076 spin_unlock(&space_info->lock);
5077
5078 flush_state = 0;
5079 do {
5080 flush_space(fs_info, space_info, to_reclaim,
5081 priority_flush_states[flush_state]);
5082 flush_state++;
5083 spin_lock(&space_info->lock);
5084 if (ticket->bytes == 0) {
5085 spin_unlock(&space_info->lock);
5086 return;
5087 }
5088 spin_unlock(&space_info->lock);
5089 } while (flush_state < ARRAY_SIZE(priority_flush_states));
5090}
5091
5092static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
5093 struct btrfs_space_info *space_info,
5094 struct reserve_ticket *ticket)
5095
5096{
5097 DEFINE_WAIT(wait);
5098 u64 reclaim_bytes = 0;
5099 int ret = 0;
5100
5101 spin_lock(&space_info->lock);
5102 while (ticket->bytes > 0 && ticket->error == 0) {
5103 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
5104 if (ret) {
5105 ret = -EINTR;
5106 break;
5107 }
5108 spin_unlock(&space_info->lock);
5109
5110 schedule();
5111
5112 finish_wait(&ticket->wait, &wait);
5113 spin_lock(&space_info->lock);
5114 }
5115 if (!ret)
5116 ret = ticket->error;
5117 if (!list_empty(&ticket->list))
5118 list_del_init(&ticket->list);
5119 if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
5120 reclaim_bytes = ticket->orig_bytes - ticket->bytes;
5121 spin_unlock(&space_info->lock);
5122
5123 if (reclaim_bytes)
5124 space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
5125 return ret;
5126}
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
5143 struct btrfs_space_info *space_info,
5144 u64 orig_bytes,
5145 enum btrfs_reserve_flush_enum flush,
5146 bool system_chunk)
5147{
5148 struct reserve_ticket ticket;
5149 u64 used;
5150 u64 reclaim_bytes = 0;
5151 int ret = 0;
5152
5153 ASSERT(orig_bytes);
5154 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
5155
5156 spin_lock(&space_info->lock);
5157 ret = -ENOSPC;
5158 used = btrfs_space_info_used(space_info, true);
5159
5160
5161
5162
5163
5164
5165 if (used + orig_bytes <= space_info->total_bytes) {
5166 update_bytes_may_use(space_info, orig_bytes);
5167 trace_btrfs_space_reservation(fs_info, "space_info",
5168 space_info->flags, orig_bytes, 1);
5169 ret = 0;
5170 } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
5171 system_chunk)) {
5172 update_bytes_may_use(space_info, orig_bytes);
5173 trace_btrfs_space_reservation(fs_info, "space_info",
5174 space_info->flags, orig_bytes, 1);
5175 ret = 0;
5176 }
5177
5178
5179
5180
5181
5182
5183
5184
5185 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5186 ticket.orig_bytes = orig_bytes;
5187 ticket.bytes = orig_bytes;
5188 ticket.error = 0;
5189 init_waitqueue_head(&ticket.wait);
5190 if (flush == BTRFS_RESERVE_FLUSH_ALL) {
5191 list_add_tail(&ticket.list, &space_info->tickets);
5192 if (!space_info->flush) {
5193 space_info->flush = 1;
5194 trace_btrfs_trigger_flush(fs_info,
5195 space_info->flags,
5196 orig_bytes, flush,
5197 "enospc");
5198 queue_work(system_unbound_wq,
5199 &fs_info->async_reclaim_work);
5200 }
5201 } else {
5202 list_add_tail(&ticket.list,
5203 &space_info->priority_tickets);
5204 }
5205 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5206 used += orig_bytes;
5207
5208
5209
5210
5211
5212 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
5213 need_do_async_reclaim(fs_info, space_info,
5214 used, system_chunk) &&
5215 !work_busy(&fs_info->async_reclaim_work)) {
5216 trace_btrfs_trigger_flush(fs_info, space_info->flags,
5217 orig_bytes, flush, "preempt");
5218 queue_work(system_unbound_wq,
5219 &fs_info->async_reclaim_work);
5220 }
5221 }
5222 spin_unlock(&space_info->lock);
5223 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5224 return ret;
5225
5226 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5227 return wait_reserve_ticket(fs_info, space_info, &ticket);
5228
5229 ret = 0;
5230 priority_reclaim_metadata_space(fs_info, space_info, &ticket);
5231 spin_lock(&space_info->lock);
5232 if (ticket.bytes) {
5233 if (ticket.bytes < orig_bytes)
5234 reclaim_bytes = orig_bytes - ticket.bytes;
5235 list_del_init(&ticket.list);
5236 ret = -ENOSPC;
5237 }
5238 spin_unlock(&space_info->lock);
5239
5240 if (reclaim_bytes)
5241 space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
5242 ASSERT(list_empty(&ticket.list));
5243 return ret;
5244}
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260static int reserve_metadata_bytes(struct btrfs_root *root,
5261 struct btrfs_block_rsv *block_rsv,
5262 u64 orig_bytes,
5263 enum btrfs_reserve_flush_enum flush)
5264{
5265 struct btrfs_fs_info *fs_info = root->fs_info;
5266 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5267 int ret;
5268 bool system_chunk = (root == fs_info->chunk_root);
5269
5270 ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
5271 orig_bytes, flush, system_chunk);
5272 if (ret == -ENOSPC &&
5273 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5274 if (block_rsv != global_rsv &&
5275 !block_rsv_use_bytes(global_rsv, orig_bytes))
5276 ret = 0;
5277 }
5278 if (ret == -ENOSPC) {
5279 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
5280 block_rsv->space_info->flags,
5281 orig_bytes, 1);
5282
5283 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
5284 dump_space_info(fs_info, block_rsv->space_info,
5285 orig_bytes, 0);
5286 }
5287 return ret;
5288}
5289
5290static struct btrfs_block_rsv *get_block_rsv(
5291 const struct btrfs_trans_handle *trans,
5292 const struct btrfs_root *root)
5293{
5294 struct btrfs_fs_info *fs_info = root->fs_info;
5295 struct btrfs_block_rsv *block_rsv = NULL;
5296
5297 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
5298 (root == fs_info->csum_root && trans->adding_csums) ||
5299 (root == fs_info->uuid_root))
5300 block_rsv = trans->block_rsv;
5301
5302 if (!block_rsv)
5303 block_rsv = root->block_rsv;
5304
5305 if (!block_rsv)
5306 block_rsv = &fs_info->empty_block_rsv;
5307
5308 return block_rsv;
5309}
5310
5311static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
5312 u64 num_bytes)
5313{
5314 int ret = -ENOSPC;
5315 spin_lock(&block_rsv->lock);
5316 if (block_rsv->reserved >= num_bytes) {
5317 block_rsv->reserved -= num_bytes;
5318 if (block_rsv->reserved < block_rsv->size)
5319 block_rsv->full = 0;
5320 ret = 0;
5321 }
5322 spin_unlock(&block_rsv->lock);
5323 return ret;
5324}
5325
5326static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
5327 u64 num_bytes, bool update_size)
5328{
5329 spin_lock(&block_rsv->lock);
5330 block_rsv->reserved += num_bytes;
5331 if (update_size)
5332 block_rsv->size += num_bytes;
5333 else if (block_rsv->reserved >= block_rsv->size)
5334 block_rsv->full = 1;
5335 spin_unlock(&block_rsv->lock);
5336}
5337
5338int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5339 struct btrfs_block_rsv *dest, u64 num_bytes,
5340 int min_factor)
5341{
5342 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5343 u64 min_bytes;
5344
5345 if (global_rsv->space_info != dest->space_info)
5346 return -ENOSPC;
5347
5348 spin_lock(&global_rsv->lock);
5349 min_bytes = div_factor(global_rsv->size, min_factor);
5350 if (global_rsv->reserved < min_bytes + num_bytes) {
5351 spin_unlock(&global_rsv->lock);
5352 return -ENOSPC;
5353 }
5354 global_rsv->reserved -= num_bytes;
5355 if (global_rsv->reserved < global_rsv->size)
5356 global_rsv->full = 0;
5357 spin_unlock(&global_rsv->lock);
5358
5359 block_rsv_add_bytes(dest, num_bytes, true);
5360 return 0;
5361}
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
5373 struct btrfs_block_rsv *src,
5374 u64 num_bytes)
5375{
5376 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
5377 u64 to_free = 0;
5378
5379 spin_lock(&src->lock);
5380 src->reserved -= num_bytes;
5381 src->size -= num_bytes;
5382 spin_unlock(&src->lock);
5383
5384 spin_lock(&delayed_refs_rsv->lock);
5385 if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
5386 u64 delta = delayed_refs_rsv->size -
5387 delayed_refs_rsv->reserved;
5388 if (num_bytes > delta) {
5389 to_free = num_bytes - delta;
5390 num_bytes = delta;
5391 }
5392 } else {
5393 to_free = num_bytes;
5394 num_bytes = 0;
5395 }
5396
5397 if (num_bytes)
5398 delayed_refs_rsv->reserved += num_bytes;
5399 if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
5400 delayed_refs_rsv->full = 1;
5401 spin_unlock(&delayed_refs_rsv->lock);
5402
5403 if (num_bytes)
5404 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5405 0, num_bytes, 1);
5406 if (to_free)
5407 space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info,
5408 to_free);
5409}
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
5420 enum btrfs_reserve_flush_enum flush)
5421{
5422 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
5423 u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
5424 u64 num_bytes = 0;
5425 int ret = -ENOSPC;
5426
5427 spin_lock(&block_rsv->lock);
5428 if (block_rsv->reserved < block_rsv->size) {
5429 num_bytes = block_rsv->size - block_rsv->reserved;
5430 num_bytes = min(num_bytes, limit);
5431 }
5432 spin_unlock(&block_rsv->lock);
5433
5434 if (!num_bytes)
5435 return 0;
5436
5437 ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv,
5438 num_bytes, flush);
5439 if (ret)
5440 return ret;
5441 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5442 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5443 0, num_bytes, 1);
5444 return 0;
5445}
5446
5447
5448
5449
5450
5451static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
5452 struct btrfs_space_info *space_info,
5453 u64 num_bytes)
5454{
5455 struct reserve_ticket *ticket;
5456 struct list_head *head;
5457 u64 used;
5458 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
5459 bool check_overcommit = false;
5460
5461 spin_lock(&space_info->lock);
5462 head = &space_info->priority_tickets;
5463
5464
5465
5466
5467
5468
5469 used = btrfs_space_info_used(space_info, true);
5470 if (used - num_bytes >= space_info->total_bytes)
5471 check_overcommit = true;
5472again:
5473 while (!list_empty(head) && num_bytes) {
5474 ticket = list_first_entry(head, struct reserve_ticket,
5475 list);
5476
5477
5478
5479
5480 if (check_overcommit &&
5481 !can_overcommit(fs_info, space_info, 0, flush, false))
5482 break;
5483 if (num_bytes >= ticket->bytes) {
5484 list_del_init(&ticket->list);
5485 num_bytes -= ticket->bytes;
5486 ticket->bytes = 0;
5487 space_info->tickets_id++;
5488 wake_up(&ticket->wait);
5489 } else {
5490 ticket->bytes -= num_bytes;
5491 num_bytes = 0;
5492 }
5493 }
5494
5495 if (num_bytes && head == &space_info->priority_tickets) {
5496 head = &space_info->tickets;
5497 flush = BTRFS_RESERVE_FLUSH_ALL;
5498 goto again;
5499 }
5500 update_bytes_may_use(space_info, -num_bytes);
5501 trace_btrfs_space_reservation(fs_info, "space_info",
5502 space_info->flags, num_bytes, 0);
5503 spin_unlock(&space_info->lock);
5504}
5505
5506
5507
5508
5509
5510
5511static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
5512 struct btrfs_space_info *space_info,
5513 u64 num_bytes)
5514{
5515 struct reserve_ticket *ticket;
5516 struct list_head *head = &space_info->priority_tickets;
5517
5518again:
5519 while (!list_empty(head) && num_bytes) {
5520 ticket = list_first_entry(head, struct reserve_ticket,
5521 list);
5522 if (num_bytes >= ticket->bytes) {
5523 trace_btrfs_space_reservation(fs_info, "space_info",
5524 space_info->flags,
5525 ticket->bytes, 1);
5526 list_del_init(&ticket->list);
5527 num_bytes -= ticket->bytes;
5528 update_bytes_may_use(space_info, ticket->bytes);
5529 ticket->bytes = 0;
5530 space_info->tickets_id++;
5531 wake_up(&ticket->wait);
5532 } else {
5533 trace_btrfs_space_reservation(fs_info, "space_info",
5534 space_info->flags,
5535 num_bytes, 1);
5536 update_bytes_may_use(space_info, num_bytes);
5537 ticket->bytes -= num_bytes;
5538 num_bytes = 0;
5539 }
5540 }
5541
5542 if (num_bytes && head == &space_info->priority_tickets) {
5543 head = &space_info->tickets;
5544 goto again;
5545 }
5546}
5547
5548static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5549 struct btrfs_block_rsv *block_rsv,
5550 struct btrfs_block_rsv *dest, u64 num_bytes,
5551 u64 *qgroup_to_release_ret)
5552{
5553 struct btrfs_space_info *space_info = block_rsv->space_info;
5554 u64 qgroup_to_release = 0;
5555 u64 ret;
5556
5557 spin_lock(&block_rsv->lock);
5558 if (num_bytes == (u64)-1) {
5559 num_bytes = block_rsv->size;
5560 qgroup_to_release = block_rsv->qgroup_rsv_size;
5561 }
5562 block_rsv->size -= num_bytes;
5563 if (block_rsv->reserved >= block_rsv->size) {
5564 num_bytes = block_rsv->reserved - block_rsv->size;
5565 block_rsv->reserved = block_rsv->size;
5566 block_rsv->full = 1;
5567 } else {
5568 num_bytes = 0;
5569 }
5570 if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
5571 qgroup_to_release = block_rsv->qgroup_rsv_reserved -
5572 block_rsv->qgroup_rsv_size;
5573 block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
5574 } else {
5575 qgroup_to_release = 0;
5576 }
5577 spin_unlock(&block_rsv->lock);
5578
5579 ret = num_bytes;
5580 if (num_bytes > 0) {
5581 if (dest) {
5582 spin_lock(&dest->lock);
5583 if (!dest->full) {
5584 u64 bytes_to_add;
5585
5586 bytes_to_add = dest->size - dest->reserved;
5587 bytes_to_add = min(num_bytes, bytes_to_add);
5588 dest->reserved += bytes_to_add;
5589 if (dest->reserved >= dest->size)
5590 dest->full = 1;
5591 num_bytes -= bytes_to_add;
5592 }
5593 spin_unlock(&dest->lock);
5594 }
5595 if (num_bytes)
5596 space_info_add_old_bytes(fs_info, space_info,
5597 num_bytes);
5598 }
5599 if (qgroup_to_release_ret)
5600 *qgroup_to_release_ret = qgroup_to_release;
5601 return ret;
5602}
5603
5604int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
5605 struct btrfs_block_rsv *dst, u64 num_bytes,
5606 bool update_size)
5607{
5608 int ret;
5609
5610 ret = block_rsv_use_bytes(src, num_bytes);
5611 if (ret)
5612 return ret;
5613
5614 block_rsv_add_bytes(dst, num_bytes, update_size);
5615 return 0;
5616}
5617
5618void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
5619{
5620 memset(rsv, 0, sizeof(*rsv));
5621 spin_lock_init(&rsv->lock);
5622 rsv->type = type;
5623}
5624
5625void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
5626 struct btrfs_block_rsv *rsv,
5627 unsigned short type)
5628{
5629 btrfs_init_block_rsv(rsv, type);
5630 rsv->space_info = __find_space_info(fs_info,
5631 BTRFS_BLOCK_GROUP_METADATA);
5632}
5633
5634struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
5635 unsigned short type)
5636{
5637 struct btrfs_block_rsv *block_rsv;
5638
5639 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5640 if (!block_rsv)
5641 return NULL;
5642
5643 btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
5644 return block_rsv;
5645}
5646
5647void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
5648 struct btrfs_block_rsv *rsv)
5649{
5650 if (!rsv)
5651 return;
5652 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
5653 kfree(rsv);
5654}
5655
5656int btrfs_block_rsv_add(struct btrfs_root *root,
5657 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5658 enum btrfs_reserve_flush_enum flush)
5659{
5660 int ret;
5661
5662 if (num_bytes == 0)
5663 return 0;
5664
5665 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5666 if (!ret)
5667 block_rsv_add_bytes(block_rsv, num_bytes, true);
5668
5669 return ret;
5670}
5671
5672int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
5673{
5674 u64 num_bytes = 0;
5675 int ret = -ENOSPC;
5676
5677 if (!block_rsv)
5678 return 0;
5679
5680 spin_lock(&block_rsv->lock);
5681 num_bytes = div_factor(block_rsv->size, min_factor);
5682 if (block_rsv->reserved >= num_bytes)
5683 ret = 0;
5684 spin_unlock(&block_rsv->lock);
5685
5686 return ret;
5687}
5688
5689int btrfs_block_rsv_refill(struct btrfs_root *root,
5690 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5691 enum btrfs_reserve_flush_enum flush)
5692{
5693 u64 num_bytes = 0;
5694 int ret = -ENOSPC;
5695
5696 if (!block_rsv)
5697 return 0;
5698
5699 spin_lock(&block_rsv->lock);
5700 num_bytes = min_reserved;
5701 if (block_rsv->reserved >= num_bytes)
5702 ret = 0;
5703 else
5704 num_bytes -= block_rsv->reserved;
5705 spin_unlock(&block_rsv->lock);
5706
5707 if (!ret)
5708 return 0;
5709
5710 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5711 if (!ret) {
5712 block_rsv_add_bytes(block_rsv, num_bytes, false);
5713 return 0;
5714 }
5715
5716 return ret;
5717}
5718
5719static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5720 struct btrfs_block_rsv *block_rsv,
5721 u64 num_bytes, u64 *qgroup_to_release)
5722{
5723 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5724 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
5725 struct btrfs_block_rsv *target = delayed_rsv;
5726
5727 if (target->full || target == block_rsv)
5728 target = global_rsv;
5729
5730 if (block_rsv->space_info != target->space_info)
5731 target = NULL;
5732
5733 return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
5734 qgroup_to_release);
5735}
5736
5737void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5738 struct btrfs_block_rsv *block_rsv,
5739 u64 num_bytes)
5740{
5741 __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
5742}
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
5756{
5757 struct btrfs_fs_info *fs_info = inode->root->fs_info;
5758 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5759 u64 released = 0;
5760 u64 qgroup_to_release = 0;
5761
5762
5763
5764
5765
5766
5767 released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
5768 &qgroup_to_release);
5769 if (released > 0)
5770 trace_btrfs_space_reservation(fs_info, "delalloc",
5771 btrfs_ino(inode), released, 0);
5772 if (qgroup_free)
5773 btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
5774 else
5775 btrfs_qgroup_convert_reserved_meta(inode->root,
5776 qgroup_to_release);
5777}
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
5788{
5789 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
5790 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5791 u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
5792 u64 released = 0;
5793
5794 released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv,
5795 num_bytes, NULL);
5796 if (released)
5797 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5798 0, released, 0);
5799}
5800
5801static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5802{
5803 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5804 struct btrfs_space_info *sinfo = block_rsv->space_info;
5805 u64 num_bytes;
5806
5807
5808
5809
5810
5811
5812 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
5813 btrfs_root_used(&fs_info->csum_root->root_item) +
5814 btrfs_root_used(&fs_info->tree_root->root_item);
5815 num_bytes = max_t(u64, num_bytes, SZ_16M);
5816
5817 spin_lock(&sinfo->lock);
5818 spin_lock(&block_rsv->lock);
5819
5820 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
5821
5822 if (block_rsv->reserved < block_rsv->size) {
5823 num_bytes = btrfs_space_info_used(sinfo, true);
5824 if (sinfo->total_bytes > num_bytes) {
5825 num_bytes = sinfo->total_bytes - num_bytes;
5826 num_bytes = min(num_bytes,
5827 block_rsv->size - block_rsv->reserved);
5828 block_rsv->reserved += num_bytes;
5829 update_bytes_may_use(sinfo, num_bytes);
5830 trace_btrfs_space_reservation(fs_info, "space_info",
5831 sinfo->flags, num_bytes,
5832 1);
5833 }
5834 } else if (block_rsv->reserved > block_rsv->size) {
5835 num_bytes = block_rsv->reserved - block_rsv->size;
5836 update_bytes_may_use(sinfo, -num_bytes);
5837 trace_btrfs_space_reservation(fs_info, "space_info",
5838 sinfo->flags, num_bytes, 0);
5839 block_rsv->reserved = block_rsv->size;
5840 }
5841
5842 if (block_rsv->reserved == block_rsv->size)
5843 block_rsv->full = 1;
5844 else
5845 block_rsv->full = 0;
5846
5847 spin_unlock(&block_rsv->lock);
5848 spin_unlock(&sinfo->lock);
5849}
5850
5851static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5852{
5853 struct btrfs_space_info *space_info;
5854
5855 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5856 fs_info->chunk_block_rsv.space_info = space_info;
5857
5858 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5859 fs_info->global_block_rsv.space_info = space_info;
5860 fs_info->trans_block_rsv.space_info = space_info;
5861 fs_info->empty_block_rsv.space_info = space_info;
5862 fs_info->delayed_block_rsv.space_info = space_info;
5863 fs_info->delayed_refs_rsv.space_info = space_info;
5864
5865 fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
5866 fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
5867 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5868 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
5869 if (fs_info->quota_root)
5870 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
5871 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
5872
5873 update_global_block_rsv(fs_info);
5874}
5875
5876static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5877{
5878 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5879 (u64)-1, NULL);
5880 WARN_ON(fs_info->trans_block_rsv.size > 0);
5881 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5882 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5883 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
5884 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5885 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
5886 WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
5887 WARN_ON(fs_info->delayed_refs_rsv.size > 0);
5888}
5889
5890
5891
5892
5893
5894
5895
5896
5897void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
5898{
5899 struct btrfs_fs_info *fs_info = trans->fs_info;
5900 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
5901 u64 num_bytes;
5902
5903 if (!trans->delayed_ref_updates)
5904 return;
5905
5906 num_bytes = btrfs_calc_trans_metadata_size(fs_info,
5907 trans->delayed_ref_updates);
5908 spin_lock(&delayed_rsv->lock);
5909 delayed_rsv->size += num_bytes;
5910 delayed_rsv->full = 0;
5911 spin_unlock(&delayed_rsv->lock);
5912 trans->delayed_ref_updates = 0;
5913}
5914
5915
5916
5917
5918
5919void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5920{
5921 struct btrfs_fs_info *fs_info = trans->fs_info;
5922
5923 if (!trans->chunk_bytes_reserved)
5924 return;
5925
5926 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5927
5928 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5929 trans->chunk_bytes_reserved, NULL);
5930 trans->chunk_bytes_reserved = 0;
5931}
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5948 struct btrfs_block_rsv *rsv, int items,
5949 bool use_global_rsv)
5950{
5951 u64 qgroup_num_bytes = 0;
5952 u64 num_bytes;
5953 int ret;
5954 struct btrfs_fs_info *fs_info = root->fs_info;
5955 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5956
5957 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
5958
5959 qgroup_num_bytes = 3 * fs_info->nodesize;
5960 ret = btrfs_qgroup_reserve_meta_prealloc(root,
5961 qgroup_num_bytes, true);
5962 if (ret)
5963 return ret;
5964 }
5965
5966 num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
5967 rsv->space_info = __find_space_info(fs_info,
5968 BTRFS_BLOCK_GROUP_METADATA);
5969 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
5970 BTRFS_RESERVE_FLUSH_ALL);
5971
5972 if (ret == -ENOSPC && use_global_rsv)
5973 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
5974
5975 if (ret && qgroup_num_bytes)
5976 btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
5977
5978 return ret;
5979}
5980
5981void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
5982 struct btrfs_block_rsv *rsv)
5983{
5984 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
5985}
5986
5987static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
5988 struct btrfs_inode *inode)
5989{
5990 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5991 u64 reserve_size = 0;
5992 u64 qgroup_rsv_size = 0;
5993 u64 csum_leaves;
5994 unsigned outstanding_extents;
5995
5996 lockdep_assert_held(&inode->lock);
5997 outstanding_extents = inode->outstanding_extents;
5998 if (outstanding_extents)
5999 reserve_size = btrfs_calc_trans_metadata_size(fs_info,
6000 outstanding_extents + 1);
6001 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
6002 inode->csum_bytes);
6003 reserve_size += btrfs_calc_trans_metadata_size(fs_info,
6004 csum_leaves);
6005
6006
6007
6008
6009
6010
6011 qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
6012
6013 spin_lock(&block_rsv->lock);
6014 block_rsv->size = reserve_size;
6015 block_rsv->qgroup_rsv_size = qgroup_rsv_size;
6016 spin_unlock(&block_rsv->lock);
6017}
6018
6019static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
6020 u64 num_bytes, u64 *meta_reserve,
6021 u64 *qgroup_reserve)
6022{
6023 u64 nr_extents = count_max_extents(num_bytes);
6024 u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
6025
6026
6027 *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
6028 nr_extents + csum_leaves + 1);
6029 *qgroup_reserve = nr_extents * fs_info->nodesize;
6030}
6031
6032int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
6033{
6034 struct btrfs_root *root = inode->root;
6035 struct btrfs_fs_info *fs_info = root->fs_info;
6036 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
6037 u64 meta_reserve, qgroup_reserve;
6038 unsigned nr_extents;
6039 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
6040 int ret = 0;
6041 bool delalloc_lock = true;
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051 if (btrfs_is_free_space_inode(inode)) {
6052 flush = BTRFS_RESERVE_NO_FLUSH;
6053 delalloc_lock = false;
6054 } else {
6055 if (current->journal_info)
6056 flush = BTRFS_RESERVE_FLUSH_LIMIT;
6057
6058 if (btrfs_transaction_in_commit(fs_info))
6059 schedule_timeout(1);
6060 }
6061
6062 if (delalloc_lock)
6063 mutex_lock(&inode->delalloc_mutex);
6064
6065 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077 calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
6078 &qgroup_reserve);
6079 ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
6080 if (ret)
6081 goto out_fail;
6082 ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
6083 if (ret)
6084 goto out_qgroup;
6085
6086
6087
6088
6089
6090
6091
6092 spin_lock(&inode->lock);
6093 nr_extents = count_max_extents(num_bytes);
6094 btrfs_mod_outstanding_extents(inode, nr_extents);
6095 inode->csum_bytes += num_bytes;
6096 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6097 spin_unlock(&inode->lock);
6098
6099
6100 block_rsv_add_bytes(block_rsv, meta_reserve, false);
6101 trace_btrfs_space_reservation(root->fs_info, "delalloc",
6102 btrfs_ino(inode), meta_reserve, 1);
6103
6104 spin_lock(&block_rsv->lock);
6105 block_rsv->qgroup_rsv_reserved += qgroup_reserve;
6106 spin_unlock(&block_rsv->lock);
6107
6108 if (delalloc_lock)
6109 mutex_unlock(&inode->delalloc_mutex);
6110 return 0;
6111out_qgroup:
6112 btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
6113out_fail:
6114 btrfs_inode_rsv_release(inode, true);
6115 if (delalloc_lock)
6116 mutex_unlock(&inode->delalloc_mutex);
6117 return ret;
6118}
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
6131 bool qgroup_free)
6132{
6133 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6134
6135 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6136 spin_lock(&inode->lock);
6137 inode->csum_bytes -= num_bytes;
6138 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6139 spin_unlock(&inode->lock);
6140
6141 if (btrfs_is_testing(fs_info))
6142 return;
6143
6144 btrfs_inode_rsv_release(inode, qgroup_free);
6145}
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
6160 bool qgroup_free)
6161{
6162 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6163 unsigned num_extents;
6164
6165 spin_lock(&inode->lock);
6166 num_extents = count_max_extents(num_bytes);
6167 btrfs_mod_outstanding_extents(inode, -num_extents);
6168 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6169 spin_unlock(&inode->lock);
6170
6171 if (btrfs_is_testing(fs_info))
6172 return;
6173
6174 btrfs_inode_rsv_release(inode, qgroup_free);
6175}
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202int btrfs_delalloc_reserve_space(struct inode *inode,
6203 struct extent_changeset **reserved, u64 start, u64 len)
6204{
6205 int ret;
6206
6207 ret = btrfs_check_data_free_space(inode, reserved, start, len);
6208 if (ret < 0)
6209 return ret;
6210 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
6211 if (ret < 0)
6212 btrfs_free_reserved_data_space(inode, *reserved, start, len);
6213 return ret;
6214}
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228void btrfs_delalloc_release_space(struct inode *inode,
6229 struct extent_changeset *reserved,
6230 u64 start, u64 len, bool qgroup_free)
6231{
6232 btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
6233 btrfs_free_reserved_data_space(inode, reserved, start, len);
6234}
6235
6236static int update_block_group(struct btrfs_trans_handle *trans,
6237 u64 bytenr, u64 num_bytes, int alloc)
6238{
6239 struct btrfs_fs_info *info = trans->fs_info;
6240 struct btrfs_block_group_cache *cache = NULL;
6241 u64 total = num_bytes;
6242 u64 old_val;
6243 u64 byte_in_group;
6244 int factor;
6245 int ret = 0;
6246
6247
6248 spin_lock(&info->delalloc_root_lock);
6249 old_val = btrfs_super_bytes_used(info->super_copy);
6250 if (alloc)
6251 old_val += num_bytes;
6252 else
6253 old_val -= num_bytes;
6254 btrfs_set_super_bytes_used(info->super_copy, old_val);
6255 spin_unlock(&info->delalloc_root_lock);
6256
6257 while (total) {
6258 cache = btrfs_lookup_block_group(info, bytenr);
6259 if (!cache) {
6260 ret = -ENOENT;
6261 break;
6262 }
6263 factor = btrfs_bg_type_to_factor(cache->flags);
6264
6265
6266
6267
6268
6269
6270
6271 if (!alloc && cache->cached == BTRFS_CACHE_NO)
6272 cache_block_group(cache, 1);
6273
6274 byte_in_group = bytenr - cache->key.objectid;
6275 WARN_ON(byte_in_group > cache->key.offset);
6276
6277 spin_lock(&cache->space_info->lock);
6278 spin_lock(&cache->lock);
6279
6280 if (btrfs_test_opt(info, SPACE_CACHE) &&
6281 cache->disk_cache_state < BTRFS_DC_CLEAR)
6282 cache->disk_cache_state = BTRFS_DC_CLEAR;
6283
6284 old_val = btrfs_block_group_used(&cache->item);
6285 num_bytes = min(total, cache->key.offset - byte_in_group);
6286 if (alloc) {
6287 old_val += num_bytes;
6288 btrfs_set_block_group_used(&cache->item, old_val);
6289 cache->reserved -= num_bytes;
6290 cache->space_info->bytes_reserved -= num_bytes;
6291 cache->space_info->bytes_used += num_bytes;
6292 cache->space_info->disk_used += num_bytes * factor;
6293 spin_unlock(&cache->lock);
6294 spin_unlock(&cache->space_info->lock);
6295 } else {
6296 old_val -= num_bytes;
6297 btrfs_set_block_group_used(&cache->item, old_val);
6298 cache->pinned += num_bytes;
6299 update_bytes_pinned(cache->space_info, num_bytes);
6300 cache->space_info->bytes_used -= num_bytes;
6301 cache->space_info->disk_used -= num_bytes * factor;
6302 spin_unlock(&cache->lock);
6303 spin_unlock(&cache->space_info->lock);
6304
6305 trace_btrfs_space_reservation(info, "pinned",
6306 cache->space_info->flags,
6307 num_bytes, 1);
6308 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
6309 num_bytes,
6310 BTRFS_TOTAL_BYTES_PINNED_BATCH);
6311 set_extent_dirty(info->pinned_extents,
6312 bytenr, bytenr + num_bytes - 1,
6313 GFP_NOFS | __GFP_NOFAIL);
6314 }
6315
6316 spin_lock(&trans->transaction->dirty_bgs_lock);
6317 if (list_empty(&cache->dirty_list)) {
6318 list_add_tail(&cache->dirty_list,
6319 &trans->transaction->dirty_bgs);
6320 trans->delayed_ref_updates++;
6321 btrfs_get_block_group(cache);
6322 }
6323 spin_unlock(&trans->transaction->dirty_bgs_lock);
6324
6325
6326
6327
6328
6329
6330
6331 if (!alloc && old_val == 0)
6332 btrfs_mark_bg_unused(cache);
6333
6334 btrfs_put_block_group(cache);
6335 total -= num_bytes;
6336 bytenr += num_bytes;
6337 }
6338
6339
6340 btrfs_update_delayed_refs_rsv(trans);
6341 return ret;
6342}
6343
6344static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
6345{
6346 struct btrfs_block_group_cache *cache;
6347 u64 bytenr;
6348
6349 spin_lock(&fs_info->block_group_cache_lock);
6350 bytenr = fs_info->first_logical_byte;
6351 spin_unlock(&fs_info->block_group_cache_lock);
6352
6353 if (bytenr < (u64)-1)
6354 return bytenr;
6355
6356 cache = btrfs_lookup_first_block_group(fs_info, search_start);
6357 if (!cache)
6358 return 0;
6359
6360 bytenr = cache->key.objectid;
6361 btrfs_put_block_group(cache);
6362
6363 return bytenr;
6364}
6365
6366static int pin_down_extent(struct btrfs_block_group_cache *cache,
6367 u64 bytenr, u64 num_bytes, int reserved)
6368{
6369 struct btrfs_fs_info *fs_info = cache->fs_info;
6370
6371 spin_lock(&cache->space_info->lock);
6372 spin_lock(&cache->lock);
6373 cache->pinned += num_bytes;
6374 update_bytes_pinned(cache->space_info, num_bytes);
6375 if (reserved) {
6376 cache->reserved -= num_bytes;
6377 cache->space_info->bytes_reserved -= num_bytes;
6378 }
6379 spin_unlock(&cache->lock);
6380 spin_unlock(&cache->space_info->lock);
6381
6382 trace_btrfs_space_reservation(fs_info, "pinned",
6383 cache->space_info->flags, num_bytes, 1);
6384 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
6385 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
6386 set_extent_dirty(fs_info->pinned_extents, bytenr,
6387 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
6388 return 0;
6389}
6390
6391
6392
6393
6394int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
6395 u64 bytenr, u64 num_bytes, int reserved)
6396{
6397 struct btrfs_block_group_cache *cache;
6398
6399 cache = btrfs_lookup_block_group(fs_info, bytenr);
6400 BUG_ON(!cache);
6401
6402 pin_down_extent(cache, bytenr, num_bytes, reserved);
6403
6404 btrfs_put_block_group(cache);
6405 return 0;
6406}
6407
6408
6409
6410
6411int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
6412 u64 bytenr, u64 num_bytes)
6413{
6414 struct btrfs_block_group_cache *cache;
6415 int ret;
6416
6417 cache = btrfs_lookup_block_group(fs_info, bytenr);
6418 if (!cache)
6419 return -EINVAL;
6420
6421
6422
6423
6424
6425
6426
6427 cache_block_group(cache, 1);
6428
6429 pin_down_extent(cache, bytenr, num_bytes, 0);
6430
6431
6432 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
6433 btrfs_put_block_group(cache);
6434 return ret;
6435}
6436
6437static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
6438 u64 start, u64 num_bytes)
6439{
6440 int ret;
6441 struct btrfs_block_group_cache *block_group;
6442 struct btrfs_caching_control *caching_ctl;
6443
6444 block_group = btrfs_lookup_block_group(fs_info, start);
6445 if (!block_group)
6446 return -EINVAL;
6447
6448 cache_block_group(block_group, 0);
6449 caching_ctl = get_caching_control(block_group);
6450
6451 if (!caching_ctl) {
6452
6453 BUG_ON(!block_group_cache_done(block_group));
6454 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6455 } else {
6456 mutex_lock(&caching_ctl->mutex);
6457
6458 if (start >= caching_ctl->progress) {
6459 ret = add_excluded_extent(fs_info, start, num_bytes);
6460 } else if (start + num_bytes <= caching_ctl->progress) {
6461 ret = btrfs_remove_free_space(block_group,
6462 start, num_bytes);
6463 } else {
6464 num_bytes = caching_ctl->progress - start;
6465 ret = btrfs_remove_free_space(block_group,
6466 start, num_bytes);
6467 if (ret)
6468 goto out_lock;
6469
6470 num_bytes = (start + num_bytes) -
6471 caching_ctl->progress;
6472 start = caching_ctl->progress;
6473 ret = add_excluded_extent(fs_info, start, num_bytes);
6474 }
6475out_lock:
6476 mutex_unlock(&caching_ctl->mutex);
6477 put_caching_control(caching_ctl);
6478 }
6479 btrfs_put_block_group(block_group);
6480 return ret;
6481}
6482
6483int btrfs_exclude_logged_extents(struct extent_buffer *eb)
6484{
6485 struct btrfs_fs_info *fs_info = eb->fs_info;
6486 struct btrfs_file_extent_item *item;
6487 struct btrfs_key key;
6488 int found_type;
6489 int i;
6490 int ret = 0;
6491
6492 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
6493 return 0;
6494
6495 for (i = 0; i < btrfs_header_nritems(eb); i++) {
6496 btrfs_item_key_to_cpu(eb, &key, i);
6497 if (key.type != BTRFS_EXTENT_DATA_KEY)
6498 continue;
6499 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
6500 found_type = btrfs_file_extent_type(eb, item);
6501 if (found_type == BTRFS_FILE_EXTENT_INLINE)
6502 continue;
6503 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
6504 continue;
6505 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
6506 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
6507 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
6508 if (ret)
6509 break;
6510 }
6511
6512 return ret;
6513}
6514
6515static void
6516btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
6517{
6518 atomic_inc(&bg->reservations);
6519}
6520
6521void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
6522 const u64 start)
6523{
6524 struct btrfs_block_group_cache *bg;
6525
6526 bg = btrfs_lookup_block_group(fs_info, start);
6527 ASSERT(bg);
6528 if (atomic_dec_and_test(&bg->reservations))
6529 wake_up_var(&bg->reservations);
6530 btrfs_put_block_group(bg);
6531}
6532
6533void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6534{
6535 struct btrfs_space_info *space_info = bg->space_info;
6536
6537 ASSERT(bg->ro);
6538
6539 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
6540 return;
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552 down_write(&space_info->groups_sem);
6553 up_write(&space_info->groups_sem);
6554
6555 wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
6556}
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
6571 u64 ram_bytes, u64 num_bytes, int delalloc)
6572{
6573 struct btrfs_space_info *space_info = cache->space_info;
6574 int ret = 0;
6575
6576 spin_lock(&space_info->lock);
6577 spin_lock(&cache->lock);
6578 if (cache->ro) {
6579 ret = -EAGAIN;
6580 } else {
6581 cache->reserved += num_bytes;
6582 space_info->bytes_reserved += num_bytes;
6583 update_bytes_may_use(space_info, -ram_bytes);
6584 if (delalloc)
6585 cache->delalloc_bytes += num_bytes;
6586 }
6587 spin_unlock(&cache->lock);
6588 spin_unlock(&space_info->lock);
6589 return ret;
6590}
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604static void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
6605 u64 num_bytes, int delalloc)
6606{
6607 struct btrfs_space_info *space_info = cache->space_info;
6608
6609 spin_lock(&space_info->lock);
6610 spin_lock(&cache->lock);
6611 if (cache->ro)
6612 space_info->bytes_readonly += num_bytes;
6613 cache->reserved -= num_bytes;
6614 space_info->bytes_reserved -= num_bytes;
6615 space_info->max_extent_size = 0;
6616
6617 if (delalloc)
6618 cache->delalloc_bytes -= num_bytes;
6619 spin_unlock(&cache->lock);
6620 spin_unlock(&space_info->lock);
6621}
6622void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
6623{
6624 struct btrfs_caching_control *next;
6625 struct btrfs_caching_control *caching_ctl;
6626 struct btrfs_block_group_cache *cache;
6627
6628 down_write(&fs_info->commit_root_sem);
6629
6630 list_for_each_entry_safe(caching_ctl, next,
6631 &fs_info->caching_block_groups, list) {
6632 cache = caching_ctl->block_group;
6633 if (block_group_cache_done(cache)) {
6634 cache->last_byte_to_unpin = (u64)-1;
6635 list_del_init(&caching_ctl->list);
6636 put_caching_control(caching_ctl);
6637 } else {
6638 cache->last_byte_to_unpin = caching_ctl->progress;
6639 }
6640 }
6641
6642 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6643 fs_info->pinned_extents = &fs_info->freed_extents[1];
6644 else
6645 fs_info->pinned_extents = &fs_info->freed_extents[0];
6646
6647 up_write(&fs_info->commit_root_sem);
6648
6649 update_global_block_rsv(fs_info);
6650}
6651
6652
6653
6654
6655
6656static struct btrfs_free_cluster *
6657fetch_cluster_info(struct btrfs_fs_info *fs_info,
6658 struct btrfs_space_info *space_info, u64 *empty_cluster)
6659{
6660 struct btrfs_free_cluster *ret = NULL;
6661
6662 *empty_cluster = 0;
6663 if (btrfs_mixed_space_info(space_info))
6664 return ret;
6665
6666 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
6667 ret = &fs_info->meta_alloc_cluster;
6668 if (btrfs_test_opt(fs_info, SSD))
6669 *empty_cluster = SZ_2M;
6670 else
6671 *empty_cluster = SZ_64K;
6672 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
6673 btrfs_test_opt(fs_info, SSD_SPREAD)) {
6674 *empty_cluster = SZ_2M;
6675 ret = &fs_info->data_alloc_cluster;
6676 }
6677
6678 return ret;
6679}
6680
6681static int unpin_extent_range(struct btrfs_fs_info *fs_info,
6682 u64 start, u64 end,
6683 const bool return_free_space)
6684{
6685 struct btrfs_block_group_cache *cache = NULL;
6686 struct btrfs_space_info *space_info;
6687 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6688 struct btrfs_free_cluster *cluster = NULL;
6689 u64 len;
6690 u64 total_unpinned = 0;
6691 u64 empty_cluster = 0;
6692 bool readonly;
6693
6694 while (start <= end) {
6695 readonly = false;
6696 if (!cache ||
6697 start >= cache->key.objectid + cache->key.offset) {
6698 if (cache)
6699 btrfs_put_block_group(cache);
6700 total_unpinned = 0;
6701 cache = btrfs_lookup_block_group(fs_info, start);
6702 BUG_ON(!cache);
6703
6704 cluster = fetch_cluster_info(fs_info,
6705 cache->space_info,
6706 &empty_cluster);
6707 empty_cluster <<= 1;
6708 }
6709
6710 len = cache->key.objectid + cache->key.offset - start;
6711 len = min(len, end + 1 - start);
6712
6713 if (start < cache->last_byte_to_unpin) {
6714 len = min(len, cache->last_byte_to_unpin - start);
6715 if (return_free_space)
6716 btrfs_add_free_space(cache, start, len);
6717 }
6718
6719 start += len;
6720 total_unpinned += len;
6721 space_info = cache->space_info;
6722
6723
6724
6725
6726
6727
6728
6729 if (cluster && cluster->fragmented &&
6730 total_unpinned > empty_cluster) {
6731 spin_lock(&cluster->lock);
6732 cluster->fragmented = 0;
6733 spin_unlock(&cluster->lock);
6734 }
6735
6736 spin_lock(&space_info->lock);
6737 spin_lock(&cache->lock);
6738 cache->pinned -= len;
6739 update_bytes_pinned(space_info, -len);
6740
6741 trace_btrfs_space_reservation(fs_info, "pinned",
6742 space_info->flags, len, 0);
6743 space_info->max_extent_size = 0;
6744 percpu_counter_add_batch(&space_info->total_bytes_pinned,
6745 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
6746 if (cache->ro) {
6747 space_info->bytes_readonly += len;
6748 readonly = true;
6749 }
6750 spin_unlock(&cache->lock);
6751 if (!readonly && return_free_space &&
6752 global_rsv->space_info == space_info) {
6753 u64 to_add = len;
6754
6755 spin_lock(&global_rsv->lock);
6756 if (!global_rsv->full) {
6757 to_add = min(len, global_rsv->size -
6758 global_rsv->reserved);
6759 global_rsv->reserved += to_add;
6760 update_bytes_may_use(space_info, to_add);
6761 if (global_rsv->reserved >= global_rsv->size)
6762 global_rsv->full = 1;
6763 trace_btrfs_space_reservation(fs_info,
6764 "space_info",
6765 space_info->flags,
6766 to_add, 1);
6767 len -= to_add;
6768 }
6769 spin_unlock(&global_rsv->lock);
6770
6771 if (len)
6772 space_info_add_new_bytes(fs_info, space_info,
6773 len);
6774 }
6775 spin_unlock(&space_info->lock);
6776 }
6777
6778 if (cache)
6779 btrfs_put_block_group(cache);
6780 return 0;
6781}
6782
6783int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
6784{
6785 struct btrfs_fs_info *fs_info = trans->fs_info;
6786 struct btrfs_block_group_cache *block_group, *tmp;
6787 struct list_head *deleted_bgs;
6788 struct extent_io_tree *unpin;
6789 u64 start;
6790 u64 end;
6791 int ret;
6792
6793 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6794 unpin = &fs_info->freed_extents[1];
6795 else
6796 unpin = &fs_info->freed_extents[0];
6797
6798 while (!trans->aborted) {
6799 struct extent_state *cached_state = NULL;
6800
6801 mutex_lock(&fs_info->unused_bg_unpin_mutex);
6802 ret = find_first_extent_bit(unpin, 0, &start, &end,
6803 EXTENT_DIRTY, &cached_state);
6804 if (ret) {
6805 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6806 break;
6807 }
6808
6809 if (btrfs_test_opt(fs_info, DISCARD))
6810 ret = btrfs_discard_extent(fs_info, start,
6811 end + 1 - start, NULL);
6812
6813 clear_extent_dirty(unpin, start, end, &cached_state);
6814 unpin_extent_range(fs_info, start, end, true);
6815 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6816 free_extent_state(cached_state);
6817 cond_resched();
6818 }
6819
6820
6821
6822
6823
6824
6825 deleted_bgs = &trans->transaction->deleted_bgs;
6826 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6827 u64 trimmed = 0;
6828
6829 ret = -EROFS;
6830 if (!trans->aborted)
6831 ret = btrfs_discard_extent(fs_info,
6832 block_group->key.objectid,
6833 block_group->key.offset,
6834 &trimmed);
6835
6836 list_del_init(&block_group->bg_list);
6837 btrfs_put_block_group_trimming(block_group);
6838 btrfs_put_block_group(block_group);
6839
6840 if (ret) {
6841 const char *errstr = btrfs_decode_error(ret);
6842 btrfs_warn(fs_info,
6843 "discard failed while removing blockgroup: errno=%d %s",
6844 ret, errstr);
6845 }
6846 }
6847
6848 return 0;
6849}
6850
6851static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6852 struct btrfs_delayed_ref_node *node, u64 parent,
6853 u64 root_objectid, u64 owner_objectid,
6854 u64 owner_offset, int refs_to_drop,
6855 struct btrfs_delayed_extent_op *extent_op)
6856{
6857 struct btrfs_fs_info *info = trans->fs_info;
6858 struct btrfs_key key;
6859 struct btrfs_path *path;
6860 struct btrfs_root *extent_root = info->extent_root;
6861 struct extent_buffer *leaf;
6862 struct btrfs_extent_item *ei;
6863 struct btrfs_extent_inline_ref *iref;
6864 int ret;
6865 int is_data;
6866 int extent_slot = 0;
6867 int found_extent = 0;
6868 int num_to_del = 1;
6869 u32 item_size;
6870 u64 refs;
6871 u64 bytenr = node->bytenr;
6872 u64 num_bytes = node->num_bytes;
6873 int last_ref = 0;
6874 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
6875
6876 path = btrfs_alloc_path();
6877 if (!path)
6878 return -ENOMEM;
6879
6880 path->reada = READA_FORWARD;
6881 path->leave_spinning = 1;
6882
6883 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6884 BUG_ON(!is_data && refs_to_drop != 1);
6885
6886 if (is_data)
6887 skinny_metadata = false;
6888
6889 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
6890 parent, root_objectid, owner_objectid,
6891 owner_offset);
6892 if (ret == 0) {
6893 extent_slot = path->slots[0];
6894 while (extent_slot >= 0) {
6895 btrfs_item_key_to_cpu(path->nodes[0], &key,
6896 extent_slot);
6897 if (key.objectid != bytenr)
6898 break;
6899 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6900 key.offset == num_bytes) {
6901 found_extent = 1;
6902 break;
6903 }
6904 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6905 key.offset == owner_objectid) {
6906 found_extent = 1;
6907 break;
6908 }
6909 if (path->slots[0] - extent_slot > 5)
6910 break;
6911 extent_slot--;
6912 }
6913
6914 if (!found_extent) {
6915 BUG_ON(iref);
6916 ret = remove_extent_backref(trans, path, NULL,
6917 refs_to_drop,
6918 is_data, &last_ref);
6919 if (ret) {
6920 btrfs_abort_transaction(trans, ret);
6921 goto out;
6922 }
6923 btrfs_release_path(path);
6924 path->leave_spinning = 1;
6925
6926 key.objectid = bytenr;
6927 key.type = BTRFS_EXTENT_ITEM_KEY;
6928 key.offset = num_bytes;
6929
6930 if (!is_data && skinny_metadata) {
6931 key.type = BTRFS_METADATA_ITEM_KEY;
6932 key.offset = owner_objectid;
6933 }
6934
6935 ret = btrfs_search_slot(trans, extent_root,
6936 &key, path, -1, 1);
6937 if (ret > 0 && skinny_metadata && path->slots[0]) {
6938
6939
6940
6941
6942 path->slots[0]--;
6943 btrfs_item_key_to_cpu(path->nodes[0], &key,
6944 path->slots[0]);
6945 if (key.objectid == bytenr &&
6946 key.type == BTRFS_EXTENT_ITEM_KEY &&
6947 key.offset == num_bytes)
6948 ret = 0;
6949 }
6950
6951 if (ret > 0 && skinny_metadata) {
6952 skinny_metadata = false;
6953 key.objectid = bytenr;
6954 key.type = BTRFS_EXTENT_ITEM_KEY;
6955 key.offset = num_bytes;
6956 btrfs_release_path(path);
6957 ret = btrfs_search_slot(trans, extent_root,
6958 &key, path, -1, 1);
6959 }
6960
6961 if (ret) {
6962 btrfs_err(info,
6963 "umm, got %d back from search, was looking for %llu",
6964 ret, bytenr);
6965 if (ret > 0)
6966 btrfs_print_leaf(path->nodes[0]);
6967 }
6968 if (ret < 0) {
6969 btrfs_abort_transaction(trans, ret);
6970 goto out;
6971 }
6972 extent_slot = path->slots[0];
6973 }
6974 } else if (WARN_ON(ret == -ENOENT)) {
6975 btrfs_print_leaf(path->nodes[0]);
6976 btrfs_err(info,
6977 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
6978 bytenr, parent, root_objectid, owner_objectid,
6979 owner_offset);
6980 btrfs_abort_transaction(trans, ret);
6981 goto out;
6982 } else {
6983 btrfs_abort_transaction(trans, ret);
6984 goto out;
6985 }
6986
6987 leaf = path->nodes[0];
6988 item_size = btrfs_item_size_nr(leaf, extent_slot);
6989 if (unlikely(item_size < sizeof(*ei))) {
6990 ret = -EINVAL;
6991 btrfs_print_v0_err(info);
6992 btrfs_abort_transaction(trans, ret);
6993 goto out;
6994 }
6995 ei = btrfs_item_ptr(leaf, extent_slot,
6996 struct btrfs_extent_item);
6997 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
6998 key.type == BTRFS_EXTENT_ITEM_KEY) {
6999 struct btrfs_tree_block_info *bi;
7000 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
7001 bi = (struct btrfs_tree_block_info *)(ei + 1);
7002 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
7003 }
7004
7005 refs = btrfs_extent_refs(leaf, ei);
7006 if (refs < refs_to_drop) {
7007 btrfs_err(info,
7008 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
7009 refs_to_drop, refs, bytenr);
7010 ret = -EINVAL;
7011 btrfs_abort_transaction(trans, ret);
7012 goto out;
7013 }
7014 refs -= refs_to_drop;
7015
7016 if (refs > 0) {
7017 if (extent_op)
7018 __run_delayed_extent_op(extent_op, leaf, ei);
7019
7020
7021
7022
7023 if (iref) {
7024 BUG_ON(!found_extent);
7025 } else {
7026 btrfs_set_extent_refs(leaf, ei, refs);
7027 btrfs_mark_buffer_dirty(leaf);
7028 }
7029 if (found_extent) {
7030 ret = remove_extent_backref(trans, path, iref,
7031 refs_to_drop, is_data,
7032 &last_ref);
7033 if (ret) {
7034 btrfs_abort_transaction(trans, ret);
7035 goto out;
7036 }
7037 }
7038 } else {
7039 if (found_extent) {
7040 BUG_ON(is_data && refs_to_drop !=
7041 extent_data_ref_count(path, iref));
7042 if (iref) {
7043 BUG_ON(path->slots[0] != extent_slot);
7044 } else {
7045 BUG_ON(path->slots[0] != extent_slot + 1);
7046 path->slots[0] = extent_slot;
7047 num_to_del = 2;
7048 }
7049 }
7050
7051 last_ref = 1;
7052 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
7053 num_to_del);
7054 if (ret) {
7055 btrfs_abort_transaction(trans, ret);
7056 goto out;
7057 }
7058 btrfs_release_path(path);
7059
7060 if (is_data) {
7061 ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
7062 if (ret) {
7063 btrfs_abort_transaction(trans, ret);
7064 goto out;
7065 }
7066 }
7067
7068 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
7069 if (ret) {
7070 btrfs_abort_transaction(trans, ret);
7071 goto out;
7072 }
7073
7074 ret = update_block_group(trans, bytenr, num_bytes, 0);
7075 if (ret) {
7076 btrfs_abort_transaction(trans, ret);
7077 goto out;
7078 }
7079 }
7080 btrfs_release_path(path);
7081
7082out:
7083 btrfs_free_path(path);
7084 return ret;
7085}
7086
7087
7088
7089
7090
7091
7092
7093static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
7094 u64 bytenr)
7095{
7096 struct btrfs_delayed_ref_head *head;
7097 struct btrfs_delayed_ref_root *delayed_refs;
7098 int ret = 0;
7099
7100 delayed_refs = &trans->transaction->delayed_refs;
7101 spin_lock(&delayed_refs->lock);
7102 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
7103 if (!head)
7104 goto out_delayed_unlock;
7105
7106 spin_lock(&head->lock);
7107 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
7108 goto out;
7109
7110 if (cleanup_extent_op(head) != NULL)
7111 goto out;
7112
7113
7114
7115
7116
7117 if (!mutex_trylock(&head->mutex))
7118 goto out;
7119
7120 btrfs_delete_ref_head(delayed_refs, head);
7121 head->processing = 0;
7122
7123 spin_unlock(&head->lock);
7124 spin_unlock(&delayed_refs->lock);
7125
7126 BUG_ON(head->extent_op);
7127 if (head->must_insert_reserved)
7128 ret = 1;
7129
7130 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
7131 mutex_unlock(&head->mutex);
7132 btrfs_put_delayed_ref_head(head);
7133 return ret;
7134out:
7135 spin_unlock(&head->lock);
7136
7137out_delayed_unlock:
7138 spin_unlock(&delayed_refs->lock);
7139 return 0;
7140}
7141
7142void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7143 struct btrfs_root *root,
7144 struct extent_buffer *buf,
7145 u64 parent, int last_ref)
7146{
7147 struct btrfs_fs_info *fs_info = root->fs_info;
7148 struct btrfs_ref generic_ref = { 0 };
7149 int pin = 1;
7150 int ret;
7151
7152 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
7153 buf->start, buf->len, parent);
7154 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
7155 root->root_key.objectid);
7156
7157 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7158 int old_ref_mod, new_ref_mod;
7159
7160 btrfs_ref_tree_mod(fs_info, &generic_ref);
7161 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL,
7162 &old_ref_mod, &new_ref_mod);
7163 BUG_ON(ret);
7164 pin = old_ref_mod >= 0 && new_ref_mod < 0;
7165 }
7166
7167 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
7168 struct btrfs_block_group_cache *cache;
7169
7170 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7171 ret = check_ref_cleanup(trans, buf->start);
7172 if (!ret)
7173 goto out;
7174 }
7175
7176 pin = 0;
7177 cache = btrfs_lookup_block_group(fs_info, buf->start);
7178
7179 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
7180 pin_down_extent(cache, buf->start, buf->len, 1);
7181 btrfs_put_block_group(cache);
7182 goto out;
7183 }
7184
7185 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
7186
7187 btrfs_add_free_space(cache, buf->start, buf->len);
7188 btrfs_free_reserved_bytes(cache, buf->len, 0);
7189 btrfs_put_block_group(cache);
7190 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
7191 }
7192out:
7193 if (pin)
7194 add_pinned_bytes(fs_info, &generic_ref, 1);
7195
7196 if (last_ref) {
7197
7198
7199
7200
7201 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
7202 }
7203}
7204
7205
7206int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
7207{
7208 struct btrfs_fs_info *fs_info = trans->fs_info;
7209 int old_ref_mod, new_ref_mod;
7210 int ret;
7211
7212 if (btrfs_is_testing(fs_info))
7213 return 0;
7214
7215
7216
7217
7218
7219 if ((ref->type == BTRFS_REF_METADATA &&
7220 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
7221 (ref->type == BTRFS_REF_DATA &&
7222 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
7223
7224 btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
7225 old_ref_mod = new_ref_mod = 0;
7226 ret = 0;
7227 } else if (ref->type == BTRFS_REF_METADATA) {
7228 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL,
7229 &old_ref_mod, &new_ref_mod);
7230 } else {
7231 ret = btrfs_add_delayed_data_ref(trans, ref, 0,
7232 &old_ref_mod, &new_ref_mod);
7233 }
7234
7235 if (!((ref->type == BTRFS_REF_METADATA &&
7236 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
7237 (ref->type == BTRFS_REF_DATA &&
7238 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
7239 btrfs_ref_tree_mod(fs_info, ref);
7240
7241 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
7242 add_pinned_bytes(fs_info, ref, 1);
7243
7244 return ret;
7245}
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261static noinline void
7262wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
7263 u64 num_bytes)
7264{
7265 struct btrfs_caching_control *caching_ctl;
7266
7267 caching_ctl = get_caching_control(cache);
7268 if (!caching_ctl)
7269 return;
7270
7271 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
7272 (cache->free_space_ctl->free_space >= num_bytes));
7273
7274 put_caching_control(caching_ctl);
7275}
7276
7277static noinline int
7278wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
7279{
7280 struct btrfs_caching_control *caching_ctl;
7281 int ret = 0;
7282
7283 caching_ctl = get_caching_control(cache);
7284 if (!caching_ctl)
7285 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
7286
7287 wait_event(caching_ctl->wait, block_group_cache_done(cache));
7288 if (cache->cached == BTRFS_CACHE_ERROR)
7289 ret = -EIO;
7290 put_caching_control(caching_ctl);
7291 return ret;
7292}
7293
7294enum btrfs_loop_type {
7295 LOOP_CACHING_NOWAIT = 0,
7296 LOOP_CACHING_WAIT = 1,
7297 LOOP_ALLOC_CHUNK = 2,
7298 LOOP_NO_EMPTY_SIZE = 3,
7299};
7300
7301static inline void
7302btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
7303 int delalloc)
7304{
7305 if (delalloc)
7306 down_read(&cache->data_rwsem);
7307}
7308
7309static inline void
7310btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
7311 int delalloc)
7312{
7313 btrfs_get_block_group(cache);
7314 if (delalloc)
7315 down_read(&cache->data_rwsem);
7316}
7317
7318static struct btrfs_block_group_cache *
7319btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
7320 struct btrfs_free_cluster *cluster,
7321 int delalloc)
7322{
7323 struct btrfs_block_group_cache *used_bg = NULL;
7324
7325 spin_lock(&cluster->refill_lock);
7326 while (1) {
7327 used_bg = cluster->block_group;
7328 if (!used_bg)
7329 return NULL;
7330
7331 if (used_bg == block_group)
7332 return used_bg;
7333
7334 btrfs_get_block_group(used_bg);
7335
7336 if (!delalloc)
7337 return used_bg;
7338
7339 if (down_read_trylock(&used_bg->data_rwsem))
7340 return used_bg;
7341
7342 spin_unlock(&cluster->refill_lock);
7343
7344
7345 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
7346
7347 spin_lock(&cluster->refill_lock);
7348 if (used_bg == cluster->block_group)
7349 return used_bg;
7350
7351 up_read(&used_bg->data_rwsem);
7352 btrfs_put_block_group(used_bg);
7353 }
7354}
7355
7356static inline void
7357btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7358 int delalloc)
7359{
7360 if (delalloc)
7361 up_read(&cache->data_rwsem);
7362 btrfs_put_block_group(cache);
7363}
7364
7365
7366
7367
7368
7369struct find_free_extent_ctl {
7370
7371 u64 ram_bytes;
7372 u64 num_bytes;
7373 u64 empty_size;
7374 u64 flags;
7375 int delalloc;
7376
7377
7378 u64 search_start;
7379
7380
7381 u64 empty_cluster;
7382
7383 bool have_caching_bg;
7384 bool orig_have_caching_bg;
7385
7386
7387 int index;
7388
7389
7390
7391
7392 int loop;
7393
7394
7395
7396
7397
7398 bool retry_clustered;
7399
7400
7401
7402
7403
7404 bool retry_unclustered;
7405
7406
7407 int cached;
7408
7409
7410 u64 max_extent_size;
7411
7412
7413 u64 total_free_space;
7414
7415
7416 u64 found_offset;
7417};
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
7429 struct btrfs_free_cluster *last_ptr,
7430 struct find_free_extent_ctl *ffe_ctl,
7431 struct btrfs_block_group_cache **cluster_bg_ret)
7432{
7433 struct btrfs_block_group_cache *cluster_bg;
7434 u64 aligned_cluster;
7435 u64 offset;
7436 int ret;
7437
7438 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
7439 if (!cluster_bg)
7440 goto refill_cluster;
7441 if (cluster_bg != bg && (cluster_bg->ro ||
7442 !block_group_bits(cluster_bg, ffe_ctl->flags)))
7443 goto release_cluster;
7444
7445 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
7446 ffe_ctl->num_bytes, cluster_bg->key.objectid,
7447 &ffe_ctl->max_extent_size);
7448 if (offset) {
7449
7450 spin_unlock(&last_ptr->refill_lock);
7451 trace_btrfs_reserve_extent_cluster(cluster_bg,
7452 ffe_ctl->search_start, ffe_ctl->num_bytes);
7453 *cluster_bg_ret = cluster_bg;
7454 ffe_ctl->found_offset = offset;
7455 return 0;
7456 }
7457 WARN_ON(last_ptr->block_group != cluster_bg);
7458
7459release_cluster:
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
7472 spin_unlock(&last_ptr->refill_lock);
7473 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
7474 return -ENOENT;
7475 }
7476
7477
7478 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7479
7480 if (cluster_bg != bg)
7481 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
7482
7483refill_cluster:
7484 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
7485 spin_unlock(&last_ptr->refill_lock);
7486 return -ENOENT;
7487 }
7488
7489 aligned_cluster = max_t(u64,
7490 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
7491 bg->full_stripe_len);
7492 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
7493 ffe_ctl->num_bytes, aligned_cluster);
7494 if (ret == 0) {
7495
7496 offset = btrfs_alloc_from_cluster(bg, last_ptr,
7497 ffe_ctl->num_bytes, ffe_ctl->search_start,
7498 &ffe_ctl->max_extent_size);
7499 if (offset) {
7500
7501 spin_unlock(&last_ptr->refill_lock);
7502 trace_btrfs_reserve_extent_cluster(bg,
7503 ffe_ctl->search_start,
7504 ffe_ctl->num_bytes);
7505 ffe_ctl->found_offset = offset;
7506 return 0;
7507 }
7508 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
7509 !ffe_ctl->retry_clustered) {
7510 spin_unlock(&last_ptr->refill_lock);
7511
7512 ffe_ctl->retry_clustered = true;
7513 wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
7514 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
7515 return -EAGAIN;
7516 }
7517
7518
7519
7520
7521
7522 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7523 spin_unlock(&last_ptr->refill_lock);
7524 return 1;
7525}
7526
7527
7528
7529
7530
7531
7532static int find_free_extent_unclustered(struct btrfs_block_group_cache *bg,
7533 struct btrfs_free_cluster *last_ptr,
7534 struct find_free_extent_ctl *ffe_ctl)
7535{
7536 u64 offset;
7537
7538
7539
7540
7541
7542
7543 if (unlikely(last_ptr)) {
7544 spin_lock(&last_ptr->lock);
7545 last_ptr->fragmented = 1;
7546 spin_unlock(&last_ptr->lock);
7547 }
7548 if (ffe_ctl->cached) {
7549 struct btrfs_free_space_ctl *free_space_ctl;
7550
7551 free_space_ctl = bg->free_space_ctl;
7552 spin_lock(&free_space_ctl->tree_lock);
7553 if (free_space_ctl->free_space <
7554 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
7555 ffe_ctl->empty_size) {
7556 ffe_ctl->total_free_space = max_t(u64,
7557 ffe_ctl->total_free_space,
7558 free_space_ctl->free_space);
7559 spin_unlock(&free_space_ctl->tree_lock);
7560 return 1;
7561 }
7562 spin_unlock(&free_space_ctl->tree_lock);
7563 }
7564
7565 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
7566 ffe_ctl->num_bytes, ffe_ctl->empty_size,
7567 &ffe_ctl->max_extent_size);
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
7579 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
7580 wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
7581 ffe_ctl->empty_size);
7582 ffe_ctl->retry_unclustered = true;
7583 return -EAGAIN;
7584 } else if (!offset) {
7585 return 1;
7586 }
7587 ffe_ctl->found_offset = offset;
7588 return 0;
7589}
7590
7591
7592
7593
7594
7595
7596static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
7597 struct btrfs_free_cluster *last_ptr,
7598 struct btrfs_key *ins,
7599 struct find_free_extent_ctl *ffe_ctl,
7600 int full_search, bool use_cluster)
7601{
7602 struct btrfs_root *root = fs_info->extent_root;
7603 int ret;
7604
7605 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
7606 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
7607 ffe_ctl->orig_have_caching_bg = true;
7608
7609 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
7610 ffe_ctl->have_caching_bg)
7611 return 1;
7612
7613 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
7614 return 1;
7615
7616 if (ins->objectid) {
7617 if (!use_cluster && last_ptr) {
7618 spin_lock(&last_ptr->lock);
7619 last_ptr->window_start = ins->objectid;
7620 spin_unlock(&last_ptr->lock);
7621 }
7622 return 0;
7623 }
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
7634 ffe_ctl->index = 0;
7635 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
7636
7637
7638
7639
7640
7641 if (ffe_ctl->orig_have_caching_bg || !full_search)
7642 ffe_ctl->loop = LOOP_CACHING_WAIT;
7643 else
7644 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
7645 } else {
7646 ffe_ctl->loop++;
7647 }
7648
7649 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
7650 struct btrfs_trans_handle *trans;
7651 int exist = 0;
7652
7653 trans = current->journal_info;
7654 if (trans)
7655 exist = 1;
7656 else
7657 trans = btrfs_join_transaction(root);
7658
7659 if (IS_ERR(trans)) {
7660 ret = PTR_ERR(trans);
7661 return ret;
7662 }
7663
7664 ret = do_chunk_alloc(trans, ffe_ctl->flags,
7665 CHUNK_ALLOC_FORCE);
7666
7667
7668
7669
7670
7671
7672 if (ret == -ENOSPC)
7673 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
7674
7675
7676 if (ret < 0 && ret != -ENOSPC)
7677 btrfs_abort_transaction(trans, ret);
7678 else
7679 ret = 0;
7680 if (!exist)
7681 btrfs_end_transaction(trans);
7682 if (ret)
7683 return ret;
7684 }
7685
7686 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
7687
7688
7689
7690
7691 if (ffe_ctl->empty_size == 0 &&
7692 ffe_ctl->empty_cluster == 0)
7693 return -ENOSPC;
7694 ffe_ctl->empty_size = 0;
7695 ffe_ctl->empty_cluster = 0;
7696 }
7697 return 1;
7698 }
7699 return -ENOSPC;
7700}
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
7728 u64 ram_bytes, u64 num_bytes, u64 empty_size,
7729 u64 hint_byte, struct btrfs_key *ins,
7730 u64 flags, int delalloc)
7731{
7732 int ret = 0;
7733 struct btrfs_free_cluster *last_ptr = NULL;
7734 struct btrfs_block_group_cache *block_group = NULL;
7735 struct find_free_extent_ctl ffe_ctl = {0};
7736 struct btrfs_space_info *space_info;
7737 bool use_cluster = true;
7738 bool full_search = false;
7739
7740 WARN_ON(num_bytes < fs_info->sectorsize);
7741
7742 ffe_ctl.ram_bytes = ram_bytes;
7743 ffe_ctl.num_bytes = num_bytes;
7744 ffe_ctl.empty_size = empty_size;
7745 ffe_ctl.flags = flags;
7746 ffe_ctl.search_start = 0;
7747 ffe_ctl.retry_clustered = false;
7748 ffe_ctl.retry_unclustered = false;
7749 ffe_ctl.delalloc = delalloc;
7750 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
7751 ffe_ctl.have_caching_bg = false;
7752 ffe_ctl.orig_have_caching_bg = false;
7753 ffe_ctl.found_offset = 0;
7754
7755 ins->type = BTRFS_EXTENT_ITEM_KEY;
7756 ins->objectid = 0;
7757 ins->offset = 0;
7758
7759 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
7760
7761 space_info = __find_space_info(fs_info, flags);
7762 if (!space_info) {
7763 btrfs_err(fs_info, "No space info for %llu", flags);
7764 return -ENOSPC;
7765 }
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777 if (unlikely(space_info->max_extent_size)) {
7778 spin_lock(&space_info->lock);
7779 if (space_info->max_extent_size &&
7780 num_bytes > space_info->max_extent_size) {
7781 ins->offset = space_info->max_extent_size;
7782 spin_unlock(&space_info->lock);
7783 return -ENOSPC;
7784 } else if (space_info->max_extent_size) {
7785 use_cluster = false;
7786 }
7787 spin_unlock(&space_info->lock);
7788 }
7789
7790 last_ptr = fetch_cluster_info(fs_info, space_info,
7791 &ffe_ctl.empty_cluster);
7792 if (last_ptr) {
7793 spin_lock(&last_ptr->lock);
7794 if (last_ptr->block_group)
7795 hint_byte = last_ptr->window_start;
7796 if (last_ptr->fragmented) {
7797
7798
7799
7800
7801
7802 hint_byte = last_ptr->window_start;
7803 use_cluster = false;
7804 }
7805 spin_unlock(&last_ptr->lock);
7806 }
7807
7808 ffe_ctl.search_start = max(ffe_ctl.search_start,
7809 first_logical_byte(fs_info, 0));
7810 ffe_ctl.search_start = max(ffe_ctl.search_start, hint_byte);
7811 if (ffe_ctl.search_start == hint_byte) {
7812 block_group = btrfs_lookup_block_group(fs_info,
7813 ffe_ctl.search_start);
7814
7815
7816
7817
7818
7819
7820
7821 if (block_group && block_group_bits(block_group, flags) &&
7822 block_group->cached != BTRFS_CACHE_NO) {
7823 down_read(&space_info->groups_sem);
7824 if (list_empty(&block_group->list) ||
7825 block_group->ro) {
7826
7827
7828
7829
7830
7831
7832 btrfs_put_block_group(block_group);
7833 up_read(&space_info->groups_sem);
7834 } else {
7835 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
7836 block_group->flags);
7837 btrfs_lock_block_group(block_group, delalloc);
7838 goto have_block_group;
7839 }
7840 } else if (block_group) {
7841 btrfs_put_block_group(block_group);
7842 }
7843 }
7844search:
7845 ffe_ctl.have_caching_bg = false;
7846 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
7847 ffe_ctl.index == 0)
7848 full_search = true;
7849 down_read(&space_info->groups_sem);
7850 list_for_each_entry(block_group,
7851 &space_info->block_groups[ffe_ctl.index], list) {
7852
7853 if (unlikely(block_group->ro))
7854 continue;
7855
7856 btrfs_grab_block_group(block_group, delalloc);
7857 ffe_ctl.search_start = block_group->key.objectid;
7858
7859
7860
7861
7862
7863
7864 if (!block_group_bits(block_group, flags)) {
7865 u64 extra = BTRFS_BLOCK_GROUP_DUP |
7866 BTRFS_BLOCK_GROUP_RAID1 |
7867 BTRFS_BLOCK_GROUP_RAID5 |
7868 BTRFS_BLOCK_GROUP_RAID6 |
7869 BTRFS_BLOCK_GROUP_RAID10;
7870
7871
7872
7873
7874
7875
7876 if ((flags & extra) && !(block_group->flags & extra))
7877 goto loop;
7878 }
7879
7880have_block_group:
7881 ffe_ctl.cached = block_group_cache_done(block_group);
7882 if (unlikely(!ffe_ctl.cached)) {
7883 ffe_ctl.have_caching_bg = true;
7884 ret = cache_block_group(block_group, 0);
7885 BUG_ON(ret < 0);
7886 ret = 0;
7887 }
7888
7889 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
7890 goto loop;
7891
7892
7893
7894
7895
7896 if (last_ptr && use_cluster) {
7897 struct btrfs_block_group_cache *cluster_bg = NULL;
7898
7899 ret = find_free_extent_clustered(block_group, last_ptr,
7900 &ffe_ctl, &cluster_bg);
7901
7902 if (ret == 0) {
7903 if (cluster_bg && cluster_bg != block_group) {
7904 btrfs_release_block_group(block_group,
7905 delalloc);
7906 block_group = cluster_bg;
7907 }
7908 goto checks;
7909 } else if (ret == -EAGAIN) {
7910 goto have_block_group;
7911 } else if (ret > 0) {
7912 goto loop;
7913 }
7914
7915 }
7916
7917 ret = find_free_extent_unclustered(block_group, last_ptr,
7918 &ffe_ctl);
7919 if (ret == -EAGAIN)
7920 goto have_block_group;
7921 else if (ret > 0)
7922 goto loop;
7923
7924checks:
7925 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
7926 fs_info->stripesize);
7927
7928
7929 if (ffe_ctl.search_start + num_bytes >
7930 block_group->key.objectid + block_group->key.offset) {
7931 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
7932 num_bytes);
7933 goto loop;
7934 }
7935
7936 if (ffe_ctl.found_offset < ffe_ctl.search_start)
7937 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
7938 ffe_ctl.search_start - ffe_ctl.found_offset);
7939
7940 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
7941 num_bytes, delalloc);
7942 if (ret == -EAGAIN) {
7943 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
7944 num_bytes);
7945 goto loop;
7946 }
7947 btrfs_inc_block_group_reservations(block_group);
7948
7949
7950 ins->objectid = ffe_ctl.search_start;
7951 ins->offset = num_bytes;
7952
7953 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
7954 num_bytes);
7955 btrfs_release_block_group(block_group, delalloc);
7956 break;
7957loop:
7958 ffe_ctl.retry_clustered = false;
7959 ffe_ctl.retry_unclustered = false;
7960 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
7961 ffe_ctl.index);
7962 btrfs_release_block_group(block_group, delalloc);
7963 cond_resched();
7964 }
7965 up_read(&space_info->groups_sem);
7966
7967 ret = find_free_extent_update_loop(fs_info, last_ptr, ins, &ffe_ctl,
7968 full_search, use_cluster);
7969 if (ret > 0)
7970 goto search;
7971
7972 if (ret == -ENOSPC) {
7973
7974
7975
7976
7977 if (!ffe_ctl.max_extent_size)
7978 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
7979 spin_lock(&space_info->lock);
7980 space_info->max_extent_size = ffe_ctl.max_extent_size;
7981 spin_unlock(&space_info->lock);
7982 ins->offset = ffe_ctl.max_extent_size;
7983 }
7984 return ret;
7985}
7986
7987#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
7988do { \
7989 struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
7990 spin_lock(&__rsv->lock); \
7991 btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
7992 __rsv->size, __rsv->reserved); \
7993 spin_unlock(&__rsv->lock); \
7994} while (0)
7995
7996static void dump_space_info(struct btrfs_fs_info *fs_info,
7997 struct btrfs_space_info *info, u64 bytes,
7998 int dump_block_groups)
7999{
8000 struct btrfs_block_group_cache *cache;
8001 int index = 0;
8002
8003 spin_lock(&info->lock);
8004 btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
8005 info->flags,
8006 info->total_bytes - btrfs_space_info_used(info, true),
8007 info->full ? "" : "not ");
8008 btrfs_info(fs_info,
8009 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
8010 info->total_bytes, info->bytes_used, info->bytes_pinned,
8011 info->bytes_reserved, info->bytes_may_use,
8012 info->bytes_readonly);
8013 spin_unlock(&info->lock);
8014
8015 DUMP_BLOCK_RSV(fs_info, global_block_rsv);
8016 DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
8017 DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
8018 DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
8019 DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
8020
8021 if (!dump_block_groups)
8022 return;
8023
8024 down_read(&info->groups_sem);
8025again:
8026 list_for_each_entry(cache, &info->block_groups[index], list) {
8027 spin_lock(&cache->lock);
8028 btrfs_info(fs_info,
8029 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
8030 cache->key.objectid, cache->key.offset,
8031 btrfs_block_group_used(&cache->item), cache->pinned,
8032 cache->reserved, cache->ro ? "[readonly]" : "");
8033 btrfs_dump_free_space(cache, bytes);
8034 spin_unlock(&cache->lock);
8035 }
8036 if (++index < BTRFS_NR_RAID_TYPES)
8037 goto again;
8038 up_read(&info->groups_sem);
8039}
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
8087 u64 num_bytes, u64 min_alloc_size,
8088 u64 empty_size, u64 hint_byte,
8089 struct btrfs_key *ins, int is_data, int delalloc)
8090{
8091 struct btrfs_fs_info *fs_info = root->fs_info;
8092 bool final_tried = num_bytes == min_alloc_size;
8093 u64 flags;
8094 int ret;
8095
8096 flags = get_alloc_profile_by_root(root, is_data);
8097again:
8098 WARN_ON(num_bytes < fs_info->sectorsize);
8099 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
8100 hint_byte, ins, flags, delalloc);
8101 if (!ret && !is_data) {
8102 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
8103 } else if (ret == -ENOSPC) {
8104 if (!final_tried && ins->offset) {
8105 num_bytes = min(num_bytes >> 1, ins->offset);
8106 num_bytes = round_down(num_bytes,
8107 fs_info->sectorsize);
8108 num_bytes = max(num_bytes, min_alloc_size);
8109 ram_bytes = num_bytes;
8110 if (num_bytes == min_alloc_size)
8111 final_tried = true;
8112 goto again;
8113 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8114 struct btrfs_space_info *sinfo;
8115
8116 sinfo = __find_space_info(fs_info, flags);
8117 btrfs_err(fs_info,
8118 "allocation failed flags %llu, wanted %llu",
8119 flags, num_bytes);
8120 if (sinfo)
8121 dump_space_info(fs_info, sinfo, num_bytes, 1);
8122 }
8123 }
8124
8125 return ret;
8126}
8127
8128static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
8129 u64 start, u64 len,
8130 int pin, int delalloc)
8131{
8132 struct btrfs_block_group_cache *cache;
8133 int ret = 0;
8134
8135 cache = btrfs_lookup_block_group(fs_info, start);
8136 if (!cache) {
8137 btrfs_err(fs_info, "Unable to find block group for %llu",
8138 start);
8139 return -ENOSPC;
8140 }
8141
8142 if (pin)
8143 pin_down_extent(cache, start, len, 1);
8144 else {
8145 if (btrfs_test_opt(fs_info, DISCARD))
8146 ret = btrfs_discard_extent(fs_info, start, len, NULL);
8147 btrfs_add_free_space(cache, start, len);
8148 btrfs_free_reserved_bytes(cache, len, delalloc);
8149 trace_btrfs_reserved_extent_free(fs_info, start, len);
8150 }
8151
8152 btrfs_put_block_group(cache);
8153 return ret;
8154}
8155
8156int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
8157 u64 start, u64 len, int delalloc)
8158{
8159 return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
8160}
8161
8162int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
8163 u64 start, u64 len)
8164{
8165 return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
8166}
8167
8168static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8169 u64 parent, u64 root_objectid,
8170 u64 flags, u64 owner, u64 offset,
8171 struct btrfs_key *ins, int ref_mod)
8172{
8173 struct btrfs_fs_info *fs_info = trans->fs_info;
8174 int ret;
8175 struct btrfs_extent_item *extent_item;
8176 struct btrfs_extent_inline_ref *iref;
8177 struct btrfs_path *path;
8178 struct extent_buffer *leaf;
8179 int type;
8180 u32 size;
8181
8182 if (parent > 0)
8183 type = BTRFS_SHARED_DATA_REF_KEY;
8184 else
8185 type = BTRFS_EXTENT_DATA_REF_KEY;
8186
8187 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
8188
8189 path = btrfs_alloc_path();
8190 if (!path)
8191 return -ENOMEM;
8192
8193 path->leave_spinning = 1;
8194 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8195 ins, size);
8196 if (ret) {
8197 btrfs_free_path(path);
8198 return ret;
8199 }
8200
8201 leaf = path->nodes[0];
8202 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8203 struct btrfs_extent_item);
8204 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
8205 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8206 btrfs_set_extent_flags(leaf, extent_item,
8207 flags | BTRFS_EXTENT_FLAG_DATA);
8208
8209 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8210 btrfs_set_extent_inline_ref_type(leaf, iref, type);
8211 if (parent > 0) {
8212 struct btrfs_shared_data_ref *ref;
8213 ref = (struct btrfs_shared_data_ref *)(iref + 1);
8214 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
8215 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
8216 } else {
8217 struct btrfs_extent_data_ref *ref;
8218 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
8219 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
8220 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
8221 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
8222 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
8223 }
8224
8225 btrfs_mark_buffer_dirty(path->nodes[0]);
8226 btrfs_free_path(path);
8227
8228 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
8229 if (ret)
8230 return ret;
8231
8232 ret = update_block_group(trans, ins->objectid, ins->offset, 1);
8233 if (ret) {
8234 btrfs_err(fs_info, "update block group failed for %llu %llu",
8235 ins->objectid, ins->offset);
8236 BUG();
8237 }
8238 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
8239 return ret;
8240}
8241
8242static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
8243 struct btrfs_delayed_ref_node *node,
8244 struct btrfs_delayed_extent_op *extent_op)
8245{
8246 struct btrfs_fs_info *fs_info = trans->fs_info;
8247 int ret;
8248 struct btrfs_extent_item *extent_item;
8249 struct btrfs_key extent_key;
8250 struct btrfs_tree_block_info *block_info;
8251 struct btrfs_extent_inline_ref *iref;
8252 struct btrfs_path *path;
8253 struct extent_buffer *leaf;
8254 struct btrfs_delayed_tree_ref *ref;
8255 u32 size = sizeof(*extent_item) + sizeof(*iref);
8256 u64 num_bytes;
8257 u64 flags = extent_op->flags_to_set;
8258 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
8259
8260 ref = btrfs_delayed_node_to_tree_ref(node);
8261
8262 extent_key.objectid = node->bytenr;
8263 if (skinny_metadata) {
8264 extent_key.offset = ref->level;
8265 extent_key.type = BTRFS_METADATA_ITEM_KEY;
8266 num_bytes = fs_info->nodesize;
8267 } else {
8268 extent_key.offset = node->num_bytes;
8269 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
8270 size += sizeof(*block_info);
8271 num_bytes = node->num_bytes;
8272 }
8273
8274 path = btrfs_alloc_path();
8275 if (!path)
8276 return -ENOMEM;
8277
8278 path->leave_spinning = 1;
8279 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8280 &extent_key, size);
8281 if (ret) {
8282 btrfs_free_path(path);
8283 return ret;
8284 }
8285
8286 leaf = path->nodes[0];
8287 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8288 struct btrfs_extent_item);
8289 btrfs_set_extent_refs(leaf, extent_item, 1);
8290 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8291 btrfs_set_extent_flags(leaf, extent_item,
8292 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
8293
8294 if (skinny_metadata) {
8295 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8296 } else {
8297 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
8298 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
8299 btrfs_set_tree_block_level(leaf, block_info, ref->level);
8300 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
8301 }
8302
8303 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
8304 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
8305 btrfs_set_extent_inline_ref_type(leaf, iref,
8306 BTRFS_SHARED_BLOCK_REF_KEY);
8307 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
8308 } else {
8309 btrfs_set_extent_inline_ref_type(leaf, iref,
8310 BTRFS_TREE_BLOCK_REF_KEY);
8311 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
8312 }
8313
8314 btrfs_mark_buffer_dirty(leaf);
8315 btrfs_free_path(path);
8316
8317 ret = remove_from_free_space_tree(trans, extent_key.objectid,
8318 num_bytes);
8319 if (ret)
8320 return ret;
8321
8322 ret = update_block_group(trans, extent_key.objectid,
8323 fs_info->nodesize, 1);
8324 if (ret) {
8325 btrfs_err(fs_info, "update block group failed for %llu %llu",
8326 extent_key.objectid, extent_key.offset);
8327 BUG();
8328 }
8329
8330 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
8331 fs_info->nodesize);
8332 return ret;
8333}
8334
8335int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8336 struct btrfs_root *root, u64 owner,
8337 u64 offset, u64 ram_bytes,
8338 struct btrfs_key *ins)
8339{
8340 struct btrfs_ref generic_ref = { 0 };
8341 int ret;
8342
8343 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
8344
8345 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
8346 ins->objectid, ins->offset, 0);
8347 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
8348 btrfs_ref_tree_mod(root->fs_info, &generic_ref);
8349 ret = btrfs_add_delayed_data_ref(trans, &generic_ref,
8350 ram_bytes, NULL, NULL);
8351 return ret;
8352}
8353
8354
8355
8356
8357
8358
8359int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
8360 u64 root_objectid, u64 owner, u64 offset,
8361 struct btrfs_key *ins)
8362{
8363 struct btrfs_fs_info *fs_info = trans->fs_info;
8364 int ret;
8365 struct btrfs_block_group_cache *block_group;
8366 struct btrfs_space_info *space_info;
8367
8368
8369
8370
8371
8372 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8373 ret = __exclude_logged_extent(fs_info, ins->objectid,
8374 ins->offset);
8375 if (ret)
8376 return ret;
8377 }
8378
8379 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
8380 if (!block_group)
8381 return -EINVAL;
8382
8383 space_info = block_group->space_info;
8384 spin_lock(&space_info->lock);
8385 spin_lock(&block_group->lock);
8386 space_info->bytes_reserved += ins->offset;
8387 block_group->reserved += ins->offset;
8388 spin_unlock(&block_group->lock);
8389 spin_unlock(&space_info->lock);
8390
8391 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
8392 offset, ins, 1);
8393 btrfs_put_block_group(block_group);
8394 return ret;
8395}
8396
8397static struct extent_buffer *
8398btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8399 u64 bytenr, int level, u64 owner)
8400{
8401 struct btrfs_fs_info *fs_info = root->fs_info;
8402 struct extent_buffer *buf;
8403
8404 buf = btrfs_find_create_tree_block(fs_info, bytenr);
8405 if (IS_ERR(buf))
8406 return buf;
8407
8408
8409
8410
8411
8412
8413 if (buf->lock_owner == current->pid) {
8414 btrfs_err_rl(fs_info,
8415"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
8416 buf->start, btrfs_header_owner(buf), current->pid);
8417 free_extent_buffer(buf);
8418 return ERR_PTR(-EUCLEAN);
8419 }
8420
8421 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
8422 btrfs_tree_lock(buf);
8423 btrfs_clean_tree_block(buf);
8424 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
8425
8426 btrfs_set_lock_blocking_write(buf);
8427 set_extent_buffer_uptodate(buf);
8428
8429 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
8430 btrfs_set_header_level(buf, level);
8431 btrfs_set_header_bytenr(buf, buf->start);
8432 btrfs_set_header_generation(buf, trans->transid);
8433 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
8434 btrfs_set_header_owner(buf, owner);
8435 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
8436 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
8437 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
8438 buf->log_index = root->log_transid % 2;
8439
8440
8441
8442
8443 if (buf->log_index == 0)
8444 set_extent_dirty(&root->dirty_log_pages, buf->start,
8445 buf->start + buf->len - 1, GFP_NOFS);
8446 else
8447 set_extent_new(&root->dirty_log_pages, buf->start,
8448 buf->start + buf->len - 1);
8449 } else {
8450 buf->log_index = -1;
8451 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
8452 buf->start + buf->len - 1, GFP_NOFS);
8453 }
8454 trans->dirty = true;
8455
8456 return buf;
8457}
8458
8459static struct btrfs_block_rsv *
8460use_block_rsv(struct btrfs_trans_handle *trans,
8461 struct btrfs_root *root, u32 blocksize)
8462{
8463 struct btrfs_fs_info *fs_info = root->fs_info;
8464 struct btrfs_block_rsv *block_rsv;
8465 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
8466 int ret;
8467 bool global_updated = false;
8468
8469 block_rsv = get_block_rsv(trans, root);
8470
8471 if (unlikely(block_rsv->size == 0))
8472 goto try_reserve;
8473again:
8474 ret = block_rsv_use_bytes(block_rsv, blocksize);
8475 if (!ret)
8476 return block_rsv;
8477
8478 if (block_rsv->failfast)
8479 return ERR_PTR(ret);
8480
8481 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
8482 global_updated = true;
8483 update_global_block_rsv(fs_info);
8484 goto again;
8485 }
8486
8487
8488
8489
8490
8491 if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
8492 btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8493 static DEFINE_RATELIMIT_STATE(_rs,
8494 DEFAULT_RATELIMIT_INTERVAL * 10,
8495 1);
8496 if (__ratelimit(&_rs))
8497 WARN(1, KERN_DEBUG
8498 "BTRFS: block rsv returned %d\n", ret);
8499 }
8500try_reserve:
8501 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
8502 BTRFS_RESERVE_NO_FLUSH);
8503 if (!ret)
8504 return block_rsv;
8505
8506
8507
8508
8509
8510 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
8511 block_rsv->space_info == global_rsv->space_info) {
8512 ret = block_rsv_use_bytes(global_rsv, blocksize);
8513 if (!ret)
8514 return global_rsv;
8515 }
8516 return ERR_PTR(ret);
8517}
8518
8519static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
8520 struct btrfs_block_rsv *block_rsv, u32 blocksize)
8521{
8522 block_rsv_add_bytes(block_rsv, blocksize, false);
8523 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
8524}
8525
8526
8527
8528
8529
8530struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8531 struct btrfs_root *root,
8532 u64 parent, u64 root_objectid,
8533 const struct btrfs_disk_key *key,
8534 int level, u64 hint,
8535 u64 empty_size)
8536{
8537 struct btrfs_fs_info *fs_info = root->fs_info;
8538 struct btrfs_key ins;
8539 struct btrfs_block_rsv *block_rsv;
8540 struct extent_buffer *buf;
8541 struct btrfs_delayed_extent_op *extent_op;
8542 struct btrfs_ref generic_ref = { 0 };
8543 u64 flags = 0;
8544 int ret;
8545 u32 blocksize = fs_info->nodesize;
8546 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
8547
8548#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8549 if (btrfs_is_testing(fs_info)) {
8550 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
8551 level, root_objectid);
8552 if (!IS_ERR(buf))
8553 root->alloc_bytenr += blocksize;
8554 return buf;
8555 }
8556#endif
8557
8558 block_rsv = use_block_rsv(trans, root, blocksize);
8559 if (IS_ERR(block_rsv))
8560 return ERR_CAST(block_rsv);
8561
8562 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
8563 empty_size, hint, &ins, 0, 0);
8564 if (ret)
8565 goto out_unuse;
8566
8567 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
8568 root_objectid);
8569 if (IS_ERR(buf)) {
8570 ret = PTR_ERR(buf);
8571 goto out_free_reserved;
8572 }
8573
8574 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
8575 if (parent == 0)
8576 parent = ins.objectid;
8577 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8578 } else
8579 BUG_ON(parent > 0);
8580
8581 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
8582 extent_op = btrfs_alloc_delayed_extent_op();
8583 if (!extent_op) {
8584 ret = -ENOMEM;
8585 goto out_free_buf;
8586 }
8587 if (key)
8588 memcpy(&extent_op->key, key, sizeof(extent_op->key));
8589 else
8590 memset(&extent_op->key, 0, sizeof(extent_op->key));
8591 extent_op->flags_to_set = flags;
8592 extent_op->update_key = skinny_metadata ? false : true;
8593 extent_op->update_flags = true;
8594 extent_op->is_data = false;
8595 extent_op->level = level;
8596
8597 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
8598 ins.objectid, ins.offset, parent);
8599 generic_ref.real_root = root->root_key.objectid;
8600 btrfs_init_tree_ref(&generic_ref, level, root_objectid);
8601 btrfs_ref_tree_mod(fs_info, &generic_ref);
8602 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref,
8603 extent_op, NULL, NULL);
8604 if (ret)
8605 goto out_free_delayed;
8606 }
8607 return buf;
8608
8609out_free_delayed:
8610 btrfs_free_delayed_extent_op(extent_op);
8611out_free_buf:
8612 free_extent_buffer(buf);
8613out_free_reserved:
8614 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
8615out_unuse:
8616 unuse_block_rsv(fs_info, block_rsv, blocksize);
8617 return ERR_PTR(ret);
8618}
8619
8620struct walk_control {
8621 u64 refs[BTRFS_MAX_LEVEL];
8622 u64 flags[BTRFS_MAX_LEVEL];
8623 struct btrfs_key update_progress;
8624 struct btrfs_key drop_progress;
8625 int drop_level;
8626 int stage;
8627 int level;
8628 int shared_level;
8629 int update_ref;
8630 int keep_locks;
8631 int reada_slot;
8632 int reada_count;
8633 int restarted;
8634};
8635
8636#define DROP_REFERENCE 1
8637#define UPDATE_BACKREF 2
8638
8639static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
8640 struct btrfs_root *root,
8641 struct walk_control *wc,
8642 struct btrfs_path *path)
8643{
8644 struct btrfs_fs_info *fs_info = root->fs_info;
8645 u64 bytenr;
8646 u64 generation;
8647 u64 refs;
8648 u64 flags;
8649 u32 nritems;
8650 struct btrfs_key key;
8651 struct extent_buffer *eb;
8652 int ret;
8653 int slot;
8654 int nread = 0;
8655
8656 if (path->slots[wc->level] < wc->reada_slot) {
8657 wc->reada_count = wc->reada_count * 2 / 3;
8658 wc->reada_count = max(wc->reada_count, 2);
8659 } else {
8660 wc->reada_count = wc->reada_count * 3 / 2;
8661 wc->reada_count = min_t(int, wc->reada_count,
8662 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
8663 }
8664
8665 eb = path->nodes[wc->level];
8666 nritems = btrfs_header_nritems(eb);
8667
8668 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
8669 if (nread >= wc->reada_count)
8670 break;
8671
8672 cond_resched();
8673 bytenr = btrfs_node_blockptr(eb, slot);
8674 generation = btrfs_node_ptr_generation(eb, slot);
8675
8676 if (slot == path->slots[wc->level])
8677 goto reada;
8678
8679 if (wc->stage == UPDATE_BACKREF &&
8680 generation <= root->root_key.offset)
8681 continue;
8682
8683
8684 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
8685 wc->level - 1, 1, &refs,
8686 &flags);
8687
8688 if (ret < 0)
8689 continue;
8690 BUG_ON(refs == 0);
8691
8692 if (wc->stage == DROP_REFERENCE) {
8693 if (refs == 1)
8694 goto reada;
8695
8696 if (wc->level == 1 &&
8697 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8698 continue;
8699 if (!wc->update_ref ||
8700 generation <= root->root_key.offset)
8701 continue;
8702 btrfs_node_key_to_cpu(eb, &key, slot);
8703 ret = btrfs_comp_cpu_keys(&key,
8704 &wc->update_progress);
8705 if (ret < 0)
8706 continue;
8707 } else {
8708 if (wc->level == 1 &&
8709 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8710 continue;
8711 }
8712reada:
8713 readahead_tree_block(fs_info, bytenr);
8714 nread++;
8715 }
8716 wc->reada_slot = slot;
8717}
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8728 struct btrfs_root *root,
8729 struct btrfs_path *path,
8730 struct walk_control *wc, int lookup_info)
8731{
8732 struct btrfs_fs_info *fs_info = root->fs_info;
8733 int level = wc->level;
8734 struct extent_buffer *eb = path->nodes[level];
8735 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8736 int ret;
8737
8738 if (wc->stage == UPDATE_BACKREF &&
8739 btrfs_header_owner(eb) != root->root_key.objectid)
8740 return 1;
8741
8742
8743
8744
8745
8746 if (lookup_info &&
8747 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
8748 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
8749 BUG_ON(!path->locks[level]);
8750 ret = btrfs_lookup_extent_info(trans, fs_info,
8751 eb->start, level, 1,
8752 &wc->refs[level],
8753 &wc->flags[level]);
8754 BUG_ON(ret == -ENOMEM);
8755 if (ret)
8756 return ret;
8757 BUG_ON(wc->refs[level] == 0);
8758 }
8759
8760 if (wc->stage == DROP_REFERENCE) {
8761 if (wc->refs[level] > 1)
8762 return 1;
8763
8764 if (path->locks[level] && !wc->keep_locks) {
8765 btrfs_tree_unlock_rw(eb, path->locks[level]);
8766 path->locks[level] = 0;
8767 }
8768 return 0;
8769 }
8770
8771
8772 if (!(wc->flags[level] & flag)) {
8773 BUG_ON(!path->locks[level]);
8774 ret = btrfs_inc_ref(trans, root, eb, 1);
8775 BUG_ON(ret);
8776 ret = btrfs_dec_ref(trans, root, eb, 0);
8777 BUG_ON(ret);
8778 ret = btrfs_set_disk_extent_flags(trans, eb->start,
8779 eb->len, flag,
8780 btrfs_header_level(eb), 0);
8781 BUG_ON(ret);
8782 wc->flags[level] |= flag;
8783 }
8784
8785
8786
8787
8788
8789 if (path->locks[level] && level > 0) {
8790 btrfs_tree_unlock_rw(eb, path->locks[level]);
8791 path->locks[level] = 0;
8792 }
8793 return 0;
8794}
8795
8796
8797
8798
8799
8800static int check_ref_exists(struct btrfs_trans_handle *trans,
8801 struct btrfs_root *root, u64 bytenr, u64 parent,
8802 int level)
8803{
8804 struct btrfs_path *path;
8805 struct btrfs_extent_inline_ref *iref;
8806 int ret;
8807
8808 path = btrfs_alloc_path();
8809 if (!path)
8810 return -ENOMEM;
8811
8812 ret = lookup_extent_backref(trans, path, &iref, bytenr,
8813 root->fs_info->nodesize, parent,
8814 root->root_key.objectid, level, 0);
8815 btrfs_free_path(path);
8816 if (ret == -ENOENT)
8817 return 0;
8818 if (ret < 0)
8819 return ret;
8820 return 1;
8821}
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8837 struct btrfs_root *root,
8838 struct btrfs_path *path,
8839 struct walk_control *wc, int *lookup_info)
8840{
8841 struct btrfs_fs_info *fs_info = root->fs_info;
8842 u64 bytenr;
8843 u64 generation;
8844 u64 parent;
8845 struct btrfs_key key;
8846 struct btrfs_key first_key;
8847 struct btrfs_ref ref = { 0 };
8848 struct extent_buffer *next;
8849 int level = wc->level;
8850 int reada = 0;
8851 int ret = 0;
8852 bool need_account = false;
8853
8854 generation = btrfs_node_ptr_generation(path->nodes[level],
8855 path->slots[level]);
8856
8857
8858
8859
8860
8861 if (wc->stage == UPDATE_BACKREF &&
8862 generation <= root->root_key.offset) {
8863 *lookup_info = 1;
8864 return 1;
8865 }
8866
8867 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8868 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
8869 path->slots[level]);
8870
8871 next = find_extent_buffer(fs_info, bytenr);
8872 if (!next) {
8873 next = btrfs_find_create_tree_block(fs_info, bytenr);
8874 if (IS_ERR(next))
8875 return PTR_ERR(next);
8876
8877 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8878 level - 1);
8879 reada = 1;
8880 }
8881 btrfs_tree_lock(next);
8882 btrfs_set_lock_blocking_write(next);
8883
8884 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
8885 &wc->refs[level - 1],
8886 &wc->flags[level - 1]);
8887 if (ret < 0)
8888 goto out_unlock;
8889
8890 if (unlikely(wc->refs[level - 1] == 0)) {
8891 btrfs_err(fs_info, "Missing references.");
8892 ret = -EIO;
8893 goto out_unlock;
8894 }
8895 *lookup_info = 0;
8896
8897 if (wc->stage == DROP_REFERENCE) {
8898 if (wc->refs[level - 1] > 1) {
8899 need_account = true;
8900 if (level == 1 &&
8901 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8902 goto skip;
8903
8904 if (!wc->update_ref ||
8905 generation <= root->root_key.offset)
8906 goto skip;
8907
8908 btrfs_node_key_to_cpu(path->nodes[level], &key,
8909 path->slots[level]);
8910 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8911 if (ret < 0)
8912 goto skip;
8913
8914 wc->stage = UPDATE_BACKREF;
8915 wc->shared_level = level - 1;
8916 }
8917 } else {
8918 if (level == 1 &&
8919 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8920 goto skip;
8921 }
8922
8923 if (!btrfs_buffer_uptodate(next, generation, 0)) {
8924 btrfs_tree_unlock(next);
8925 free_extent_buffer(next);
8926 next = NULL;
8927 *lookup_info = 1;
8928 }
8929
8930 if (!next) {
8931 if (reada && level == 1)
8932 reada_walk_down(trans, root, wc, path);
8933 next = read_tree_block(fs_info, bytenr, generation, level - 1,
8934 &first_key);
8935 if (IS_ERR(next)) {
8936 return PTR_ERR(next);
8937 } else if (!extent_buffer_uptodate(next)) {
8938 free_extent_buffer(next);
8939 return -EIO;
8940 }
8941 btrfs_tree_lock(next);
8942 btrfs_set_lock_blocking_write(next);
8943 }
8944
8945 level--;
8946 ASSERT(level == btrfs_header_level(next));
8947 if (level != btrfs_header_level(next)) {
8948 btrfs_err(root->fs_info, "mismatched level");
8949 ret = -EIO;
8950 goto out_unlock;
8951 }
8952 path->nodes[level] = next;
8953 path->slots[level] = 0;
8954 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8955 wc->level = level;
8956 if (wc->level == 1)
8957 wc->reada_slot = 0;
8958 return 0;
8959skip:
8960 wc->refs[level - 1] = 0;
8961 wc->flags[level - 1] = 0;
8962 if (wc->stage == DROP_REFERENCE) {
8963 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8964 parent = path->nodes[level]->start;
8965 } else {
8966 ASSERT(root->root_key.objectid ==
8967 btrfs_header_owner(path->nodes[level]));
8968 if (root->root_key.objectid !=
8969 btrfs_header_owner(path->nodes[level])) {
8970 btrfs_err(root->fs_info,
8971 "mismatched block owner");
8972 ret = -EIO;
8973 goto out_unlock;
8974 }
8975 parent = 0;
8976 }
8977
8978
8979
8980
8981
8982
8983
8984 if (wc->restarted) {
8985 ret = check_ref_exists(trans, root, bytenr, parent,
8986 level - 1);
8987 if (ret < 0)
8988 goto out_unlock;
8989 if (ret == 0)
8990 goto no_delete;
8991 ret = 0;
8992 wc->restarted = 0;
8993 }
8994
8995
8996
8997
8998
8999
9000 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
9001 need_account) {
9002 ret = btrfs_qgroup_trace_subtree(trans, next,
9003 generation, level - 1);
9004 if (ret) {
9005 btrfs_err_rl(fs_info,
9006 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
9007 ret);
9008 }
9009 }
9010
9011
9012
9013
9014
9015
9016
9017 wc->drop_level = level;
9018 find_next_key(path, level, &wc->drop_progress);
9019
9020 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
9021 fs_info->nodesize, parent);
9022 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
9023 ret = btrfs_free_extent(trans, &ref);
9024 if (ret)
9025 goto out_unlock;
9026 }
9027no_delete:
9028 *lookup_info = 1;
9029 ret = 1;
9030
9031out_unlock:
9032 btrfs_tree_unlock(next);
9033 free_extent_buffer(next);
9034
9035 return ret;
9036}
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
9051 struct btrfs_root *root,
9052 struct btrfs_path *path,
9053 struct walk_control *wc)
9054{
9055 struct btrfs_fs_info *fs_info = root->fs_info;
9056 int ret;
9057 int level = wc->level;
9058 struct extent_buffer *eb = path->nodes[level];
9059 u64 parent = 0;
9060
9061 if (wc->stage == UPDATE_BACKREF) {
9062 BUG_ON(wc->shared_level < level);
9063 if (level < wc->shared_level)
9064 goto out;
9065
9066 ret = find_next_key(path, level + 1, &wc->update_progress);
9067 if (ret > 0)
9068 wc->update_ref = 0;
9069
9070 wc->stage = DROP_REFERENCE;
9071 wc->shared_level = -1;
9072 path->slots[level] = 0;
9073
9074
9075
9076
9077
9078
9079 if (!path->locks[level]) {
9080 BUG_ON(level == 0);
9081 btrfs_tree_lock(eb);
9082 btrfs_set_lock_blocking_write(eb);
9083 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9084
9085 ret = btrfs_lookup_extent_info(trans, fs_info,
9086 eb->start, level, 1,
9087 &wc->refs[level],
9088 &wc->flags[level]);
9089 if (ret < 0) {
9090 btrfs_tree_unlock_rw(eb, path->locks[level]);
9091 path->locks[level] = 0;
9092 return ret;
9093 }
9094 BUG_ON(wc->refs[level] == 0);
9095 if (wc->refs[level] == 1) {
9096 btrfs_tree_unlock_rw(eb, path->locks[level]);
9097 path->locks[level] = 0;
9098 return 1;
9099 }
9100 }
9101 }
9102
9103
9104 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
9105
9106 if (wc->refs[level] == 1) {
9107 if (level == 0) {
9108 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9109 ret = btrfs_dec_ref(trans, root, eb, 1);
9110 else
9111 ret = btrfs_dec_ref(trans, root, eb, 0);
9112 BUG_ON(ret);
9113 if (is_fstree(root->root_key.objectid)) {
9114 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
9115 if (ret) {
9116 btrfs_err_rl(fs_info,
9117 "error %d accounting leaf items, quota is out of sync, rescan required",
9118 ret);
9119 }
9120 }
9121 }
9122
9123 if (!path->locks[level] &&
9124 btrfs_header_generation(eb) == trans->transid) {
9125 btrfs_tree_lock(eb);
9126 btrfs_set_lock_blocking_write(eb);
9127 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9128 }
9129 btrfs_clean_tree_block(eb);
9130 }
9131
9132 if (eb == root->node) {
9133 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9134 parent = eb->start;
9135 else if (root->root_key.objectid != btrfs_header_owner(eb))
9136 goto owner_mismatch;
9137 } else {
9138 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9139 parent = path->nodes[level + 1]->start;
9140 else if (root->root_key.objectid !=
9141 btrfs_header_owner(path->nodes[level + 1]))
9142 goto owner_mismatch;
9143 }
9144
9145 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
9146out:
9147 wc->refs[level] = 0;
9148 wc->flags[level] = 0;
9149 return 0;
9150
9151owner_mismatch:
9152 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
9153 btrfs_header_owner(eb), root->root_key.objectid);
9154 return -EUCLEAN;
9155}
9156
9157static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
9158 struct btrfs_root *root,
9159 struct btrfs_path *path,
9160 struct walk_control *wc)
9161{
9162 int level = wc->level;
9163 int lookup_info = 1;
9164 int ret;
9165
9166 while (level >= 0) {
9167 ret = walk_down_proc(trans, root, path, wc, lookup_info);
9168 if (ret > 0)
9169 break;
9170
9171 if (level == 0)
9172 break;
9173
9174 if (path->slots[level] >=
9175 btrfs_header_nritems(path->nodes[level]))
9176 break;
9177
9178 ret = do_walk_down(trans, root, path, wc, &lookup_info);
9179 if (ret > 0) {
9180 path->slots[level]++;
9181 continue;
9182 } else if (ret < 0)
9183 return ret;
9184 level = wc->level;
9185 }
9186 return 0;
9187}
9188
9189static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
9190 struct btrfs_root *root,
9191 struct btrfs_path *path,
9192 struct walk_control *wc, int max_level)
9193{
9194 int level = wc->level;
9195 int ret;
9196
9197 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
9198 while (level < max_level && path->nodes[level]) {
9199 wc->level = level;
9200 if (path->slots[level] + 1 <
9201 btrfs_header_nritems(path->nodes[level])) {
9202 path->slots[level]++;
9203 return 0;
9204 } else {
9205 ret = walk_up_proc(trans, root, path, wc);
9206 if (ret > 0)
9207 return 0;
9208 if (ret < 0)
9209 return ret;
9210
9211 if (path->locks[level]) {
9212 btrfs_tree_unlock_rw(path->nodes[level],
9213 path->locks[level]);
9214 path->locks[level] = 0;
9215 }
9216 free_extent_buffer(path->nodes[level]);
9217 path->nodes[level] = NULL;
9218 level++;
9219 }
9220 }
9221 return 1;
9222}
9223
9224
9225
9226
9227
9228
9229
9230
9231
9232
9233
9234
9235
9236
9237int btrfs_drop_snapshot(struct btrfs_root *root,
9238 struct btrfs_block_rsv *block_rsv, int update_ref,
9239 int for_reloc)
9240{
9241 struct btrfs_fs_info *fs_info = root->fs_info;
9242 struct btrfs_path *path;
9243 struct btrfs_trans_handle *trans;
9244 struct btrfs_root *tree_root = fs_info->tree_root;
9245 struct btrfs_root_item *root_item = &root->root_item;
9246 struct walk_control *wc;
9247 struct btrfs_key key;
9248 int err = 0;
9249 int ret;
9250 int level;
9251 bool root_dropped = false;
9252
9253 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
9254
9255 path = btrfs_alloc_path();
9256 if (!path) {
9257 err = -ENOMEM;
9258 goto out;
9259 }
9260
9261 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9262 if (!wc) {
9263 btrfs_free_path(path);
9264 err = -ENOMEM;
9265 goto out;
9266 }
9267
9268 trans = btrfs_start_transaction(tree_root, 0);
9269 if (IS_ERR(trans)) {
9270 err = PTR_ERR(trans);
9271 goto out_free;
9272 }
9273
9274 err = btrfs_run_delayed_items(trans);
9275 if (err)
9276 goto out_end_trans;
9277
9278 if (block_rsv)
9279 trans->block_rsv = block_rsv;
9280
9281
9282
9283
9284
9285
9286
9287
9288
9289 set_bit(BTRFS_ROOT_DELETING, &root->state);
9290 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
9291 level = btrfs_header_level(root->node);
9292 path->nodes[level] = btrfs_lock_root_node(root);
9293 btrfs_set_lock_blocking_write(path->nodes[level]);
9294 path->slots[level] = 0;
9295 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9296 memset(&wc->update_progress, 0,
9297 sizeof(wc->update_progress));
9298 } else {
9299 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
9300 memcpy(&wc->update_progress, &key,
9301 sizeof(wc->update_progress));
9302
9303 level = root_item->drop_level;
9304 BUG_ON(level == 0);
9305 path->lowest_level = level;
9306 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9307 path->lowest_level = 0;
9308 if (ret < 0) {
9309 err = ret;
9310 goto out_end_trans;
9311 }
9312 WARN_ON(ret > 0);
9313
9314
9315
9316
9317
9318 btrfs_unlock_up_safe(path, 0);
9319
9320 level = btrfs_header_level(root->node);
9321 while (1) {
9322 btrfs_tree_lock(path->nodes[level]);
9323 btrfs_set_lock_blocking_write(path->nodes[level]);
9324 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9325
9326 ret = btrfs_lookup_extent_info(trans, fs_info,
9327 path->nodes[level]->start,
9328 level, 1, &wc->refs[level],
9329 &wc->flags[level]);
9330 if (ret < 0) {
9331 err = ret;
9332 goto out_end_trans;
9333 }
9334 BUG_ON(wc->refs[level] == 0);
9335
9336 if (level == root_item->drop_level)
9337 break;
9338
9339 btrfs_tree_unlock(path->nodes[level]);
9340 path->locks[level] = 0;
9341 WARN_ON(wc->refs[level] != 1);
9342 level--;
9343 }
9344 }
9345
9346 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
9347 wc->level = level;
9348 wc->shared_level = -1;
9349 wc->stage = DROP_REFERENCE;
9350 wc->update_ref = update_ref;
9351 wc->keep_locks = 0;
9352 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9353
9354 while (1) {
9355
9356 ret = walk_down_tree(trans, root, path, wc);
9357 if (ret < 0) {
9358 err = ret;
9359 break;
9360 }
9361
9362 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
9363 if (ret < 0) {
9364 err = ret;
9365 break;
9366 }
9367
9368 if (ret > 0) {
9369 BUG_ON(wc->stage != DROP_REFERENCE);
9370 break;
9371 }
9372
9373 if (wc->stage == DROP_REFERENCE) {
9374 wc->drop_level = wc->level;
9375 btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
9376 &wc->drop_progress,
9377 path->slots[wc->drop_level]);
9378 }
9379 btrfs_cpu_key_to_disk(&root_item->drop_progress,
9380 &wc->drop_progress);
9381 root_item->drop_level = wc->drop_level;
9382
9383 BUG_ON(wc->level == 0);
9384 if (btrfs_should_end_transaction(trans) ||
9385 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
9386 ret = btrfs_update_root(trans, tree_root,
9387 &root->root_key,
9388 root_item);
9389 if (ret) {
9390 btrfs_abort_transaction(trans, ret);
9391 err = ret;
9392 goto out_end_trans;
9393 }
9394
9395 btrfs_end_transaction_throttle(trans);
9396 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
9397 btrfs_debug(fs_info,
9398 "drop snapshot early exit");
9399 err = -EAGAIN;
9400 goto out_free;
9401 }
9402
9403 trans = btrfs_start_transaction(tree_root, 0);
9404 if (IS_ERR(trans)) {
9405 err = PTR_ERR(trans);
9406 goto out_free;
9407 }
9408 if (block_rsv)
9409 trans->block_rsv = block_rsv;
9410 }
9411 }
9412 btrfs_release_path(path);
9413 if (err)
9414 goto out_end_trans;
9415
9416 ret = btrfs_del_root(trans, &root->root_key);
9417 if (ret) {
9418 btrfs_abort_transaction(trans, ret);
9419 err = ret;
9420 goto out_end_trans;
9421 }
9422
9423 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9424 ret = btrfs_find_root(tree_root, &root->root_key, path,
9425 NULL, NULL);
9426 if (ret < 0) {
9427 btrfs_abort_transaction(trans, ret);
9428 err = ret;
9429 goto out_end_trans;
9430 } else if (ret > 0) {
9431
9432
9433
9434
9435
9436 btrfs_del_orphan_item(trans, tree_root,
9437 root->root_key.objectid);
9438 }
9439 }
9440
9441 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
9442 btrfs_add_dropped_root(trans, root);
9443 } else {
9444 free_extent_buffer(root->node);
9445 free_extent_buffer(root->commit_root);
9446 btrfs_put_fs_root(root);
9447 }
9448 root_dropped = true;
9449out_end_trans:
9450 btrfs_end_transaction_throttle(trans);
9451out_free:
9452 kfree(wc);
9453 btrfs_free_path(path);
9454out:
9455
9456
9457
9458
9459
9460
9461
9462 if (!for_reloc && !root_dropped)
9463 btrfs_add_dead_root(root);
9464 if (err && err != -EAGAIN)
9465 btrfs_handle_fs_error(fs_info, err, NULL);
9466 return err;
9467}
9468
9469
9470
9471
9472
9473
9474
9475int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
9476 struct btrfs_root *root,
9477 struct extent_buffer *node,
9478 struct extent_buffer *parent)
9479{
9480 struct btrfs_fs_info *fs_info = root->fs_info;
9481 struct btrfs_path *path;
9482 struct walk_control *wc;
9483 int level;
9484 int parent_level;
9485 int ret = 0;
9486 int wret;
9487
9488 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
9489
9490 path = btrfs_alloc_path();
9491 if (!path)
9492 return -ENOMEM;
9493
9494 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9495 if (!wc) {
9496 btrfs_free_path(path);
9497 return -ENOMEM;
9498 }
9499
9500 btrfs_assert_tree_locked(parent);
9501 parent_level = btrfs_header_level(parent);
9502 extent_buffer_get(parent);
9503 path->nodes[parent_level] = parent;
9504 path->slots[parent_level] = btrfs_header_nritems(parent);
9505
9506 btrfs_assert_tree_locked(node);
9507 level = btrfs_header_level(node);
9508 path->nodes[level] = node;
9509 path->slots[level] = 0;
9510 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9511
9512 wc->refs[parent_level] = 1;
9513 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
9514 wc->level = level;
9515 wc->shared_level = -1;
9516 wc->stage = DROP_REFERENCE;
9517 wc->update_ref = 0;
9518 wc->keep_locks = 1;
9519 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9520
9521 while (1) {
9522 wret = walk_down_tree(trans, root, path, wc);
9523 if (wret < 0) {
9524 ret = wret;
9525 break;
9526 }
9527
9528 wret = walk_up_tree(trans, root, path, wc, parent_level);
9529 if (wret < 0)
9530 ret = wret;
9531 if (wret != 0)
9532 break;
9533 }
9534
9535 kfree(wc);
9536 btrfs_free_path(path);
9537 return ret;
9538}
9539
9540static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
9541{
9542 u64 num_devices;
9543 u64 stripped;
9544
9545
9546
9547
9548
9549 stripped = get_restripe_target(fs_info, flags);
9550 if (stripped)
9551 return extended_to_chunk(stripped);
9552
9553 num_devices = fs_info->fs_devices->rw_devices;
9554
9555 stripped = BTRFS_BLOCK_GROUP_RAID0 |
9556 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
9557 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
9558
9559 if (num_devices == 1) {
9560 stripped |= BTRFS_BLOCK_GROUP_DUP;
9561 stripped = flags & ~stripped;
9562
9563
9564 if (flags & BTRFS_BLOCK_GROUP_RAID0)
9565 return stripped;
9566
9567
9568 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
9569 BTRFS_BLOCK_GROUP_RAID10))
9570 return stripped | BTRFS_BLOCK_GROUP_DUP;
9571 } else {
9572
9573 if (flags & stripped)
9574 return flags;
9575
9576 stripped |= BTRFS_BLOCK_GROUP_DUP;
9577 stripped = flags & ~stripped;
9578
9579
9580 if (flags & BTRFS_BLOCK_GROUP_DUP)
9581 return stripped | BTRFS_BLOCK_GROUP_RAID1;
9582
9583
9584 }
9585
9586 return flags;
9587}
9588
9589static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
9590{
9591 struct btrfs_space_info *sinfo = cache->space_info;
9592 u64 num_bytes;
9593 u64 sinfo_used;
9594 u64 min_allocable_bytes;
9595 int ret = -ENOSPC;
9596
9597
9598
9599
9600
9601
9602 if ((sinfo->flags &
9603 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
9604 !force)
9605 min_allocable_bytes = SZ_1M;
9606 else
9607 min_allocable_bytes = 0;
9608
9609 spin_lock(&sinfo->lock);
9610 spin_lock(&cache->lock);
9611
9612 if (cache->ro) {
9613 cache->ro++;
9614 ret = 0;
9615 goto out;
9616 }
9617
9618 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
9619 cache->bytes_super - btrfs_block_group_used(&cache->item);
9620 sinfo_used = btrfs_space_info_used(sinfo, true);
9621
9622 if (sinfo_used + num_bytes + min_allocable_bytes <=
9623 sinfo->total_bytes) {
9624 sinfo->bytes_readonly += num_bytes;
9625 cache->ro++;
9626 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
9627 ret = 0;
9628 }
9629out:
9630 spin_unlock(&cache->lock);
9631 spin_unlock(&sinfo->lock);
9632 if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
9633 btrfs_info(cache->fs_info,
9634 "unable to make block group %llu ro",
9635 cache->key.objectid);
9636 btrfs_info(cache->fs_info,
9637 "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
9638 sinfo_used, num_bytes, min_allocable_bytes);
9639 dump_space_info(cache->fs_info, cache->space_info, 0, 0);
9640 }
9641 return ret;
9642}
9643
9644int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
9645
9646{
9647 struct btrfs_fs_info *fs_info = cache->fs_info;
9648 struct btrfs_trans_handle *trans;
9649 u64 alloc_flags;
9650 int ret;
9651
9652again:
9653 trans = btrfs_join_transaction(fs_info->extent_root);
9654 if (IS_ERR(trans))
9655 return PTR_ERR(trans);
9656
9657
9658
9659
9660
9661
9662 mutex_lock(&fs_info->ro_block_group_mutex);
9663 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
9664 u64 transid = trans->transid;
9665
9666 mutex_unlock(&fs_info->ro_block_group_mutex);
9667 btrfs_end_transaction(trans);
9668
9669 ret = btrfs_wait_for_commit(fs_info, transid);
9670 if (ret)
9671 return ret;
9672 goto again;
9673 }
9674
9675
9676
9677
9678
9679 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9680 if (alloc_flags != cache->flags) {
9681 ret = do_chunk_alloc(trans, alloc_flags,
9682 CHUNK_ALLOC_FORCE);
9683
9684
9685
9686
9687
9688 if (ret == -ENOSPC)
9689 ret = 0;
9690 if (ret < 0)
9691 goto out;
9692 }
9693
9694 ret = inc_block_group_ro(cache, 0);
9695 if (!ret)
9696 goto out;
9697 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
9698 ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
9699 if (ret < 0)
9700 goto out;
9701 ret = inc_block_group_ro(cache, 0);
9702out:
9703 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
9704 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9705 mutex_lock(&fs_info->chunk_mutex);
9706 check_system_chunk(trans, alloc_flags);
9707 mutex_unlock(&fs_info->chunk_mutex);
9708 }
9709 mutex_unlock(&fs_info->ro_block_group_mutex);
9710
9711 btrfs_end_transaction(trans);
9712 return ret;
9713}
9714
9715int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
9716{
9717 u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
9718
9719 return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
9720}
9721
9722
9723
9724
9725
9726u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
9727{
9728 struct btrfs_block_group_cache *block_group;
9729 u64 free_bytes = 0;
9730 int factor;
9731
9732
9733 if (list_empty(&sinfo->ro_bgs))
9734 return 0;
9735
9736 spin_lock(&sinfo->lock);
9737 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
9738 spin_lock(&block_group->lock);
9739
9740 if (!block_group->ro) {
9741 spin_unlock(&block_group->lock);
9742 continue;
9743 }
9744
9745 factor = btrfs_bg_type_to_factor(block_group->flags);
9746 free_bytes += (block_group->key.offset -
9747 btrfs_block_group_used(&block_group->item)) *
9748 factor;
9749
9750 spin_unlock(&block_group->lock);
9751 }
9752 spin_unlock(&sinfo->lock);
9753
9754 return free_bytes;
9755}
9756
9757void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
9758{
9759 struct btrfs_space_info *sinfo = cache->space_info;
9760 u64 num_bytes;
9761
9762 BUG_ON(!cache->ro);
9763
9764 spin_lock(&sinfo->lock);
9765 spin_lock(&cache->lock);
9766 if (!--cache->ro) {
9767 num_bytes = cache->key.offset - cache->reserved -
9768 cache->pinned - cache->bytes_super -
9769 btrfs_block_group_used(&cache->item);
9770 sinfo->bytes_readonly -= num_bytes;
9771 list_del_init(&cache->ro_list);
9772 }
9773 spin_unlock(&cache->lock);
9774 spin_unlock(&sinfo->lock);
9775}
9776
9777
9778
9779
9780
9781
9782
9783int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
9784{
9785 struct btrfs_block_group_cache *block_group;
9786 struct btrfs_space_info *space_info;
9787 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
9788 struct btrfs_device *device;
9789 u64 min_free;
9790 u64 dev_min = 1;
9791 u64 dev_nr = 0;
9792 u64 target;
9793 int debug;
9794 int index;
9795 int full = 0;
9796 int ret = 0;
9797
9798 debug = btrfs_test_opt(fs_info, ENOSPC_DEBUG);
9799
9800 block_group = btrfs_lookup_block_group(fs_info, bytenr);
9801
9802
9803 if (!block_group) {
9804 if (debug)
9805 btrfs_warn(fs_info,
9806 "can't find block group for bytenr %llu",
9807 bytenr);
9808 return -1;
9809 }
9810
9811 min_free = btrfs_block_group_used(&block_group->item);
9812
9813
9814 if (!min_free)
9815 goto out;
9816
9817 space_info = block_group->space_info;
9818 spin_lock(&space_info->lock);
9819
9820 full = space_info->full;
9821
9822
9823
9824
9825
9826
9827
9828
9829 if ((space_info->total_bytes != block_group->key.offset) &&
9830 (btrfs_space_info_used(space_info, false) + min_free <
9831 space_info->total_bytes)) {
9832 spin_unlock(&space_info->lock);
9833 goto out;
9834 }
9835 spin_unlock(&space_info->lock);
9836
9837
9838
9839
9840
9841
9842
9843
9844 ret = -1;
9845
9846
9847
9848
9849
9850
9851
9852
9853
9854 target = get_restripe_target(fs_info, block_group->flags);
9855 if (target) {
9856 index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target));
9857 } else {
9858
9859
9860
9861
9862 if (full) {
9863 if (debug)
9864 btrfs_warn(fs_info,
9865 "no space to alloc new chunk for block group %llu",
9866 block_group->key.objectid);
9867 goto out;
9868 }
9869
9870 index = btrfs_bg_flags_to_raid_index(block_group->flags);
9871 }
9872
9873 if (index == BTRFS_RAID_RAID10) {
9874 dev_min = 4;
9875
9876 min_free >>= 1;
9877 } else if (index == BTRFS_RAID_RAID1) {
9878 dev_min = 2;
9879 } else if (index == BTRFS_RAID_DUP) {
9880
9881 min_free <<= 1;
9882 } else if (index == BTRFS_RAID_RAID0) {
9883 dev_min = fs_devices->rw_devices;
9884 min_free = div64_u64(min_free, dev_min);
9885 }
9886
9887 mutex_lock(&fs_info->chunk_mutex);
9888 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
9889 u64 dev_offset;
9890
9891
9892
9893
9894
9895 if (device->total_bytes > device->bytes_used + min_free &&
9896 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
9897 ret = find_free_dev_extent(device, min_free,
9898 &dev_offset, NULL);
9899 if (!ret)
9900 dev_nr++;
9901
9902 if (dev_nr >= dev_min)
9903 break;
9904
9905 ret = -1;
9906 }
9907 }
9908 if (debug && ret == -1)
9909 btrfs_warn(fs_info,
9910 "no space to allocate a new chunk for block group %llu",
9911 block_group->key.objectid);
9912 mutex_unlock(&fs_info->chunk_mutex);
9913out:
9914 btrfs_put_block_group(block_group);
9915 return ret;
9916}
9917
9918static int find_first_block_group(struct btrfs_fs_info *fs_info,
9919 struct btrfs_path *path,
9920 struct btrfs_key *key)
9921{
9922 struct btrfs_root *root = fs_info->extent_root;
9923 int ret = 0;
9924 struct btrfs_key found_key;
9925 struct extent_buffer *leaf;
9926 struct btrfs_block_group_item bg;
9927 u64 flags;
9928 int slot;
9929
9930 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9931 if (ret < 0)
9932 goto out;
9933
9934 while (1) {
9935 slot = path->slots[0];
9936 leaf = path->nodes[0];
9937 if (slot >= btrfs_header_nritems(leaf)) {
9938 ret = btrfs_next_leaf(root, path);
9939 if (ret == 0)
9940 continue;
9941 if (ret < 0)
9942 goto out;
9943 break;
9944 }
9945 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9946
9947 if (found_key.objectid >= key->objectid &&
9948 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9949 struct extent_map_tree *em_tree;
9950 struct extent_map *em;
9951
9952 em_tree = &root->fs_info->mapping_tree.map_tree;
9953 read_lock(&em_tree->lock);
9954 em = lookup_extent_mapping(em_tree, found_key.objectid,
9955 found_key.offset);
9956 read_unlock(&em_tree->lock);
9957 if (!em) {
9958 btrfs_err(fs_info,
9959 "logical %llu len %llu found bg but no related chunk",
9960 found_key.objectid, found_key.offset);
9961 ret = -ENOENT;
9962 } else if (em->start != found_key.objectid ||
9963 em->len != found_key.offset) {
9964 btrfs_err(fs_info,
9965 "block group %llu len %llu mismatch with chunk %llu len %llu",
9966 found_key.objectid, found_key.offset,
9967 em->start, em->len);
9968 ret = -EUCLEAN;
9969 } else {
9970 read_extent_buffer(leaf, &bg,
9971 btrfs_item_ptr_offset(leaf, slot),
9972 sizeof(bg));
9973 flags = btrfs_block_group_flags(&bg) &
9974 BTRFS_BLOCK_GROUP_TYPE_MASK;
9975
9976 if (flags != (em->map_lookup->type &
9977 BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9978 btrfs_err(fs_info,
9979"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
9980 found_key.objectid,
9981 found_key.offset, flags,
9982 (BTRFS_BLOCK_GROUP_TYPE_MASK &
9983 em->map_lookup->type));
9984 ret = -EUCLEAN;
9985 } else {
9986 ret = 0;
9987 }
9988 }
9989 free_extent_map(em);
9990 goto out;
9991 }
9992 path->slots[0]++;
9993 }
9994out:
9995 return ret;
9996}
9997
9998void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9999{
10000 struct btrfs_block_group_cache *block_group;
10001 u64 last = 0;
10002
10003 while (1) {
10004 struct inode *inode;
10005
10006 block_group = btrfs_lookup_first_block_group(info, last);
10007 while (block_group) {
10008 wait_block_group_cache_done(block_group);
10009 spin_lock(&block_group->lock);
10010 if (block_group->iref)
10011 break;
10012 spin_unlock(&block_group->lock);
10013 block_group = next_block_group(block_group);
10014 }
10015 if (!block_group) {
10016 if (last == 0)
10017 break;
10018 last = 0;
10019 continue;
10020 }
10021
10022 inode = block_group->inode;
10023 block_group->iref = 0;
10024 block_group->inode = NULL;
10025 spin_unlock(&block_group->lock);
10026 ASSERT(block_group->io_ctl.inode == NULL);
10027 iput(inode);
10028 last = block_group->key.objectid + block_group->key.offset;
10029 btrfs_put_block_group(block_group);
10030 }
10031}
10032
10033
10034
10035
10036
10037
10038int btrfs_free_block_groups(struct btrfs_fs_info *info)
10039{
10040 struct btrfs_block_group_cache *block_group;
10041 struct btrfs_space_info *space_info;
10042 struct btrfs_caching_control *caching_ctl;
10043 struct rb_node *n;
10044
10045 down_write(&info->commit_root_sem);
10046 while (!list_empty(&info->caching_block_groups)) {
10047 caching_ctl = list_entry(info->caching_block_groups.next,
10048 struct btrfs_caching_control, list);
10049 list_del(&caching_ctl->list);
10050 put_caching_control(caching_ctl);
10051 }
10052 up_write(&info->commit_root_sem);
10053
10054 spin_lock(&info->unused_bgs_lock);
10055 while (!list_empty(&info->unused_bgs)) {
10056 block_group = list_first_entry(&info->unused_bgs,
10057 struct btrfs_block_group_cache,
10058 bg_list);
10059 list_del_init(&block_group->bg_list);
10060 btrfs_put_block_group(block_group);
10061 }
10062 spin_unlock(&info->unused_bgs_lock);
10063
10064 spin_lock(&info->block_group_cache_lock);
10065 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
10066 block_group = rb_entry(n, struct btrfs_block_group_cache,
10067 cache_node);
10068 rb_erase(&block_group->cache_node,
10069 &info->block_group_cache_tree);
10070 RB_CLEAR_NODE(&block_group->cache_node);
10071 spin_unlock(&info->block_group_cache_lock);
10072
10073 down_write(&block_group->space_info->groups_sem);
10074 list_del(&block_group->list);
10075 up_write(&block_group->space_info->groups_sem);
10076
10077
10078
10079
10080
10081 if (block_group->cached == BTRFS_CACHE_NO ||
10082 block_group->cached == BTRFS_CACHE_ERROR)
10083 free_excluded_extents(block_group);
10084
10085 btrfs_remove_free_space_cache(block_group);
10086 ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
10087 ASSERT(list_empty(&block_group->dirty_list));
10088 ASSERT(list_empty(&block_group->io_list));
10089 ASSERT(list_empty(&block_group->bg_list));
10090 ASSERT(atomic_read(&block_group->count) == 1);
10091 btrfs_put_block_group(block_group);
10092
10093 spin_lock(&info->block_group_cache_lock);
10094 }
10095 spin_unlock(&info->block_group_cache_lock);
10096
10097
10098
10099
10100
10101
10102
10103 synchronize_rcu();
10104
10105 release_global_block_rsv(info);
10106
10107 while (!list_empty(&info->space_info)) {
10108 int i;
10109
10110 space_info = list_entry(info->space_info.next,
10111 struct btrfs_space_info,
10112 list);
10113
10114
10115
10116
10117
10118 if (WARN_ON(space_info->bytes_pinned > 0 ||
10119 space_info->bytes_reserved > 0 ||
10120 space_info->bytes_may_use > 0))
10121 dump_space_info(info, space_info, 0, 0);
10122 list_del(&space_info->list);
10123 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
10124 struct kobject *kobj;
10125 kobj = space_info->block_group_kobjs[i];
10126 space_info->block_group_kobjs[i] = NULL;
10127 if (kobj) {
10128 kobject_del(kobj);
10129 kobject_put(kobj);
10130 }
10131 }
10132 kobject_del(&space_info->kobj);
10133 kobject_put(&space_info->kobj);
10134 }
10135 return 0;
10136}
10137
10138
10139void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
10140{
10141 struct btrfs_space_info *space_info;
10142 struct raid_kobject *rkobj;
10143 LIST_HEAD(list);
10144 int index;
10145 int ret = 0;
10146
10147 spin_lock(&fs_info->pending_raid_kobjs_lock);
10148 list_splice_init(&fs_info->pending_raid_kobjs, &list);
10149 spin_unlock(&fs_info->pending_raid_kobjs_lock);
10150
10151 list_for_each_entry(rkobj, &list, list) {
10152 space_info = __find_space_info(fs_info, rkobj->flags);
10153 index = btrfs_bg_flags_to_raid_index(rkobj->flags);
10154
10155 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
10156 "%s", get_raid_name(index));
10157 if (ret) {
10158 kobject_put(&rkobj->kobj);
10159 break;
10160 }
10161 }
10162 if (ret)
10163 btrfs_warn(fs_info,
10164 "failed to add kobject for block cache, ignoring");
10165}
10166
10167static void link_block_group(struct btrfs_block_group_cache *cache)
10168{
10169 struct btrfs_space_info *space_info = cache->space_info;
10170 struct btrfs_fs_info *fs_info = cache->fs_info;
10171 int index = btrfs_bg_flags_to_raid_index(cache->flags);
10172 bool first = false;
10173
10174 down_write(&space_info->groups_sem);
10175 if (list_empty(&space_info->block_groups[index]))
10176 first = true;
10177 list_add_tail(&cache->list, &space_info->block_groups[index]);
10178 up_write(&space_info->groups_sem);
10179
10180 if (first) {
10181 struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
10182 if (!rkobj) {
10183 btrfs_warn(cache->fs_info,
10184 "couldn't alloc memory for raid level kobject");
10185 return;
10186 }
10187 rkobj->flags = cache->flags;
10188 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
10189
10190 spin_lock(&fs_info->pending_raid_kobjs_lock);
10191 list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
10192 spin_unlock(&fs_info->pending_raid_kobjs_lock);
10193 space_info->block_group_kobjs[index] = &rkobj->kobj;
10194 }
10195}
10196
10197static struct btrfs_block_group_cache *
10198btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
10199 u64 start, u64 size)
10200{
10201 struct btrfs_block_group_cache *cache;
10202
10203 cache = kzalloc(sizeof(*cache), GFP_NOFS);
10204 if (!cache)
10205 return NULL;
10206
10207 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
10208 GFP_NOFS);
10209 if (!cache->free_space_ctl) {
10210 kfree(cache);
10211 return NULL;
10212 }
10213
10214 cache->key.objectid = start;
10215 cache->key.offset = size;
10216 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10217
10218 cache->fs_info = fs_info;
10219 cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
10220 set_free_space_tree_thresholds(cache);
10221
10222 atomic_set(&cache->count, 1);
10223 spin_lock_init(&cache->lock);
10224 init_rwsem(&cache->data_rwsem);
10225 INIT_LIST_HEAD(&cache->list);
10226 INIT_LIST_HEAD(&cache->cluster_list);
10227 INIT_LIST_HEAD(&cache->bg_list);
10228 INIT_LIST_HEAD(&cache->ro_list);
10229 INIT_LIST_HEAD(&cache->dirty_list);
10230 INIT_LIST_HEAD(&cache->io_list);
10231 btrfs_init_free_space_ctl(cache);
10232 atomic_set(&cache->trimming, 0);
10233 mutex_init(&cache->free_space_lock);
10234 btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
10235
10236 return cache;
10237}
10238
10239
10240
10241
10242
10243
10244static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
10245{
10246 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
10247 struct extent_map *em;
10248 struct btrfs_block_group_cache *bg;
10249 u64 start = 0;
10250 int ret = 0;
10251
10252 while (1) {
10253 read_lock(&map_tree->map_tree.lock);
10254
10255
10256
10257
10258
10259 em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
10260 read_unlock(&map_tree->map_tree.lock);
10261 if (!em)
10262 break;
10263
10264 bg = btrfs_lookup_block_group(fs_info, em->start);
10265 if (!bg) {
10266 btrfs_err(fs_info,
10267 "chunk start=%llu len=%llu doesn't have corresponding block group",
10268 em->start, em->len);
10269 ret = -EUCLEAN;
10270 free_extent_map(em);
10271 break;
10272 }
10273 if (bg->key.objectid != em->start ||
10274 bg->key.offset != em->len ||
10275 (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
10276 (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10277 btrfs_err(fs_info,
10278"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
10279 em->start, em->len,
10280 em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
10281 bg->key.objectid, bg->key.offset,
10282 bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
10283 ret = -EUCLEAN;
10284 free_extent_map(em);
10285 btrfs_put_block_group(bg);
10286 break;
10287 }
10288 start = em->start + em->len;
10289 free_extent_map(em);
10290 btrfs_put_block_group(bg);
10291 }
10292 return ret;
10293}
10294
10295int btrfs_read_block_groups(struct btrfs_fs_info *info)
10296{
10297 struct btrfs_path *path;
10298 int ret;
10299 struct btrfs_block_group_cache *cache;
10300 struct btrfs_space_info *space_info;
10301 struct btrfs_key key;
10302 struct btrfs_key found_key;
10303 struct extent_buffer *leaf;
10304 int need_clear = 0;
10305 u64 cache_gen;
10306 u64 feature;
10307 int mixed;
10308
10309 feature = btrfs_super_incompat_flags(info->super_copy);
10310 mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
10311
10312 key.objectid = 0;
10313 key.offset = 0;
10314 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10315 path = btrfs_alloc_path();
10316 if (!path)
10317 return -ENOMEM;
10318 path->reada = READA_FORWARD;
10319
10320 cache_gen = btrfs_super_cache_generation(info->super_copy);
10321 if (btrfs_test_opt(info, SPACE_CACHE) &&
10322 btrfs_super_generation(info->super_copy) != cache_gen)
10323 need_clear = 1;
10324 if (btrfs_test_opt(info, CLEAR_CACHE))
10325 need_clear = 1;
10326
10327 while (1) {
10328 ret = find_first_block_group(info, path, &key);
10329 if (ret > 0)
10330 break;
10331 if (ret != 0)
10332 goto error;
10333
10334 leaf = path->nodes[0];
10335 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10336
10337 cache = btrfs_create_block_group_cache(info, found_key.objectid,
10338 found_key.offset);
10339 if (!cache) {
10340 ret = -ENOMEM;
10341 goto error;
10342 }
10343
10344 if (need_clear) {
10345
10346
10347
10348
10349
10350
10351
10352
10353
10354
10355 if (btrfs_test_opt(info, SPACE_CACHE))
10356 cache->disk_cache_state = BTRFS_DC_CLEAR;
10357 }
10358
10359 read_extent_buffer(leaf, &cache->item,
10360 btrfs_item_ptr_offset(leaf, path->slots[0]),
10361 sizeof(cache->item));
10362 cache->flags = btrfs_block_group_flags(&cache->item);
10363 if (!mixed &&
10364 ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
10365 (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
10366 btrfs_err(info,
10367"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
10368 cache->key.objectid);
10369 ret = -EINVAL;
10370 goto error;
10371 }
10372
10373 key.objectid = found_key.objectid + found_key.offset;
10374 btrfs_release_path(path);
10375
10376
10377
10378
10379
10380
10381 ret = exclude_super_stripes(cache);
10382 if (ret) {
10383
10384
10385
10386
10387 free_excluded_extents(cache);
10388 btrfs_put_block_group(cache);
10389 goto error;
10390 }
10391
10392
10393
10394
10395
10396
10397
10398
10399 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
10400 cache->last_byte_to_unpin = (u64)-1;
10401 cache->cached = BTRFS_CACHE_FINISHED;
10402 free_excluded_extents(cache);
10403 } else if (btrfs_block_group_used(&cache->item) == 0) {
10404 cache->last_byte_to_unpin = (u64)-1;
10405 cache->cached = BTRFS_CACHE_FINISHED;
10406 add_new_free_space(cache, found_key.objectid,
10407 found_key.objectid +
10408 found_key.offset);
10409 free_excluded_extents(cache);
10410 }
10411
10412 ret = btrfs_add_block_group_cache(info, cache);
10413 if (ret) {
10414 btrfs_remove_free_space_cache(cache);
10415 btrfs_put_block_group(cache);
10416 goto error;
10417 }
10418
10419 trace_btrfs_add_block_group(info, cache, 0);
10420 update_space_info(info, cache->flags, found_key.offset,
10421 btrfs_block_group_used(&cache->item),
10422 cache->bytes_super, &space_info);
10423
10424 cache->space_info = space_info;
10425
10426 link_block_group(cache);
10427
10428 set_avail_alloc_bits(info, cache->flags);
10429 if (btrfs_chunk_readonly(info, cache->key.objectid)) {
10430 inc_block_group_ro(cache, 1);
10431 } else if (btrfs_block_group_used(&cache->item) == 0) {
10432 ASSERT(list_empty(&cache->bg_list));
10433 btrfs_mark_bg_unused(cache);
10434 }
10435 }
10436
10437 list_for_each_entry_rcu(space_info, &info->space_info, list) {
10438 if (!(get_alloc_profile(info, space_info->flags) &
10439 (BTRFS_BLOCK_GROUP_RAID10 |
10440 BTRFS_BLOCK_GROUP_RAID1 |
10441 BTRFS_BLOCK_GROUP_RAID5 |
10442 BTRFS_BLOCK_GROUP_RAID6 |
10443 BTRFS_BLOCK_GROUP_DUP)))
10444 continue;
10445
10446
10447
10448
10449 list_for_each_entry(cache,
10450 &space_info->block_groups[BTRFS_RAID_RAID0],
10451 list)
10452 inc_block_group_ro(cache, 1);
10453 list_for_each_entry(cache,
10454 &space_info->block_groups[BTRFS_RAID_SINGLE],
10455 list)
10456 inc_block_group_ro(cache, 1);
10457 }
10458
10459 btrfs_add_raid_kobjects(info);
10460 init_global_block_rsv(info);
10461 ret = check_chunk_block_group_mappings(info);
10462error:
10463 btrfs_free_path(path);
10464 return ret;
10465}
10466
10467void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
10468{
10469 struct btrfs_fs_info *fs_info = trans->fs_info;
10470 struct btrfs_block_group_cache *block_group;
10471 struct btrfs_root *extent_root = fs_info->extent_root;
10472 struct btrfs_block_group_item item;
10473 struct btrfs_key key;
10474 int ret = 0;
10475
10476 if (!trans->can_flush_pending_bgs)
10477 return;
10478
10479 while (!list_empty(&trans->new_bgs)) {
10480 block_group = list_first_entry(&trans->new_bgs,
10481 struct btrfs_block_group_cache,
10482 bg_list);
10483 if (ret)
10484 goto next;
10485
10486 spin_lock(&block_group->lock);
10487 memcpy(&item, &block_group->item, sizeof(item));
10488 memcpy(&key, &block_group->key, sizeof(key));
10489 spin_unlock(&block_group->lock);
10490
10491 ret = btrfs_insert_item(trans, extent_root, &key, &item,
10492 sizeof(item));
10493 if (ret)
10494 btrfs_abort_transaction(trans, ret);
10495 ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
10496 if (ret)
10497 btrfs_abort_transaction(trans, ret);
10498 add_block_group_free_space(trans, block_group);
10499
10500next:
10501 btrfs_delayed_refs_rsv_release(fs_info, 1);
10502 list_del_init(&block_group->bg_list);
10503 }
10504 btrfs_trans_release_chunk_metadata(trans);
10505}
10506
10507int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
10508 u64 type, u64 chunk_offset, u64 size)
10509{
10510 struct btrfs_fs_info *fs_info = trans->fs_info;
10511 struct btrfs_block_group_cache *cache;
10512 int ret;
10513
10514 btrfs_set_log_full_commit(trans);
10515
10516 cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
10517 if (!cache)
10518 return -ENOMEM;
10519
10520 btrfs_set_block_group_used(&cache->item, bytes_used);
10521 btrfs_set_block_group_chunk_objectid(&cache->item,
10522 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
10523 btrfs_set_block_group_flags(&cache->item, type);
10524
10525 cache->flags = type;
10526 cache->last_byte_to_unpin = (u64)-1;
10527 cache->cached = BTRFS_CACHE_FINISHED;
10528 cache->needs_free_space = 1;
10529 ret = exclude_super_stripes(cache);
10530 if (ret) {
10531
10532
10533
10534
10535 free_excluded_extents(cache);
10536 btrfs_put_block_group(cache);
10537 return ret;
10538 }
10539
10540 add_new_free_space(cache, chunk_offset, chunk_offset + size);
10541
10542 free_excluded_extents(cache);
10543
10544#ifdef CONFIG_BTRFS_DEBUG
10545 if (btrfs_should_fragment_free_space(cache)) {
10546 u64 new_bytes_used = size - bytes_used;
10547
10548 bytes_used += new_bytes_used >> 1;
10549 fragment_free_space(cache);
10550 }
10551#endif
10552
10553
10554
10555
10556
10557 cache->space_info = __find_space_info(fs_info, cache->flags);
10558 ASSERT(cache->space_info);
10559
10560 ret = btrfs_add_block_group_cache(fs_info, cache);
10561 if (ret) {
10562 btrfs_remove_free_space_cache(cache);
10563 btrfs_put_block_group(cache);
10564 return ret;
10565 }
10566
10567
10568
10569
10570
10571 trace_btrfs_add_block_group(fs_info, cache, 1);
10572 update_space_info(fs_info, cache->flags, size, bytes_used,
10573 cache->bytes_super, &cache->space_info);
10574 update_global_block_rsv(fs_info);
10575
10576 link_block_group(cache);
10577
10578 list_add_tail(&cache->bg_list, &trans->new_bgs);
10579 trans->delayed_ref_updates++;
10580 btrfs_update_delayed_refs_rsv(trans);
10581
10582 set_avail_alloc_bits(fs_info, type);
10583 return 0;
10584}
10585
10586static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10587{
10588 u64 extra_flags = chunk_to_extended(flags) &
10589 BTRFS_EXTENDED_PROFILE_MASK;
10590
10591 write_seqlock(&fs_info->profiles_lock);
10592 if (flags & BTRFS_BLOCK_GROUP_DATA)
10593 fs_info->avail_data_alloc_bits &= ~extra_flags;
10594 if (flags & BTRFS_BLOCK_GROUP_METADATA)
10595 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
10596 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
10597 fs_info->avail_system_alloc_bits &= ~extra_flags;
10598 write_sequnlock(&fs_info->profiles_lock);
10599}
10600
10601int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10602 u64 group_start, struct extent_map *em)
10603{
10604 struct btrfs_fs_info *fs_info = trans->fs_info;
10605 struct btrfs_root *root = fs_info->extent_root;
10606 struct btrfs_path *path;
10607 struct btrfs_block_group_cache *block_group;
10608 struct btrfs_free_cluster *cluster;
10609 struct btrfs_root *tree_root = fs_info->tree_root;
10610 struct btrfs_key key;
10611 struct inode *inode;
10612 struct kobject *kobj = NULL;
10613 int ret;
10614 int index;
10615 int factor;
10616 struct btrfs_caching_control *caching_ctl = NULL;
10617 bool remove_em;
10618 bool remove_rsv = false;
10619
10620 block_group = btrfs_lookup_block_group(fs_info, group_start);
10621 BUG_ON(!block_group);
10622 BUG_ON(!block_group->ro);
10623
10624 trace_btrfs_remove_block_group(block_group);
10625
10626
10627
10628
10629 free_excluded_extents(block_group);
10630 btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
10631 block_group->key.offset);
10632
10633 memcpy(&key, &block_group->key, sizeof(key));
10634 index = btrfs_bg_flags_to_raid_index(block_group->flags);
10635 factor = btrfs_bg_type_to_factor(block_group->flags);
10636
10637
10638 cluster = &fs_info->data_alloc_cluster;
10639 spin_lock(&cluster->refill_lock);
10640 btrfs_return_cluster_to_free_space(block_group, cluster);
10641 spin_unlock(&cluster->refill_lock);
10642
10643
10644
10645
10646
10647 cluster = &fs_info->meta_alloc_cluster;
10648 spin_lock(&cluster->refill_lock);
10649 btrfs_return_cluster_to_free_space(block_group, cluster);
10650 spin_unlock(&cluster->refill_lock);
10651
10652 path = btrfs_alloc_path();
10653 if (!path) {
10654 ret = -ENOMEM;
10655 goto out;
10656 }
10657
10658
10659
10660
10661
10662 inode = lookup_free_space_inode(block_group, path);
10663
10664 mutex_lock(&trans->transaction->cache_write_mutex);
10665
10666
10667
10668
10669 spin_lock(&trans->transaction->dirty_bgs_lock);
10670 if (!list_empty(&block_group->io_list)) {
10671 list_del_init(&block_group->io_list);
10672
10673 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
10674
10675 spin_unlock(&trans->transaction->dirty_bgs_lock);
10676 btrfs_wait_cache_io(trans, block_group, path);
10677 btrfs_put_block_group(block_group);
10678 spin_lock(&trans->transaction->dirty_bgs_lock);
10679 }
10680
10681 if (!list_empty(&block_group->dirty_list)) {
10682 list_del_init(&block_group->dirty_list);
10683 remove_rsv = true;
10684 btrfs_put_block_group(block_group);
10685 }
10686 spin_unlock(&trans->transaction->dirty_bgs_lock);
10687 mutex_unlock(&trans->transaction->cache_write_mutex);
10688
10689 if (!IS_ERR(inode)) {
10690 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
10691 if (ret) {
10692 btrfs_add_delayed_iput(inode);
10693 goto out;
10694 }
10695 clear_nlink(inode);
10696
10697 spin_lock(&block_group->lock);
10698 if (block_group->iref) {
10699 block_group->iref = 0;
10700 block_group->inode = NULL;
10701 spin_unlock(&block_group->lock);
10702 iput(inode);
10703 } else {
10704 spin_unlock(&block_group->lock);
10705 }
10706
10707 btrfs_add_delayed_iput(inode);
10708 }
10709
10710 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
10711 key.offset = block_group->key.objectid;
10712 key.type = 0;
10713
10714 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
10715 if (ret < 0)
10716 goto out;
10717 if (ret > 0)
10718 btrfs_release_path(path);
10719 if (ret == 0) {
10720 ret = btrfs_del_item(trans, tree_root, path);
10721 if (ret)
10722 goto out;
10723 btrfs_release_path(path);
10724 }
10725
10726 spin_lock(&fs_info->block_group_cache_lock);
10727 rb_erase(&block_group->cache_node,
10728 &fs_info->block_group_cache_tree);
10729 RB_CLEAR_NODE(&block_group->cache_node);
10730
10731 if (fs_info->first_logical_byte == block_group->key.objectid)
10732 fs_info->first_logical_byte = (u64)-1;
10733 spin_unlock(&fs_info->block_group_cache_lock);
10734
10735 down_write(&block_group->space_info->groups_sem);
10736
10737
10738
10739
10740 list_del_init(&block_group->list);
10741 if (list_empty(&block_group->space_info->block_groups[index])) {
10742 kobj = block_group->space_info->block_group_kobjs[index];
10743 block_group->space_info->block_group_kobjs[index] = NULL;
10744 clear_avail_alloc_bits(fs_info, block_group->flags);
10745 }
10746 up_write(&block_group->space_info->groups_sem);
10747 if (kobj) {
10748 kobject_del(kobj);
10749 kobject_put(kobj);
10750 }
10751
10752 if (block_group->has_caching_ctl)
10753 caching_ctl = get_caching_control(block_group);
10754 if (block_group->cached == BTRFS_CACHE_STARTED)
10755 wait_block_group_cache_done(block_group);
10756 if (block_group->has_caching_ctl) {
10757 down_write(&fs_info->commit_root_sem);
10758 if (!caching_ctl) {
10759 struct btrfs_caching_control *ctl;
10760
10761 list_for_each_entry(ctl,
10762 &fs_info->caching_block_groups, list)
10763 if (ctl->block_group == block_group) {
10764 caching_ctl = ctl;
10765 refcount_inc(&caching_ctl->count);
10766 break;
10767 }
10768 }
10769 if (caching_ctl)
10770 list_del_init(&caching_ctl->list);
10771 up_write(&fs_info->commit_root_sem);
10772 if (caching_ctl) {
10773
10774 put_caching_control(caching_ctl);
10775 put_caching_control(caching_ctl);
10776 }
10777 }
10778
10779 spin_lock(&trans->transaction->dirty_bgs_lock);
10780 WARN_ON(!list_empty(&block_group->dirty_list));
10781 WARN_ON(!list_empty(&block_group->io_list));
10782 spin_unlock(&trans->transaction->dirty_bgs_lock);
10783
10784 btrfs_remove_free_space_cache(block_group);
10785
10786 spin_lock(&block_group->space_info->lock);
10787 list_del_init(&block_group->ro_list);
10788
10789 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
10790 WARN_ON(block_group->space_info->total_bytes
10791 < block_group->key.offset);
10792 WARN_ON(block_group->space_info->bytes_readonly
10793 < block_group->key.offset);
10794 WARN_ON(block_group->space_info->disk_total
10795 < block_group->key.offset * factor);
10796 }
10797 block_group->space_info->total_bytes -= block_group->key.offset;
10798 block_group->space_info->bytes_readonly -= block_group->key.offset;
10799 block_group->space_info->disk_total -= block_group->key.offset * factor;
10800
10801 spin_unlock(&block_group->space_info->lock);
10802
10803 memcpy(&key, &block_group->key, sizeof(key));
10804
10805 mutex_lock(&fs_info->chunk_mutex);
10806 spin_lock(&block_group->lock);
10807 block_group->removed = 1;
10808
10809
10810
10811
10812
10813
10814
10815
10816
10817
10818
10819
10820
10821
10822
10823
10824
10825
10826
10827
10828
10829
10830
10831 remove_em = (atomic_read(&block_group->trimming) == 0);
10832 spin_unlock(&block_group->lock);
10833
10834 mutex_unlock(&fs_info->chunk_mutex);
10835
10836 ret = remove_block_group_free_space(trans, block_group);
10837 if (ret)
10838 goto out;
10839
10840 btrfs_put_block_group(block_group);
10841 btrfs_put_block_group(block_group);
10842
10843 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10844 if (ret > 0)
10845 ret = -EIO;
10846 if (ret < 0)
10847 goto out;
10848
10849 ret = btrfs_del_item(trans, root, path);
10850 if (ret)
10851 goto out;
10852
10853 if (remove_em) {
10854 struct extent_map_tree *em_tree;
10855
10856 em_tree = &fs_info->mapping_tree.map_tree;
10857 write_lock(&em_tree->lock);
10858 remove_extent_mapping(em_tree, em);
10859 write_unlock(&em_tree->lock);
10860
10861 free_extent_map(em);
10862 }
10863out:
10864 if (remove_rsv)
10865 btrfs_delayed_refs_rsv_release(fs_info, 1);
10866 btrfs_free_path(path);
10867 return ret;
10868}
10869
10870struct btrfs_trans_handle *
10871btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
10872 const u64 chunk_offset)
10873{
10874 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
10875 struct extent_map *em;
10876 struct map_lookup *map;
10877 unsigned int num_items;
10878
10879 read_lock(&em_tree->lock);
10880 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
10881 read_unlock(&em_tree->lock);
10882 ASSERT(em && em->start == chunk_offset);
10883
10884
10885
10886
10887
10888
10889
10890
10891
10892
10893
10894
10895
10896
10897
10898
10899
10900
10901
10902
10903 map = em->map_lookup;
10904 num_items = 3 + map->num_stripes;
10905 free_extent_map(em);
10906
10907 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
10908 num_items, 1);
10909}
10910
10911
10912
10913
10914
10915void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10916{
10917 struct btrfs_block_group_cache *block_group;
10918 struct btrfs_space_info *space_info;
10919 struct btrfs_trans_handle *trans;
10920 int ret = 0;
10921
10922 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
10923 return;
10924
10925 spin_lock(&fs_info->unused_bgs_lock);
10926 while (!list_empty(&fs_info->unused_bgs)) {
10927 u64 start, end;
10928 int trimming;
10929
10930 block_group = list_first_entry(&fs_info->unused_bgs,
10931 struct btrfs_block_group_cache,
10932 bg_list);
10933 list_del_init(&block_group->bg_list);
10934
10935 space_info = block_group->space_info;
10936
10937 if (ret || btrfs_mixed_space_info(space_info)) {
10938 btrfs_put_block_group(block_group);
10939 continue;
10940 }
10941 spin_unlock(&fs_info->unused_bgs_lock);
10942
10943 mutex_lock(&fs_info->delete_unused_bgs_mutex);
10944
10945
10946 down_write(&space_info->groups_sem);
10947 spin_lock(&block_group->lock);
10948 if (block_group->reserved || block_group->pinned ||
10949 btrfs_block_group_used(&block_group->item) ||
10950 block_group->ro ||
10951 list_is_singular(&block_group->list)) {
10952
10953
10954
10955
10956
10957
10958 trace_btrfs_skip_unused_block_group(block_group);
10959 spin_unlock(&block_group->lock);
10960 up_write(&space_info->groups_sem);
10961 goto next;
10962 }
10963 spin_unlock(&block_group->lock);
10964
10965
10966 ret = inc_block_group_ro(block_group, 0);
10967 up_write(&space_info->groups_sem);
10968 if (ret < 0) {
10969 ret = 0;
10970 goto next;
10971 }
10972
10973
10974
10975
10976
10977 trans = btrfs_start_trans_remove_block_group(fs_info,
10978 block_group->key.objectid);
10979 if (IS_ERR(trans)) {
10980 btrfs_dec_block_group_ro(block_group);
10981 ret = PTR_ERR(trans);
10982 goto next;
10983 }
10984
10985
10986
10987
10988
10989 start = block_group->key.objectid;
10990 end = start + block_group->key.offset - 1;
10991
10992
10993
10994
10995
10996
10997
10998
10999
11000
11001
11002 mutex_lock(&fs_info->unused_bg_unpin_mutex);
11003 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
11004 EXTENT_DIRTY);
11005 if (ret) {
11006 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
11007 btrfs_dec_block_group_ro(block_group);
11008 goto end_trans;
11009 }
11010 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
11011 EXTENT_DIRTY);
11012 if (ret) {
11013 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
11014 btrfs_dec_block_group_ro(block_group);
11015 goto end_trans;
11016 }
11017 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
11018
11019
11020 spin_lock(&space_info->lock);
11021 spin_lock(&block_group->lock);
11022
11023 update_bytes_pinned(space_info, -block_group->pinned);
11024 space_info->bytes_readonly += block_group->pinned;
11025 percpu_counter_add_batch(&space_info->total_bytes_pinned,
11026 -block_group->pinned,
11027 BTRFS_TOTAL_BYTES_PINNED_BATCH);
11028 block_group->pinned = 0;
11029
11030 spin_unlock(&block_group->lock);
11031 spin_unlock(&space_info->lock);
11032
11033
11034 trimming = btrfs_test_opt(fs_info, DISCARD);
11035
11036
11037 if (trimming)
11038 btrfs_get_block_group_trimming(block_group);
11039
11040
11041
11042
11043
11044 ret = btrfs_remove_chunk(trans, block_group->key.objectid);
11045
11046 if (ret) {
11047 if (trimming)
11048 btrfs_put_block_group_trimming(block_group);
11049 goto end_trans;
11050 }
11051
11052
11053
11054
11055
11056
11057 if (trimming) {
11058 spin_lock(&fs_info->unused_bgs_lock);
11059
11060
11061
11062
11063
11064 list_move(&block_group->bg_list,
11065 &trans->transaction->deleted_bgs);
11066 spin_unlock(&fs_info->unused_bgs_lock);
11067 btrfs_get_block_group(block_group);
11068 }
11069end_trans:
11070 btrfs_end_transaction(trans);
11071next:
11072 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
11073 btrfs_put_block_group(block_group);
11074 spin_lock(&fs_info->unused_bgs_lock);
11075 }
11076 spin_unlock(&fs_info->unused_bgs_lock);
11077}
11078
11079int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
11080{
11081 struct btrfs_super_block *disk_super;
11082 u64 features;
11083 u64 flags;
11084 int mixed = 0;
11085 int ret;
11086
11087 disk_super = fs_info->super_copy;
11088 if (!btrfs_super_root(disk_super))
11089 return -EINVAL;
11090
11091 features = btrfs_super_incompat_flags(disk_super);
11092 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
11093 mixed = 1;
11094
11095 flags = BTRFS_BLOCK_GROUP_SYSTEM;
11096 ret = create_space_info(fs_info, flags);
11097 if (ret)
11098 goto out;
11099
11100 if (mixed) {
11101 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
11102 ret = create_space_info(fs_info, flags);
11103 } else {
11104 flags = BTRFS_BLOCK_GROUP_METADATA;
11105 ret = create_space_info(fs_info, flags);
11106 if (ret)
11107 goto out;
11108
11109 flags = BTRFS_BLOCK_GROUP_DATA;
11110 ret = create_space_info(fs_info, flags);
11111 }
11112out:
11113 return ret;
11114}
11115
11116int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
11117 u64 start, u64 end)
11118{
11119 return unpin_extent_range(fs_info, start, end, false);
11120}
11121
11122
11123
11124
11125
11126
11127
11128
11129
11130
11131
11132
11133
11134
11135
11136
11137
11138
11139
11140
11141
11142static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
11143{
11144 u64 start = SZ_1M, len = 0, end = 0;
11145 int ret;
11146
11147 *trimmed = 0;
11148
11149
11150 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
11151 return 0;
11152
11153
11154 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
11155 return 0;
11156
11157
11158 if (device->total_bytes <= device->bytes_used)
11159 return 0;
11160
11161 ret = 0;
11162
11163 while (1) {
11164 struct btrfs_fs_info *fs_info = device->fs_info;
11165 u64 bytes;
11166
11167 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
11168 if (ret)
11169 break;
11170
11171 find_first_clear_extent_bit(&device->alloc_state, start,
11172 &start, &end,
11173 CHUNK_TRIMMED | CHUNK_ALLOCATED);
11174
11175
11176
11177
11178
11179 end = min(end, device->total_bytes - 1);
11180 len = end - start + 1;
11181
11182
11183 if (!len) {
11184 mutex_unlock(&fs_info->chunk_mutex);
11185 ret = 0;
11186 break;
11187 }
11188
11189 ret = btrfs_issue_discard(device->bdev, start, len,
11190 &bytes);
11191 if (!ret)
11192 set_extent_bits(&device->alloc_state, start,
11193 start + bytes - 1,
11194 CHUNK_TRIMMED);
11195 mutex_unlock(&fs_info->chunk_mutex);
11196
11197 if (ret)
11198 break;
11199
11200 start += len;
11201 *trimmed += bytes;
11202
11203 if (fatal_signal_pending(current)) {
11204 ret = -ERESTARTSYS;
11205 break;
11206 }
11207
11208 cond_resched();
11209 }
11210
11211 return ret;
11212}
11213
11214
11215
11216
11217
11218
11219
11220
11221
11222
11223int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
11224{
11225 struct btrfs_block_group_cache *cache = NULL;
11226 struct btrfs_device *device;
11227 struct list_head *devices;
11228 u64 group_trimmed;
11229 u64 start;
11230 u64 end;
11231 u64 trimmed = 0;
11232 u64 bg_failed = 0;
11233 u64 dev_failed = 0;
11234 int bg_ret = 0;
11235 int dev_ret = 0;
11236 int ret = 0;
11237
11238 cache = btrfs_lookup_first_block_group(fs_info, range->start);
11239 for (; cache; cache = next_block_group(cache)) {
11240 if (cache->key.objectid >= (range->start + range->len)) {
11241 btrfs_put_block_group(cache);
11242 break;
11243 }
11244
11245 start = max(range->start, cache->key.objectid);
11246 end = min(range->start + range->len,
11247 cache->key.objectid + cache->key.offset);
11248
11249 if (end - start >= range->minlen) {
11250 if (!block_group_cache_done(cache)) {
11251 ret = cache_block_group(cache, 0);
11252 if (ret) {
11253 bg_failed++;
11254 bg_ret = ret;
11255 continue;
11256 }
11257 ret = wait_block_group_cache_done(cache);
11258 if (ret) {
11259 bg_failed++;
11260 bg_ret = ret;
11261 continue;
11262 }
11263 }
11264 ret = btrfs_trim_block_group(cache,
11265 &group_trimmed,
11266 start,
11267 end,
11268 range->minlen);
11269
11270 trimmed += group_trimmed;
11271 if (ret) {
11272 bg_failed++;
11273 bg_ret = ret;
11274 continue;
11275 }
11276 }
11277 }
11278
11279 if (bg_failed)
11280 btrfs_warn(fs_info,
11281 "failed to trim %llu block group(s), last error %d",
11282 bg_failed, bg_ret);
11283 mutex_lock(&fs_info->fs_devices->device_list_mutex);
11284 devices = &fs_info->fs_devices->devices;
11285 list_for_each_entry(device, devices, dev_list) {
11286 ret = btrfs_trim_free_extents(device, &group_trimmed);
11287 if (ret) {
11288 dev_failed++;
11289 dev_ret = ret;
11290 break;
11291 }
11292
11293 trimmed += group_trimmed;
11294 }
11295 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
11296
11297 if (dev_failed)
11298 btrfs_warn(fs_info,
11299 "failed to trim %llu device(s), last error %d",
11300 dev_failed, dev_ret);
11301 range->len = trimmed;
11302 if (bg_ret)
11303 return bg_ret;
11304 return dev_ret;
11305}
11306
11307
11308
11309
11310
11311
11312
11313
11314
11315void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
11316{
11317 percpu_counter_dec(&root->subv_writers->counter);
11318 cond_wake_up(&root->subv_writers->wait);
11319}
11320
11321int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
11322{
11323 if (atomic_read(&root->will_be_snapshotted))
11324 return 0;
11325
11326 percpu_counter_inc(&root->subv_writers->counter);
11327
11328
11329
11330 smp_mb();
11331 if (atomic_read(&root->will_be_snapshotted)) {
11332 btrfs_end_write_no_snapshotting(root);
11333 return 0;
11334 }
11335 return 1;
11336}
11337
11338void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
11339{
11340 while (true) {
11341 int ret;
11342
11343 ret = btrfs_start_write_no_snapshotting(root);
11344 if (ret)
11345 break;
11346 wait_var_event(&root->will_be_snapshotted,
11347 !atomic_read(&root->will_be_snapshotted));
11348 }
11349}
11350
11351void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg)
11352{
11353 struct btrfs_fs_info *fs_info = bg->fs_info;
11354
11355 spin_lock(&fs_info->unused_bgs_lock);
11356 if (list_empty(&bg->bg_list)) {
11357 btrfs_get_block_group(bg);
11358 trace_btrfs_add_unused_block_group(bg);
11359 list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
11360 }
11361 spin_unlock(&fs_info->unused_bgs_lock);
11362}
11363