1
2
3#include "misc.h"
4#include "ctree.h"
5#include "block-group.h"
6#include "space-info.h"
7#include "disk-io.h"
8#include "free-space-cache.h"
9#include "free-space-tree.h"
10#include "volumes.h"
11#include "transaction.h"
12#include "ref-verify.h"
13#include "sysfs.h"
14#include "tree-log.h"
15#include "delalloc-space.h"
16#include "discard.h"
17#include "raid56.h"
18#include "zoned.h"
19
20
21
22
23
24
25
26static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
27{
28 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
29 u64 target = 0;
30
31 if (!bctl)
32 return 0;
33
34 if (flags & BTRFS_BLOCK_GROUP_DATA &&
35 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
36 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
37 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
38 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
39 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
40 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
41 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
42 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
43 }
44
45 return target;
46}
47
48
49
50
51
52
53
54
55static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
56{
57 u64 num_devices = fs_info->fs_devices->rw_devices;
58 u64 target;
59 u64 raid_type;
60 u64 allowed = 0;
61
62
63
64
65
66 spin_lock(&fs_info->balance_lock);
67 target = get_restripe_target(fs_info, flags);
68 if (target) {
69 spin_unlock(&fs_info->balance_lock);
70 return extended_to_chunk(target);
71 }
72 spin_unlock(&fs_info->balance_lock);
73
74
75 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
76 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
77 allowed |= btrfs_raid_array[raid_type].bg_flag;
78 }
79 allowed &= flags;
80
81 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
82 allowed = BTRFS_BLOCK_GROUP_RAID6;
83 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
84 allowed = BTRFS_BLOCK_GROUP_RAID5;
85 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
86 allowed = BTRFS_BLOCK_GROUP_RAID10;
87 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
88 allowed = BTRFS_BLOCK_GROUP_RAID1;
89 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
90 allowed = BTRFS_BLOCK_GROUP_RAID0;
91
92 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
93
94 return extended_to_chunk(flags | allowed);
95}
96
97u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
98{
99 unsigned seq;
100 u64 flags;
101
102 do {
103 flags = orig_flags;
104 seq = read_seqbegin(&fs_info->profiles_lock);
105
106 if (flags & BTRFS_BLOCK_GROUP_DATA)
107 flags |= fs_info->avail_data_alloc_bits;
108 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
109 flags |= fs_info->avail_system_alloc_bits;
110 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
111 flags |= fs_info->avail_metadata_alloc_bits;
112 } while (read_seqretry(&fs_info->profiles_lock, seq));
113
114 return btrfs_reduce_alloc_profile(fs_info, flags);
115}
116
117void btrfs_get_block_group(struct btrfs_block_group *cache)
118{
119 refcount_inc(&cache->refs);
120}
121
122void btrfs_put_block_group(struct btrfs_block_group *cache)
123{
124 if (refcount_dec_and_test(&cache->refs)) {
125 WARN_ON(cache->pinned > 0);
126 WARN_ON(cache->reserved > 0);
127
128
129
130
131
132
133 if (WARN_ON(!list_empty(&cache->discard_list)))
134 btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
135 cache);
136
137
138
139
140
141
142
143
144
145 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
146 kfree(cache->free_space_ctl);
147 kfree(cache);
148 }
149}
150
151
152
153
154static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
155 struct btrfs_block_group *block_group)
156{
157 struct rb_node **p;
158 struct rb_node *parent = NULL;
159 struct btrfs_block_group *cache;
160
161 ASSERT(block_group->length != 0);
162
163 spin_lock(&info->block_group_cache_lock);
164 p = &info->block_group_cache_tree.rb_node;
165
166 while (*p) {
167 parent = *p;
168 cache = rb_entry(parent, struct btrfs_block_group, cache_node);
169 if (block_group->start < cache->start) {
170 p = &(*p)->rb_left;
171 } else if (block_group->start > cache->start) {
172 p = &(*p)->rb_right;
173 } else {
174 spin_unlock(&info->block_group_cache_lock);
175 return -EEXIST;
176 }
177 }
178
179 rb_link_node(&block_group->cache_node, parent, p);
180 rb_insert_color(&block_group->cache_node,
181 &info->block_group_cache_tree);
182
183 if (info->first_logical_byte > block_group->start)
184 info->first_logical_byte = block_group->start;
185
186 spin_unlock(&info->block_group_cache_lock);
187
188 return 0;
189}
190
191
192
193
194
195static struct btrfs_block_group *block_group_cache_tree_search(
196 struct btrfs_fs_info *info, u64 bytenr, int contains)
197{
198 struct btrfs_block_group *cache, *ret = NULL;
199 struct rb_node *n;
200 u64 end, start;
201
202 spin_lock(&info->block_group_cache_lock);
203 n = info->block_group_cache_tree.rb_node;
204
205 while (n) {
206 cache = rb_entry(n, struct btrfs_block_group, cache_node);
207 end = cache->start + cache->length - 1;
208 start = cache->start;
209
210 if (bytenr < start) {
211 if (!contains && (!ret || start < ret->start))
212 ret = cache;
213 n = n->rb_left;
214 } else if (bytenr > start) {
215 if (contains && bytenr <= end) {
216 ret = cache;
217 break;
218 }
219 n = n->rb_right;
220 } else {
221 ret = cache;
222 break;
223 }
224 }
225 if (ret) {
226 btrfs_get_block_group(ret);
227 if (bytenr == 0 && info->first_logical_byte > ret->start)
228 info->first_logical_byte = ret->start;
229 }
230 spin_unlock(&info->block_group_cache_lock);
231
232 return ret;
233}
234
235
236
237
238struct btrfs_block_group *btrfs_lookup_first_block_group(
239 struct btrfs_fs_info *info, u64 bytenr)
240{
241 return block_group_cache_tree_search(info, bytenr, 0);
242}
243
244
245
246
247struct btrfs_block_group *btrfs_lookup_block_group(
248 struct btrfs_fs_info *info, u64 bytenr)
249{
250 return block_group_cache_tree_search(info, bytenr, 1);
251}
252
253struct btrfs_block_group *btrfs_next_block_group(
254 struct btrfs_block_group *cache)
255{
256 struct btrfs_fs_info *fs_info = cache->fs_info;
257 struct rb_node *node;
258
259 spin_lock(&fs_info->block_group_cache_lock);
260
261
262 if (RB_EMPTY_NODE(&cache->cache_node)) {
263 const u64 next_bytenr = cache->start + cache->length;
264
265 spin_unlock(&fs_info->block_group_cache_lock);
266 btrfs_put_block_group(cache);
267 cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache;
268 }
269 node = rb_next(&cache->cache_node);
270 btrfs_put_block_group(cache);
271 if (node) {
272 cache = rb_entry(node, struct btrfs_block_group, cache_node);
273 btrfs_get_block_group(cache);
274 } else
275 cache = NULL;
276 spin_unlock(&fs_info->block_group_cache_lock);
277 return cache;
278}
279
280bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
281{
282 struct btrfs_block_group *bg;
283 bool ret = true;
284
285 bg = btrfs_lookup_block_group(fs_info, bytenr);
286 if (!bg)
287 return false;
288
289 spin_lock(&bg->lock);
290 if (bg->ro)
291 ret = false;
292 else
293 atomic_inc(&bg->nocow_writers);
294 spin_unlock(&bg->lock);
295
296
297 if (!ret)
298 btrfs_put_block_group(bg);
299
300 return ret;
301}
302
303void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
304{
305 struct btrfs_block_group *bg;
306
307 bg = btrfs_lookup_block_group(fs_info, bytenr);
308 ASSERT(bg);
309 if (atomic_dec_and_test(&bg->nocow_writers))
310 wake_up_var(&bg->nocow_writers);
311
312
313
314
315 btrfs_put_block_group(bg);
316 btrfs_put_block_group(bg);
317}
318
319void btrfs_wait_nocow_writers(struct btrfs_block_group *bg)
320{
321 wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
322}
323
324void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
325 const u64 start)
326{
327 struct btrfs_block_group *bg;
328
329 bg = btrfs_lookup_block_group(fs_info, start);
330 ASSERT(bg);
331 if (atomic_dec_and_test(&bg->reservations))
332 wake_up_var(&bg->reservations);
333 btrfs_put_block_group(bg);
334}
335
336void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg)
337{
338 struct btrfs_space_info *space_info = bg->space_info;
339
340 ASSERT(bg->ro);
341
342 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
343 return;
344
345
346
347
348
349
350
351
352
353
354
355 down_write(&space_info->groups_sem);
356 up_write(&space_info->groups_sem);
357
358 wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
359}
360
361struct btrfs_caching_control *btrfs_get_caching_control(
362 struct btrfs_block_group *cache)
363{
364 struct btrfs_caching_control *ctl;
365
366 spin_lock(&cache->lock);
367 if (!cache->caching_ctl) {
368 spin_unlock(&cache->lock);
369 return NULL;
370 }
371
372 ctl = cache->caching_ctl;
373 refcount_inc(&ctl->count);
374 spin_unlock(&cache->lock);
375 return ctl;
376}
377
378void btrfs_put_caching_control(struct btrfs_caching_control *ctl)
379{
380 if (refcount_dec_and_test(&ctl->count))
381 kfree(ctl);
382}
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
398 u64 num_bytes)
399{
400 struct btrfs_caching_control *caching_ctl;
401
402 caching_ctl = btrfs_get_caching_control(cache);
403 if (!caching_ctl)
404 return;
405
406 wait_event(caching_ctl->wait, btrfs_block_group_done(cache) ||
407 (cache->free_space_ctl->free_space >= num_bytes));
408
409 btrfs_put_caching_control(caching_ctl);
410}
411
412int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
413{
414 struct btrfs_caching_control *caching_ctl;
415 int ret = 0;
416
417 caching_ctl = btrfs_get_caching_control(cache);
418 if (!caching_ctl)
419 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
420
421 wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
422 if (cache->cached == BTRFS_CACHE_ERROR)
423 ret = -EIO;
424 btrfs_put_caching_control(caching_ctl);
425 return ret;
426}
427
428static bool space_cache_v1_done(struct btrfs_block_group *cache)
429{
430 bool ret;
431
432 spin_lock(&cache->lock);
433 ret = cache->cached != BTRFS_CACHE_FAST;
434 spin_unlock(&cache->lock);
435
436 return ret;
437}
438
439void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
440 struct btrfs_caching_control *caching_ctl)
441{
442 wait_event(caching_ctl->wait, space_cache_v1_done(cache));
443}
444
445#ifdef CONFIG_BTRFS_DEBUG
446static void fragment_free_space(struct btrfs_block_group *block_group)
447{
448 struct btrfs_fs_info *fs_info = block_group->fs_info;
449 u64 start = block_group->start;
450 u64 len = block_group->length;
451 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
452 fs_info->nodesize : fs_info->sectorsize;
453 u64 step = chunk << 1;
454
455 while (len > chunk) {
456 btrfs_remove_free_space(block_group, start, chunk);
457 start += step;
458 if (len < step)
459 len = 0;
460 else
461 len -= step;
462 }
463}
464#endif
465
466
467
468
469
470
471
472u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end)
473{
474 struct btrfs_fs_info *info = block_group->fs_info;
475 u64 extent_start, extent_end, size, total_added = 0;
476 int ret;
477
478 while (start < end) {
479 ret = find_first_extent_bit(&info->excluded_extents, start,
480 &extent_start, &extent_end,
481 EXTENT_DIRTY | EXTENT_UPTODATE,
482 NULL);
483 if (ret)
484 break;
485
486 if (extent_start <= start) {
487 start = extent_end + 1;
488 } else if (extent_start > start && extent_start < end) {
489 size = extent_start - start;
490 total_added += size;
491 ret = btrfs_add_free_space_async_trimmed(block_group,
492 start, size);
493 BUG_ON(ret);
494 start = extent_end + 1;
495 } else {
496 break;
497 }
498 }
499
500 if (start < end) {
501 size = end - start;
502 total_added += size;
503 ret = btrfs_add_free_space_async_trimmed(block_group, start,
504 size);
505 BUG_ON(ret);
506 }
507
508 return total_added;
509}
510
511static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
512{
513 struct btrfs_block_group *block_group = caching_ctl->block_group;
514 struct btrfs_fs_info *fs_info = block_group->fs_info;
515 struct btrfs_root *extent_root = fs_info->extent_root;
516 struct btrfs_path *path;
517 struct extent_buffer *leaf;
518 struct btrfs_key key;
519 u64 total_found = 0;
520 u64 last = 0;
521 u32 nritems;
522 int ret;
523 bool wakeup = true;
524
525 path = btrfs_alloc_path();
526 if (!path)
527 return -ENOMEM;
528
529 last = max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET);
530
531#ifdef CONFIG_BTRFS_DEBUG
532
533
534
535
536
537 if (btrfs_should_fragment_free_space(block_group))
538 wakeup = false;
539#endif
540
541
542
543
544
545
546 path->skip_locking = 1;
547 path->search_commit_root = 1;
548 path->reada = READA_FORWARD;
549
550 key.objectid = last;
551 key.offset = 0;
552 key.type = BTRFS_EXTENT_ITEM_KEY;
553
554next:
555 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
556 if (ret < 0)
557 goto out;
558
559 leaf = path->nodes[0];
560 nritems = btrfs_header_nritems(leaf);
561
562 while (1) {
563 if (btrfs_fs_closing(fs_info) > 1) {
564 last = (u64)-1;
565 break;
566 }
567
568 if (path->slots[0] < nritems) {
569 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
570 } else {
571 ret = btrfs_find_next_key(extent_root, path, &key, 0, 0);
572 if (ret)
573 break;
574
575 if (need_resched() ||
576 rwsem_is_contended(&fs_info->commit_root_sem)) {
577 if (wakeup)
578 caching_ctl->progress = last;
579 btrfs_release_path(path);
580 up_read(&fs_info->commit_root_sem);
581 mutex_unlock(&caching_ctl->mutex);
582 cond_resched();
583 mutex_lock(&caching_ctl->mutex);
584 down_read(&fs_info->commit_root_sem);
585 goto next;
586 }
587
588 ret = btrfs_next_leaf(extent_root, path);
589 if (ret < 0)
590 goto out;
591 if (ret)
592 break;
593 leaf = path->nodes[0];
594 nritems = btrfs_header_nritems(leaf);
595 continue;
596 }
597
598 if (key.objectid < last) {
599 key.objectid = last;
600 key.offset = 0;
601 key.type = BTRFS_EXTENT_ITEM_KEY;
602
603 if (wakeup)
604 caching_ctl->progress = last;
605 btrfs_release_path(path);
606 goto next;
607 }
608
609 if (key.objectid < block_group->start) {
610 path->slots[0]++;
611 continue;
612 }
613
614 if (key.objectid >= block_group->start + block_group->length)
615 break;
616
617 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
618 key.type == BTRFS_METADATA_ITEM_KEY) {
619 total_found += add_new_free_space(block_group, last,
620 key.objectid);
621 if (key.type == BTRFS_METADATA_ITEM_KEY)
622 last = key.objectid +
623 fs_info->nodesize;
624 else
625 last = key.objectid + key.offset;
626
627 if (total_found > CACHING_CTL_WAKE_UP) {
628 total_found = 0;
629 if (wakeup)
630 wake_up(&caching_ctl->wait);
631 }
632 }
633 path->slots[0]++;
634 }
635 ret = 0;
636
637 total_found += add_new_free_space(block_group, last,
638 block_group->start + block_group->length);
639 caching_ctl->progress = (u64)-1;
640
641out:
642 btrfs_free_path(path);
643 return ret;
644}
645
646static noinline void caching_thread(struct btrfs_work *work)
647{
648 struct btrfs_block_group *block_group;
649 struct btrfs_fs_info *fs_info;
650 struct btrfs_caching_control *caching_ctl;
651 int ret;
652
653 caching_ctl = container_of(work, struct btrfs_caching_control, work);
654 block_group = caching_ctl->block_group;
655 fs_info = block_group->fs_info;
656
657 mutex_lock(&caching_ctl->mutex);
658 down_read(&fs_info->commit_root_sem);
659
660 if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
661 ret = load_free_space_cache(block_group);
662 if (ret == 1) {
663 ret = 0;
664 goto done;
665 }
666
667
668
669
670
671 spin_lock(&block_group->lock);
672 block_group->cached = BTRFS_CACHE_STARTED;
673 spin_unlock(&block_group->lock);
674 wake_up(&caching_ctl->wait);
675 }
676
677
678
679
680
681
682
683
684 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
685 !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags)))
686 ret = load_free_space_tree(caching_ctl);
687 else
688 ret = load_extent_tree_free(caching_ctl);
689done:
690 spin_lock(&block_group->lock);
691 block_group->caching_ctl = NULL;
692 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
693 spin_unlock(&block_group->lock);
694
695#ifdef CONFIG_BTRFS_DEBUG
696 if (btrfs_should_fragment_free_space(block_group)) {
697 u64 bytes_used;
698
699 spin_lock(&block_group->space_info->lock);
700 spin_lock(&block_group->lock);
701 bytes_used = block_group->length - block_group->used;
702 block_group->space_info->bytes_used += bytes_used >> 1;
703 spin_unlock(&block_group->lock);
704 spin_unlock(&block_group->space_info->lock);
705 fragment_free_space(block_group);
706 }
707#endif
708
709 caching_ctl->progress = (u64)-1;
710
711 up_read(&fs_info->commit_root_sem);
712 btrfs_free_excluded_extents(block_group);
713 mutex_unlock(&caching_ctl->mutex);
714
715 wake_up(&caching_ctl->wait);
716
717 btrfs_put_caching_control(caching_ctl);
718 btrfs_put_block_group(block_group);
719}
720
721int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
722{
723 DEFINE_WAIT(wait);
724 struct btrfs_fs_info *fs_info = cache->fs_info;
725 struct btrfs_caching_control *caching_ctl = NULL;
726 int ret = 0;
727
728
729 if (btrfs_is_zoned(fs_info))
730 return 0;
731
732 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
733 if (!caching_ctl)
734 return -ENOMEM;
735
736 INIT_LIST_HEAD(&caching_ctl->list);
737 mutex_init(&caching_ctl->mutex);
738 init_waitqueue_head(&caching_ctl->wait);
739 caching_ctl->block_group = cache;
740 caching_ctl->progress = cache->start;
741 refcount_set(&caching_ctl->count, 2);
742 btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
743
744 spin_lock(&cache->lock);
745 if (cache->cached != BTRFS_CACHE_NO) {
746 kfree(caching_ctl);
747
748 caching_ctl = cache->caching_ctl;
749 if (caching_ctl)
750 refcount_inc(&caching_ctl->count);
751 spin_unlock(&cache->lock);
752 goto out;
753 }
754 WARN_ON(cache->caching_ctl);
755 cache->caching_ctl = caching_ctl;
756 if (btrfs_test_opt(fs_info, SPACE_CACHE))
757 cache->cached = BTRFS_CACHE_FAST;
758 else
759 cache->cached = BTRFS_CACHE_STARTED;
760 cache->has_caching_ctl = 1;
761 spin_unlock(&cache->lock);
762
763 spin_lock(&fs_info->block_group_cache_lock);
764 refcount_inc(&caching_ctl->count);
765 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
766 spin_unlock(&fs_info->block_group_cache_lock);
767
768 btrfs_get_block_group(cache);
769
770 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
771out:
772 if (load_cache_only && caching_ctl)
773 btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
774 if (caching_ctl)
775 btrfs_put_caching_control(caching_ctl);
776
777 return ret;
778}
779
780static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
781{
782 u64 extra_flags = chunk_to_extended(flags) &
783 BTRFS_EXTENDED_PROFILE_MASK;
784
785 write_seqlock(&fs_info->profiles_lock);
786 if (flags & BTRFS_BLOCK_GROUP_DATA)
787 fs_info->avail_data_alloc_bits &= ~extra_flags;
788 if (flags & BTRFS_BLOCK_GROUP_METADATA)
789 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
790 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
791 fs_info->avail_system_alloc_bits &= ~extra_flags;
792 write_sequnlock(&fs_info->profiles_lock);
793}
794
795
796
797
798
799
800
801
802
803static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
804{
805 bool found_raid56 = false;
806 bool found_raid1c34 = false;
807
808 if ((flags & BTRFS_BLOCK_GROUP_RAID56_MASK) ||
809 (flags & BTRFS_BLOCK_GROUP_RAID1C3) ||
810 (flags & BTRFS_BLOCK_GROUP_RAID1C4)) {
811 struct list_head *head = &fs_info->space_info;
812 struct btrfs_space_info *sinfo;
813
814 list_for_each_entry_rcu(sinfo, head, list) {
815 down_read(&sinfo->groups_sem);
816 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
817 found_raid56 = true;
818 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
819 found_raid56 = true;
820 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C3]))
821 found_raid1c34 = true;
822 if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C4]))
823 found_raid1c34 = true;
824 up_read(&sinfo->groups_sem);
825 }
826 if (!found_raid56)
827 btrfs_clear_fs_incompat(fs_info, RAID56);
828 if (!found_raid1c34)
829 btrfs_clear_fs_incompat(fs_info, RAID1C34);
830 }
831}
832
833static int remove_block_group_item(struct btrfs_trans_handle *trans,
834 struct btrfs_path *path,
835 struct btrfs_block_group *block_group)
836{
837 struct btrfs_fs_info *fs_info = trans->fs_info;
838 struct btrfs_root *root;
839 struct btrfs_key key;
840 int ret;
841
842 root = fs_info->extent_root;
843 key.objectid = block_group->start;
844 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
845 key.offset = block_group->length;
846
847 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
848 if (ret > 0)
849 ret = -ENOENT;
850 if (ret < 0)
851 return ret;
852
853 ret = btrfs_del_item(trans, root, path);
854 return ret;
855}
856
857int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
858 u64 group_start, struct extent_map *em)
859{
860 struct btrfs_fs_info *fs_info = trans->fs_info;
861 struct btrfs_path *path;
862 struct btrfs_block_group *block_group;
863 struct btrfs_free_cluster *cluster;
864 struct inode *inode;
865 struct kobject *kobj = NULL;
866 int ret;
867 int index;
868 int factor;
869 struct btrfs_caching_control *caching_ctl = NULL;
870 bool remove_em;
871 bool remove_rsv = false;
872
873 block_group = btrfs_lookup_block_group(fs_info, group_start);
874 BUG_ON(!block_group);
875 BUG_ON(!block_group->ro);
876
877 trace_btrfs_remove_block_group(block_group);
878
879
880
881
882 btrfs_free_excluded_extents(block_group);
883 btrfs_free_ref_tree_range(fs_info, block_group->start,
884 block_group->length);
885
886 index = btrfs_bg_flags_to_raid_index(block_group->flags);
887 factor = btrfs_bg_type_to_factor(block_group->flags);
888
889
890 cluster = &fs_info->data_alloc_cluster;
891 spin_lock(&cluster->refill_lock);
892 btrfs_return_cluster_to_free_space(block_group, cluster);
893 spin_unlock(&cluster->refill_lock);
894
895
896
897
898
899 cluster = &fs_info->meta_alloc_cluster;
900 spin_lock(&cluster->refill_lock);
901 btrfs_return_cluster_to_free_space(block_group, cluster);
902 spin_unlock(&cluster->refill_lock);
903
904 btrfs_clear_treelog_bg(block_group);
905
906 path = btrfs_alloc_path();
907 if (!path) {
908 ret = -ENOMEM;
909 goto out;
910 }
911
912
913
914
915
916 inode = lookup_free_space_inode(block_group, path);
917
918 mutex_lock(&trans->transaction->cache_write_mutex);
919
920
921
922
923 spin_lock(&trans->transaction->dirty_bgs_lock);
924 if (!list_empty(&block_group->io_list)) {
925 list_del_init(&block_group->io_list);
926
927 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
928
929 spin_unlock(&trans->transaction->dirty_bgs_lock);
930 btrfs_wait_cache_io(trans, block_group, path);
931 btrfs_put_block_group(block_group);
932 spin_lock(&trans->transaction->dirty_bgs_lock);
933 }
934
935 if (!list_empty(&block_group->dirty_list)) {
936 list_del_init(&block_group->dirty_list);
937 remove_rsv = true;
938 btrfs_put_block_group(block_group);
939 }
940 spin_unlock(&trans->transaction->dirty_bgs_lock);
941 mutex_unlock(&trans->transaction->cache_write_mutex);
942
943 ret = btrfs_remove_free_space_inode(trans, inode, block_group);
944 if (ret)
945 goto out;
946
947 spin_lock(&fs_info->block_group_cache_lock);
948 rb_erase(&block_group->cache_node,
949 &fs_info->block_group_cache_tree);
950 RB_CLEAR_NODE(&block_group->cache_node);
951
952
953 btrfs_put_block_group(block_group);
954
955 if (fs_info->first_logical_byte == block_group->start)
956 fs_info->first_logical_byte = (u64)-1;
957 spin_unlock(&fs_info->block_group_cache_lock);
958
959 down_write(&block_group->space_info->groups_sem);
960
961
962
963
964 list_del_init(&block_group->list);
965 if (list_empty(&block_group->space_info->block_groups[index])) {
966 kobj = block_group->space_info->block_group_kobjs[index];
967 block_group->space_info->block_group_kobjs[index] = NULL;
968 clear_avail_alloc_bits(fs_info, block_group->flags);
969 }
970 up_write(&block_group->space_info->groups_sem);
971 clear_incompat_bg_bits(fs_info, block_group->flags);
972 if (kobj) {
973 kobject_del(kobj);
974 kobject_put(kobj);
975 }
976
977 if (block_group->has_caching_ctl)
978 caching_ctl = btrfs_get_caching_control(block_group);
979 if (block_group->cached == BTRFS_CACHE_STARTED)
980 btrfs_wait_block_group_cache_done(block_group);
981 if (block_group->has_caching_ctl) {
982 spin_lock(&fs_info->block_group_cache_lock);
983 if (!caching_ctl) {
984 struct btrfs_caching_control *ctl;
985
986 list_for_each_entry(ctl,
987 &fs_info->caching_block_groups, list)
988 if (ctl->block_group == block_group) {
989 caching_ctl = ctl;
990 refcount_inc(&caching_ctl->count);
991 break;
992 }
993 }
994 if (caching_ctl)
995 list_del_init(&caching_ctl->list);
996 spin_unlock(&fs_info->block_group_cache_lock);
997 if (caching_ctl) {
998
999 btrfs_put_caching_control(caching_ctl);
1000 btrfs_put_caching_control(caching_ctl);
1001 }
1002 }
1003
1004 spin_lock(&trans->transaction->dirty_bgs_lock);
1005 WARN_ON(!list_empty(&block_group->dirty_list));
1006 WARN_ON(!list_empty(&block_group->io_list));
1007 spin_unlock(&trans->transaction->dirty_bgs_lock);
1008
1009 btrfs_remove_free_space_cache(block_group);
1010
1011 spin_lock(&block_group->space_info->lock);
1012 list_del_init(&block_group->ro_list);
1013
1014 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
1015 WARN_ON(block_group->space_info->total_bytes
1016 < block_group->length);
1017 WARN_ON(block_group->space_info->bytes_readonly
1018 < block_group->length - block_group->zone_unusable);
1019 WARN_ON(block_group->space_info->bytes_zone_unusable
1020 < block_group->zone_unusable);
1021 WARN_ON(block_group->space_info->disk_total
1022 < block_group->length * factor);
1023 }
1024 block_group->space_info->total_bytes -= block_group->length;
1025 block_group->space_info->bytes_readonly -=
1026 (block_group->length - block_group->zone_unusable);
1027 block_group->space_info->bytes_zone_unusable -=
1028 block_group->zone_unusable;
1029 block_group->space_info->disk_total -= block_group->length * factor;
1030
1031 spin_unlock(&block_group->space_info->lock);
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044 ret = remove_block_group_free_space(trans, block_group);
1045 if (ret)
1046 goto out;
1047
1048 ret = remove_block_group_item(trans, path, block_group);
1049 if (ret < 0)
1050 goto out;
1051
1052 spin_lock(&block_group->lock);
1053 block_group->removed = 1;
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 remove_em = (atomic_read(&block_group->frozen) == 0);
1081 spin_unlock(&block_group->lock);
1082
1083 if (remove_em) {
1084 struct extent_map_tree *em_tree;
1085
1086 em_tree = &fs_info->mapping_tree;
1087 write_lock(&em_tree->lock);
1088 remove_extent_mapping(em_tree, em);
1089 write_unlock(&em_tree->lock);
1090
1091 free_extent_map(em);
1092 }
1093
1094out:
1095
1096 btrfs_put_block_group(block_group);
1097 if (remove_rsv)
1098 btrfs_delayed_refs_rsv_release(fs_info, 1);
1099 btrfs_free_path(path);
1100 return ret;
1101}
1102
1103struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
1104 struct btrfs_fs_info *fs_info, const u64 chunk_offset)
1105{
1106 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
1107 struct extent_map *em;
1108 struct map_lookup *map;
1109 unsigned int num_items;
1110
1111 read_lock(&em_tree->lock);
1112 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1113 read_unlock(&em_tree->lock);
1114 ASSERT(em && em->start == chunk_offset);
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135 map = em->map_lookup;
1136 num_items = 3 + map->num_stripes;
1137 free_extent_map(em);
1138
1139 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
1140 num_items);
1141}
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
1157{
1158 struct btrfs_space_info *sinfo = cache->space_info;
1159 u64 num_bytes;
1160 int ret = -ENOSPC;
1161
1162 spin_lock(&sinfo->lock);
1163 spin_lock(&cache->lock);
1164
1165 if (cache->swap_extents) {
1166 ret = -ETXTBSY;
1167 goto out;
1168 }
1169
1170 if (cache->ro) {
1171 cache->ro++;
1172 ret = 0;
1173 goto out;
1174 }
1175
1176 num_bytes = cache->length - cache->reserved - cache->pinned -
1177 cache->bytes_super - cache->zone_unusable - cache->used;
1178
1179
1180
1181
1182
1183 if (force) {
1184 ret = 0;
1185 } else if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) {
1186 u64 sinfo_used = btrfs_space_info_used(sinfo, true);
1187
1188
1189
1190
1191
1192 if (sinfo_used + num_bytes <= sinfo->total_bytes)
1193 ret = 0;
1194 } else {
1195
1196
1197
1198
1199
1200
1201 if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes,
1202 BTRFS_RESERVE_NO_FLUSH))
1203 ret = 0;
1204 }
1205
1206 if (!ret) {
1207 sinfo->bytes_readonly += num_bytes;
1208 if (btrfs_is_zoned(cache->fs_info)) {
1209
1210 sinfo->bytes_readonly += cache->zone_unusable;
1211 sinfo->bytes_zone_unusable -= cache->zone_unusable;
1212 cache->zone_unusable = 0;
1213 }
1214 cache->ro++;
1215 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
1216 }
1217out:
1218 spin_unlock(&cache->lock);
1219 spin_unlock(&sinfo->lock);
1220 if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
1221 btrfs_info(cache->fs_info,
1222 "unable to make block group %llu ro", cache->start);
1223 btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
1224 }
1225 return ret;
1226}
1227
1228static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
1229 struct btrfs_block_group *bg)
1230{
1231 struct btrfs_fs_info *fs_info = bg->fs_info;
1232 struct btrfs_transaction *prev_trans = NULL;
1233 const u64 start = bg->start;
1234 const u64 end = start + bg->length - 1;
1235 int ret;
1236
1237 spin_lock(&fs_info->trans_lock);
1238 if (trans->transaction->list.prev != &fs_info->trans_list) {
1239 prev_trans = list_last_entry(&trans->transaction->list,
1240 struct btrfs_transaction, list);
1241 refcount_inc(&prev_trans->use_count);
1242 }
1243 spin_unlock(&fs_info->trans_lock);
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255 mutex_lock(&fs_info->unused_bg_unpin_mutex);
1256 if (prev_trans) {
1257 ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
1258 EXTENT_DIRTY);
1259 if (ret)
1260 goto out;
1261 }
1262
1263 ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
1264 EXTENT_DIRTY);
1265out:
1266 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
1267 if (prev_trans)
1268 btrfs_put_transaction(prev_trans);
1269
1270 return ret == 0;
1271}
1272
1273
1274
1275
1276
1277void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
1278{
1279 struct btrfs_block_group *block_group;
1280 struct btrfs_space_info *space_info;
1281 struct btrfs_trans_handle *trans;
1282 const bool async_trim_enabled = btrfs_test_opt(fs_info, DISCARD_ASYNC);
1283 int ret = 0;
1284
1285 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
1286 return;
1287
1288
1289
1290
1291
1292 if (!mutex_trylock(&fs_info->reclaim_bgs_lock))
1293 return;
1294
1295 spin_lock(&fs_info->unused_bgs_lock);
1296 while (!list_empty(&fs_info->unused_bgs)) {
1297 int trimming;
1298
1299 block_group = list_first_entry(&fs_info->unused_bgs,
1300 struct btrfs_block_group,
1301 bg_list);
1302 list_del_init(&block_group->bg_list);
1303
1304 space_info = block_group->space_info;
1305
1306 if (ret || btrfs_mixed_space_info(space_info)) {
1307 btrfs_put_block_group(block_group);
1308 continue;
1309 }
1310 spin_unlock(&fs_info->unused_bgs_lock);
1311
1312 btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
1313
1314
1315 down_write(&space_info->groups_sem);
1316
1317
1318
1319
1320
1321
1322 if (btrfs_test_opt(fs_info, DISCARD_ASYNC) &&
1323 !btrfs_is_free_space_trimmed(block_group)) {
1324 trace_btrfs_skip_unused_block_group(block_group);
1325 up_write(&space_info->groups_sem);
1326
1327 btrfs_discard_queue_work(&fs_info->discard_ctl,
1328 block_group);
1329 goto next;
1330 }
1331
1332 spin_lock(&block_group->lock);
1333 if (block_group->reserved || block_group->pinned ||
1334 block_group->used || block_group->ro ||
1335 list_is_singular(&block_group->list)) {
1336
1337
1338
1339
1340
1341
1342 trace_btrfs_skip_unused_block_group(block_group);
1343 spin_unlock(&block_group->lock);
1344 up_write(&space_info->groups_sem);
1345 goto next;
1346 }
1347 spin_unlock(&block_group->lock);
1348
1349
1350 ret = inc_block_group_ro(block_group, 0);
1351 up_write(&space_info->groups_sem);
1352 if (ret < 0) {
1353 ret = 0;
1354 goto next;
1355 }
1356
1357
1358
1359
1360
1361 trans = btrfs_start_trans_remove_block_group(fs_info,
1362 block_group->start);
1363 if (IS_ERR(trans)) {
1364 btrfs_dec_block_group_ro(block_group);
1365 ret = PTR_ERR(trans);
1366 goto next;
1367 }
1368
1369
1370
1371
1372
1373 if (!clean_pinned_extents(trans, block_group)) {
1374 btrfs_dec_block_group_ro(block_group);
1375 goto end_trans;
1376 }
1377
1378
1379
1380
1381
1382
1383
1384
1385 spin_lock(&fs_info->discard_ctl.lock);
1386 if (!list_empty(&block_group->discard_list)) {
1387 spin_unlock(&fs_info->discard_ctl.lock);
1388 btrfs_dec_block_group_ro(block_group);
1389 btrfs_discard_queue_work(&fs_info->discard_ctl,
1390 block_group);
1391 goto end_trans;
1392 }
1393 spin_unlock(&fs_info->discard_ctl.lock);
1394
1395
1396 spin_lock(&space_info->lock);
1397 spin_lock(&block_group->lock);
1398
1399 btrfs_space_info_update_bytes_pinned(fs_info, space_info,
1400 -block_group->pinned);
1401 space_info->bytes_readonly += block_group->pinned;
1402 block_group->pinned = 0;
1403
1404 spin_unlock(&block_group->lock);
1405 spin_unlock(&space_info->lock);
1406
1407
1408
1409
1410
1411
1412
1413
1414 if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
1415 goto flip_async;
1416
1417
1418
1419
1420
1421 trimming = btrfs_test_opt(fs_info, DISCARD_SYNC) ||
1422 btrfs_is_zoned(fs_info);
1423
1424
1425 if (trimming)
1426 btrfs_freeze_block_group(block_group);
1427
1428
1429
1430
1431
1432 ret = btrfs_remove_chunk(trans, block_group->start);
1433
1434 if (ret) {
1435 if (trimming)
1436 btrfs_unfreeze_block_group(block_group);
1437 goto end_trans;
1438 }
1439
1440
1441
1442
1443
1444
1445 if (trimming) {
1446 spin_lock(&fs_info->unused_bgs_lock);
1447
1448
1449
1450
1451
1452 list_move(&block_group->bg_list,
1453 &trans->transaction->deleted_bgs);
1454 spin_unlock(&fs_info->unused_bgs_lock);
1455 btrfs_get_block_group(block_group);
1456 }
1457end_trans:
1458 btrfs_end_transaction(trans);
1459next:
1460 btrfs_put_block_group(block_group);
1461 spin_lock(&fs_info->unused_bgs_lock);
1462 }
1463 spin_unlock(&fs_info->unused_bgs_lock);
1464 mutex_unlock(&fs_info->reclaim_bgs_lock);
1465 return;
1466
1467flip_async:
1468 btrfs_end_transaction(trans);
1469 mutex_unlock(&fs_info->reclaim_bgs_lock);
1470 btrfs_put_block_group(block_group);
1471 btrfs_discard_punt_unused_bgs_list(fs_info);
1472}
1473
1474void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
1475{
1476 struct btrfs_fs_info *fs_info = bg->fs_info;
1477
1478 spin_lock(&fs_info->unused_bgs_lock);
1479 if (list_empty(&bg->bg_list)) {
1480 btrfs_get_block_group(bg);
1481 trace_btrfs_add_unused_block_group(bg);
1482 list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
1483 }
1484 spin_unlock(&fs_info->unused_bgs_lock);
1485}
1486
1487void btrfs_reclaim_bgs_work(struct work_struct *work)
1488{
1489 struct btrfs_fs_info *fs_info =
1490 container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
1491 struct btrfs_block_group *bg;
1492 struct btrfs_space_info *space_info;
1493 LIST_HEAD(again_list);
1494
1495 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
1496 return;
1497
1498 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
1499 return;
1500
1501
1502
1503
1504
1505 if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
1506 btrfs_exclop_finish(fs_info);
1507 return;
1508 }
1509
1510 spin_lock(&fs_info->unused_bgs_lock);
1511 while (!list_empty(&fs_info->reclaim_bgs)) {
1512 u64 zone_unusable;
1513 int ret = 0;
1514
1515 bg = list_first_entry(&fs_info->reclaim_bgs,
1516 struct btrfs_block_group,
1517 bg_list);
1518 list_del_init(&bg->bg_list);
1519
1520 space_info = bg->space_info;
1521 spin_unlock(&fs_info->unused_bgs_lock);
1522
1523
1524 down_write(&space_info->groups_sem);
1525
1526 spin_lock(&bg->lock);
1527 if (bg->reserved || bg->pinned || bg->ro) {
1528
1529
1530
1531
1532
1533
1534 spin_unlock(&bg->lock);
1535 up_write(&space_info->groups_sem);
1536 goto next;
1537 }
1538 spin_unlock(&bg->lock);
1539
1540
1541 if (btrfs_fs_closing(fs_info)) {
1542 up_write(&space_info->groups_sem);
1543 goto next;
1544 }
1545
1546
1547
1548
1549
1550
1551
1552 zone_unusable = bg->zone_unusable;
1553 ret = inc_block_group_ro(bg, 0);
1554 up_write(&space_info->groups_sem);
1555 if (ret < 0)
1556 goto next;
1557
1558 btrfs_info(fs_info,
1559 "reclaiming chunk %llu with %llu%% used %llu%% unusable",
1560 bg->start, div_u64(bg->used * 100, bg->length),
1561 div64_u64(zone_unusable * 100, bg->length));
1562 trace_btrfs_reclaim_block_group(bg);
1563 ret = btrfs_relocate_chunk(fs_info, bg->start);
1564 if (ret && ret != -EAGAIN)
1565 btrfs_err(fs_info, "error relocating chunk %llu",
1566 bg->start);
1567
1568next:
1569 spin_lock(&fs_info->unused_bgs_lock);
1570 if (ret == -EAGAIN && list_empty(&bg->bg_list))
1571 list_add_tail(&bg->bg_list, &again_list);
1572 else
1573 btrfs_put_block_group(bg);
1574 }
1575 list_splice_tail(&again_list, &fs_info->reclaim_bgs);
1576 spin_unlock(&fs_info->unused_bgs_lock);
1577 mutex_unlock(&fs_info->reclaim_bgs_lock);
1578 btrfs_exclop_finish(fs_info);
1579}
1580
1581void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
1582{
1583 spin_lock(&fs_info->unused_bgs_lock);
1584 if (!list_empty(&fs_info->reclaim_bgs))
1585 queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
1586 spin_unlock(&fs_info->unused_bgs_lock);
1587}
1588
1589void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg)
1590{
1591 struct btrfs_fs_info *fs_info = bg->fs_info;
1592
1593 spin_lock(&fs_info->unused_bgs_lock);
1594 if (list_empty(&bg->bg_list)) {
1595 btrfs_get_block_group(bg);
1596 trace_btrfs_add_reclaim_block_group(bg);
1597 list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs);
1598 }
1599 spin_unlock(&fs_info->unused_bgs_lock);
1600}
1601
1602static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
1603 struct btrfs_path *path)
1604{
1605 struct extent_map_tree *em_tree;
1606 struct extent_map *em;
1607 struct btrfs_block_group_item bg;
1608 struct extent_buffer *leaf;
1609 int slot;
1610 u64 flags;
1611 int ret = 0;
1612
1613 slot = path->slots[0];
1614 leaf = path->nodes[0];
1615
1616 em_tree = &fs_info->mapping_tree;
1617 read_lock(&em_tree->lock);
1618 em = lookup_extent_mapping(em_tree, key->objectid, key->offset);
1619 read_unlock(&em_tree->lock);
1620 if (!em) {
1621 btrfs_err(fs_info,
1622 "logical %llu len %llu found bg but no related chunk",
1623 key->objectid, key->offset);
1624 return -ENOENT;
1625 }
1626
1627 if (em->start != key->objectid || em->len != key->offset) {
1628 btrfs_err(fs_info,
1629 "block group %llu len %llu mismatch with chunk %llu len %llu",
1630 key->objectid, key->offset, em->start, em->len);
1631 ret = -EUCLEAN;
1632 goto out_free_em;
1633 }
1634
1635 read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot),
1636 sizeof(bg));
1637 flags = btrfs_stack_block_group_flags(&bg) &
1638 BTRFS_BLOCK_GROUP_TYPE_MASK;
1639
1640 if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
1641 btrfs_err(fs_info,
1642"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
1643 key->objectid, key->offset, flags,
1644 (BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type));
1645 ret = -EUCLEAN;
1646 }
1647
1648out_free_em:
1649 free_extent_map(em);
1650 return ret;
1651}
1652
1653static int find_first_block_group(struct btrfs_fs_info *fs_info,
1654 struct btrfs_path *path,
1655 struct btrfs_key *key)
1656{
1657 struct btrfs_root *root = fs_info->extent_root;
1658 int ret;
1659 struct btrfs_key found_key;
1660 struct extent_buffer *leaf;
1661 int slot;
1662
1663 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
1664 if (ret < 0)
1665 return ret;
1666
1667 while (1) {
1668 slot = path->slots[0];
1669 leaf = path->nodes[0];
1670 if (slot >= btrfs_header_nritems(leaf)) {
1671 ret = btrfs_next_leaf(root, path);
1672 if (ret == 0)
1673 continue;
1674 if (ret < 0)
1675 goto out;
1676 break;
1677 }
1678 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1679
1680 if (found_key.objectid >= key->objectid &&
1681 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1682 ret = read_bg_from_eb(fs_info, &found_key, path);
1683 break;
1684 }
1685
1686 path->slots[0]++;
1687 }
1688out:
1689 return ret;
1690}
1691
1692static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1693{
1694 u64 extra_flags = chunk_to_extended(flags) &
1695 BTRFS_EXTENDED_PROFILE_MASK;
1696
1697 write_seqlock(&fs_info->profiles_lock);
1698 if (flags & BTRFS_BLOCK_GROUP_DATA)
1699 fs_info->avail_data_alloc_bits |= extra_flags;
1700 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1701 fs_info->avail_metadata_alloc_bits |= extra_flags;
1702 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1703 fs_info->avail_system_alloc_bits |= extra_flags;
1704 write_sequnlock(&fs_info->profiles_lock);
1705}
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
1723 struct block_device *bdev, u64 physical, u64 **logical,
1724 int *naddrs, int *stripe_len)
1725{
1726 struct extent_map *em;
1727 struct map_lookup *map;
1728 u64 *buf;
1729 u64 bytenr;
1730 u64 data_stripe_length;
1731 u64 io_stripe_size;
1732 int i, nr = 0;
1733 int ret = 0;
1734
1735 em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
1736 if (IS_ERR(em))
1737 return -EIO;
1738
1739 map = em->map_lookup;
1740 data_stripe_length = em->orig_block_len;
1741 io_stripe_size = map->stripe_len;
1742 chunk_start = em->start;
1743
1744
1745 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
1746 io_stripe_size = map->stripe_len * nr_data_stripes(map);
1747
1748 buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
1749 if (!buf) {
1750 ret = -ENOMEM;
1751 goto out;
1752 }
1753
1754 for (i = 0; i < map->num_stripes; i++) {
1755 bool already_inserted = false;
1756 u64 stripe_nr;
1757 u64 offset;
1758 int j;
1759
1760 if (!in_range(physical, map->stripes[i].physical,
1761 data_stripe_length))
1762 continue;
1763
1764 if (bdev && map->stripes[i].dev->bdev != bdev)
1765 continue;
1766
1767 stripe_nr = physical - map->stripes[i].physical;
1768 stripe_nr = div64_u64_rem(stripe_nr, map->stripe_len, &offset);
1769
1770 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1771 stripe_nr = stripe_nr * map->num_stripes + i;
1772 stripe_nr = div_u64(stripe_nr, map->sub_stripes);
1773 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
1774 stripe_nr = stripe_nr * map->num_stripes + i;
1775 }
1776
1777
1778
1779
1780
1781
1782 bytenr = chunk_start + stripe_nr * io_stripe_size + offset;
1783
1784
1785 for (j = 0; j < nr; j++) {
1786 if (buf[j] == bytenr) {
1787 already_inserted = true;
1788 break;
1789 }
1790 }
1791
1792 if (!already_inserted)
1793 buf[nr++] = bytenr;
1794 }
1795
1796 *logical = buf;
1797 *naddrs = nr;
1798 *stripe_len = io_stripe_size;
1799out:
1800 free_extent_map(em);
1801 return ret;
1802}
1803
1804static int exclude_super_stripes(struct btrfs_block_group *cache)
1805{
1806 struct btrfs_fs_info *fs_info = cache->fs_info;
1807 const bool zoned = btrfs_is_zoned(fs_info);
1808 u64 bytenr;
1809 u64 *logical;
1810 int stripe_len;
1811 int i, nr, ret;
1812
1813 if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
1814 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
1815 cache->bytes_super += stripe_len;
1816 ret = btrfs_add_excluded_extent(fs_info, cache->start,
1817 stripe_len);
1818 if (ret)
1819 return ret;
1820 }
1821
1822 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1823 bytenr = btrfs_sb_offset(i);
1824 ret = btrfs_rmap_block(fs_info, cache->start, NULL,
1825 bytenr, &logical, &nr, &stripe_len);
1826 if (ret)
1827 return ret;
1828
1829
1830 if (zoned && nr) {
1831 btrfs_err(fs_info,
1832 "zoned: block group %llu must not contain super block",
1833 cache->start);
1834 return -EUCLEAN;
1835 }
1836
1837 while (nr--) {
1838 u64 len = min_t(u64, stripe_len,
1839 cache->start + cache->length - logical[nr]);
1840
1841 cache->bytes_super += len;
1842 ret = btrfs_add_excluded_extent(fs_info, logical[nr],
1843 len);
1844 if (ret) {
1845 kfree(logical);
1846 return ret;
1847 }
1848 }
1849
1850 kfree(logical);
1851 }
1852 return 0;
1853}
1854
1855static void link_block_group(struct btrfs_block_group *cache)
1856{
1857 struct btrfs_space_info *space_info = cache->space_info;
1858 int index = btrfs_bg_flags_to_raid_index(cache->flags);
1859
1860 down_write(&space_info->groups_sem);
1861 list_add_tail(&cache->list, &space_info->block_groups[index]);
1862 up_write(&space_info->groups_sem);
1863}
1864
1865static struct btrfs_block_group *btrfs_create_block_group_cache(
1866 struct btrfs_fs_info *fs_info, u64 start)
1867{
1868 struct btrfs_block_group *cache;
1869
1870 cache = kzalloc(sizeof(*cache), GFP_NOFS);
1871 if (!cache)
1872 return NULL;
1873
1874 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
1875 GFP_NOFS);
1876 if (!cache->free_space_ctl) {
1877 kfree(cache);
1878 return NULL;
1879 }
1880
1881 cache->start = start;
1882
1883 cache->fs_info = fs_info;
1884 cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
1885
1886 cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
1887
1888 refcount_set(&cache->refs, 1);
1889 spin_lock_init(&cache->lock);
1890 init_rwsem(&cache->data_rwsem);
1891 INIT_LIST_HEAD(&cache->list);
1892 INIT_LIST_HEAD(&cache->cluster_list);
1893 INIT_LIST_HEAD(&cache->bg_list);
1894 INIT_LIST_HEAD(&cache->ro_list);
1895 INIT_LIST_HEAD(&cache->discard_list);
1896 INIT_LIST_HEAD(&cache->dirty_list);
1897 INIT_LIST_HEAD(&cache->io_list);
1898 btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
1899 atomic_set(&cache->frozen, 0);
1900 mutex_init(&cache->free_space_lock);
1901 btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
1902
1903 return cache;
1904}
1905
1906
1907
1908
1909
1910static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
1911{
1912 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
1913 struct extent_map *em;
1914 struct btrfs_block_group *bg;
1915 u64 start = 0;
1916 int ret = 0;
1917
1918 while (1) {
1919 read_lock(&map_tree->lock);
1920
1921
1922
1923
1924
1925 em = lookup_extent_mapping(map_tree, start, 1);
1926 read_unlock(&map_tree->lock);
1927 if (!em)
1928 break;
1929
1930 bg = btrfs_lookup_block_group(fs_info, em->start);
1931 if (!bg) {
1932 btrfs_err(fs_info,
1933 "chunk start=%llu len=%llu doesn't have corresponding block group",
1934 em->start, em->len);
1935 ret = -EUCLEAN;
1936 free_extent_map(em);
1937 break;
1938 }
1939 if (bg->start != em->start || bg->length != em->len ||
1940 (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
1941 (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
1942 btrfs_err(fs_info,
1943"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
1944 em->start, em->len,
1945 em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
1946 bg->start, bg->length,
1947 bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
1948 ret = -EUCLEAN;
1949 free_extent_map(em);
1950 btrfs_put_block_group(bg);
1951 break;
1952 }
1953 start = em->start + em->len;
1954 free_extent_map(em);
1955 btrfs_put_block_group(bg);
1956 }
1957 return ret;
1958}
1959
1960static int read_one_block_group(struct btrfs_fs_info *info,
1961 struct btrfs_block_group_item *bgi,
1962 const struct btrfs_key *key,
1963 int need_clear)
1964{
1965 struct btrfs_block_group *cache;
1966 struct btrfs_space_info *space_info;
1967 const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
1968 int ret;
1969
1970 ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY);
1971
1972 cache = btrfs_create_block_group_cache(info, key->objectid);
1973 if (!cache)
1974 return -ENOMEM;
1975
1976 cache->length = key->offset;
1977 cache->used = btrfs_stack_block_group_used(bgi);
1978 cache->flags = btrfs_stack_block_group_flags(bgi);
1979
1980 set_free_space_tree_thresholds(cache);
1981
1982 if (need_clear) {
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993 if (btrfs_test_opt(info, SPACE_CACHE))
1994 cache->disk_cache_state = BTRFS_DC_CLEAR;
1995 }
1996 if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
1997 (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
1998 btrfs_err(info,
1999"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
2000 cache->start);
2001 ret = -EINVAL;
2002 goto error;
2003 }
2004
2005 ret = btrfs_load_block_group_zone_info(cache, false);
2006 if (ret) {
2007 btrfs_err(info, "zoned: failed to load zone info of bg %llu",
2008 cache->start);
2009 goto error;
2010 }
2011
2012
2013
2014
2015
2016
2017 ret = exclude_super_stripes(cache);
2018 if (ret) {
2019
2020 btrfs_free_excluded_extents(cache);
2021 goto error;
2022 }
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036 if (btrfs_is_zoned(info)) {
2037 btrfs_calc_zone_unusable(cache);
2038 } else if (cache->length == cache->used) {
2039 cache->last_byte_to_unpin = (u64)-1;
2040 cache->cached = BTRFS_CACHE_FINISHED;
2041 btrfs_free_excluded_extents(cache);
2042 } else if (cache->used == 0) {
2043 cache->last_byte_to_unpin = (u64)-1;
2044 cache->cached = BTRFS_CACHE_FINISHED;
2045 add_new_free_space(cache, cache->start,
2046 cache->start + cache->length);
2047 btrfs_free_excluded_extents(cache);
2048 }
2049
2050 ret = btrfs_add_block_group_cache(info, cache);
2051 if (ret) {
2052 btrfs_remove_free_space_cache(cache);
2053 goto error;
2054 }
2055 trace_btrfs_add_block_group(info, cache, 0);
2056 btrfs_update_space_info(info, cache->flags, cache->length,
2057 cache->used, cache->bytes_super,
2058 cache->zone_unusable, &space_info);
2059
2060 cache->space_info = space_info;
2061
2062 link_block_group(cache);
2063
2064 set_avail_alloc_bits(info, cache->flags);
2065 if (btrfs_chunk_readonly(info, cache->start)) {
2066 inc_block_group_ro(cache, 1);
2067 } else if (cache->used == 0) {
2068 ASSERT(list_empty(&cache->bg_list));
2069 if (btrfs_test_opt(info, DISCARD_ASYNC))
2070 btrfs_discard_queue_work(&info->discard_ctl, cache);
2071 else
2072 btrfs_mark_bg_unused(cache);
2073 }
2074 return 0;
2075error:
2076 btrfs_put_block_group(cache);
2077 return ret;
2078}
2079
2080static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
2081{
2082 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
2083 struct btrfs_space_info *space_info;
2084 struct rb_node *node;
2085 int ret = 0;
2086
2087 for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
2088 struct extent_map *em;
2089 struct map_lookup *map;
2090 struct btrfs_block_group *bg;
2091
2092 em = rb_entry(node, struct extent_map, rb_node);
2093 map = em->map_lookup;
2094 bg = btrfs_create_block_group_cache(fs_info, em->start);
2095 if (!bg) {
2096 ret = -ENOMEM;
2097 break;
2098 }
2099
2100
2101 bg->length = em->len;
2102 bg->flags = map->type;
2103 bg->last_byte_to_unpin = (u64)-1;
2104 bg->cached = BTRFS_CACHE_FINISHED;
2105 bg->used = em->len;
2106 bg->flags = map->type;
2107 ret = btrfs_add_block_group_cache(fs_info, bg);
2108
2109
2110
2111
2112 if (ret == -EEXIST) {
2113 ret = 0;
2114 btrfs_put_block_group(bg);
2115 continue;
2116 }
2117
2118 if (ret) {
2119 btrfs_remove_free_space_cache(bg);
2120 btrfs_put_block_group(bg);
2121 break;
2122 }
2123
2124 btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
2125 0, 0, &space_info);
2126 bg->space_info = space_info;
2127 link_block_group(bg);
2128
2129 set_avail_alloc_bits(fs_info, bg->flags);
2130 }
2131 if (!ret)
2132 btrfs_init_global_block_rsv(fs_info);
2133 return ret;
2134}
2135
2136int btrfs_read_block_groups(struct btrfs_fs_info *info)
2137{
2138 struct btrfs_path *path;
2139 int ret;
2140 struct btrfs_block_group *cache;
2141 struct btrfs_space_info *space_info;
2142 struct btrfs_key key;
2143 int need_clear = 0;
2144 u64 cache_gen;
2145
2146 if (!info->extent_root)
2147 return fill_dummy_bgs(info);
2148
2149 key.objectid = 0;
2150 key.offset = 0;
2151 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
2152 path = btrfs_alloc_path();
2153 if (!path)
2154 return -ENOMEM;
2155
2156 cache_gen = btrfs_super_cache_generation(info->super_copy);
2157 if (btrfs_test_opt(info, SPACE_CACHE) &&
2158 btrfs_super_generation(info->super_copy) != cache_gen)
2159 need_clear = 1;
2160 if (btrfs_test_opt(info, CLEAR_CACHE))
2161 need_clear = 1;
2162
2163 while (1) {
2164 struct btrfs_block_group_item bgi;
2165 struct extent_buffer *leaf;
2166 int slot;
2167
2168 ret = find_first_block_group(info, path, &key);
2169 if (ret > 0)
2170 break;
2171 if (ret != 0)
2172 goto error;
2173
2174 leaf = path->nodes[0];
2175 slot = path->slots[0];
2176
2177 read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
2178 sizeof(bgi));
2179
2180 btrfs_item_key_to_cpu(leaf, &key, slot);
2181 btrfs_release_path(path);
2182 ret = read_one_block_group(info, &bgi, &key, need_clear);
2183 if (ret < 0)
2184 goto error;
2185 key.objectid += key.offset;
2186 key.offset = 0;
2187 }
2188 btrfs_release_path(path);
2189
2190 list_for_each_entry(space_info, &info->space_info, list) {
2191 int i;
2192
2193 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2194 if (list_empty(&space_info->block_groups[i]))
2195 continue;
2196 cache = list_first_entry(&space_info->block_groups[i],
2197 struct btrfs_block_group,
2198 list);
2199 btrfs_sysfs_add_block_group_type(cache);
2200 }
2201
2202 if (!(btrfs_get_alloc_profile(info, space_info->flags) &
2203 (BTRFS_BLOCK_GROUP_RAID10 |
2204 BTRFS_BLOCK_GROUP_RAID1_MASK |
2205 BTRFS_BLOCK_GROUP_RAID56_MASK |
2206 BTRFS_BLOCK_GROUP_DUP)))
2207 continue;
2208
2209
2210
2211
2212 list_for_each_entry(cache,
2213 &space_info->block_groups[BTRFS_RAID_RAID0],
2214 list)
2215 inc_block_group_ro(cache, 1);
2216 list_for_each_entry(cache,
2217 &space_info->block_groups[BTRFS_RAID_SINGLE],
2218 list)
2219 inc_block_group_ro(cache, 1);
2220 }
2221
2222 btrfs_init_global_block_rsv(info);
2223 ret = check_chunk_block_group_mappings(info);
2224error:
2225 btrfs_free_path(path);
2226
2227
2228
2229
2230
2231
2232 if (ret && btrfs_test_opt(info, IGNOREBADROOTS))
2233 ret = fill_dummy_bgs(info);
2234 return ret;
2235}
2236
2237
2238
2239
2240
2241
2242
2243
2244static int insert_block_group_item(struct btrfs_trans_handle *trans,
2245 struct btrfs_block_group *block_group)
2246{
2247 struct btrfs_fs_info *fs_info = trans->fs_info;
2248 struct btrfs_block_group_item bgi;
2249 struct btrfs_root *root;
2250 struct btrfs_key key;
2251
2252 spin_lock(&block_group->lock);
2253 btrfs_set_stack_block_group_used(&bgi, block_group->used);
2254 btrfs_set_stack_block_group_chunk_objectid(&bgi,
2255 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
2256 btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
2257 key.objectid = block_group->start;
2258 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
2259 key.offset = block_group->length;
2260 spin_unlock(&block_group->lock);
2261
2262 root = fs_info->extent_root;
2263 return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
2264}
2265
2266static int insert_dev_extent(struct btrfs_trans_handle *trans,
2267 struct btrfs_device *device, u64 chunk_offset,
2268 u64 start, u64 num_bytes)
2269{
2270 struct btrfs_fs_info *fs_info = device->fs_info;
2271 struct btrfs_root *root = fs_info->dev_root;
2272 struct btrfs_path *path;
2273 struct btrfs_dev_extent *extent;
2274 struct extent_buffer *leaf;
2275 struct btrfs_key key;
2276 int ret;
2277
2278 WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
2279 WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
2280 path = btrfs_alloc_path();
2281 if (!path)
2282 return -ENOMEM;
2283
2284 key.objectid = device->devid;
2285 key.type = BTRFS_DEV_EXTENT_KEY;
2286 key.offset = start;
2287 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent));
2288 if (ret)
2289 goto out;
2290
2291 leaf = path->nodes[0];
2292 extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent);
2293 btrfs_set_dev_extent_chunk_tree(leaf, extent, BTRFS_CHUNK_TREE_OBJECTID);
2294 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
2295 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
2296 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
2297
2298 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
2299 btrfs_mark_buffer_dirty(leaf);
2300out:
2301 btrfs_free_path(path);
2302 return ret;
2303}
2304
2305
2306
2307
2308
2309
2310
2311static int insert_dev_extents(struct btrfs_trans_handle *trans,
2312 u64 chunk_offset, u64 chunk_size)
2313{
2314 struct btrfs_fs_info *fs_info = trans->fs_info;
2315 struct btrfs_device *device;
2316 struct extent_map *em;
2317 struct map_lookup *map;
2318 u64 dev_offset;
2319 u64 stripe_size;
2320 int i;
2321 int ret = 0;
2322
2323 em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
2324 if (IS_ERR(em))
2325 return PTR_ERR(em);
2326
2327 map = em->map_lookup;
2328 stripe_size = em->orig_block_len;
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2340 for (i = 0; i < map->num_stripes; i++) {
2341 device = map->stripes[i].dev;
2342 dev_offset = map->stripes[i].physical;
2343
2344 ret = insert_dev_extent(trans, device, chunk_offset, dev_offset,
2345 stripe_size);
2346 if (ret)
2347 break;
2348 }
2349 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2350
2351 free_extent_map(em);
2352 return ret;
2353}
2354
2355
2356
2357
2358
2359
2360
2361
2362void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
2363{
2364 struct btrfs_fs_info *fs_info = trans->fs_info;
2365 struct btrfs_block_group *block_group;
2366 int ret = 0;
2367
2368 while (!list_empty(&trans->new_bgs)) {
2369 int index;
2370
2371 block_group = list_first_entry(&trans->new_bgs,
2372 struct btrfs_block_group,
2373 bg_list);
2374 if (ret)
2375 goto next;
2376
2377 index = btrfs_bg_flags_to_raid_index(block_group->flags);
2378
2379 ret = insert_block_group_item(trans, block_group);
2380 if (ret)
2381 btrfs_abort_transaction(trans, ret);
2382 if (!block_group->chunk_item_inserted) {
2383 mutex_lock(&fs_info->chunk_mutex);
2384 ret = btrfs_chunk_alloc_add_chunk_item(trans, block_group);
2385 mutex_unlock(&fs_info->chunk_mutex);
2386 if (ret)
2387 btrfs_abort_transaction(trans, ret);
2388 }
2389 ret = insert_dev_extents(trans, block_group->start,
2390 block_group->length);
2391 if (ret)
2392 btrfs_abort_transaction(trans, ret);
2393 add_block_group_free_space(trans, block_group);
2394
2395
2396
2397
2398
2399
2400
2401 if (block_group->space_info->block_group_kobjs[index] == NULL)
2402 btrfs_sysfs_add_block_group_type(block_group);
2403
2404
2405next:
2406 btrfs_delayed_refs_rsv_release(fs_info, 1);
2407 list_del_init(&block_group->bg_list);
2408 }
2409 btrfs_trans_release_chunk_metadata(trans);
2410}
2411
2412struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
2413 u64 bytes_used, u64 type,
2414 u64 chunk_offset, u64 size)
2415{
2416 struct btrfs_fs_info *fs_info = trans->fs_info;
2417 struct btrfs_block_group *cache;
2418 int ret;
2419
2420 btrfs_set_log_full_commit(trans);
2421
2422 cache = btrfs_create_block_group_cache(fs_info, chunk_offset);
2423 if (!cache)
2424 return ERR_PTR(-ENOMEM);
2425
2426 cache->length = size;
2427 set_free_space_tree_thresholds(cache);
2428 cache->used = bytes_used;
2429 cache->flags = type;
2430 cache->last_byte_to_unpin = (u64)-1;
2431 cache->cached = BTRFS_CACHE_FINISHED;
2432 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
2433 cache->needs_free_space = 1;
2434
2435 ret = btrfs_load_block_group_zone_info(cache, true);
2436 if (ret) {
2437 btrfs_put_block_group(cache);
2438 return ERR_PTR(ret);
2439 }
2440
2441 ret = exclude_super_stripes(cache);
2442 if (ret) {
2443
2444 btrfs_free_excluded_extents(cache);
2445 btrfs_put_block_group(cache);
2446 return ERR_PTR(ret);
2447 }
2448
2449 add_new_free_space(cache, chunk_offset, chunk_offset + size);
2450
2451 btrfs_free_excluded_extents(cache);
2452
2453#ifdef CONFIG_BTRFS_DEBUG
2454 if (btrfs_should_fragment_free_space(cache)) {
2455 u64 new_bytes_used = size - bytes_used;
2456
2457 bytes_used += new_bytes_used >> 1;
2458 fragment_free_space(cache);
2459 }
2460#endif
2461
2462
2463
2464
2465
2466 cache->space_info = btrfs_find_space_info(fs_info, cache->flags);
2467 ASSERT(cache->space_info);
2468
2469 ret = btrfs_add_block_group_cache(fs_info, cache);
2470 if (ret) {
2471 btrfs_remove_free_space_cache(cache);
2472 btrfs_put_block_group(cache);
2473 return ERR_PTR(ret);
2474 }
2475
2476
2477
2478
2479
2480 trace_btrfs_add_block_group(fs_info, cache, 1);
2481 btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
2482 cache->bytes_super, 0, &cache->space_info);
2483 btrfs_update_global_block_rsv(fs_info);
2484
2485 link_block_group(cache);
2486
2487 list_add_tail(&cache->bg_list, &trans->new_bgs);
2488 trans->delayed_ref_updates++;
2489 btrfs_update_delayed_refs_rsv(trans);
2490
2491 set_avail_alloc_bits(fs_info, type);
2492 return cache;
2493}
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
2505 bool do_chunk_alloc)
2506{
2507 struct btrfs_fs_info *fs_info = cache->fs_info;
2508 struct btrfs_trans_handle *trans;
2509 u64 alloc_flags;
2510 int ret;
2511 bool dirty_bg_running;
2512
2513 do {
2514 trans = btrfs_join_transaction(fs_info->extent_root);
2515 if (IS_ERR(trans))
2516 return PTR_ERR(trans);
2517
2518 dirty_bg_running = false;
2519
2520
2521
2522
2523
2524
2525 mutex_lock(&fs_info->ro_block_group_mutex);
2526 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
2527 u64 transid = trans->transid;
2528
2529 mutex_unlock(&fs_info->ro_block_group_mutex);
2530 btrfs_end_transaction(trans);
2531
2532 ret = btrfs_wait_for_commit(fs_info, transid);
2533 if (ret)
2534 return ret;
2535 dirty_bg_running = true;
2536 }
2537 } while (dirty_bg_running);
2538
2539 if (do_chunk_alloc) {
2540
2541
2542
2543
2544 alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
2545 if (alloc_flags != cache->flags) {
2546 ret = btrfs_chunk_alloc(trans, alloc_flags,
2547 CHUNK_ALLOC_FORCE);
2548
2549
2550
2551
2552 if (ret == -ENOSPC)
2553 ret = 0;
2554 if (ret < 0)
2555 goto out;
2556 }
2557 }
2558
2559 ret = inc_block_group_ro(cache, 0);
2560 if (!do_chunk_alloc || ret == -ETXTBSY)
2561 goto unlock_out;
2562 if (!ret)
2563 goto out;
2564 alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
2565 ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
2566 if (ret < 0)
2567 goto out;
2568 ret = inc_block_group_ro(cache, 0);
2569 if (ret == -ETXTBSY)
2570 goto unlock_out;
2571out:
2572 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2573 alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
2574 mutex_lock(&fs_info->chunk_mutex);
2575 check_system_chunk(trans, alloc_flags);
2576 mutex_unlock(&fs_info->chunk_mutex);
2577 }
2578unlock_out:
2579 mutex_unlock(&fs_info->ro_block_group_mutex);
2580
2581 btrfs_end_transaction(trans);
2582 return ret;
2583}
2584
2585void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
2586{
2587 struct btrfs_space_info *sinfo = cache->space_info;
2588 u64 num_bytes;
2589
2590 BUG_ON(!cache->ro);
2591
2592 spin_lock(&sinfo->lock);
2593 spin_lock(&cache->lock);
2594 if (!--cache->ro) {
2595 if (btrfs_is_zoned(cache->fs_info)) {
2596
2597 cache->zone_unusable = cache->alloc_offset - cache->used;
2598 sinfo->bytes_zone_unusable += cache->zone_unusable;
2599 sinfo->bytes_readonly -= cache->zone_unusable;
2600 }
2601 num_bytes = cache->length - cache->reserved -
2602 cache->pinned - cache->bytes_super -
2603 cache->zone_unusable - cache->used;
2604 sinfo->bytes_readonly -= num_bytes;
2605 list_del_init(&cache->ro_list);
2606 }
2607 spin_unlock(&cache->lock);
2608 spin_unlock(&sinfo->lock);
2609}
2610
2611static int update_block_group_item(struct btrfs_trans_handle *trans,
2612 struct btrfs_path *path,
2613 struct btrfs_block_group *cache)
2614{
2615 struct btrfs_fs_info *fs_info = trans->fs_info;
2616 int ret;
2617 struct btrfs_root *root = fs_info->extent_root;
2618 unsigned long bi;
2619 struct extent_buffer *leaf;
2620 struct btrfs_block_group_item bgi;
2621 struct btrfs_key key;
2622
2623 key.objectid = cache->start;
2624 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
2625 key.offset = cache->length;
2626
2627 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2628 if (ret) {
2629 if (ret > 0)
2630 ret = -ENOENT;
2631 goto fail;
2632 }
2633
2634 leaf = path->nodes[0];
2635 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2636 btrfs_set_stack_block_group_used(&bgi, cache->used);
2637 btrfs_set_stack_block_group_chunk_objectid(&bgi,
2638 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
2639 btrfs_set_stack_block_group_flags(&bgi, cache->flags);
2640 write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
2641 btrfs_mark_buffer_dirty(leaf);
2642fail:
2643 btrfs_release_path(path);
2644 return ret;
2645
2646}
2647
2648static int cache_save_setup(struct btrfs_block_group *block_group,
2649 struct btrfs_trans_handle *trans,
2650 struct btrfs_path *path)
2651{
2652 struct btrfs_fs_info *fs_info = block_group->fs_info;
2653 struct btrfs_root *root = fs_info->tree_root;
2654 struct inode *inode = NULL;
2655 struct extent_changeset *data_reserved = NULL;
2656 u64 alloc_hint = 0;
2657 int dcs = BTRFS_DC_ERROR;
2658 u64 cache_size = 0;
2659 int retries = 0;
2660 int ret = 0;
2661
2662 if (!btrfs_test_opt(fs_info, SPACE_CACHE))
2663 return 0;
2664
2665
2666
2667
2668
2669 if (block_group->length < (100 * SZ_1M)) {
2670 spin_lock(&block_group->lock);
2671 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
2672 spin_unlock(&block_group->lock);
2673 return 0;
2674 }
2675
2676 if (TRANS_ABORTED(trans))
2677 return 0;
2678again:
2679 inode = lookup_free_space_inode(block_group, path);
2680 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2681 ret = PTR_ERR(inode);
2682 btrfs_release_path(path);
2683 goto out;
2684 }
2685
2686 if (IS_ERR(inode)) {
2687 BUG_ON(retries);
2688 retries++;
2689
2690 if (block_group->ro)
2691 goto out_free;
2692
2693 ret = create_free_space_inode(trans, block_group, path);
2694 if (ret)
2695 goto out_free;
2696 goto again;
2697 }
2698
2699
2700
2701
2702
2703
2704 BTRFS_I(inode)->generation = 0;
2705 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
2706 if (ret) {
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717 btrfs_abort_transaction(trans, ret);
2718 goto out_put;
2719 }
2720 WARN_ON(ret);
2721
2722
2723 if (block_group->cache_generation == trans->transid &&
2724 i_size_read(inode)) {
2725 dcs = BTRFS_DC_SETUP;
2726 goto out_put;
2727 }
2728
2729 if (i_size_read(inode) > 0) {
2730 ret = btrfs_check_trunc_cache_free_space(fs_info,
2731 &fs_info->global_block_rsv);
2732 if (ret)
2733 goto out_put;
2734
2735 ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
2736 if (ret)
2737 goto out_put;
2738 }
2739
2740 spin_lock(&block_group->lock);
2741 if (block_group->cached != BTRFS_CACHE_FINISHED ||
2742 !btrfs_test_opt(fs_info, SPACE_CACHE)) {
2743
2744
2745
2746
2747
2748
2749 dcs = BTRFS_DC_WRITTEN;
2750 spin_unlock(&block_group->lock);
2751 goto out_put;
2752 }
2753 spin_unlock(&block_group->lock);
2754
2755
2756
2757
2758
2759 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
2760 ret = -ENOSPC;
2761 goto out_put;
2762 }
2763
2764
2765
2766
2767
2768
2769
2770 cache_size = div_u64(block_group->length, SZ_256M);
2771 if (!cache_size)
2772 cache_size = 1;
2773
2774 cache_size *= 16;
2775 cache_size *= fs_info->sectorsize;
2776
2777 ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
2778 cache_size);
2779 if (ret)
2780 goto out_put;
2781
2782 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, cache_size,
2783 cache_size, cache_size,
2784 &alloc_hint);
2785
2786
2787
2788
2789
2790
2791
2792
2793 if (!ret)
2794 dcs = BTRFS_DC_SETUP;
2795 else if (ret == -ENOSPC)
2796 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
2797
2798out_put:
2799 iput(inode);
2800out_free:
2801 btrfs_release_path(path);
2802out:
2803 spin_lock(&block_group->lock);
2804 if (!ret && dcs == BTRFS_DC_SETUP)
2805 block_group->cache_generation = trans->transid;
2806 block_group->disk_cache_state = dcs;
2807 spin_unlock(&block_group->lock);
2808
2809 extent_changeset_free(data_reserved);
2810 return ret;
2811}
2812
2813int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
2814{
2815 struct btrfs_fs_info *fs_info = trans->fs_info;
2816 struct btrfs_block_group *cache, *tmp;
2817 struct btrfs_transaction *cur_trans = trans->transaction;
2818 struct btrfs_path *path;
2819
2820 if (list_empty(&cur_trans->dirty_bgs) ||
2821 !btrfs_test_opt(fs_info, SPACE_CACHE))
2822 return 0;
2823
2824 path = btrfs_alloc_path();
2825 if (!path)
2826 return -ENOMEM;
2827
2828
2829 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
2830 dirty_list) {
2831 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
2832 cache_save_setup(cache, trans, path);
2833 }
2834
2835 btrfs_free_path(path);
2836 return 0;
2837}
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
2852{
2853 struct btrfs_fs_info *fs_info = trans->fs_info;
2854 struct btrfs_block_group *cache;
2855 struct btrfs_transaction *cur_trans = trans->transaction;
2856 int ret = 0;
2857 int should_put;
2858 struct btrfs_path *path = NULL;
2859 LIST_HEAD(dirty);
2860 struct list_head *io = &cur_trans->io_bgs;
2861 int num_started = 0;
2862 int loops = 0;
2863
2864 spin_lock(&cur_trans->dirty_bgs_lock);
2865 if (list_empty(&cur_trans->dirty_bgs)) {
2866 spin_unlock(&cur_trans->dirty_bgs_lock);
2867 return 0;
2868 }
2869 list_splice_init(&cur_trans->dirty_bgs, &dirty);
2870 spin_unlock(&cur_trans->dirty_bgs_lock);
2871
2872again:
2873
2874 btrfs_create_pending_block_groups(trans);
2875
2876 if (!path) {
2877 path = btrfs_alloc_path();
2878 if (!path) {
2879 ret = -ENOMEM;
2880 goto out;
2881 }
2882 }
2883
2884
2885
2886
2887
2888
2889 mutex_lock(&trans->transaction->cache_write_mutex);
2890 while (!list_empty(&dirty)) {
2891 bool drop_reserve = true;
2892
2893 cache = list_first_entry(&dirty, struct btrfs_block_group,
2894 dirty_list);
2895
2896
2897
2898
2899
2900 if (!list_empty(&cache->io_list)) {
2901 list_del_init(&cache->io_list);
2902 btrfs_wait_cache_io(trans, cache, path);
2903 btrfs_put_block_group(cache);
2904 }
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915 spin_lock(&cur_trans->dirty_bgs_lock);
2916 list_del_init(&cache->dirty_list);
2917 spin_unlock(&cur_trans->dirty_bgs_lock);
2918
2919 should_put = 1;
2920
2921 cache_save_setup(cache, trans, path);
2922
2923 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
2924 cache->io_ctl.inode = NULL;
2925 ret = btrfs_write_out_cache(trans, cache, path);
2926 if (ret == 0 && cache->io_ctl.inode) {
2927 num_started++;
2928 should_put = 0;
2929
2930
2931
2932
2933
2934
2935 list_add_tail(&cache->io_list, io);
2936 } else {
2937
2938
2939
2940
2941 ret = 0;
2942 }
2943 }
2944 if (!ret) {
2945 ret = update_block_group_item(trans, path, cache);
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955 if (ret == -ENOENT) {
2956 ret = 0;
2957 spin_lock(&cur_trans->dirty_bgs_lock);
2958 if (list_empty(&cache->dirty_list)) {
2959 list_add_tail(&cache->dirty_list,
2960 &cur_trans->dirty_bgs);
2961 btrfs_get_block_group(cache);
2962 drop_reserve = false;
2963 }
2964 spin_unlock(&cur_trans->dirty_bgs_lock);
2965 } else if (ret) {
2966 btrfs_abort_transaction(trans, ret);
2967 }
2968 }
2969
2970
2971 if (should_put)
2972 btrfs_put_block_group(cache);
2973 if (drop_reserve)
2974 btrfs_delayed_refs_rsv_release(fs_info, 1);
2975
2976
2977
2978
2979
2980 mutex_unlock(&trans->transaction->cache_write_mutex);
2981 if (ret)
2982 goto out;
2983 mutex_lock(&trans->transaction->cache_write_mutex);
2984 }
2985 mutex_unlock(&trans->transaction->cache_write_mutex);
2986
2987
2988
2989
2990
2991 if (!ret)
2992 ret = btrfs_run_delayed_refs(trans, 0);
2993 if (!ret && loops == 0) {
2994 loops++;
2995 spin_lock(&cur_trans->dirty_bgs_lock);
2996 list_splice_init(&cur_trans->dirty_bgs, &dirty);
2997
2998
2999
3000
3001 if (!list_empty(&dirty)) {
3002 spin_unlock(&cur_trans->dirty_bgs_lock);
3003 goto again;
3004 }
3005 spin_unlock(&cur_trans->dirty_bgs_lock);
3006 }
3007out:
3008 if (ret < 0) {
3009 spin_lock(&cur_trans->dirty_bgs_lock);
3010 list_splice_init(&dirty, &cur_trans->dirty_bgs);
3011 spin_unlock(&cur_trans->dirty_bgs_lock);
3012 btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
3013 }
3014
3015 btrfs_free_path(path);
3016 return ret;
3017}
3018
3019int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
3020{
3021 struct btrfs_fs_info *fs_info = trans->fs_info;
3022 struct btrfs_block_group *cache;
3023 struct btrfs_transaction *cur_trans = trans->transaction;
3024 int ret = 0;
3025 int should_put;
3026 struct btrfs_path *path;
3027 struct list_head *io = &cur_trans->io_bgs;
3028 int num_started = 0;
3029
3030 path = btrfs_alloc_path();
3031 if (!path)
3032 return -ENOMEM;
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049 spin_lock(&cur_trans->dirty_bgs_lock);
3050 while (!list_empty(&cur_trans->dirty_bgs)) {
3051 cache = list_first_entry(&cur_trans->dirty_bgs,
3052 struct btrfs_block_group,
3053 dirty_list);
3054
3055
3056
3057
3058
3059
3060 if (!list_empty(&cache->io_list)) {
3061 spin_unlock(&cur_trans->dirty_bgs_lock);
3062 list_del_init(&cache->io_list);
3063 btrfs_wait_cache_io(trans, cache, path);
3064 btrfs_put_block_group(cache);
3065 spin_lock(&cur_trans->dirty_bgs_lock);
3066 }
3067
3068
3069
3070
3071
3072 list_del_init(&cache->dirty_list);
3073 spin_unlock(&cur_trans->dirty_bgs_lock);
3074 should_put = 1;
3075
3076 cache_save_setup(cache, trans, path);
3077
3078 if (!ret)
3079 ret = btrfs_run_delayed_refs(trans,
3080 (unsigned long) -1);
3081
3082 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3083 cache->io_ctl.inode = NULL;
3084 ret = btrfs_write_out_cache(trans, cache, path);
3085 if (ret == 0 && cache->io_ctl.inode) {
3086 num_started++;
3087 should_put = 0;
3088 list_add_tail(&cache->io_list, io);
3089 } else {
3090
3091
3092
3093
3094 ret = 0;
3095 }
3096 }
3097 if (!ret) {
3098 ret = update_block_group_item(trans, path, cache);
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112 if (ret == -ENOENT) {
3113 wait_event(cur_trans->writer_wait,
3114 atomic_read(&cur_trans->num_writers) == 1);
3115 ret = update_block_group_item(trans, path, cache);
3116 }
3117 if (ret)
3118 btrfs_abort_transaction(trans, ret);
3119 }
3120
3121
3122 if (should_put)
3123 btrfs_put_block_group(cache);
3124 btrfs_delayed_refs_rsv_release(fs_info, 1);
3125 spin_lock(&cur_trans->dirty_bgs_lock);
3126 }
3127 spin_unlock(&cur_trans->dirty_bgs_lock);
3128
3129
3130
3131
3132
3133 while (!list_empty(io)) {
3134 cache = list_first_entry(io, struct btrfs_block_group,
3135 io_list);
3136 list_del_init(&cache->io_list);
3137 btrfs_wait_cache_io(trans, cache, path);
3138 btrfs_put_block_group(cache);
3139 }
3140
3141 btrfs_free_path(path);
3142 return ret;
3143}
3144
3145int btrfs_update_block_group(struct btrfs_trans_handle *trans,
3146 u64 bytenr, u64 num_bytes, int alloc)
3147{
3148 struct btrfs_fs_info *info = trans->fs_info;
3149 struct btrfs_block_group *cache = NULL;
3150 u64 total = num_bytes;
3151 u64 old_val;
3152 u64 byte_in_group;
3153 int factor;
3154 int ret = 0;
3155
3156
3157 spin_lock(&info->delalloc_root_lock);
3158 old_val = btrfs_super_bytes_used(info->super_copy);
3159 if (alloc)
3160 old_val += num_bytes;
3161 else
3162 old_val -= num_bytes;
3163 btrfs_set_super_bytes_used(info->super_copy, old_val);
3164 spin_unlock(&info->delalloc_root_lock);
3165
3166 while (total) {
3167 cache = btrfs_lookup_block_group(info, bytenr);
3168 if (!cache) {
3169 ret = -ENOENT;
3170 break;
3171 }
3172 factor = btrfs_bg_type_to_factor(cache->flags);
3173
3174
3175
3176
3177
3178
3179
3180 if (!alloc && !btrfs_block_group_done(cache))
3181 btrfs_cache_block_group(cache, 1);
3182
3183 byte_in_group = bytenr - cache->start;
3184 WARN_ON(byte_in_group > cache->length);
3185
3186 spin_lock(&cache->space_info->lock);
3187 spin_lock(&cache->lock);
3188
3189 if (btrfs_test_opt(info, SPACE_CACHE) &&
3190 cache->disk_cache_state < BTRFS_DC_CLEAR)
3191 cache->disk_cache_state = BTRFS_DC_CLEAR;
3192
3193 old_val = cache->used;
3194 num_bytes = min(total, cache->length - byte_in_group);
3195 if (alloc) {
3196 old_val += num_bytes;
3197 cache->used = old_val;
3198 cache->reserved -= num_bytes;
3199 cache->space_info->bytes_reserved -= num_bytes;
3200 cache->space_info->bytes_used += num_bytes;
3201 cache->space_info->disk_used += num_bytes * factor;
3202 spin_unlock(&cache->lock);
3203 spin_unlock(&cache->space_info->lock);
3204 } else {
3205 old_val -= num_bytes;
3206 cache->used = old_val;
3207 cache->pinned += num_bytes;
3208 btrfs_space_info_update_bytes_pinned(info,
3209 cache->space_info, num_bytes);
3210 cache->space_info->bytes_used -= num_bytes;
3211 cache->space_info->disk_used -= num_bytes * factor;
3212 spin_unlock(&cache->lock);
3213 spin_unlock(&cache->space_info->lock);
3214
3215 set_extent_dirty(&trans->transaction->pinned_extents,
3216 bytenr, bytenr + num_bytes - 1,
3217 GFP_NOFS | __GFP_NOFAIL);
3218 }
3219
3220 spin_lock(&trans->transaction->dirty_bgs_lock);
3221 if (list_empty(&cache->dirty_list)) {
3222 list_add_tail(&cache->dirty_list,
3223 &trans->transaction->dirty_bgs);
3224 trans->delayed_ref_updates++;
3225 btrfs_get_block_group(cache);
3226 }
3227 spin_unlock(&trans->transaction->dirty_bgs_lock);
3228
3229
3230
3231
3232
3233
3234
3235 if (!alloc && old_val == 0) {
3236 if (!btrfs_test_opt(info, DISCARD_ASYNC))
3237 btrfs_mark_bg_unused(cache);
3238 }
3239
3240 btrfs_put_block_group(cache);
3241 total -= num_bytes;
3242 bytenr += num_bytes;
3243 }
3244
3245
3246 btrfs_update_delayed_refs_rsv(trans);
3247 return ret;
3248}
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
3263 u64 ram_bytes, u64 num_bytes, int delalloc)
3264{
3265 struct btrfs_space_info *space_info = cache->space_info;
3266 int ret = 0;
3267
3268 spin_lock(&space_info->lock);
3269 spin_lock(&cache->lock);
3270 if (cache->ro) {
3271 ret = -EAGAIN;
3272 } else {
3273 cache->reserved += num_bytes;
3274 space_info->bytes_reserved += num_bytes;
3275 trace_btrfs_space_reservation(cache->fs_info, "space_info",
3276 space_info->flags, num_bytes, 1);
3277 btrfs_space_info_update_bytes_may_use(cache->fs_info,
3278 space_info, -ram_bytes);
3279 if (delalloc)
3280 cache->delalloc_bytes += num_bytes;
3281
3282
3283
3284
3285
3286 if (num_bytes < ram_bytes)
3287 btrfs_try_granting_tickets(cache->fs_info, space_info);
3288 }
3289 spin_unlock(&cache->lock);
3290 spin_unlock(&space_info->lock);
3291 return ret;
3292}
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
3306 u64 num_bytes, int delalloc)
3307{
3308 struct btrfs_space_info *space_info = cache->space_info;
3309
3310 spin_lock(&space_info->lock);
3311 spin_lock(&cache->lock);
3312 if (cache->ro)
3313 space_info->bytes_readonly += num_bytes;
3314 cache->reserved -= num_bytes;
3315 space_info->bytes_reserved -= num_bytes;
3316 space_info->max_extent_size = 0;
3317
3318 if (delalloc)
3319 cache->delalloc_bytes -= num_bytes;
3320 spin_unlock(&cache->lock);
3321
3322 btrfs_try_granting_tickets(cache->fs_info, space_info);
3323 spin_unlock(&space_info->lock);
3324}
3325
3326static void force_metadata_allocation(struct btrfs_fs_info *info)
3327{
3328 struct list_head *head = &info->space_info;
3329 struct btrfs_space_info *found;
3330
3331 list_for_each_entry(found, head, list) {
3332 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3333 found->force_alloc = CHUNK_ALLOC_FORCE;
3334 }
3335}
3336
3337static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
3338 struct btrfs_space_info *sinfo, int force)
3339{
3340 u64 bytes_used = btrfs_space_info_used(sinfo, false);
3341 u64 thresh;
3342
3343 if (force == CHUNK_ALLOC_FORCE)
3344 return 1;
3345
3346
3347
3348
3349
3350 if (force == CHUNK_ALLOC_LIMITED) {
3351 thresh = btrfs_super_total_bytes(fs_info->super_copy);
3352 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
3353
3354 if (sinfo->total_bytes - bytes_used < thresh)
3355 return 1;
3356 }
3357
3358 if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
3359 return 0;
3360 return 1;
3361}
3362
3363int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
3364{
3365 u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type);
3366
3367 return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
3368}
3369
3370static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
3371{
3372 struct btrfs_block_group *bg;
3373 int ret;
3374
3375
3376
3377
3378
3379
3380
3381 check_system_chunk(trans, flags);
3382
3383 bg = btrfs_alloc_chunk(trans, flags);
3384 if (IS_ERR(bg)) {
3385 ret = PTR_ERR(bg);
3386 goto out;
3387 }
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3406 return 0;
3407
3408 ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440 if (ret == -ENOSPC) {
3441 const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info);
3442 struct btrfs_block_group *sys_bg;
3443
3444 sys_bg = btrfs_alloc_chunk(trans, sys_flags);
3445 if (IS_ERR(sys_bg)) {
3446 ret = PTR_ERR(sys_bg);
3447 btrfs_abort_transaction(trans, ret);
3448 goto out;
3449 }
3450
3451 ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg);
3452 if (ret) {
3453 btrfs_abort_transaction(trans, ret);
3454 goto out;
3455 }
3456
3457 ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
3458 if (ret) {
3459 btrfs_abort_transaction(trans, ret);
3460 goto out;
3461 }
3462 } else if (ret) {
3463 btrfs_abort_transaction(trans, ret);
3464 goto out;
3465 }
3466out:
3467 btrfs_trans_release_chunk_metadata(trans);
3468
3469 return ret;
3470}
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
3572 enum btrfs_chunk_alloc_enum force)
3573{
3574 struct btrfs_fs_info *fs_info = trans->fs_info;
3575 struct btrfs_space_info *space_info;
3576 bool wait_for_alloc = false;
3577 bool should_alloc = false;
3578 int ret = 0;
3579
3580
3581 if (trans->allocating_chunk)
3582 return -ENOSPC;
3583
3584
3585
3586
3587
3588 if (trans->removing_chunk)
3589 return -ENOSPC;
3590
3591 space_info = btrfs_find_space_info(fs_info, flags);
3592 ASSERT(space_info);
3593
3594 do {
3595 spin_lock(&space_info->lock);
3596 if (force < space_info->force_alloc)
3597 force = space_info->force_alloc;
3598 should_alloc = should_alloc_chunk(fs_info, space_info, force);
3599 if (space_info->full) {
3600
3601 if (should_alloc)
3602 ret = -ENOSPC;
3603 else
3604 ret = 0;
3605 spin_unlock(&space_info->lock);
3606 return ret;
3607 } else if (!should_alloc) {
3608 spin_unlock(&space_info->lock);
3609 return 0;
3610 } else if (space_info->chunk_alloc) {
3611
3612
3613
3614
3615
3616
3617 wait_for_alloc = true;
3618 spin_unlock(&space_info->lock);
3619 mutex_lock(&fs_info->chunk_mutex);
3620 mutex_unlock(&fs_info->chunk_mutex);
3621 } else {
3622
3623 space_info->chunk_alloc = 1;
3624 wait_for_alloc = false;
3625 spin_unlock(&space_info->lock);
3626 }
3627
3628 cond_resched();
3629 } while (wait_for_alloc);
3630
3631 mutex_lock(&fs_info->chunk_mutex);
3632 trans->allocating_chunk = true;
3633
3634
3635
3636
3637
3638 if (btrfs_mixed_space_info(space_info))
3639 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3640
3641
3642
3643
3644
3645
3646 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3647 fs_info->data_chunk_allocations++;
3648 if (!(fs_info->data_chunk_allocations %
3649 fs_info->metadata_ratio))
3650 force_metadata_allocation(fs_info);
3651 }
3652
3653 ret = do_chunk_alloc(trans, flags);
3654 trans->allocating_chunk = false;
3655
3656 spin_lock(&space_info->lock);
3657 if (ret < 0) {
3658 if (ret == -ENOSPC)
3659 space_info->full = 1;
3660 else
3661 goto out;
3662 } else {
3663 ret = 1;
3664 space_info->max_extent_size = 0;
3665 }
3666
3667 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3668out:
3669 space_info->chunk_alloc = 0;
3670 spin_unlock(&space_info->lock);
3671 mutex_unlock(&fs_info->chunk_mutex);
3672
3673 return ret;
3674}
3675
3676static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
3677{
3678 u64 num_dev;
3679
3680 num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
3681 if (!num_dev)
3682 num_dev = fs_info->fs_devices->rw_devices;
3683
3684 return num_dev;
3685}
3686
3687
3688
3689
3690void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
3691{
3692 struct btrfs_fs_info *fs_info = trans->fs_info;
3693 struct btrfs_space_info *info;
3694 u64 left;
3695 u64 thresh;
3696 int ret = 0;
3697 u64 num_devs;
3698
3699
3700
3701
3702
3703 lockdep_assert_held(&fs_info->chunk_mutex);
3704
3705 info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3706 spin_lock(&info->lock);
3707 left = info->total_bytes - btrfs_space_info_used(info, true);
3708 spin_unlock(&info->lock);
3709
3710 num_devs = get_profile_num_devs(fs_info, type);
3711
3712
3713 thresh = btrfs_calc_metadata_size(fs_info, num_devs) +
3714 btrfs_calc_insert_metadata_size(fs_info, 1);
3715
3716 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
3717 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
3718 left, thresh, type);
3719 btrfs_dump_space_info(fs_info, info, 0, 0);
3720 }
3721
3722 if (left < thresh) {
3723 u64 flags = btrfs_system_alloc_profile(fs_info);
3724 struct btrfs_block_group *bg;
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737 bg = btrfs_alloc_chunk(trans, flags);
3738 if (IS_ERR(bg)) {
3739 ret = PTR_ERR(bg);
3740 } else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
3741
3742
3743
3744
3745
3746
3747 btrfs_chunk_alloc_add_chunk_item(trans, bg);
3748 }
3749 }
3750
3751 if (!ret) {
3752 ret = btrfs_block_rsv_add(fs_info->chunk_root,
3753 &fs_info->chunk_block_rsv,
3754 thresh, BTRFS_RESERVE_NO_FLUSH);
3755 if (!ret)
3756 trans->chunk_bytes_reserved += thresh;
3757 }
3758}
3759
3760void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
3761{
3762 struct btrfs_block_group *block_group;
3763 u64 last = 0;
3764
3765 while (1) {
3766 struct inode *inode;
3767
3768 block_group = btrfs_lookup_first_block_group(info, last);
3769 while (block_group) {
3770 btrfs_wait_block_group_cache_done(block_group);
3771 spin_lock(&block_group->lock);
3772 if (block_group->iref)
3773 break;
3774 spin_unlock(&block_group->lock);
3775 block_group = btrfs_next_block_group(block_group);
3776 }
3777 if (!block_group) {
3778 if (last == 0)
3779 break;
3780 last = 0;
3781 continue;
3782 }
3783
3784 inode = block_group->inode;
3785 block_group->iref = 0;
3786 block_group->inode = NULL;
3787 spin_unlock(&block_group->lock);
3788 ASSERT(block_group->io_ctl.inode == NULL);
3789 iput(inode);
3790 last = block_group->start + block_group->length;
3791 btrfs_put_block_group(block_group);
3792 }
3793}
3794
3795
3796
3797
3798
3799
3800int btrfs_free_block_groups(struct btrfs_fs_info *info)
3801{
3802 struct btrfs_block_group *block_group;
3803 struct btrfs_space_info *space_info;
3804 struct btrfs_caching_control *caching_ctl;
3805 struct rb_node *n;
3806
3807 spin_lock(&info->block_group_cache_lock);
3808 while (!list_empty(&info->caching_block_groups)) {
3809 caching_ctl = list_entry(info->caching_block_groups.next,
3810 struct btrfs_caching_control, list);
3811 list_del(&caching_ctl->list);
3812 btrfs_put_caching_control(caching_ctl);
3813 }
3814 spin_unlock(&info->block_group_cache_lock);
3815
3816 spin_lock(&info->unused_bgs_lock);
3817 while (!list_empty(&info->unused_bgs)) {
3818 block_group = list_first_entry(&info->unused_bgs,
3819 struct btrfs_block_group,
3820 bg_list);
3821 list_del_init(&block_group->bg_list);
3822 btrfs_put_block_group(block_group);
3823 }
3824 spin_unlock(&info->unused_bgs_lock);
3825
3826 spin_lock(&info->unused_bgs_lock);
3827 while (!list_empty(&info->reclaim_bgs)) {
3828 block_group = list_first_entry(&info->reclaim_bgs,
3829 struct btrfs_block_group,
3830 bg_list);
3831 list_del_init(&block_group->bg_list);
3832 btrfs_put_block_group(block_group);
3833 }
3834 spin_unlock(&info->unused_bgs_lock);
3835
3836 spin_lock(&info->block_group_cache_lock);
3837 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
3838 block_group = rb_entry(n, struct btrfs_block_group,
3839 cache_node);
3840 rb_erase(&block_group->cache_node,
3841 &info->block_group_cache_tree);
3842 RB_CLEAR_NODE(&block_group->cache_node);
3843 spin_unlock(&info->block_group_cache_lock);
3844
3845 down_write(&block_group->space_info->groups_sem);
3846 list_del(&block_group->list);
3847 up_write(&block_group->space_info->groups_sem);
3848
3849
3850
3851
3852
3853 if (block_group->cached == BTRFS_CACHE_NO ||
3854 block_group->cached == BTRFS_CACHE_ERROR)
3855 btrfs_free_excluded_extents(block_group);
3856
3857 btrfs_remove_free_space_cache(block_group);
3858 ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
3859 ASSERT(list_empty(&block_group->dirty_list));
3860 ASSERT(list_empty(&block_group->io_list));
3861 ASSERT(list_empty(&block_group->bg_list));
3862 ASSERT(refcount_read(&block_group->refs) == 1);
3863 ASSERT(block_group->swap_extents == 0);
3864 btrfs_put_block_group(block_group);
3865
3866 spin_lock(&info->block_group_cache_lock);
3867 }
3868 spin_unlock(&info->block_group_cache_lock);
3869
3870 btrfs_release_global_block_rsv(info);
3871
3872 while (!list_empty(&info->space_info)) {
3873 space_info = list_entry(info->space_info.next,
3874 struct btrfs_space_info,
3875 list);
3876
3877
3878
3879
3880
3881 if (WARN_ON(space_info->bytes_pinned > 0 ||
3882 space_info->bytes_reserved > 0 ||
3883 space_info->bytes_may_use > 0))
3884 btrfs_dump_space_info(info, space_info, 0, 0);
3885 WARN_ON(space_info->reclaim_size > 0);
3886 list_del(&space_info->list);
3887 btrfs_sysfs_remove_space_info(space_info);
3888 }
3889 return 0;
3890}
3891
3892void btrfs_freeze_block_group(struct btrfs_block_group *cache)
3893{
3894 atomic_inc(&cache->frozen);
3895}
3896
3897void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
3898{
3899 struct btrfs_fs_info *fs_info = block_group->fs_info;
3900 struct extent_map_tree *em_tree;
3901 struct extent_map *em;
3902 bool cleanup;
3903
3904 spin_lock(&block_group->lock);
3905 cleanup = (atomic_dec_and_test(&block_group->frozen) &&
3906 block_group->removed);
3907 spin_unlock(&block_group->lock);
3908
3909 if (cleanup) {
3910 em_tree = &fs_info->mapping_tree;
3911 write_lock(&em_tree->lock);
3912 em = lookup_extent_mapping(em_tree, block_group->start,
3913 1);
3914 BUG_ON(!em);
3915 remove_extent_mapping(em_tree, em);
3916 write_unlock(&em_tree->lock);
3917
3918
3919 free_extent_map(em);
3920 free_extent_map(em);
3921
3922
3923
3924
3925
3926
3927 __btrfs_remove_free_space_cache(block_group->free_space_ctl);
3928 }
3929}
3930
3931bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg)
3932{
3933 bool ret = true;
3934
3935 spin_lock(&bg->lock);
3936 if (bg->ro)
3937 ret = false;
3938 else
3939 bg->swap_extents++;
3940 spin_unlock(&bg->lock);
3941
3942 return ret;
3943}
3944
3945void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount)
3946{
3947 spin_lock(&bg->lock);
3948 ASSERT(!bg->ro);
3949 ASSERT(bg->swap_extents >= amount);
3950 bg->swap_extents -= amount;
3951 spin_unlock(&bg->lock);
3952}
3953