1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/sched.h>
19#include <linux/sched/signal.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/sort.h>
24#include <linux/rcupdate.h>
25#include <linux/kthread.h>
26#include <linux/slab.h>
27#include <linux/ratelimit.h>
28#include <linux/percpu_counter.h>
29#include <linux/lockdep.h>
30#include "hash.h"
31#include "tree-log.h"
32#include "disk-io.h"
33#include "print-tree.h"
34#include "volumes.h"
35#include "raid56.h"
36#include "locking.h"
37#include "free-space-cache.h"
38#include "free-space-tree.h"
39#include "math.h"
40#include "sysfs.h"
41#include "qgroup.h"
42#include "ref-verify.h"
43
44#undef SCRAMBLE_DELAYED_REFS
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60enum {
61 CHUNK_ALLOC_NO_FORCE = 0,
62 CHUNK_ALLOC_LIMITED = 1,
63 CHUNK_ALLOC_FORCE = 2,
64};
65
66static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
67 struct btrfs_fs_info *fs_info,
68 struct btrfs_delayed_ref_node *node, u64 parent,
69 u64 root_objectid, u64 owner_objectid,
70 u64 owner_offset, int refs_to_drop,
71 struct btrfs_delayed_extent_op *extra_op);
72static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
73 struct extent_buffer *leaf,
74 struct btrfs_extent_item *ei);
75static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
76 struct btrfs_fs_info *fs_info,
77 u64 parent, u64 root_objectid,
78 u64 flags, u64 owner, u64 offset,
79 struct btrfs_key *ins, int ref_mod);
80static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
81 struct btrfs_fs_info *fs_info,
82 u64 parent, u64 root_objectid,
83 u64 flags, struct btrfs_disk_key *key,
84 int level, struct btrfs_key *ins);
85static int do_chunk_alloc(struct btrfs_trans_handle *trans,
86 struct btrfs_fs_info *fs_info, u64 flags,
87 int force);
88static int find_next_key(struct btrfs_path *path, int level,
89 struct btrfs_key *key);
90static void dump_space_info(struct btrfs_fs_info *fs_info,
91 struct btrfs_space_info *info, u64 bytes,
92 int dump_block_groups);
93static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
94 u64 num_bytes);
95static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
96 struct btrfs_space_info *space_info,
97 u64 num_bytes);
98static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
99 struct btrfs_space_info *space_info,
100 u64 num_bytes);
101
102static noinline int
103block_group_cache_done(struct btrfs_block_group_cache *cache)
104{
105 smp_mb();
106 return cache->cached == BTRFS_CACHE_FINISHED ||
107 cache->cached == BTRFS_CACHE_ERROR;
108}
109
110static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
111{
112 return (cache->flags & bits) == bits;
113}
114
115void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
116{
117 atomic_inc(&cache->count);
118}
119
120void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
121{
122 if (atomic_dec_and_test(&cache->count)) {
123 WARN_ON(cache->pinned > 0);
124 WARN_ON(cache->reserved > 0);
125
126
127
128
129
130
131
132
133
134 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
135 kfree(cache->free_space_ctl);
136 kfree(cache);
137 }
138}
139
140
141
142
143
144static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
145 struct btrfs_block_group_cache *block_group)
146{
147 struct rb_node **p;
148 struct rb_node *parent = NULL;
149 struct btrfs_block_group_cache *cache;
150
151 spin_lock(&info->block_group_cache_lock);
152 p = &info->block_group_cache_tree.rb_node;
153
154 while (*p) {
155 parent = *p;
156 cache = rb_entry(parent, struct btrfs_block_group_cache,
157 cache_node);
158 if (block_group->key.objectid < cache->key.objectid) {
159 p = &(*p)->rb_left;
160 } else if (block_group->key.objectid > cache->key.objectid) {
161 p = &(*p)->rb_right;
162 } else {
163 spin_unlock(&info->block_group_cache_lock);
164 return -EEXIST;
165 }
166 }
167
168 rb_link_node(&block_group->cache_node, parent, p);
169 rb_insert_color(&block_group->cache_node,
170 &info->block_group_cache_tree);
171
172 if (info->first_logical_byte > block_group->key.objectid)
173 info->first_logical_byte = block_group->key.objectid;
174
175 spin_unlock(&info->block_group_cache_lock);
176
177 return 0;
178}
179
180
181
182
183
184static struct btrfs_block_group_cache *
185block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
186 int contains)
187{
188 struct btrfs_block_group_cache *cache, *ret = NULL;
189 struct rb_node *n;
190 u64 end, start;
191
192 spin_lock(&info->block_group_cache_lock);
193 n = info->block_group_cache_tree.rb_node;
194
195 while (n) {
196 cache = rb_entry(n, struct btrfs_block_group_cache,
197 cache_node);
198 end = cache->key.objectid + cache->key.offset - 1;
199 start = cache->key.objectid;
200
201 if (bytenr < start) {
202 if (!contains && (!ret || start < ret->key.objectid))
203 ret = cache;
204 n = n->rb_left;
205 } else if (bytenr > start) {
206 if (contains && bytenr <= end) {
207 ret = cache;
208 break;
209 }
210 n = n->rb_right;
211 } else {
212 ret = cache;
213 break;
214 }
215 }
216 if (ret) {
217 btrfs_get_block_group(ret);
218 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
219 info->first_logical_byte = ret->key.objectid;
220 }
221 spin_unlock(&info->block_group_cache_lock);
222
223 return ret;
224}
225
226static int add_excluded_extent(struct btrfs_fs_info *fs_info,
227 u64 start, u64 num_bytes)
228{
229 u64 end = start + num_bytes - 1;
230 set_extent_bits(&fs_info->freed_extents[0],
231 start, end, EXTENT_UPTODATE);
232 set_extent_bits(&fs_info->freed_extents[1],
233 start, end, EXTENT_UPTODATE);
234 return 0;
235}
236
237static void free_excluded_extents(struct btrfs_fs_info *fs_info,
238 struct btrfs_block_group_cache *cache)
239{
240 u64 start, end;
241
242 start = cache->key.objectid;
243 end = start + cache->key.offset - 1;
244
245 clear_extent_bits(&fs_info->freed_extents[0],
246 start, end, EXTENT_UPTODATE);
247 clear_extent_bits(&fs_info->freed_extents[1],
248 start, end, EXTENT_UPTODATE);
249}
250
251static int exclude_super_stripes(struct btrfs_fs_info *fs_info,
252 struct btrfs_block_group_cache *cache)
253{
254 u64 bytenr;
255 u64 *logical;
256 int stripe_len;
257 int i, nr, ret;
258
259 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
260 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
261 cache->bytes_super += stripe_len;
262 ret = add_excluded_extent(fs_info, cache->key.objectid,
263 stripe_len);
264 if (ret)
265 return ret;
266 }
267
268 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
269 bytenr = btrfs_sb_offset(i);
270 ret = btrfs_rmap_block(fs_info, cache->key.objectid,
271 bytenr, 0, &logical, &nr, &stripe_len);
272 if (ret)
273 return ret;
274
275 while (nr--) {
276 u64 start, len;
277
278 if (logical[nr] > cache->key.objectid +
279 cache->key.offset)
280 continue;
281
282 if (logical[nr] + stripe_len <= cache->key.objectid)
283 continue;
284
285 start = logical[nr];
286 if (start < cache->key.objectid) {
287 start = cache->key.objectid;
288 len = (logical[nr] + stripe_len) - start;
289 } else {
290 len = min_t(u64, stripe_len,
291 cache->key.objectid +
292 cache->key.offset - start);
293 }
294
295 cache->bytes_super += len;
296 ret = add_excluded_extent(fs_info, start, len);
297 if (ret) {
298 kfree(logical);
299 return ret;
300 }
301 }
302
303 kfree(logical);
304 }
305 return 0;
306}
307
308static struct btrfs_caching_control *
309get_caching_control(struct btrfs_block_group_cache *cache)
310{
311 struct btrfs_caching_control *ctl;
312
313 spin_lock(&cache->lock);
314 if (!cache->caching_ctl) {
315 spin_unlock(&cache->lock);
316 return NULL;
317 }
318
319 ctl = cache->caching_ctl;
320 refcount_inc(&ctl->count);
321 spin_unlock(&cache->lock);
322 return ctl;
323}
324
325static void put_caching_control(struct btrfs_caching_control *ctl)
326{
327 if (refcount_dec_and_test(&ctl->count))
328 kfree(ctl);
329}
330
331#ifdef CONFIG_BTRFS_DEBUG
332static void fragment_free_space(struct btrfs_block_group_cache *block_group)
333{
334 struct btrfs_fs_info *fs_info = block_group->fs_info;
335 u64 start = block_group->key.objectid;
336 u64 len = block_group->key.offset;
337 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
338 fs_info->nodesize : fs_info->sectorsize;
339 u64 step = chunk << 1;
340
341 while (len > chunk) {
342 btrfs_remove_free_space(block_group, start, chunk);
343 start += step;
344 if (len < step)
345 len = 0;
346 else
347 len -= step;
348 }
349}
350#endif
351
352
353
354
355
356
357u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
358 struct btrfs_fs_info *info, u64 start, u64 end)
359{
360 u64 extent_start, extent_end, size, total_added = 0;
361 int ret;
362
363 while (start < end) {
364 ret = find_first_extent_bit(info->pinned_extents, start,
365 &extent_start, &extent_end,
366 EXTENT_DIRTY | EXTENT_UPTODATE,
367 NULL);
368 if (ret)
369 break;
370
371 if (extent_start <= start) {
372 start = extent_end + 1;
373 } else if (extent_start > start && extent_start < end) {
374 size = extent_start - start;
375 total_added += size;
376 ret = btrfs_add_free_space(block_group, start,
377 size);
378 BUG_ON(ret);
379 start = extent_end + 1;
380 } else {
381 break;
382 }
383 }
384
385 if (start < end) {
386 size = end - start;
387 total_added += size;
388 ret = btrfs_add_free_space(block_group, start, size);
389 BUG_ON(ret);
390 }
391
392 return total_added;
393}
394
395static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
396{
397 struct btrfs_block_group_cache *block_group = caching_ctl->block_group;
398 struct btrfs_fs_info *fs_info = block_group->fs_info;
399 struct btrfs_root *extent_root = fs_info->extent_root;
400 struct btrfs_path *path;
401 struct extent_buffer *leaf;
402 struct btrfs_key key;
403 u64 total_found = 0;
404 u64 last = 0;
405 u32 nritems;
406 int ret;
407 bool wakeup = true;
408
409 path = btrfs_alloc_path();
410 if (!path)
411 return -ENOMEM;
412
413 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
414
415#ifdef CONFIG_BTRFS_DEBUG
416
417
418
419
420
421 if (btrfs_should_fragment_free_space(block_group))
422 wakeup = false;
423#endif
424
425
426
427
428
429
430 path->skip_locking = 1;
431 path->search_commit_root = 1;
432 path->reada = READA_FORWARD;
433
434 key.objectid = last;
435 key.offset = 0;
436 key.type = BTRFS_EXTENT_ITEM_KEY;
437
438next:
439 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
440 if (ret < 0)
441 goto out;
442
443 leaf = path->nodes[0];
444 nritems = btrfs_header_nritems(leaf);
445
446 while (1) {
447 if (btrfs_fs_closing(fs_info) > 1) {
448 last = (u64)-1;
449 break;
450 }
451
452 if (path->slots[0] < nritems) {
453 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
454 } else {
455 ret = find_next_key(path, 0, &key);
456 if (ret)
457 break;
458
459 if (need_resched() ||
460 rwsem_is_contended(&fs_info->commit_root_sem)) {
461 if (wakeup)
462 caching_ctl->progress = last;
463 btrfs_release_path(path);
464 up_read(&fs_info->commit_root_sem);
465 mutex_unlock(&caching_ctl->mutex);
466 cond_resched();
467 mutex_lock(&caching_ctl->mutex);
468 down_read(&fs_info->commit_root_sem);
469 goto next;
470 }
471
472 ret = btrfs_next_leaf(extent_root, path);
473 if (ret < 0)
474 goto out;
475 if (ret)
476 break;
477 leaf = path->nodes[0];
478 nritems = btrfs_header_nritems(leaf);
479 continue;
480 }
481
482 if (key.objectid < last) {
483 key.objectid = last;
484 key.offset = 0;
485 key.type = BTRFS_EXTENT_ITEM_KEY;
486
487 if (wakeup)
488 caching_ctl->progress = last;
489 btrfs_release_path(path);
490 goto next;
491 }
492
493 if (key.objectid < block_group->key.objectid) {
494 path->slots[0]++;
495 continue;
496 }
497
498 if (key.objectid >= block_group->key.objectid +
499 block_group->key.offset)
500 break;
501
502 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
503 key.type == BTRFS_METADATA_ITEM_KEY) {
504 total_found += add_new_free_space(block_group,
505 fs_info, last,
506 key.objectid);
507 if (key.type == BTRFS_METADATA_ITEM_KEY)
508 last = key.objectid +
509 fs_info->nodesize;
510 else
511 last = key.objectid + key.offset;
512
513 if (total_found > CACHING_CTL_WAKE_UP) {
514 total_found = 0;
515 if (wakeup)
516 wake_up(&caching_ctl->wait);
517 }
518 }
519 path->slots[0]++;
520 }
521 ret = 0;
522
523 total_found += add_new_free_space(block_group, fs_info, last,
524 block_group->key.objectid +
525 block_group->key.offset);
526 caching_ctl->progress = (u64)-1;
527
528out:
529 btrfs_free_path(path);
530 return ret;
531}
532
533static noinline void caching_thread(struct btrfs_work *work)
534{
535 struct btrfs_block_group_cache *block_group;
536 struct btrfs_fs_info *fs_info;
537 struct btrfs_caching_control *caching_ctl;
538 struct btrfs_root *extent_root;
539 int ret;
540
541 caching_ctl = container_of(work, struct btrfs_caching_control, work);
542 block_group = caching_ctl->block_group;
543 fs_info = block_group->fs_info;
544 extent_root = fs_info->extent_root;
545
546 mutex_lock(&caching_ctl->mutex);
547 down_read(&fs_info->commit_root_sem);
548
549 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
550 ret = load_free_space_tree(caching_ctl);
551 else
552 ret = load_extent_tree_free(caching_ctl);
553
554 spin_lock(&block_group->lock);
555 block_group->caching_ctl = NULL;
556 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
557 spin_unlock(&block_group->lock);
558
559#ifdef CONFIG_BTRFS_DEBUG
560 if (btrfs_should_fragment_free_space(block_group)) {
561 u64 bytes_used;
562
563 spin_lock(&block_group->space_info->lock);
564 spin_lock(&block_group->lock);
565 bytes_used = block_group->key.offset -
566 btrfs_block_group_used(&block_group->item);
567 block_group->space_info->bytes_used += bytes_used >> 1;
568 spin_unlock(&block_group->lock);
569 spin_unlock(&block_group->space_info->lock);
570 fragment_free_space(block_group);
571 }
572#endif
573
574 caching_ctl->progress = (u64)-1;
575
576 up_read(&fs_info->commit_root_sem);
577 free_excluded_extents(fs_info, block_group);
578 mutex_unlock(&caching_ctl->mutex);
579
580 wake_up(&caching_ctl->wait);
581
582 put_caching_control(caching_ctl);
583 btrfs_put_block_group(block_group);
584}
585
586static int cache_block_group(struct btrfs_block_group_cache *cache,
587 int load_cache_only)
588{
589 DEFINE_WAIT(wait);
590 struct btrfs_fs_info *fs_info = cache->fs_info;
591 struct btrfs_caching_control *caching_ctl;
592 int ret = 0;
593
594 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
595 if (!caching_ctl)
596 return -ENOMEM;
597
598 INIT_LIST_HEAD(&caching_ctl->list);
599 mutex_init(&caching_ctl->mutex);
600 init_waitqueue_head(&caching_ctl->wait);
601 caching_ctl->block_group = cache;
602 caching_ctl->progress = cache->key.objectid;
603 refcount_set(&caching_ctl->count, 1);
604 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
605 caching_thread, NULL, NULL);
606
607 spin_lock(&cache->lock);
608
609
610
611
612
613
614
615
616
617
618
619
620 while (cache->cached == BTRFS_CACHE_FAST) {
621 struct btrfs_caching_control *ctl;
622
623 ctl = cache->caching_ctl;
624 refcount_inc(&ctl->count);
625 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
626 spin_unlock(&cache->lock);
627
628 schedule();
629
630 finish_wait(&ctl->wait, &wait);
631 put_caching_control(ctl);
632 spin_lock(&cache->lock);
633 }
634
635 if (cache->cached != BTRFS_CACHE_NO) {
636 spin_unlock(&cache->lock);
637 kfree(caching_ctl);
638 return 0;
639 }
640 WARN_ON(cache->caching_ctl);
641 cache->caching_ctl = caching_ctl;
642 cache->cached = BTRFS_CACHE_FAST;
643 spin_unlock(&cache->lock);
644
645 if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
646 mutex_lock(&caching_ctl->mutex);
647 ret = load_free_space_cache(fs_info, cache);
648
649 spin_lock(&cache->lock);
650 if (ret == 1) {
651 cache->caching_ctl = NULL;
652 cache->cached = BTRFS_CACHE_FINISHED;
653 cache->last_byte_to_unpin = (u64)-1;
654 caching_ctl->progress = (u64)-1;
655 } else {
656 if (load_cache_only) {
657 cache->caching_ctl = NULL;
658 cache->cached = BTRFS_CACHE_NO;
659 } else {
660 cache->cached = BTRFS_CACHE_STARTED;
661 cache->has_caching_ctl = 1;
662 }
663 }
664 spin_unlock(&cache->lock);
665#ifdef CONFIG_BTRFS_DEBUG
666 if (ret == 1 &&
667 btrfs_should_fragment_free_space(cache)) {
668 u64 bytes_used;
669
670 spin_lock(&cache->space_info->lock);
671 spin_lock(&cache->lock);
672 bytes_used = cache->key.offset -
673 btrfs_block_group_used(&cache->item);
674 cache->space_info->bytes_used += bytes_used >> 1;
675 spin_unlock(&cache->lock);
676 spin_unlock(&cache->space_info->lock);
677 fragment_free_space(cache);
678 }
679#endif
680 mutex_unlock(&caching_ctl->mutex);
681
682 wake_up(&caching_ctl->wait);
683 if (ret == 1) {
684 put_caching_control(caching_ctl);
685 free_excluded_extents(fs_info, cache);
686 return 0;
687 }
688 } else {
689
690
691
692
693 spin_lock(&cache->lock);
694 if (load_cache_only) {
695 cache->caching_ctl = NULL;
696 cache->cached = BTRFS_CACHE_NO;
697 } else {
698 cache->cached = BTRFS_CACHE_STARTED;
699 cache->has_caching_ctl = 1;
700 }
701 spin_unlock(&cache->lock);
702 wake_up(&caching_ctl->wait);
703 }
704
705 if (load_cache_only) {
706 put_caching_control(caching_ctl);
707 return 0;
708 }
709
710 down_write(&fs_info->commit_root_sem);
711 refcount_inc(&caching_ctl->count);
712 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
713 up_write(&fs_info->commit_root_sem);
714
715 btrfs_get_block_group(cache);
716
717 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
718
719 return ret;
720}
721
722
723
724
725static struct btrfs_block_group_cache *
726btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
727{
728 return block_group_cache_tree_search(info, bytenr, 0);
729}
730
731
732
733
734struct btrfs_block_group_cache *btrfs_lookup_block_group(
735 struct btrfs_fs_info *info,
736 u64 bytenr)
737{
738 return block_group_cache_tree_search(info, bytenr, 1);
739}
740
741static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
742 u64 flags)
743{
744 struct list_head *head = &info->space_info;
745 struct btrfs_space_info *found;
746
747 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
748
749 rcu_read_lock();
750 list_for_each_entry_rcu(found, head, list) {
751 if (found->flags & flags) {
752 rcu_read_unlock();
753 return found;
754 }
755 }
756 rcu_read_unlock();
757 return NULL;
758}
759
760static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
761 u64 owner, u64 root_objectid)
762{
763 struct btrfs_space_info *space_info;
764 u64 flags;
765
766 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
767 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
768 flags = BTRFS_BLOCK_GROUP_SYSTEM;
769 else
770 flags = BTRFS_BLOCK_GROUP_METADATA;
771 } else {
772 flags = BTRFS_BLOCK_GROUP_DATA;
773 }
774
775 space_info = __find_space_info(fs_info, flags);
776 ASSERT(space_info);
777 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
778}
779
780
781
782
783
784void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
785{
786 struct list_head *head = &info->space_info;
787 struct btrfs_space_info *found;
788
789 rcu_read_lock();
790 list_for_each_entry_rcu(found, head, list)
791 found->full = 0;
792 rcu_read_unlock();
793}
794
795
796int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
797{
798 int ret;
799 struct btrfs_key key;
800 struct btrfs_path *path;
801
802 path = btrfs_alloc_path();
803 if (!path)
804 return -ENOMEM;
805
806 key.objectid = start;
807 key.offset = len;
808 key.type = BTRFS_EXTENT_ITEM_KEY;
809 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
810 btrfs_free_path(path);
811 return ret;
812}
813
814
815
816
817
818
819
820
821
822
823int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
824 struct btrfs_fs_info *fs_info, u64 bytenr,
825 u64 offset, int metadata, u64 *refs, u64 *flags)
826{
827 struct btrfs_delayed_ref_head *head;
828 struct btrfs_delayed_ref_root *delayed_refs;
829 struct btrfs_path *path;
830 struct btrfs_extent_item *ei;
831 struct extent_buffer *leaf;
832 struct btrfs_key key;
833 u32 item_size;
834 u64 num_refs;
835 u64 extent_flags;
836 int ret;
837
838
839
840
841
842 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
843 offset = fs_info->nodesize;
844 metadata = 0;
845 }
846
847 path = btrfs_alloc_path();
848 if (!path)
849 return -ENOMEM;
850
851 if (!trans) {
852 path->skip_locking = 1;
853 path->search_commit_root = 1;
854 }
855
856search_again:
857 key.objectid = bytenr;
858 key.offset = offset;
859 if (metadata)
860 key.type = BTRFS_METADATA_ITEM_KEY;
861 else
862 key.type = BTRFS_EXTENT_ITEM_KEY;
863
864 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
865 if (ret < 0)
866 goto out_free;
867
868 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
869 if (path->slots[0]) {
870 path->slots[0]--;
871 btrfs_item_key_to_cpu(path->nodes[0], &key,
872 path->slots[0]);
873 if (key.objectid == bytenr &&
874 key.type == BTRFS_EXTENT_ITEM_KEY &&
875 key.offset == fs_info->nodesize)
876 ret = 0;
877 }
878 }
879
880 if (ret == 0) {
881 leaf = path->nodes[0];
882 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
883 if (item_size >= sizeof(*ei)) {
884 ei = btrfs_item_ptr(leaf, path->slots[0],
885 struct btrfs_extent_item);
886 num_refs = btrfs_extent_refs(leaf, ei);
887 extent_flags = btrfs_extent_flags(leaf, ei);
888 } else {
889#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
890 struct btrfs_extent_item_v0 *ei0;
891 BUG_ON(item_size != sizeof(*ei0));
892 ei0 = btrfs_item_ptr(leaf, path->slots[0],
893 struct btrfs_extent_item_v0);
894 num_refs = btrfs_extent_refs_v0(leaf, ei0);
895
896 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
897#else
898 BUG();
899#endif
900 }
901 BUG_ON(num_refs == 0);
902 } else {
903 num_refs = 0;
904 extent_flags = 0;
905 ret = 0;
906 }
907
908 if (!trans)
909 goto out;
910
911 delayed_refs = &trans->transaction->delayed_refs;
912 spin_lock(&delayed_refs->lock);
913 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
914 if (head) {
915 if (!mutex_trylock(&head->mutex)) {
916 refcount_inc(&head->refs);
917 spin_unlock(&delayed_refs->lock);
918
919 btrfs_release_path(path);
920
921
922
923
924
925 mutex_lock(&head->mutex);
926 mutex_unlock(&head->mutex);
927 btrfs_put_delayed_ref_head(head);
928 goto search_again;
929 }
930 spin_lock(&head->lock);
931 if (head->extent_op && head->extent_op->update_flags)
932 extent_flags |= head->extent_op->flags_to_set;
933 else
934 BUG_ON(num_refs == 0);
935
936 num_refs += head->ref_mod;
937 spin_unlock(&head->lock);
938 mutex_unlock(&head->mutex);
939 }
940 spin_unlock(&delayed_refs->lock);
941out:
942 WARN_ON(num_refs == 0);
943 if (refs)
944 *refs = num_refs;
945 if (flags)
946 *flags = extent_flags;
947out_free:
948 btrfs_free_path(path);
949 return ret;
950}
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1059static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1060 struct btrfs_fs_info *fs_info,
1061 struct btrfs_path *path,
1062 u64 owner, u32 extra_size)
1063{
1064 struct btrfs_root *root = fs_info->extent_root;
1065 struct btrfs_extent_item *item;
1066 struct btrfs_extent_item_v0 *ei0;
1067 struct btrfs_extent_ref_v0 *ref0;
1068 struct btrfs_tree_block_info *bi;
1069 struct extent_buffer *leaf;
1070 struct btrfs_key key;
1071 struct btrfs_key found_key;
1072 u32 new_size = sizeof(*item);
1073 u64 refs;
1074 int ret;
1075
1076 leaf = path->nodes[0];
1077 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
1078
1079 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1080 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1081 struct btrfs_extent_item_v0);
1082 refs = btrfs_extent_refs_v0(leaf, ei0);
1083
1084 if (owner == (u64)-1) {
1085 while (1) {
1086 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1087 ret = btrfs_next_leaf(root, path);
1088 if (ret < 0)
1089 return ret;
1090 BUG_ON(ret > 0);
1091 leaf = path->nodes[0];
1092 }
1093 btrfs_item_key_to_cpu(leaf, &found_key,
1094 path->slots[0]);
1095 BUG_ON(key.objectid != found_key.objectid);
1096 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1097 path->slots[0]++;
1098 continue;
1099 }
1100 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1101 struct btrfs_extent_ref_v0);
1102 owner = btrfs_ref_objectid_v0(leaf, ref0);
1103 break;
1104 }
1105 }
1106 btrfs_release_path(path);
1107
1108 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1109 new_size += sizeof(*bi);
1110
1111 new_size -= sizeof(*ei0);
1112 ret = btrfs_search_slot(trans, root, &key, path,
1113 new_size + extra_size, 1);
1114 if (ret < 0)
1115 return ret;
1116 BUG_ON(ret);
1117
1118 btrfs_extend_item(fs_info, path, new_size);
1119
1120 leaf = path->nodes[0];
1121 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1122 btrfs_set_extent_refs(leaf, item, refs);
1123
1124 btrfs_set_extent_generation(leaf, item, 0);
1125 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1126 btrfs_set_extent_flags(leaf, item,
1127 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1128 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1129 bi = (struct btrfs_tree_block_info *)(item + 1);
1130
1131 memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi));
1132 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1133 } else {
1134 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1135 }
1136 btrfs_mark_buffer_dirty(leaf);
1137 return 0;
1138}
1139#endif
1140
1141
1142
1143
1144
1145
1146int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
1147 struct btrfs_extent_inline_ref *iref,
1148 enum btrfs_inline_ref_type is_data)
1149{
1150 int type = btrfs_extent_inline_ref_type(eb, iref);
1151 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
1152
1153 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1154 type == BTRFS_SHARED_BLOCK_REF_KEY ||
1155 type == BTRFS_SHARED_DATA_REF_KEY ||
1156 type == BTRFS_EXTENT_DATA_REF_KEY) {
1157 if (is_data == BTRFS_REF_TYPE_BLOCK) {
1158 if (type == BTRFS_TREE_BLOCK_REF_KEY)
1159 return type;
1160 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1161 ASSERT(eb->fs_info);
1162
1163
1164
1165
1166
1167 if (offset &&
1168 IS_ALIGNED(offset, eb->fs_info->nodesize))
1169 return type;
1170 }
1171 } else if (is_data == BTRFS_REF_TYPE_DATA) {
1172 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1173 return type;
1174 if (type == BTRFS_SHARED_DATA_REF_KEY) {
1175 ASSERT(eb->fs_info);
1176
1177
1178
1179
1180
1181 if (offset &&
1182 IS_ALIGNED(offset, eb->fs_info->nodesize))
1183 return type;
1184 }
1185 } else {
1186 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
1187 return type;
1188 }
1189 }
1190
1191 btrfs_print_leaf((struct extent_buffer *)eb);
1192 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
1193 eb->start, type);
1194 WARN_ON(1);
1195
1196 return BTRFS_REF_TYPE_INVALID;
1197}
1198
1199static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1200{
1201 u32 high_crc = ~(u32)0;
1202 u32 low_crc = ~(u32)0;
1203 __le64 lenum;
1204
1205 lenum = cpu_to_le64(root_objectid);
1206 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1207 lenum = cpu_to_le64(owner);
1208 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1209 lenum = cpu_to_le64(offset);
1210 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1211
1212 return ((u64)high_crc << 31) ^ (u64)low_crc;
1213}
1214
1215static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1216 struct btrfs_extent_data_ref *ref)
1217{
1218 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1219 btrfs_extent_data_ref_objectid(leaf, ref),
1220 btrfs_extent_data_ref_offset(leaf, ref));
1221}
1222
1223static int match_extent_data_ref(struct extent_buffer *leaf,
1224 struct btrfs_extent_data_ref *ref,
1225 u64 root_objectid, u64 owner, u64 offset)
1226{
1227 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1228 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1229 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1230 return 0;
1231 return 1;
1232}
1233
1234static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1235 struct btrfs_fs_info *fs_info,
1236 struct btrfs_path *path,
1237 u64 bytenr, u64 parent,
1238 u64 root_objectid,
1239 u64 owner, u64 offset)
1240{
1241 struct btrfs_root *root = fs_info->extent_root;
1242 struct btrfs_key key;
1243 struct btrfs_extent_data_ref *ref;
1244 struct extent_buffer *leaf;
1245 u32 nritems;
1246 int ret;
1247 int recow;
1248 int err = -ENOENT;
1249
1250 key.objectid = bytenr;
1251 if (parent) {
1252 key.type = BTRFS_SHARED_DATA_REF_KEY;
1253 key.offset = parent;
1254 } else {
1255 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1256 key.offset = hash_extent_data_ref(root_objectid,
1257 owner, offset);
1258 }
1259again:
1260 recow = 0;
1261 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1262 if (ret < 0) {
1263 err = ret;
1264 goto fail;
1265 }
1266
1267 if (parent) {
1268 if (!ret)
1269 return 0;
1270#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1271 key.type = BTRFS_EXTENT_REF_V0_KEY;
1272 btrfs_release_path(path);
1273 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1274 if (ret < 0) {
1275 err = ret;
1276 goto fail;
1277 }
1278 if (!ret)
1279 return 0;
1280#endif
1281 goto fail;
1282 }
1283
1284 leaf = path->nodes[0];
1285 nritems = btrfs_header_nritems(leaf);
1286 while (1) {
1287 if (path->slots[0] >= nritems) {
1288 ret = btrfs_next_leaf(root, path);
1289 if (ret < 0)
1290 err = ret;
1291 if (ret)
1292 goto fail;
1293
1294 leaf = path->nodes[0];
1295 nritems = btrfs_header_nritems(leaf);
1296 recow = 1;
1297 }
1298
1299 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1300 if (key.objectid != bytenr ||
1301 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1302 goto fail;
1303
1304 ref = btrfs_item_ptr(leaf, path->slots[0],
1305 struct btrfs_extent_data_ref);
1306
1307 if (match_extent_data_ref(leaf, ref, root_objectid,
1308 owner, offset)) {
1309 if (recow) {
1310 btrfs_release_path(path);
1311 goto again;
1312 }
1313 err = 0;
1314 break;
1315 }
1316 path->slots[0]++;
1317 }
1318fail:
1319 return err;
1320}
1321
1322static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1323 struct btrfs_fs_info *fs_info,
1324 struct btrfs_path *path,
1325 u64 bytenr, u64 parent,
1326 u64 root_objectid, u64 owner,
1327 u64 offset, int refs_to_add)
1328{
1329 struct btrfs_root *root = fs_info->extent_root;
1330 struct btrfs_key key;
1331 struct extent_buffer *leaf;
1332 u32 size;
1333 u32 num_refs;
1334 int ret;
1335
1336 key.objectid = bytenr;
1337 if (parent) {
1338 key.type = BTRFS_SHARED_DATA_REF_KEY;
1339 key.offset = parent;
1340 size = sizeof(struct btrfs_shared_data_ref);
1341 } else {
1342 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1343 key.offset = hash_extent_data_ref(root_objectid,
1344 owner, offset);
1345 size = sizeof(struct btrfs_extent_data_ref);
1346 }
1347
1348 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1349 if (ret && ret != -EEXIST)
1350 goto fail;
1351
1352 leaf = path->nodes[0];
1353 if (parent) {
1354 struct btrfs_shared_data_ref *ref;
1355 ref = btrfs_item_ptr(leaf, path->slots[0],
1356 struct btrfs_shared_data_ref);
1357 if (ret == 0) {
1358 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1359 } else {
1360 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1361 num_refs += refs_to_add;
1362 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1363 }
1364 } else {
1365 struct btrfs_extent_data_ref *ref;
1366 while (ret == -EEXIST) {
1367 ref = btrfs_item_ptr(leaf, path->slots[0],
1368 struct btrfs_extent_data_ref);
1369 if (match_extent_data_ref(leaf, ref, root_objectid,
1370 owner, offset))
1371 break;
1372 btrfs_release_path(path);
1373 key.offset++;
1374 ret = btrfs_insert_empty_item(trans, root, path, &key,
1375 size);
1376 if (ret && ret != -EEXIST)
1377 goto fail;
1378
1379 leaf = path->nodes[0];
1380 }
1381 ref = btrfs_item_ptr(leaf, path->slots[0],
1382 struct btrfs_extent_data_ref);
1383 if (ret == 0) {
1384 btrfs_set_extent_data_ref_root(leaf, ref,
1385 root_objectid);
1386 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1387 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1388 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1389 } else {
1390 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1391 num_refs += refs_to_add;
1392 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1393 }
1394 }
1395 btrfs_mark_buffer_dirty(leaf);
1396 ret = 0;
1397fail:
1398 btrfs_release_path(path);
1399 return ret;
1400}
1401
1402static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1403 struct btrfs_fs_info *fs_info,
1404 struct btrfs_path *path,
1405 int refs_to_drop, int *last_ref)
1406{
1407 struct btrfs_key key;
1408 struct btrfs_extent_data_ref *ref1 = NULL;
1409 struct btrfs_shared_data_ref *ref2 = NULL;
1410 struct extent_buffer *leaf;
1411 u32 num_refs = 0;
1412 int ret = 0;
1413
1414 leaf = path->nodes[0];
1415 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1416
1417 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1418 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1419 struct btrfs_extent_data_ref);
1420 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1421 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1422 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1423 struct btrfs_shared_data_ref);
1424 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1425#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1426 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1427 struct btrfs_extent_ref_v0 *ref0;
1428 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1429 struct btrfs_extent_ref_v0);
1430 num_refs = btrfs_ref_count_v0(leaf, ref0);
1431#endif
1432 } else {
1433 BUG();
1434 }
1435
1436 BUG_ON(num_refs < refs_to_drop);
1437 num_refs -= refs_to_drop;
1438
1439 if (num_refs == 0) {
1440 ret = btrfs_del_item(trans, fs_info->extent_root, path);
1441 *last_ref = 1;
1442 } else {
1443 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1444 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1445 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1446 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1447#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1448 else {
1449 struct btrfs_extent_ref_v0 *ref0;
1450 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1451 struct btrfs_extent_ref_v0);
1452 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1453 }
1454#endif
1455 btrfs_mark_buffer_dirty(leaf);
1456 }
1457 return ret;
1458}
1459
1460static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1461 struct btrfs_extent_inline_ref *iref)
1462{
1463 struct btrfs_key key;
1464 struct extent_buffer *leaf;
1465 struct btrfs_extent_data_ref *ref1;
1466 struct btrfs_shared_data_ref *ref2;
1467 u32 num_refs = 0;
1468 int type;
1469
1470 leaf = path->nodes[0];
1471 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1472 if (iref) {
1473
1474
1475
1476
1477 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
1478 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1479 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1480 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1481 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1482 } else {
1483 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1484 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1485 }
1486 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1487 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1488 struct btrfs_extent_data_ref);
1489 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1490 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1491 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1492 struct btrfs_shared_data_ref);
1493 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1494#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1495 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1496 struct btrfs_extent_ref_v0 *ref0;
1497 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1498 struct btrfs_extent_ref_v0);
1499 num_refs = btrfs_ref_count_v0(leaf, ref0);
1500#endif
1501 } else {
1502 WARN_ON(1);
1503 }
1504 return num_refs;
1505}
1506
1507static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1508 struct btrfs_fs_info *fs_info,
1509 struct btrfs_path *path,
1510 u64 bytenr, u64 parent,
1511 u64 root_objectid)
1512{
1513 struct btrfs_root *root = fs_info->extent_root;
1514 struct btrfs_key key;
1515 int ret;
1516
1517 key.objectid = bytenr;
1518 if (parent) {
1519 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1520 key.offset = parent;
1521 } else {
1522 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1523 key.offset = root_objectid;
1524 }
1525
1526 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1527 if (ret > 0)
1528 ret = -ENOENT;
1529#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1530 if (ret == -ENOENT && parent) {
1531 btrfs_release_path(path);
1532 key.type = BTRFS_EXTENT_REF_V0_KEY;
1533 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1534 if (ret > 0)
1535 ret = -ENOENT;
1536 }
1537#endif
1538 return ret;
1539}
1540
1541static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1542 struct btrfs_fs_info *fs_info,
1543 struct btrfs_path *path,
1544 u64 bytenr, u64 parent,
1545 u64 root_objectid)
1546{
1547 struct btrfs_key key;
1548 int ret;
1549
1550 key.objectid = bytenr;
1551 if (parent) {
1552 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1553 key.offset = parent;
1554 } else {
1555 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1556 key.offset = root_objectid;
1557 }
1558
1559 ret = btrfs_insert_empty_item(trans, fs_info->extent_root,
1560 path, &key, 0);
1561 btrfs_release_path(path);
1562 return ret;
1563}
1564
1565static inline int extent_ref_type(u64 parent, u64 owner)
1566{
1567 int type;
1568 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1569 if (parent > 0)
1570 type = BTRFS_SHARED_BLOCK_REF_KEY;
1571 else
1572 type = BTRFS_TREE_BLOCK_REF_KEY;
1573 } else {
1574 if (parent > 0)
1575 type = BTRFS_SHARED_DATA_REF_KEY;
1576 else
1577 type = BTRFS_EXTENT_DATA_REF_KEY;
1578 }
1579 return type;
1580}
1581
1582static int find_next_key(struct btrfs_path *path, int level,
1583 struct btrfs_key *key)
1584
1585{
1586 for (; level < BTRFS_MAX_LEVEL; level++) {
1587 if (!path->nodes[level])
1588 break;
1589 if (path->slots[level] + 1 >=
1590 btrfs_header_nritems(path->nodes[level]))
1591 continue;
1592 if (level == 0)
1593 btrfs_item_key_to_cpu(path->nodes[level], key,
1594 path->slots[level] + 1);
1595 else
1596 btrfs_node_key_to_cpu(path->nodes[level], key,
1597 path->slots[level] + 1);
1598 return 0;
1599 }
1600 return 1;
1601}
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616static noinline_for_stack
1617int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1618 struct btrfs_fs_info *fs_info,
1619 struct btrfs_path *path,
1620 struct btrfs_extent_inline_ref **ref_ret,
1621 u64 bytenr, u64 num_bytes,
1622 u64 parent, u64 root_objectid,
1623 u64 owner, u64 offset, int insert)
1624{
1625 struct btrfs_root *root = fs_info->extent_root;
1626 struct btrfs_key key;
1627 struct extent_buffer *leaf;
1628 struct btrfs_extent_item *ei;
1629 struct btrfs_extent_inline_ref *iref;
1630 u64 flags;
1631 u64 item_size;
1632 unsigned long ptr;
1633 unsigned long end;
1634 int extra_size;
1635 int type;
1636 int want;
1637 int ret;
1638 int err = 0;
1639 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
1640 int needed;
1641
1642 key.objectid = bytenr;
1643 key.type = BTRFS_EXTENT_ITEM_KEY;
1644 key.offset = num_bytes;
1645
1646 want = extent_ref_type(parent, owner);
1647 if (insert) {
1648 extra_size = btrfs_extent_inline_ref_size(want);
1649 path->keep_locks = 1;
1650 } else
1651 extra_size = -1;
1652
1653
1654
1655
1656
1657 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1658 key.type = BTRFS_METADATA_ITEM_KEY;
1659 key.offset = owner;
1660 }
1661
1662again:
1663 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1664 if (ret < 0) {
1665 err = ret;
1666 goto out;
1667 }
1668
1669
1670
1671
1672
1673 if (ret > 0 && skinny_metadata) {
1674 skinny_metadata = false;
1675 if (path->slots[0]) {
1676 path->slots[0]--;
1677 btrfs_item_key_to_cpu(path->nodes[0], &key,
1678 path->slots[0]);
1679 if (key.objectid == bytenr &&
1680 key.type == BTRFS_EXTENT_ITEM_KEY &&
1681 key.offset == num_bytes)
1682 ret = 0;
1683 }
1684 if (ret) {
1685 key.objectid = bytenr;
1686 key.type = BTRFS_EXTENT_ITEM_KEY;
1687 key.offset = num_bytes;
1688 btrfs_release_path(path);
1689 goto again;
1690 }
1691 }
1692
1693 if (ret && !insert) {
1694 err = -ENOENT;
1695 goto out;
1696 } else if (WARN_ON(ret)) {
1697 err = -EIO;
1698 goto out;
1699 }
1700
1701 leaf = path->nodes[0];
1702 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1703#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1704 if (item_size < sizeof(*ei)) {
1705 if (!insert) {
1706 err = -ENOENT;
1707 goto out;
1708 }
1709 ret = convert_extent_item_v0(trans, fs_info, path, owner,
1710 extra_size);
1711 if (ret < 0) {
1712 err = ret;
1713 goto out;
1714 }
1715 leaf = path->nodes[0];
1716 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1717 }
1718#endif
1719 BUG_ON(item_size < sizeof(*ei));
1720
1721 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1722 flags = btrfs_extent_flags(leaf, ei);
1723
1724 ptr = (unsigned long)(ei + 1);
1725 end = (unsigned long)ei + item_size;
1726
1727 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1728 ptr += sizeof(struct btrfs_tree_block_info);
1729 BUG_ON(ptr > end);
1730 }
1731
1732 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
1733 needed = BTRFS_REF_TYPE_DATA;
1734 else
1735 needed = BTRFS_REF_TYPE_BLOCK;
1736
1737 err = -ENOENT;
1738 while (1) {
1739 if (ptr >= end) {
1740 WARN_ON(ptr > end);
1741 break;
1742 }
1743 iref = (struct btrfs_extent_inline_ref *)ptr;
1744 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
1745 if (type == BTRFS_REF_TYPE_INVALID) {
1746 err = -EINVAL;
1747 goto out;
1748 }
1749
1750 if (want < type)
1751 break;
1752 if (want > type) {
1753 ptr += btrfs_extent_inline_ref_size(type);
1754 continue;
1755 }
1756
1757 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1758 struct btrfs_extent_data_ref *dref;
1759 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1760 if (match_extent_data_ref(leaf, dref, root_objectid,
1761 owner, offset)) {
1762 err = 0;
1763 break;
1764 }
1765 if (hash_extent_data_ref_item(leaf, dref) <
1766 hash_extent_data_ref(root_objectid, owner, offset))
1767 break;
1768 } else {
1769 u64 ref_offset;
1770 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1771 if (parent > 0) {
1772 if (parent == ref_offset) {
1773 err = 0;
1774 break;
1775 }
1776 if (ref_offset < parent)
1777 break;
1778 } else {
1779 if (root_objectid == ref_offset) {
1780 err = 0;
1781 break;
1782 }
1783 if (ref_offset < root_objectid)
1784 break;
1785 }
1786 }
1787 ptr += btrfs_extent_inline_ref_size(type);
1788 }
1789 if (err == -ENOENT && insert) {
1790 if (item_size + extra_size >=
1791 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1792 err = -EAGAIN;
1793 goto out;
1794 }
1795
1796
1797
1798
1799
1800
1801 if (find_next_key(path, 0, &key) == 0 &&
1802 key.objectid == bytenr &&
1803 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1804 err = -EAGAIN;
1805 goto out;
1806 }
1807 }
1808 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1809out:
1810 if (insert) {
1811 path->keep_locks = 0;
1812 btrfs_unlock_up_safe(path, 1);
1813 }
1814 return err;
1815}
1816
1817
1818
1819
1820static noinline_for_stack
1821void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1822 struct btrfs_path *path,
1823 struct btrfs_extent_inline_ref *iref,
1824 u64 parent, u64 root_objectid,
1825 u64 owner, u64 offset, int refs_to_add,
1826 struct btrfs_delayed_extent_op *extent_op)
1827{
1828 struct extent_buffer *leaf;
1829 struct btrfs_extent_item *ei;
1830 unsigned long ptr;
1831 unsigned long end;
1832 unsigned long item_offset;
1833 u64 refs;
1834 int size;
1835 int type;
1836
1837 leaf = path->nodes[0];
1838 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1839 item_offset = (unsigned long)iref - (unsigned long)ei;
1840
1841 type = extent_ref_type(parent, owner);
1842 size = btrfs_extent_inline_ref_size(type);
1843
1844 btrfs_extend_item(fs_info, path, size);
1845
1846 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1847 refs = btrfs_extent_refs(leaf, ei);
1848 refs += refs_to_add;
1849 btrfs_set_extent_refs(leaf, ei, refs);
1850 if (extent_op)
1851 __run_delayed_extent_op(extent_op, leaf, ei);
1852
1853 ptr = (unsigned long)ei + item_offset;
1854 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1855 if (ptr < end - size)
1856 memmove_extent_buffer(leaf, ptr + size, ptr,
1857 end - size - ptr);
1858
1859 iref = (struct btrfs_extent_inline_ref *)ptr;
1860 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1861 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1862 struct btrfs_extent_data_ref *dref;
1863 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1864 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1865 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1866 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1867 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1868 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1869 struct btrfs_shared_data_ref *sref;
1870 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1871 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1872 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1873 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1874 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1875 } else {
1876 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1877 }
1878 btrfs_mark_buffer_dirty(leaf);
1879}
1880
1881static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1882 struct btrfs_fs_info *fs_info,
1883 struct btrfs_path *path,
1884 struct btrfs_extent_inline_ref **ref_ret,
1885 u64 bytenr, u64 num_bytes, u64 parent,
1886 u64 root_objectid, u64 owner, u64 offset)
1887{
1888 int ret;
1889
1890 ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret,
1891 bytenr, num_bytes, parent,
1892 root_objectid, owner, offset, 0);
1893 if (ret != -ENOENT)
1894 return ret;
1895
1896 btrfs_release_path(path);
1897 *ref_ret = NULL;
1898
1899 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1900 ret = lookup_tree_block_ref(trans, fs_info, path, bytenr,
1901 parent, root_objectid);
1902 } else {
1903 ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
1904 parent, root_objectid, owner,
1905 offset);
1906 }
1907 return ret;
1908}
1909
1910
1911
1912
1913static noinline_for_stack
1914void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
1915 struct btrfs_path *path,
1916 struct btrfs_extent_inline_ref *iref,
1917 int refs_to_mod,
1918 struct btrfs_delayed_extent_op *extent_op,
1919 int *last_ref)
1920{
1921 struct extent_buffer *leaf;
1922 struct btrfs_extent_item *ei;
1923 struct btrfs_extent_data_ref *dref = NULL;
1924 struct btrfs_shared_data_ref *sref = NULL;
1925 unsigned long ptr;
1926 unsigned long end;
1927 u32 item_size;
1928 int size;
1929 int type;
1930 u64 refs;
1931
1932 leaf = path->nodes[0];
1933 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1934 refs = btrfs_extent_refs(leaf, ei);
1935 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1936 refs += refs_to_mod;
1937 btrfs_set_extent_refs(leaf, ei, refs);
1938 if (extent_op)
1939 __run_delayed_extent_op(extent_op, leaf, ei);
1940
1941
1942
1943
1944
1945 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1946 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1947
1948 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1949 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1950 refs = btrfs_extent_data_ref_count(leaf, dref);
1951 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1952 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1953 refs = btrfs_shared_data_ref_count(leaf, sref);
1954 } else {
1955 refs = 1;
1956 BUG_ON(refs_to_mod != -1);
1957 }
1958
1959 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1960 refs += refs_to_mod;
1961
1962 if (refs > 0) {
1963 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1964 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1965 else
1966 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1967 } else {
1968 *last_ref = 1;
1969 size = btrfs_extent_inline_ref_size(type);
1970 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1971 ptr = (unsigned long)iref;
1972 end = (unsigned long)ei + item_size;
1973 if (ptr + size < end)
1974 memmove_extent_buffer(leaf, ptr, ptr + size,
1975 end - ptr - size);
1976 item_size -= size;
1977 btrfs_truncate_item(fs_info, path, item_size, 1);
1978 }
1979 btrfs_mark_buffer_dirty(leaf);
1980}
1981
1982static noinline_for_stack
1983int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1984 struct btrfs_fs_info *fs_info,
1985 struct btrfs_path *path,
1986 u64 bytenr, u64 num_bytes, u64 parent,
1987 u64 root_objectid, u64 owner,
1988 u64 offset, int refs_to_add,
1989 struct btrfs_delayed_extent_op *extent_op)
1990{
1991 struct btrfs_extent_inline_ref *iref;
1992 int ret;
1993
1994 ret = lookup_inline_extent_backref(trans, fs_info, path, &iref,
1995 bytenr, num_bytes, parent,
1996 root_objectid, owner, offset, 1);
1997 if (ret == 0) {
1998 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1999 update_inline_extent_backref(fs_info, path, iref,
2000 refs_to_add, extent_op, NULL);
2001 } else if (ret == -ENOENT) {
2002 setup_inline_extent_backref(fs_info, path, iref, parent,
2003 root_objectid, owner, offset,
2004 refs_to_add, extent_op);
2005 ret = 0;
2006 }
2007 return ret;
2008}
2009
2010static int insert_extent_backref(struct btrfs_trans_handle *trans,
2011 struct btrfs_fs_info *fs_info,
2012 struct btrfs_path *path,
2013 u64 bytenr, u64 parent, u64 root_objectid,
2014 u64 owner, u64 offset, int refs_to_add)
2015{
2016 int ret;
2017 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2018 BUG_ON(refs_to_add != 1);
2019 ret = insert_tree_block_ref(trans, fs_info, path, bytenr,
2020 parent, root_objectid);
2021 } else {
2022 ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
2023 parent, root_objectid,
2024 owner, offset, refs_to_add);
2025 }
2026 return ret;
2027}
2028
2029static int remove_extent_backref(struct btrfs_trans_handle *trans,
2030 struct btrfs_fs_info *fs_info,
2031 struct btrfs_path *path,
2032 struct btrfs_extent_inline_ref *iref,
2033 int refs_to_drop, int is_data, int *last_ref)
2034{
2035 int ret = 0;
2036
2037 BUG_ON(!is_data && refs_to_drop != 1);
2038 if (iref) {
2039 update_inline_extent_backref(fs_info, path, iref,
2040 -refs_to_drop, NULL, last_ref);
2041 } else if (is_data) {
2042 ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop,
2043 last_ref);
2044 } else {
2045 *last_ref = 1;
2046 ret = btrfs_del_item(trans, fs_info->extent_root, path);
2047 }
2048 return ret;
2049}
2050
2051#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
2052static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
2053 u64 *discarded_bytes)
2054{
2055 int j, ret = 0;
2056 u64 bytes_left, end;
2057 u64 aligned_start = ALIGN(start, 1 << 9);
2058
2059 if (WARN_ON(start != aligned_start)) {
2060 len -= aligned_start - start;
2061 len = round_down(len, 1 << 9);
2062 start = aligned_start;
2063 }
2064
2065 *discarded_bytes = 0;
2066
2067 if (!len)
2068 return 0;
2069
2070 end = start + len;
2071 bytes_left = len;
2072
2073
2074 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
2075 u64 sb_start = btrfs_sb_offset(j);
2076 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
2077 u64 size = sb_start - start;
2078
2079 if (!in_range(sb_start, start, bytes_left) &&
2080 !in_range(sb_end, start, bytes_left) &&
2081 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
2082 continue;
2083
2084
2085
2086
2087
2088 if (sb_start <= start) {
2089 start += sb_end - start;
2090 if (start > end) {
2091 bytes_left = 0;
2092 break;
2093 }
2094 bytes_left = end - start;
2095 continue;
2096 }
2097
2098 if (size) {
2099 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
2100 GFP_NOFS, 0);
2101 if (!ret)
2102 *discarded_bytes += size;
2103 else if (ret != -EOPNOTSUPP)
2104 return ret;
2105 }
2106
2107 start = sb_end;
2108 if (start > end) {
2109 bytes_left = 0;
2110 break;
2111 }
2112 bytes_left = end - start;
2113 }
2114
2115 if (bytes_left) {
2116 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
2117 GFP_NOFS, 0);
2118 if (!ret)
2119 *discarded_bytes += bytes_left;
2120 }
2121 return ret;
2122}
2123
2124int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
2125 u64 num_bytes, u64 *actual_bytes)
2126{
2127 int ret;
2128 u64 discarded_bytes = 0;
2129 struct btrfs_bio *bbio = NULL;
2130
2131
2132
2133
2134
2135
2136 btrfs_bio_counter_inc_blocked(fs_info);
2137
2138 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
2139 &bbio, 0);
2140
2141 if (!ret) {
2142 struct btrfs_bio_stripe *stripe = bbio->stripes;
2143 int i;
2144
2145
2146 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
2147 u64 bytes;
2148 if (!stripe->dev->can_discard)
2149 continue;
2150
2151 ret = btrfs_issue_discard(stripe->dev->bdev,
2152 stripe->physical,
2153 stripe->length,
2154 &bytes);
2155 if (!ret)
2156 discarded_bytes += bytes;
2157 else if (ret != -EOPNOTSUPP)
2158 break;
2159
2160
2161
2162
2163
2164
2165 ret = 0;
2166 }
2167 btrfs_put_bbio(bbio);
2168 }
2169 btrfs_bio_counter_dec(fs_info);
2170
2171 if (actual_bytes)
2172 *actual_bytes = discarded_bytes;
2173
2174
2175 if (ret == -EOPNOTSUPP)
2176 ret = 0;
2177 return ret;
2178}
2179
2180
2181int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2182 struct btrfs_root *root,
2183 u64 bytenr, u64 num_bytes, u64 parent,
2184 u64 root_objectid, u64 owner, u64 offset)
2185{
2186 struct btrfs_fs_info *fs_info = root->fs_info;
2187 int old_ref_mod, new_ref_mod;
2188 int ret;
2189
2190 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
2191 root_objectid == BTRFS_TREE_LOG_OBJECTID);
2192
2193 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
2194 owner, offset, BTRFS_ADD_DELAYED_REF);
2195
2196 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2197 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
2198 num_bytes, parent,
2199 root_objectid, (int)owner,
2200 BTRFS_ADD_DELAYED_REF, NULL,
2201 &old_ref_mod, &new_ref_mod);
2202 } else {
2203 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
2204 num_bytes, parent,
2205 root_objectid, owner, offset,
2206 0, BTRFS_ADD_DELAYED_REF,
2207 &old_ref_mod, &new_ref_mod);
2208 }
2209
2210 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
2211 add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
2212
2213 return ret;
2214}
2215
2216static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2217 struct btrfs_fs_info *fs_info,
2218 struct btrfs_delayed_ref_node *node,
2219 u64 parent, u64 root_objectid,
2220 u64 owner, u64 offset, int refs_to_add,
2221 struct btrfs_delayed_extent_op *extent_op)
2222{
2223 struct btrfs_path *path;
2224 struct extent_buffer *leaf;
2225 struct btrfs_extent_item *item;
2226 struct btrfs_key key;
2227 u64 bytenr = node->bytenr;
2228 u64 num_bytes = node->num_bytes;
2229 u64 refs;
2230 int ret;
2231
2232 path = btrfs_alloc_path();
2233 if (!path)
2234 return -ENOMEM;
2235
2236 path->reada = READA_FORWARD;
2237 path->leave_spinning = 1;
2238
2239 ret = insert_inline_extent_backref(trans, fs_info, path, bytenr,
2240 num_bytes, parent, root_objectid,
2241 owner, offset,
2242 refs_to_add, extent_op);
2243 if ((ret < 0 && ret != -EAGAIN) || !ret)
2244 goto out;
2245
2246
2247
2248
2249
2250
2251 leaf = path->nodes[0];
2252 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2253 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2254 refs = btrfs_extent_refs(leaf, item);
2255 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2256 if (extent_op)
2257 __run_delayed_extent_op(extent_op, leaf, item);
2258
2259 btrfs_mark_buffer_dirty(leaf);
2260 btrfs_release_path(path);
2261
2262 path->reada = READA_FORWARD;
2263 path->leave_spinning = 1;
2264
2265 ret = insert_extent_backref(trans, fs_info, path, bytenr, parent,
2266 root_objectid, owner, offset, refs_to_add);
2267 if (ret)
2268 btrfs_abort_transaction(trans, ret);
2269out:
2270 btrfs_free_path(path);
2271 return ret;
2272}
2273
2274static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2275 struct btrfs_fs_info *fs_info,
2276 struct btrfs_delayed_ref_node *node,
2277 struct btrfs_delayed_extent_op *extent_op,
2278 int insert_reserved)
2279{
2280 int ret = 0;
2281 struct btrfs_delayed_data_ref *ref;
2282 struct btrfs_key ins;
2283 u64 parent = 0;
2284 u64 ref_root = 0;
2285 u64 flags = 0;
2286
2287 ins.objectid = node->bytenr;
2288 ins.offset = node->num_bytes;
2289 ins.type = BTRFS_EXTENT_ITEM_KEY;
2290
2291 ref = btrfs_delayed_node_to_data_ref(node);
2292 trace_run_delayed_data_ref(fs_info, node, ref, node->action);
2293
2294 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2295 parent = ref->parent;
2296 ref_root = ref->root;
2297
2298 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2299 if (extent_op)
2300 flags |= extent_op->flags_to_set;
2301 ret = alloc_reserved_file_extent(trans, fs_info,
2302 parent, ref_root, flags,
2303 ref->objectid, ref->offset,
2304 &ins, node->ref_mod);
2305 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2306 ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent,
2307 ref_root, ref->objectid,
2308 ref->offset, node->ref_mod,
2309 extent_op);
2310 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2311 ret = __btrfs_free_extent(trans, fs_info, node, parent,
2312 ref_root, ref->objectid,
2313 ref->offset, node->ref_mod,
2314 extent_op);
2315 } else {
2316 BUG();
2317 }
2318 return ret;
2319}
2320
2321static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2322 struct extent_buffer *leaf,
2323 struct btrfs_extent_item *ei)
2324{
2325 u64 flags = btrfs_extent_flags(leaf, ei);
2326 if (extent_op->update_flags) {
2327 flags |= extent_op->flags_to_set;
2328 btrfs_set_extent_flags(leaf, ei, flags);
2329 }
2330
2331 if (extent_op->update_key) {
2332 struct btrfs_tree_block_info *bi;
2333 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2334 bi = (struct btrfs_tree_block_info *)(ei + 1);
2335 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2336 }
2337}
2338
2339static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2340 struct btrfs_fs_info *fs_info,
2341 struct btrfs_delayed_ref_head *head,
2342 struct btrfs_delayed_extent_op *extent_op)
2343{
2344 struct btrfs_key key;
2345 struct btrfs_path *path;
2346 struct btrfs_extent_item *ei;
2347 struct extent_buffer *leaf;
2348 u32 item_size;
2349 int ret;
2350 int err = 0;
2351 int metadata = !extent_op->is_data;
2352
2353 if (trans->aborted)
2354 return 0;
2355
2356 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2357 metadata = 0;
2358
2359 path = btrfs_alloc_path();
2360 if (!path)
2361 return -ENOMEM;
2362
2363 key.objectid = head->bytenr;
2364
2365 if (metadata) {
2366 key.type = BTRFS_METADATA_ITEM_KEY;
2367 key.offset = extent_op->level;
2368 } else {
2369 key.type = BTRFS_EXTENT_ITEM_KEY;
2370 key.offset = head->num_bytes;
2371 }
2372
2373again:
2374 path->reada = READA_FORWARD;
2375 path->leave_spinning = 1;
2376 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
2377 if (ret < 0) {
2378 err = ret;
2379 goto out;
2380 }
2381 if (ret > 0) {
2382 if (metadata) {
2383 if (path->slots[0] > 0) {
2384 path->slots[0]--;
2385 btrfs_item_key_to_cpu(path->nodes[0], &key,
2386 path->slots[0]);
2387 if (key.objectid == head->bytenr &&
2388 key.type == BTRFS_EXTENT_ITEM_KEY &&
2389 key.offset == head->num_bytes)
2390 ret = 0;
2391 }
2392 if (ret > 0) {
2393 btrfs_release_path(path);
2394 metadata = 0;
2395
2396 key.objectid = head->bytenr;
2397 key.offset = head->num_bytes;
2398 key.type = BTRFS_EXTENT_ITEM_KEY;
2399 goto again;
2400 }
2401 } else {
2402 err = -EIO;
2403 goto out;
2404 }
2405 }
2406
2407 leaf = path->nodes[0];
2408 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2409#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2410 if (item_size < sizeof(*ei)) {
2411 ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0);
2412 if (ret < 0) {
2413 err = ret;
2414 goto out;
2415 }
2416 leaf = path->nodes[0];
2417 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2418 }
2419#endif
2420 BUG_ON(item_size < sizeof(*ei));
2421 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2422 __run_delayed_extent_op(extent_op, leaf, ei);
2423
2424 btrfs_mark_buffer_dirty(leaf);
2425out:
2426 btrfs_free_path(path);
2427 return err;
2428}
2429
2430static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2431 struct btrfs_fs_info *fs_info,
2432 struct btrfs_delayed_ref_node *node,
2433 struct btrfs_delayed_extent_op *extent_op,
2434 int insert_reserved)
2435{
2436 int ret = 0;
2437 struct btrfs_delayed_tree_ref *ref;
2438 struct btrfs_key ins;
2439 u64 parent = 0;
2440 u64 ref_root = 0;
2441 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2442
2443 ref = btrfs_delayed_node_to_tree_ref(node);
2444 trace_run_delayed_tree_ref(fs_info, node, ref, node->action);
2445
2446 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2447 parent = ref->parent;
2448 ref_root = ref->root;
2449
2450 ins.objectid = node->bytenr;
2451 if (skinny_metadata) {
2452 ins.offset = ref->level;
2453 ins.type = BTRFS_METADATA_ITEM_KEY;
2454 } else {
2455 ins.offset = node->num_bytes;
2456 ins.type = BTRFS_EXTENT_ITEM_KEY;
2457 }
2458
2459 if (node->ref_mod != 1) {
2460 btrfs_err(fs_info,
2461 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
2462 node->bytenr, node->ref_mod, node->action, ref_root,
2463 parent);
2464 return -EIO;
2465 }
2466 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2467 BUG_ON(!extent_op || !extent_op->update_flags);
2468 ret = alloc_reserved_tree_block(trans, fs_info,
2469 parent, ref_root,
2470 extent_op->flags_to_set,
2471 &extent_op->key,
2472 ref->level, &ins);
2473 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2474 ret = __btrfs_inc_extent_ref(trans, fs_info, node,
2475 parent, ref_root,
2476 ref->level, 0, 1,
2477 extent_op);
2478 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2479 ret = __btrfs_free_extent(trans, fs_info, node,
2480 parent, ref_root,
2481 ref->level, 0, 1, extent_op);
2482 } else {
2483 BUG();
2484 }
2485 return ret;
2486}
2487
2488
2489static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2490 struct btrfs_fs_info *fs_info,
2491 struct btrfs_delayed_ref_node *node,
2492 struct btrfs_delayed_extent_op *extent_op,
2493 int insert_reserved)
2494{
2495 int ret = 0;
2496
2497 if (trans->aborted) {
2498 if (insert_reserved)
2499 btrfs_pin_extent(fs_info, node->bytenr,
2500 node->num_bytes, 1);
2501 return 0;
2502 }
2503
2504 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2505 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2506 ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
2507 insert_reserved);
2508 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2509 node->type == BTRFS_SHARED_DATA_REF_KEY)
2510 ret = run_delayed_data_ref(trans, fs_info, node, extent_op,
2511 insert_reserved);
2512 else
2513 BUG();
2514 return ret;
2515}
2516
2517static inline struct btrfs_delayed_ref_node *
2518select_delayed_ref(struct btrfs_delayed_ref_head *head)
2519{
2520 struct btrfs_delayed_ref_node *ref;
2521
2522 if (RB_EMPTY_ROOT(&head->ref_tree))
2523 return NULL;
2524
2525
2526
2527
2528
2529
2530
2531 if (!list_empty(&head->ref_add_list))
2532 return list_first_entry(&head->ref_add_list,
2533 struct btrfs_delayed_ref_node, add_list);
2534
2535 ref = rb_entry(rb_first(&head->ref_tree),
2536 struct btrfs_delayed_ref_node, ref_node);
2537 ASSERT(list_empty(&ref->add_list));
2538 return ref;
2539}
2540
2541static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
2542 struct btrfs_delayed_ref_head *head)
2543{
2544 spin_lock(&delayed_refs->lock);
2545 head->processing = 0;
2546 delayed_refs->num_heads_ready++;
2547 spin_unlock(&delayed_refs->lock);
2548 btrfs_delayed_ref_unlock(head);
2549}
2550
2551static int cleanup_extent_op(struct btrfs_trans_handle *trans,
2552 struct btrfs_fs_info *fs_info,
2553 struct btrfs_delayed_ref_head *head)
2554{
2555 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
2556 int ret;
2557
2558 if (!extent_op)
2559 return 0;
2560 head->extent_op = NULL;
2561 if (head->must_insert_reserved) {
2562 btrfs_free_delayed_extent_op(extent_op);
2563 return 0;
2564 }
2565 spin_unlock(&head->lock);
2566 ret = run_delayed_extent_op(trans, fs_info, head, extent_op);
2567 btrfs_free_delayed_extent_op(extent_op);
2568 return ret ? ret : 1;
2569}
2570
2571static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2572 struct btrfs_fs_info *fs_info,
2573 struct btrfs_delayed_ref_head *head)
2574{
2575 struct btrfs_delayed_ref_root *delayed_refs;
2576 int ret;
2577
2578 delayed_refs = &trans->transaction->delayed_refs;
2579
2580 ret = cleanup_extent_op(trans, fs_info, head);
2581 if (ret < 0) {
2582 unselect_delayed_ref_head(delayed_refs, head);
2583 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2584 return ret;
2585 } else if (ret) {
2586 return ret;
2587 }
2588
2589
2590
2591
2592
2593 spin_unlock(&head->lock);
2594 spin_lock(&delayed_refs->lock);
2595 spin_lock(&head->lock);
2596 if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) {
2597 spin_unlock(&head->lock);
2598 spin_unlock(&delayed_refs->lock);
2599 return 1;
2600 }
2601 delayed_refs->num_heads--;
2602 rb_erase(&head->href_node, &delayed_refs->href_root);
2603 RB_CLEAR_NODE(&head->href_node);
2604 spin_unlock(&delayed_refs->lock);
2605 spin_unlock(&head->lock);
2606 atomic_dec(&delayed_refs->num_entries);
2607
2608 trace_run_delayed_ref_head(fs_info, head, 0);
2609
2610 if (head->total_ref_mod < 0) {
2611 struct btrfs_block_group_cache *cache;
2612
2613 cache = btrfs_lookup_block_group(fs_info, head->bytenr);
2614 ASSERT(cache);
2615 percpu_counter_add(&cache->space_info->total_bytes_pinned,
2616 -head->num_bytes);
2617 btrfs_put_block_group(cache);
2618
2619 if (head->is_data) {
2620 spin_lock(&delayed_refs->lock);
2621 delayed_refs->pending_csums -= head->num_bytes;
2622 spin_unlock(&delayed_refs->lock);
2623 }
2624 }
2625
2626 if (head->must_insert_reserved) {
2627 btrfs_pin_extent(fs_info, head->bytenr,
2628 head->num_bytes, 1);
2629 if (head->is_data) {
2630 ret = btrfs_del_csums(trans, fs_info, head->bytenr,
2631 head->num_bytes);
2632 }
2633 }
2634
2635
2636 btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
2637 head->qgroup_reserved);
2638 btrfs_delayed_ref_unlock(head);
2639 btrfs_put_delayed_ref_head(head);
2640 return 0;
2641}
2642
2643
2644
2645
2646
2647static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2648 struct btrfs_fs_info *fs_info,
2649 unsigned long nr)
2650{
2651 struct btrfs_delayed_ref_root *delayed_refs;
2652 struct btrfs_delayed_ref_node *ref;
2653 struct btrfs_delayed_ref_head *locked_ref = NULL;
2654 struct btrfs_delayed_extent_op *extent_op;
2655 ktime_t start = ktime_get();
2656 int ret;
2657 unsigned long count = 0;
2658 unsigned long actual_count = 0;
2659 int must_insert_reserved = 0;
2660
2661 delayed_refs = &trans->transaction->delayed_refs;
2662 while (1) {
2663 if (!locked_ref) {
2664 if (count >= nr)
2665 break;
2666
2667 spin_lock(&delayed_refs->lock);
2668 locked_ref = btrfs_select_ref_head(trans);
2669 if (!locked_ref) {
2670 spin_unlock(&delayed_refs->lock);
2671 break;
2672 }
2673
2674
2675
2676 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2677 spin_unlock(&delayed_refs->lock);
2678
2679
2680
2681
2682
2683
2684 if (ret == -EAGAIN) {
2685 locked_ref = NULL;
2686 count++;
2687 continue;
2688 }
2689 }
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703 spin_lock(&locked_ref->lock);
2704 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2705 locked_ref);
2706
2707
2708
2709
2710
2711 ref = select_delayed_ref(locked_ref);
2712
2713 if (ref && ref->seq &&
2714 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2715 spin_unlock(&locked_ref->lock);
2716 unselect_delayed_ref_head(delayed_refs, locked_ref);
2717 locked_ref = NULL;
2718 cond_resched();
2719 count++;
2720 continue;
2721 }
2722
2723
2724
2725
2726
2727 if (!ref) {
2728 ret = cleanup_ref_head(trans, fs_info, locked_ref);
2729 if (ret > 0 ) {
2730
2731 ret = 0;
2732 continue;
2733 } else if (ret) {
2734 return ret;
2735 }
2736 locked_ref = NULL;
2737 count++;
2738 continue;
2739 }
2740
2741 actual_count++;
2742 ref->in_tree = 0;
2743 rb_erase(&ref->ref_node, &locked_ref->ref_tree);
2744 RB_CLEAR_NODE(&ref->ref_node);
2745 if (!list_empty(&ref->add_list))
2746 list_del(&ref->add_list);
2747
2748
2749
2750
2751 switch (ref->action) {
2752 case BTRFS_ADD_DELAYED_REF:
2753 case BTRFS_ADD_DELAYED_EXTENT:
2754 locked_ref->ref_mod -= ref->ref_mod;
2755 break;
2756 case BTRFS_DROP_DELAYED_REF:
2757 locked_ref->ref_mod += ref->ref_mod;
2758 break;
2759 default:
2760 WARN_ON(1);
2761 }
2762 atomic_dec(&delayed_refs->num_entries);
2763
2764
2765
2766
2767
2768 must_insert_reserved = locked_ref->must_insert_reserved;
2769 locked_ref->must_insert_reserved = 0;
2770
2771 extent_op = locked_ref->extent_op;
2772 locked_ref->extent_op = NULL;
2773 spin_unlock(&locked_ref->lock);
2774
2775 ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
2776 must_insert_reserved);
2777
2778 btrfs_free_delayed_extent_op(extent_op);
2779 if (ret) {
2780 unselect_delayed_ref_head(delayed_refs, locked_ref);
2781 btrfs_put_delayed_ref(ref);
2782 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
2783 ret);
2784 return ret;
2785 }
2786
2787 btrfs_put_delayed_ref(ref);
2788 count++;
2789 cond_resched();
2790 }
2791
2792
2793
2794
2795
2796
2797 if (actual_count > 0) {
2798 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2799 u64 avg;
2800
2801
2802
2803
2804
2805 spin_lock(&delayed_refs->lock);
2806 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2807 fs_info->avg_delayed_ref_runtime = avg >> 2;
2808 spin_unlock(&delayed_refs->lock);
2809 }
2810 return 0;
2811}
2812
2813#ifdef SCRAMBLE_DELAYED_REFS
2814
2815
2816
2817
2818
2819static u64 find_middle(struct rb_root *root)
2820{
2821 struct rb_node *n = root->rb_node;
2822 struct btrfs_delayed_ref_node *entry;
2823 int alt = 1;
2824 u64 middle;
2825 u64 first = 0, last = 0;
2826
2827 n = rb_first(root);
2828 if (n) {
2829 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2830 first = entry->bytenr;
2831 }
2832 n = rb_last(root);
2833 if (n) {
2834 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2835 last = entry->bytenr;
2836 }
2837 n = root->rb_node;
2838
2839 while (n) {
2840 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2841 WARN_ON(!entry->in_tree);
2842
2843 middle = entry->bytenr;
2844
2845 if (alt)
2846 n = n->rb_left;
2847 else
2848 n = n->rb_right;
2849
2850 alt = 1 - alt;
2851 }
2852 return middle;
2853}
2854#endif
2855
2856static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2857{
2858 u64 num_bytes;
2859
2860 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2861 sizeof(struct btrfs_extent_inline_ref));
2862 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2863 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2864
2865
2866
2867
2868
2869 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2870}
2871
2872
2873
2874
2875
2876u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2877{
2878 u64 csum_size;
2879 u64 num_csums_per_leaf;
2880 u64 num_csums;
2881
2882 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2883 num_csums_per_leaf = div64_u64(csum_size,
2884 (u64)btrfs_super_csum_size(fs_info->super_copy));
2885 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2886 num_csums += num_csums_per_leaf - 1;
2887 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2888 return num_csums;
2889}
2890
2891int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2892 struct btrfs_fs_info *fs_info)
2893{
2894 struct btrfs_block_rsv *global_rsv;
2895 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2896 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2897 u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
2898 u64 num_bytes, num_dirty_bgs_bytes;
2899 int ret = 0;
2900
2901 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
2902 num_heads = heads_to_leaves(fs_info, num_heads);
2903 if (num_heads > 1)
2904 num_bytes += (num_heads - 1) * fs_info->nodesize;
2905 num_bytes <<= 1;
2906 num_bytes += btrfs_csum_bytes_to_leaves(fs_info, csum_bytes) *
2907 fs_info->nodesize;
2908 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(fs_info,
2909 num_dirty_bgs);
2910 global_rsv = &fs_info->global_block_rsv;
2911
2912
2913
2914
2915
2916 if (global_rsv->space_info->full) {
2917 num_dirty_bgs_bytes <<= 1;
2918 num_bytes <<= 1;
2919 }
2920
2921 spin_lock(&global_rsv->lock);
2922 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
2923 ret = 1;
2924 spin_unlock(&global_rsv->lock);
2925 return ret;
2926}
2927
2928int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2929 struct btrfs_fs_info *fs_info)
2930{
2931 u64 num_entries =
2932 atomic_read(&trans->transaction->delayed_refs.num_entries);
2933 u64 avg_runtime;
2934 u64 val;
2935
2936 smp_mb();
2937 avg_runtime = fs_info->avg_delayed_ref_runtime;
2938 val = num_entries * avg_runtime;
2939 if (val >= NSEC_PER_SEC)
2940 return 1;
2941 if (val >= NSEC_PER_SEC / 2)
2942 return 2;
2943
2944 return btrfs_check_space_for_delayed_refs(trans, fs_info);
2945}
2946
2947struct async_delayed_refs {
2948 struct btrfs_root *root;
2949 u64 transid;
2950 int count;
2951 int error;
2952 int sync;
2953 struct completion wait;
2954 struct btrfs_work work;
2955};
2956
2957static inline struct async_delayed_refs *
2958to_async_delayed_refs(struct btrfs_work *work)
2959{
2960 return container_of(work, struct async_delayed_refs, work);
2961}
2962
2963static void delayed_ref_async_start(struct btrfs_work *work)
2964{
2965 struct async_delayed_refs *async = to_async_delayed_refs(work);
2966 struct btrfs_trans_handle *trans;
2967 struct btrfs_fs_info *fs_info = async->root->fs_info;
2968 int ret;
2969
2970
2971 if (btrfs_transaction_blocked(fs_info))
2972 goto done;
2973
2974 trans = btrfs_join_transaction(async->root);
2975 if (IS_ERR(trans)) {
2976 async->error = PTR_ERR(trans);
2977 goto done;
2978 }
2979
2980
2981
2982
2983
2984 trans->sync = true;
2985
2986
2987 if (trans->transid > async->transid)
2988 goto end;
2989
2990 ret = btrfs_run_delayed_refs(trans, fs_info, async->count);
2991 if (ret)
2992 async->error = ret;
2993end:
2994 ret = btrfs_end_transaction(trans);
2995 if (ret && !async->error)
2996 async->error = ret;
2997done:
2998 if (async->sync)
2999 complete(&async->wait);
3000 else
3001 kfree(async);
3002}
3003
3004int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
3005 unsigned long count, u64 transid, int wait)
3006{
3007 struct async_delayed_refs *async;
3008 int ret;
3009
3010 async = kmalloc(sizeof(*async), GFP_NOFS);
3011 if (!async)
3012 return -ENOMEM;
3013
3014 async->root = fs_info->tree_root;
3015 async->count = count;
3016 async->error = 0;
3017 async->transid = transid;
3018 if (wait)
3019 async->sync = 1;
3020 else
3021 async->sync = 0;
3022 init_completion(&async->wait);
3023
3024 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
3025 delayed_ref_async_start, NULL, NULL);
3026
3027 btrfs_queue_work(fs_info->extent_workers, &async->work);
3028
3029 if (wait) {
3030 wait_for_completion(&async->wait);
3031 ret = async->error;
3032 kfree(async);
3033 return ret;
3034 }
3035 return 0;
3036}
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
3049 struct btrfs_fs_info *fs_info, unsigned long count)
3050{
3051 struct rb_node *node;
3052 struct btrfs_delayed_ref_root *delayed_refs;
3053 struct btrfs_delayed_ref_head *head;
3054 int ret;
3055 int run_all = count == (unsigned long)-1;
3056 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
3057
3058
3059 if (trans->aborted)
3060 return 0;
3061
3062 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
3063 return 0;
3064
3065 delayed_refs = &trans->transaction->delayed_refs;
3066 if (count == 0)
3067 count = atomic_read(&delayed_refs->num_entries) * 2;
3068
3069again:
3070#ifdef SCRAMBLE_DELAYED_REFS
3071 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
3072#endif
3073 trans->can_flush_pending_bgs = false;
3074 ret = __btrfs_run_delayed_refs(trans, fs_info, count);
3075 if (ret < 0) {
3076 btrfs_abort_transaction(trans, ret);
3077 return ret;
3078 }
3079
3080 if (run_all) {
3081 if (!list_empty(&trans->new_bgs))
3082 btrfs_create_pending_block_groups(trans, fs_info);
3083
3084 spin_lock(&delayed_refs->lock);
3085 node = rb_first(&delayed_refs->href_root);
3086 if (!node) {
3087 spin_unlock(&delayed_refs->lock);
3088 goto out;
3089 }
3090 head = rb_entry(node, struct btrfs_delayed_ref_head,
3091 href_node);
3092 refcount_inc(&head->refs);
3093 spin_unlock(&delayed_refs->lock);
3094
3095
3096 mutex_lock(&head->mutex);
3097 mutex_unlock(&head->mutex);
3098
3099 btrfs_put_delayed_ref_head(head);
3100 cond_resched();
3101 goto again;
3102 }
3103out:
3104 trans->can_flush_pending_bgs = can_flush_pending_bgs;
3105 return 0;
3106}
3107
3108int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3109 struct btrfs_fs_info *fs_info,
3110 u64 bytenr, u64 num_bytes, u64 flags,
3111 int level, int is_data)
3112{
3113 struct btrfs_delayed_extent_op *extent_op;
3114 int ret;
3115
3116 extent_op = btrfs_alloc_delayed_extent_op();
3117 if (!extent_op)
3118 return -ENOMEM;
3119
3120 extent_op->flags_to_set = flags;
3121 extent_op->update_flags = true;
3122 extent_op->update_key = false;
3123 extent_op->is_data = is_data ? true : false;
3124 extent_op->level = level;
3125
3126 ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr,
3127 num_bytes, extent_op);
3128 if (ret)
3129 btrfs_free_delayed_extent_op(extent_op);
3130 return ret;
3131}
3132
3133static noinline int check_delayed_ref(struct btrfs_root *root,
3134 struct btrfs_path *path,
3135 u64 objectid, u64 offset, u64 bytenr)
3136{
3137 struct btrfs_delayed_ref_head *head;
3138 struct btrfs_delayed_ref_node *ref;
3139 struct btrfs_delayed_data_ref *data_ref;
3140 struct btrfs_delayed_ref_root *delayed_refs;
3141 struct btrfs_transaction *cur_trans;
3142 struct rb_node *node;
3143 int ret = 0;
3144
3145 cur_trans = root->fs_info->running_transaction;
3146 if (!cur_trans)
3147 return 0;
3148
3149 delayed_refs = &cur_trans->delayed_refs;
3150 spin_lock(&delayed_refs->lock);
3151 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3152 if (!head) {
3153 spin_unlock(&delayed_refs->lock);
3154 return 0;
3155 }
3156
3157 if (!mutex_trylock(&head->mutex)) {
3158 refcount_inc(&head->refs);
3159 spin_unlock(&delayed_refs->lock);
3160
3161 btrfs_release_path(path);
3162
3163
3164
3165
3166
3167 mutex_lock(&head->mutex);
3168 mutex_unlock(&head->mutex);
3169 btrfs_put_delayed_ref_head(head);
3170 return -EAGAIN;
3171 }
3172 spin_unlock(&delayed_refs->lock);
3173
3174 spin_lock(&head->lock);
3175
3176
3177
3178
3179 for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
3180 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
3181
3182 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
3183 ret = 1;
3184 break;
3185 }
3186
3187 data_ref = btrfs_delayed_node_to_data_ref(ref);
3188
3189
3190
3191
3192
3193 if (data_ref->root != root->root_key.objectid ||
3194 data_ref->objectid != objectid ||
3195 data_ref->offset != offset) {
3196 ret = 1;
3197 break;
3198 }
3199 }
3200 spin_unlock(&head->lock);
3201 mutex_unlock(&head->mutex);
3202 return ret;
3203}
3204
3205static noinline int check_committed_ref(struct btrfs_root *root,
3206 struct btrfs_path *path,
3207 u64 objectid, u64 offset, u64 bytenr)
3208{
3209 struct btrfs_fs_info *fs_info = root->fs_info;
3210 struct btrfs_root *extent_root = fs_info->extent_root;
3211 struct extent_buffer *leaf;
3212 struct btrfs_extent_data_ref *ref;
3213 struct btrfs_extent_inline_ref *iref;
3214 struct btrfs_extent_item *ei;
3215 struct btrfs_key key;
3216 u32 item_size;
3217 int type;
3218 int ret;
3219
3220 key.objectid = bytenr;
3221 key.offset = (u64)-1;
3222 key.type = BTRFS_EXTENT_ITEM_KEY;
3223
3224 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3225 if (ret < 0)
3226 goto out;
3227 BUG_ON(ret == 0);
3228
3229 ret = -ENOENT;
3230 if (path->slots[0] == 0)
3231 goto out;
3232
3233 path->slots[0]--;
3234 leaf = path->nodes[0];
3235 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3236
3237 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
3238 goto out;
3239
3240 ret = 1;
3241 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3242#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3243 if (item_size < sizeof(*ei)) {
3244 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3245 goto out;
3246 }
3247#endif
3248 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3249
3250 if (item_size != sizeof(*ei) +
3251 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3252 goto out;
3253
3254 if (btrfs_extent_generation(leaf, ei) <=
3255 btrfs_root_last_snapshot(&root->root_item))
3256 goto out;
3257
3258 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3259
3260 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
3261 if (type != BTRFS_EXTENT_DATA_REF_KEY)
3262 goto out;
3263
3264 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3265 if (btrfs_extent_refs(leaf, ei) !=
3266 btrfs_extent_data_ref_count(leaf, ref) ||
3267 btrfs_extent_data_ref_root(leaf, ref) !=
3268 root->root_key.objectid ||
3269 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3270 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3271 goto out;
3272
3273 ret = 0;
3274out:
3275 return ret;
3276}
3277
3278int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
3279 u64 bytenr)
3280{
3281 struct btrfs_path *path;
3282 int ret;
3283 int ret2;
3284
3285 path = btrfs_alloc_path();
3286 if (!path)
3287 return -ENOENT;
3288
3289 do {
3290 ret = check_committed_ref(root, path, objectid,
3291 offset, bytenr);
3292 if (ret && ret != -ENOENT)
3293 goto out;
3294
3295 ret2 = check_delayed_ref(root, path, objectid,
3296 offset, bytenr);
3297 } while (ret2 == -EAGAIN);
3298
3299 if (ret2 && ret2 != -ENOENT) {
3300 ret = ret2;
3301 goto out;
3302 }
3303
3304 if (ret != -ENOENT || ret2 != -ENOENT)
3305 ret = 0;
3306out:
3307 btrfs_free_path(path);
3308 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3309 WARN_ON(ret > 0);
3310 return ret;
3311}
3312
3313static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3314 struct btrfs_root *root,
3315 struct extent_buffer *buf,
3316 int full_backref, int inc)
3317{
3318 struct btrfs_fs_info *fs_info = root->fs_info;
3319 u64 bytenr;
3320 u64 num_bytes;
3321 u64 parent;
3322 u64 ref_root;
3323 u32 nritems;
3324 struct btrfs_key key;
3325 struct btrfs_file_extent_item *fi;
3326 int i;
3327 int level;
3328 int ret = 0;
3329 int (*process_func)(struct btrfs_trans_handle *,
3330 struct btrfs_root *,
3331 u64, u64, u64, u64, u64, u64);
3332
3333
3334 if (btrfs_is_testing(fs_info))
3335 return 0;
3336
3337 ref_root = btrfs_header_owner(buf);
3338 nritems = btrfs_header_nritems(buf);
3339 level = btrfs_header_level(buf);
3340
3341 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3342 return 0;
3343
3344 if (inc)
3345 process_func = btrfs_inc_extent_ref;
3346 else
3347 process_func = btrfs_free_extent;
3348
3349 if (full_backref)
3350 parent = buf->start;
3351 else
3352 parent = 0;
3353
3354 for (i = 0; i < nritems; i++) {
3355 if (level == 0) {
3356 btrfs_item_key_to_cpu(buf, &key, i);
3357 if (key.type != BTRFS_EXTENT_DATA_KEY)
3358 continue;
3359 fi = btrfs_item_ptr(buf, i,
3360 struct btrfs_file_extent_item);
3361 if (btrfs_file_extent_type(buf, fi) ==
3362 BTRFS_FILE_EXTENT_INLINE)
3363 continue;
3364 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3365 if (bytenr == 0)
3366 continue;
3367
3368 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3369 key.offset -= btrfs_file_extent_offset(buf, fi);
3370 ret = process_func(trans, root, bytenr, num_bytes,
3371 parent, ref_root, key.objectid,
3372 key.offset);
3373 if (ret)
3374 goto fail;
3375 } else {
3376 bytenr = btrfs_node_blockptr(buf, i);
3377 num_bytes = fs_info->nodesize;
3378 ret = process_func(trans, root, bytenr, num_bytes,
3379 parent, ref_root, level - 1, 0);
3380 if (ret)
3381 goto fail;
3382 }
3383 }
3384 return 0;
3385fail:
3386 return ret;
3387}
3388
3389int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3390 struct extent_buffer *buf, int full_backref)
3391{
3392 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3393}
3394
3395int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3396 struct extent_buffer *buf, int full_backref)
3397{
3398 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3399}
3400
3401static int write_one_cache_group(struct btrfs_trans_handle *trans,
3402 struct btrfs_fs_info *fs_info,
3403 struct btrfs_path *path,
3404 struct btrfs_block_group_cache *cache)
3405{
3406 int ret;
3407 struct btrfs_root *extent_root = fs_info->extent_root;
3408 unsigned long bi;
3409 struct extent_buffer *leaf;
3410
3411 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3412 if (ret) {
3413 if (ret > 0)
3414 ret = -ENOENT;
3415 goto fail;
3416 }
3417
3418 leaf = path->nodes[0];
3419 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3420 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3421 btrfs_mark_buffer_dirty(leaf);
3422fail:
3423 btrfs_release_path(path);
3424 return ret;
3425
3426}
3427
3428static struct btrfs_block_group_cache *
3429next_block_group(struct btrfs_fs_info *fs_info,
3430 struct btrfs_block_group_cache *cache)
3431{
3432 struct rb_node *node;
3433
3434 spin_lock(&fs_info->block_group_cache_lock);
3435
3436
3437 if (RB_EMPTY_NODE(&cache->cache_node)) {
3438 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3439
3440 spin_unlock(&fs_info->block_group_cache_lock);
3441 btrfs_put_block_group(cache);
3442 cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache;
3443 }
3444 node = rb_next(&cache->cache_node);
3445 btrfs_put_block_group(cache);
3446 if (node) {
3447 cache = rb_entry(node, struct btrfs_block_group_cache,
3448 cache_node);
3449 btrfs_get_block_group(cache);
3450 } else
3451 cache = NULL;
3452 spin_unlock(&fs_info->block_group_cache_lock);
3453 return cache;
3454}
3455
3456static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3457 struct btrfs_trans_handle *trans,
3458 struct btrfs_path *path)
3459{
3460 struct btrfs_fs_info *fs_info = block_group->fs_info;
3461 struct btrfs_root *root = fs_info->tree_root;
3462 struct inode *inode = NULL;
3463 struct extent_changeset *data_reserved = NULL;
3464 u64 alloc_hint = 0;
3465 int dcs = BTRFS_DC_ERROR;
3466 u64 num_pages = 0;
3467 int retries = 0;
3468 int ret = 0;
3469
3470
3471
3472
3473
3474 if (block_group->key.offset < (100 * SZ_1M)) {
3475 spin_lock(&block_group->lock);
3476 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3477 spin_unlock(&block_group->lock);
3478 return 0;
3479 }
3480
3481 if (trans->aborted)
3482 return 0;
3483again:
3484 inode = lookup_free_space_inode(fs_info, block_group, path);
3485 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3486 ret = PTR_ERR(inode);
3487 btrfs_release_path(path);
3488 goto out;
3489 }
3490
3491 if (IS_ERR(inode)) {
3492 BUG_ON(retries);
3493 retries++;
3494
3495 if (block_group->ro)
3496 goto out_free;
3497
3498 ret = create_free_space_inode(fs_info, trans, block_group,
3499 path);
3500 if (ret)
3501 goto out_free;
3502 goto again;
3503 }
3504
3505
3506
3507
3508
3509
3510 BTRFS_I(inode)->generation = 0;
3511 ret = btrfs_update_inode(trans, root, inode);
3512 if (ret) {
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523 btrfs_abort_transaction(trans, ret);
3524 goto out_put;
3525 }
3526 WARN_ON(ret);
3527
3528
3529 if (block_group->cache_generation == trans->transid &&
3530 i_size_read(inode)) {
3531 dcs = BTRFS_DC_SETUP;
3532 goto out_put;
3533 }
3534
3535 if (i_size_read(inode) > 0) {
3536 ret = btrfs_check_trunc_cache_free_space(fs_info,
3537 &fs_info->global_block_rsv);
3538 if (ret)
3539 goto out_put;
3540
3541 ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
3542 if (ret)
3543 goto out_put;
3544 }
3545
3546 spin_lock(&block_group->lock);
3547 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3548 !btrfs_test_opt(fs_info, SPACE_CACHE)) {
3549
3550
3551
3552
3553
3554
3555 dcs = BTRFS_DC_WRITTEN;
3556 spin_unlock(&block_group->lock);
3557 goto out_put;
3558 }
3559 spin_unlock(&block_group->lock);
3560
3561
3562
3563
3564
3565 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
3566 ret = -ENOSPC;
3567 goto out_put;
3568 }
3569
3570
3571
3572
3573
3574
3575
3576 num_pages = div_u64(block_group->key.offset, SZ_256M);
3577 if (!num_pages)
3578 num_pages = 1;
3579
3580 num_pages *= 16;
3581 num_pages *= PAGE_SIZE;
3582
3583 ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
3584 if (ret)
3585 goto out_put;
3586
3587 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3588 num_pages, num_pages,
3589 &alloc_hint);
3590
3591
3592
3593
3594
3595
3596
3597
3598 if (!ret)
3599 dcs = BTRFS_DC_SETUP;
3600 else if (ret == -ENOSPC)
3601 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3602
3603out_put:
3604 iput(inode);
3605out_free:
3606 btrfs_release_path(path);
3607out:
3608 spin_lock(&block_group->lock);
3609 if (!ret && dcs == BTRFS_DC_SETUP)
3610 block_group->cache_generation = trans->transid;
3611 block_group->disk_cache_state = dcs;
3612 spin_unlock(&block_group->lock);
3613
3614 extent_changeset_free(data_reserved);
3615 return ret;
3616}
3617
3618int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3619 struct btrfs_fs_info *fs_info)
3620{
3621 struct btrfs_block_group_cache *cache, *tmp;
3622 struct btrfs_transaction *cur_trans = trans->transaction;
3623 struct btrfs_path *path;
3624
3625 if (list_empty(&cur_trans->dirty_bgs) ||
3626 !btrfs_test_opt(fs_info, SPACE_CACHE))
3627 return 0;
3628
3629 path = btrfs_alloc_path();
3630 if (!path)
3631 return -ENOMEM;
3632
3633
3634 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3635 dirty_list) {
3636 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3637 cache_save_setup(cache, trans, path);
3638 }
3639
3640 btrfs_free_path(path);
3641 return 0;
3642}
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3657 struct btrfs_fs_info *fs_info)
3658{
3659 struct btrfs_block_group_cache *cache;
3660 struct btrfs_transaction *cur_trans = trans->transaction;
3661 int ret = 0;
3662 int should_put;
3663 struct btrfs_path *path = NULL;
3664 LIST_HEAD(dirty);
3665 struct list_head *io = &cur_trans->io_bgs;
3666 int num_started = 0;
3667 int loops = 0;
3668
3669 spin_lock(&cur_trans->dirty_bgs_lock);
3670 if (list_empty(&cur_trans->dirty_bgs)) {
3671 spin_unlock(&cur_trans->dirty_bgs_lock);
3672 return 0;
3673 }
3674 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3675 spin_unlock(&cur_trans->dirty_bgs_lock);
3676
3677again:
3678
3679
3680
3681
3682 btrfs_create_pending_block_groups(trans, fs_info);
3683
3684 if (!path) {
3685 path = btrfs_alloc_path();
3686 if (!path)
3687 return -ENOMEM;
3688 }
3689
3690
3691
3692
3693
3694
3695 mutex_lock(&trans->transaction->cache_write_mutex);
3696 while (!list_empty(&dirty)) {
3697 cache = list_first_entry(&dirty,
3698 struct btrfs_block_group_cache,
3699 dirty_list);
3700
3701
3702
3703
3704
3705 if (!list_empty(&cache->io_list)) {
3706 list_del_init(&cache->io_list);
3707 btrfs_wait_cache_io(trans, cache, path);
3708 btrfs_put_block_group(cache);
3709 }
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720 spin_lock(&cur_trans->dirty_bgs_lock);
3721 list_del_init(&cache->dirty_list);
3722 spin_unlock(&cur_trans->dirty_bgs_lock);
3723
3724 should_put = 1;
3725
3726 cache_save_setup(cache, trans, path);
3727
3728 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3729 cache->io_ctl.inode = NULL;
3730 ret = btrfs_write_out_cache(fs_info, trans,
3731 cache, path);
3732 if (ret == 0 && cache->io_ctl.inode) {
3733 num_started++;
3734 should_put = 0;
3735
3736
3737
3738
3739
3740 list_add_tail(&cache->io_list, io);
3741 } else {
3742
3743
3744
3745
3746 ret = 0;
3747 }
3748 }
3749 if (!ret) {
3750 ret = write_one_cache_group(trans, fs_info,
3751 path, cache);
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761 if (ret == -ENOENT) {
3762 ret = 0;
3763 spin_lock(&cur_trans->dirty_bgs_lock);
3764 if (list_empty(&cache->dirty_list)) {
3765 list_add_tail(&cache->dirty_list,
3766 &cur_trans->dirty_bgs);
3767 btrfs_get_block_group(cache);
3768 }
3769 spin_unlock(&cur_trans->dirty_bgs_lock);
3770 } else if (ret) {
3771 btrfs_abort_transaction(trans, ret);
3772 }
3773 }
3774
3775
3776 if (should_put)
3777 btrfs_put_block_group(cache);
3778
3779 if (ret)
3780 break;
3781
3782
3783
3784
3785
3786
3787 mutex_unlock(&trans->transaction->cache_write_mutex);
3788 mutex_lock(&trans->transaction->cache_write_mutex);
3789 }
3790 mutex_unlock(&trans->transaction->cache_write_mutex);
3791
3792
3793
3794
3795
3796 ret = btrfs_run_delayed_refs(trans, fs_info, 0);
3797 if (!ret && loops == 0) {
3798 loops++;
3799 spin_lock(&cur_trans->dirty_bgs_lock);
3800 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3801
3802
3803
3804
3805 if (!list_empty(&dirty)) {
3806 spin_unlock(&cur_trans->dirty_bgs_lock);
3807 goto again;
3808 }
3809 spin_unlock(&cur_trans->dirty_bgs_lock);
3810 } else if (ret < 0) {
3811 btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
3812 }
3813
3814 btrfs_free_path(path);
3815 return ret;
3816}
3817
3818int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3819 struct btrfs_fs_info *fs_info)
3820{
3821 struct btrfs_block_group_cache *cache;
3822 struct btrfs_transaction *cur_trans = trans->transaction;
3823 int ret = 0;
3824 int should_put;
3825 struct btrfs_path *path;
3826 struct list_head *io = &cur_trans->io_bgs;
3827 int num_started = 0;
3828
3829 path = btrfs_alloc_path();
3830 if (!path)
3831 return -ENOMEM;
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848 spin_lock(&cur_trans->dirty_bgs_lock);
3849 while (!list_empty(&cur_trans->dirty_bgs)) {
3850 cache = list_first_entry(&cur_trans->dirty_bgs,
3851 struct btrfs_block_group_cache,
3852 dirty_list);
3853
3854
3855
3856
3857
3858
3859 if (!list_empty(&cache->io_list)) {
3860 spin_unlock(&cur_trans->dirty_bgs_lock);
3861 list_del_init(&cache->io_list);
3862 btrfs_wait_cache_io(trans, cache, path);
3863 btrfs_put_block_group(cache);
3864 spin_lock(&cur_trans->dirty_bgs_lock);
3865 }
3866
3867
3868
3869
3870
3871 list_del_init(&cache->dirty_list);
3872 spin_unlock(&cur_trans->dirty_bgs_lock);
3873 should_put = 1;
3874
3875 cache_save_setup(cache, trans, path);
3876
3877 if (!ret)
3878 ret = btrfs_run_delayed_refs(trans, fs_info,
3879 (unsigned long) -1);
3880
3881 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3882 cache->io_ctl.inode = NULL;
3883 ret = btrfs_write_out_cache(fs_info, trans,
3884 cache, path);
3885 if (ret == 0 && cache->io_ctl.inode) {
3886 num_started++;
3887 should_put = 0;
3888 list_add_tail(&cache->io_list, io);
3889 } else {
3890
3891
3892
3893
3894 ret = 0;
3895 }
3896 }
3897 if (!ret) {
3898 ret = write_one_cache_group(trans, fs_info,
3899 path, cache);
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913 if (ret == -ENOENT) {
3914 wait_event(cur_trans->writer_wait,
3915 atomic_read(&cur_trans->num_writers) == 1);
3916 ret = write_one_cache_group(trans, fs_info,
3917 path, cache);
3918 }
3919 if (ret)
3920 btrfs_abort_transaction(trans, ret);
3921 }
3922
3923
3924 if (should_put)
3925 btrfs_put_block_group(cache);
3926 spin_lock(&cur_trans->dirty_bgs_lock);
3927 }
3928 spin_unlock(&cur_trans->dirty_bgs_lock);
3929
3930 while (!list_empty(io)) {
3931 cache = list_first_entry(io, struct btrfs_block_group_cache,
3932 io_list);
3933 list_del_init(&cache->io_list);
3934 btrfs_wait_cache_io(trans, cache, path);
3935 btrfs_put_block_group(cache);
3936 }
3937
3938 btrfs_free_path(path);
3939 return ret;
3940}
3941
3942int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
3943{
3944 struct btrfs_block_group_cache *block_group;
3945 int readonly = 0;
3946
3947 block_group = btrfs_lookup_block_group(fs_info, bytenr);
3948 if (!block_group || block_group->ro)
3949 readonly = 1;
3950 if (block_group)
3951 btrfs_put_block_group(block_group);
3952 return readonly;
3953}
3954
3955bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3956{
3957 struct btrfs_block_group_cache *bg;
3958 bool ret = true;
3959
3960 bg = btrfs_lookup_block_group(fs_info, bytenr);
3961 if (!bg)
3962 return false;
3963
3964 spin_lock(&bg->lock);
3965 if (bg->ro)
3966 ret = false;
3967 else
3968 atomic_inc(&bg->nocow_writers);
3969 spin_unlock(&bg->lock);
3970
3971
3972 if (!ret)
3973 btrfs_put_block_group(bg);
3974
3975 return ret;
3976
3977}
3978
3979void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3980{
3981 struct btrfs_block_group_cache *bg;
3982
3983 bg = btrfs_lookup_block_group(fs_info, bytenr);
3984 ASSERT(bg);
3985 if (atomic_dec_and_test(&bg->nocow_writers))
3986 wake_up_atomic_t(&bg->nocow_writers);
3987
3988
3989
3990
3991 btrfs_put_block_group(bg);
3992 btrfs_put_block_group(bg);
3993}
3994
3995void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
3996{
3997 wait_on_atomic_t(&bg->nocow_writers, atomic_t_wait,
3998 TASK_UNINTERRUPTIBLE);
3999}
4000
4001static const char *alloc_name(u64 flags)
4002{
4003 switch (flags) {
4004 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
4005 return "mixed";
4006 case BTRFS_BLOCK_GROUP_METADATA:
4007 return "metadata";
4008 case BTRFS_BLOCK_GROUP_DATA:
4009 return "data";
4010 case BTRFS_BLOCK_GROUP_SYSTEM:
4011 return "system";
4012 default:
4013 WARN_ON(1);
4014 return "invalid-combination";
4015 };
4016}
4017
4018static int create_space_info(struct btrfs_fs_info *info, u64 flags,
4019 struct btrfs_space_info **new)
4020{
4021
4022 struct btrfs_space_info *space_info;
4023 int i;
4024 int ret;
4025
4026 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
4027 if (!space_info)
4028 return -ENOMEM;
4029
4030 ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
4031 GFP_KERNEL);
4032 if (ret) {
4033 kfree(space_info);
4034 return ret;
4035 }
4036
4037 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
4038 INIT_LIST_HEAD(&space_info->block_groups[i]);
4039 init_rwsem(&space_info->groups_sem);
4040 spin_lock_init(&space_info->lock);
4041 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
4042 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4043 init_waitqueue_head(&space_info->wait);
4044 INIT_LIST_HEAD(&space_info->ro_bgs);
4045 INIT_LIST_HEAD(&space_info->tickets);
4046 INIT_LIST_HEAD(&space_info->priority_tickets);
4047
4048 ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
4049 info->space_info_kobj, "%s",
4050 alloc_name(space_info->flags));
4051 if (ret) {
4052 percpu_counter_destroy(&space_info->total_bytes_pinned);
4053 kfree(space_info);
4054 return ret;
4055 }
4056
4057 *new = space_info;
4058 list_add_rcu(&space_info->list, &info->space_info);
4059 if (flags & BTRFS_BLOCK_GROUP_DATA)
4060 info->data_sinfo = space_info;
4061
4062 return ret;
4063}
4064
4065static void update_space_info(struct btrfs_fs_info *info, u64 flags,
4066 u64 total_bytes, u64 bytes_used,
4067 u64 bytes_readonly,
4068 struct btrfs_space_info **space_info)
4069{
4070 struct btrfs_space_info *found;
4071 int factor;
4072
4073 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
4074 BTRFS_BLOCK_GROUP_RAID10))
4075 factor = 2;
4076 else
4077 factor = 1;
4078
4079 found = __find_space_info(info, flags);
4080 ASSERT(found);
4081 spin_lock(&found->lock);
4082 found->total_bytes += total_bytes;
4083 found->disk_total += total_bytes * factor;
4084 found->bytes_used += bytes_used;
4085 found->disk_used += bytes_used * factor;
4086 found->bytes_readonly += bytes_readonly;
4087 if (total_bytes > 0)
4088 found->full = 0;
4089 space_info_add_new_bytes(info, found, total_bytes -
4090 bytes_used - bytes_readonly);
4091 spin_unlock(&found->lock);
4092 *space_info = found;
4093}
4094
4095static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
4096{
4097 u64 extra_flags = chunk_to_extended(flags) &
4098 BTRFS_EXTENDED_PROFILE_MASK;
4099
4100 write_seqlock(&fs_info->profiles_lock);
4101 if (flags & BTRFS_BLOCK_GROUP_DATA)
4102 fs_info->avail_data_alloc_bits |= extra_flags;
4103 if (flags & BTRFS_BLOCK_GROUP_METADATA)
4104 fs_info->avail_metadata_alloc_bits |= extra_flags;
4105 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4106 fs_info->avail_system_alloc_bits |= extra_flags;
4107 write_sequnlock(&fs_info->profiles_lock);
4108}
4109
4110
4111
4112
4113
4114
4115
4116static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
4117{
4118 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4119 u64 target = 0;
4120
4121 if (!bctl)
4122 return 0;
4123
4124 if (flags & BTRFS_BLOCK_GROUP_DATA &&
4125 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4126 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
4127 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
4128 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4129 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
4130 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
4131 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4132 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
4133 }
4134
4135 return target;
4136}
4137
4138
4139
4140
4141
4142
4143
4144
4145static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
4146{
4147 u64 num_devices = fs_info->fs_devices->rw_devices;
4148 u64 target;
4149 u64 raid_type;
4150 u64 allowed = 0;
4151
4152
4153
4154
4155
4156 spin_lock(&fs_info->balance_lock);
4157 target = get_restripe_target(fs_info, flags);
4158 if (target) {
4159
4160 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
4161 spin_unlock(&fs_info->balance_lock);
4162 return extended_to_chunk(target);
4163 }
4164 }
4165 spin_unlock(&fs_info->balance_lock);
4166
4167
4168 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
4169 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
4170 allowed |= btrfs_raid_group[raid_type];
4171 }
4172 allowed &= flags;
4173
4174 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
4175 allowed = BTRFS_BLOCK_GROUP_RAID6;
4176 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
4177 allowed = BTRFS_BLOCK_GROUP_RAID5;
4178 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
4179 allowed = BTRFS_BLOCK_GROUP_RAID10;
4180 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
4181 allowed = BTRFS_BLOCK_GROUP_RAID1;
4182 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
4183 allowed = BTRFS_BLOCK_GROUP_RAID0;
4184
4185 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
4186
4187 return extended_to_chunk(flags | allowed);
4188}
4189
4190static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
4191{
4192 unsigned seq;
4193 u64 flags;
4194
4195 do {
4196 flags = orig_flags;
4197 seq = read_seqbegin(&fs_info->profiles_lock);
4198
4199 if (flags & BTRFS_BLOCK_GROUP_DATA)
4200 flags |= fs_info->avail_data_alloc_bits;
4201 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4202 flags |= fs_info->avail_system_alloc_bits;
4203 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
4204 flags |= fs_info->avail_metadata_alloc_bits;
4205 } while (read_seqretry(&fs_info->profiles_lock, seq));
4206
4207 return btrfs_reduce_alloc_profile(fs_info, flags);
4208}
4209
4210static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
4211{
4212 struct btrfs_fs_info *fs_info = root->fs_info;
4213 u64 flags;
4214 u64 ret;
4215
4216 if (data)
4217 flags = BTRFS_BLOCK_GROUP_DATA;
4218 else if (root == fs_info->chunk_root)
4219 flags = BTRFS_BLOCK_GROUP_SYSTEM;
4220 else
4221 flags = BTRFS_BLOCK_GROUP_METADATA;
4222
4223 ret = get_alloc_profile(fs_info, flags);
4224 return ret;
4225}
4226
4227u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
4228{
4229 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
4230}
4231
4232u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
4233{
4234 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4235}
4236
4237u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
4238{
4239 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4240}
4241
4242static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
4243 bool may_use_included)
4244{
4245 ASSERT(s_info);
4246 return s_info->bytes_used + s_info->bytes_reserved +
4247 s_info->bytes_pinned + s_info->bytes_readonly +
4248 (may_use_included ? s_info->bytes_may_use : 0);
4249}
4250
4251int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
4252{
4253 struct btrfs_root *root = inode->root;
4254 struct btrfs_fs_info *fs_info = root->fs_info;
4255 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
4256 u64 used;
4257 int ret = 0;
4258 int need_commit = 2;
4259 int have_pinned_space;
4260
4261
4262 bytes = ALIGN(bytes, fs_info->sectorsize);
4263
4264 if (btrfs_is_free_space_inode(inode)) {
4265 need_commit = 0;
4266 ASSERT(current->journal_info);
4267 }
4268
4269again:
4270
4271 spin_lock(&data_sinfo->lock);
4272 used = btrfs_space_info_used(data_sinfo, true);
4273
4274 if (used + bytes > data_sinfo->total_bytes) {
4275 struct btrfs_trans_handle *trans;
4276
4277
4278
4279
4280
4281 if (!data_sinfo->full) {
4282 u64 alloc_target;
4283
4284 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
4285 spin_unlock(&data_sinfo->lock);
4286
4287 alloc_target = btrfs_data_alloc_profile(fs_info);
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298 trans = btrfs_join_transaction(root);
4299 if (IS_ERR(trans))
4300 return PTR_ERR(trans);
4301
4302 ret = do_chunk_alloc(trans, fs_info, alloc_target,
4303 CHUNK_ALLOC_NO_FORCE);
4304 btrfs_end_transaction(trans);
4305 if (ret < 0) {
4306 if (ret != -ENOSPC)
4307 return ret;
4308 else {
4309 have_pinned_space = 1;
4310 goto commit_trans;
4311 }
4312 }
4313
4314 goto again;
4315 }
4316
4317
4318
4319
4320
4321
4322 have_pinned_space = percpu_counter_compare(
4323 &data_sinfo->total_bytes_pinned,
4324 used + bytes - data_sinfo->total_bytes);
4325 spin_unlock(&data_sinfo->lock);
4326
4327
4328commit_trans:
4329 if (need_commit &&
4330 !atomic_read(&fs_info->open_ioctl_trans)) {
4331 need_commit--;
4332
4333 if (need_commit > 0) {
4334 btrfs_start_delalloc_roots(fs_info, 0, -1);
4335 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
4336 (u64)-1);
4337 }
4338
4339 trans = btrfs_join_transaction(root);
4340 if (IS_ERR(trans))
4341 return PTR_ERR(trans);
4342 if (have_pinned_space >= 0 ||
4343 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4344 &trans->transaction->flags) ||
4345 need_commit > 0) {
4346 ret = btrfs_commit_transaction(trans);
4347 if (ret)
4348 return ret;
4349
4350
4351
4352
4353
4354 mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
4355 mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
4356 goto again;
4357 } else {
4358 btrfs_end_transaction(trans);
4359 }
4360 }
4361
4362 trace_btrfs_space_reservation(fs_info,
4363 "space_info:enospc",
4364 data_sinfo->flags, bytes, 1);
4365 return -ENOSPC;
4366 }
4367 data_sinfo->bytes_may_use += bytes;
4368 trace_btrfs_space_reservation(fs_info, "space_info",
4369 data_sinfo->flags, bytes, 1);
4370 spin_unlock(&data_sinfo->lock);
4371
4372 return ret;
4373}
4374
4375int btrfs_check_data_free_space(struct inode *inode,
4376 struct extent_changeset **reserved, u64 start, u64 len)
4377{
4378 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4379 int ret;
4380
4381
4382 len = round_up(start + len, fs_info->sectorsize) -
4383 round_down(start, fs_info->sectorsize);
4384 start = round_down(start, fs_info->sectorsize);
4385
4386 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
4387 if (ret < 0)
4388 return ret;
4389
4390
4391 ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
4392 if (ret < 0)
4393 btrfs_free_reserved_data_space_noquota(inode, start, len);
4394 else
4395 ret = 0;
4396 return ret;
4397}
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
4408 u64 len)
4409{
4410 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4411 struct btrfs_space_info *data_sinfo;
4412
4413
4414 len = round_up(start + len, fs_info->sectorsize) -
4415 round_down(start, fs_info->sectorsize);
4416 start = round_down(start, fs_info->sectorsize);
4417
4418 data_sinfo = fs_info->data_sinfo;
4419 spin_lock(&data_sinfo->lock);
4420 if (WARN_ON(data_sinfo->bytes_may_use < len))
4421 data_sinfo->bytes_may_use = 0;
4422 else
4423 data_sinfo->bytes_may_use -= len;
4424 trace_btrfs_space_reservation(fs_info, "space_info",
4425 data_sinfo->flags, len, 0);
4426 spin_unlock(&data_sinfo->lock);
4427}
4428
4429
4430
4431
4432
4433
4434
4435
4436void btrfs_free_reserved_data_space(struct inode *inode,
4437 struct extent_changeset *reserved, u64 start, u64 len)
4438{
4439 struct btrfs_root *root = BTRFS_I(inode)->root;
4440
4441
4442 len = round_up(start + len, root->fs_info->sectorsize) -
4443 round_down(start, root->fs_info->sectorsize);
4444 start = round_down(start, root->fs_info->sectorsize);
4445
4446 btrfs_free_reserved_data_space_noquota(inode, start, len);
4447 btrfs_qgroup_free_data(inode, reserved, start, len);
4448}
4449
4450static void force_metadata_allocation(struct btrfs_fs_info *info)
4451{
4452 struct list_head *head = &info->space_info;
4453 struct btrfs_space_info *found;
4454
4455 rcu_read_lock();
4456 list_for_each_entry_rcu(found, head, list) {
4457 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
4458 found->force_alloc = CHUNK_ALLOC_FORCE;
4459 }
4460 rcu_read_unlock();
4461}
4462
4463static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4464{
4465 return (global->size << 1);
4466}
4467
4468static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
4469 struct btrfs_space_info *sinfo, int force)
4470{
4471 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4472 u64 bytes_used = btrfs_space_info_used(sinfo, false);
4473 u64 thresh;
4474
4475 if (force == CHUNK_ALLOC_FORCE)
4476 return 1;
4477
4478
4479
4480
4481
4482
4483 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
4484 bytes_used += calc_global_rsv_need_space(global_rsv);
4485
4486
4487
4488
4489
4490 if (force == CHUNK_ALLOC_LIMITED) {
4491 thresh = btrfs_super_total_bytes(fs_info->super_copy);
4492 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
4493
4494 if (sinfo->total_bytes - bytes_used < thresh)
4495 return 1;
4496 }
4497
4498 if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
4499 return 0;
4500 return 1;
4501}
4502
4503static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
4504{
4505 u64 num_dev;
4506
4507 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4508 BTRFS_BLOCK_GROUP_RAID0 |
4509 BTRFS_BLOCK_GROUP_RAID5 |
4510 BTRFS_BLOCK_GROUP_RAID6))
4511 num_dev = fs_info->fs_devices->rw_devices;
4512 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4513 num_dev = 2;
4514 else
4515 num_dev = 1;
4516
4517 return num_dev;
4518}
4519
4520
4521
4522
4523
4524
4525void check_system_chunk(struct btrfs_trans_handle *trans,
4526 struct btrfs_fs_info *fs_info, u64 type)
4527{
4528 struct btrfs_space_info *info;
4529 u64 left;
4530 u64 thresh;
4531 int ret = 0;
4532 u64 num_devs;
4533
4534
4535
4536
4537
4538 ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
4539
4540 info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4541 spin_lock(&info->lock);
4542 left = info->total_bytes - btrfs_space_info_used(info, true);
4543 spin_unlock(&info->lock);
4544
4545 num_devs = get_profile_num_devs(fs_info, type);
4546
4547
4548 thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
4549 btrfs_calc_trans_metadata_size(fs_info, 1);
4550
4551 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4552 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
4553 left, thresh, type);
4554 dump_space_info(fs_info, info, 0, 0);
4555 }
4556
4557 if (left < thresh) {
4558 u64 flags = btrfs_system_alloc_profile(fs_info);
4559
4560
4561
4562
4563
4564
4565
4566 ret = btrfs_alloc_chunk(trans, fs_info, flags);
4567 }
4568
4569 if (!ret) {
4570 ret = btrfs_block_rsv_add(fs_info->chunk_root,
4571 &fs_info->chunk_block_rsv,
4572 thresh, BTRFS_RESERVE_NO_FLUSH);
4573 if (!ret)
4574 trans->chunk_bytes_reserved += thresh;
4575 }
4576}
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4588 struct btrfs_fs_info *fs_info, u64 flags, int force)
4589{
4590 struct btrfs_space_info *space_info;
4591 int wait_for_alloc = 0;
4592 int ret = 0;
4593
4594
4595 if (trans->allocating_chunk)
4596 return -ENOSPC;
4597
4598 space_info = __find_space_info(fs_info, flags);
4599 if (!space_info) {
4600 ret = create_space_info(fs_info, flags, &space_info);
4601 if (ret)
4602 return ret;
4603 }
4604
4605again:
4606 spin_lock(&space_info->lock);
4607 if (force < space_info->force_alloc)
4608 force = space_info->force_alloc;
4609 if (space_info->full) {
4610 if (should_alloc_chunk(fs_info, space_info, force))
4611 ret = -ENOSPC;
4612 else
4613 ret = 0;
4614 spin_unlock(&space_info->lock);
4615 return ret;
4616 }
4617
4618 if (!should_alloc_chunk(fs_info, space_info, force)) {
4619 spin_unlock(&space_info->lock);
4620 return 0;
4621 } else if (space_info->chunk_alloc) {
4622 wait_for_alloc = 1;
4623 } else {
4624 space_info->chunk_alloc = 1;
4625 }
4626
4627 spin_unlock(&space_info->lock);
4628
4629 mutex_lock(&fs_info->chunk_mutex);
4630
4631
4632
4633
4634
4635
4636
4637 if (wait_for_alloc) {
4638 mutex_unlock(&fs_info->chunk_mutex);
4639 wait_for_alloc = 0;
4640 goto again;
4641 }
4642
4643 trans->allocating_chunk = true;
4644
4645
4646
4647
4648
4649 if (btrfs_mixed_space_info(space_info))
4650 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4651
4652
4653
4654
4655
4656
4657 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
4658 fs_info->data_chunk_allocations++;
4659 if (!(fs_info->data_chunk_allocations %
4660 fs_info->metadata_ratio))
4661 force_metadata_allocation(fs_info);
4662 }
4663
4664
4665
4666
4667
4668 check_system_chunk(trans, fs_info, flags);
4669
4670 ret = btrfs_alloc_chunk(trans, fs_info, flags);
4671 trans->allocating_chunk = false;
4672
4673 spin_lock(&space_info->lock);
4674 if (ret < 0 && ret != -ENOSPC)
4675 goto out;
4676 if (ret)
4677 space_info->full = 1;
4678 else
4679 ret = 1;
4680
4681 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4682out:
4683 space_info->chunk_alloc = 0;
4684 spin_unlock(&space_info->lock);
4685 mutex_unlock(&fs_info->chunk_mutex);
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700 if (trans->can_flush_pending_bgs &&
4701 trans->chunk_bytes_reserved >= (u64)SZ_2M) {
4702 btrfs_create_pending_block_groups(trans, fs_info);
4703 btrfs_trans_release_chunk_metadata(trans);
4704 }
4705 return ret;
4706}
4707
4708static int can_overcommit(struct btrfs_fs_info *fs_info,
4709 struct btrfs_space_info *space_info, u64 bytes,
4710 enum btrfs_reserve_flush_enum flush,
4711 bool system_chunk)
4712{
4713 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4714 u64 profile;
4715 u64 space_size;
4716 u64 avail;
4717 u64 used;
4718
4719
4720 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
4721 return 0;
4722
4723 if (system_chunk)
4724 profile = btrfs_system_alloc_profile(fs_info);
4725 else
4726 profile = btrfs_metadata_alloc_profile(fs_info);
4727
4728 used = btrfs_space_info_used(space_info, false);
4729
4730
4731
4732
4733
4734
4735
4736 spin_lock(&global_rsv->lock);
4737 space_size = calc_global_rsv_need_space(global_rsv);
4738 spin_unlock(&global_rsv->lock);
4739 if (used + space_size >= space_info->total_bytes)
4740 return 0;
4741
4742 used += space_info->bytes_may_use;
4743
4744 avail = atomic64_read(&fs_info->free_chunk_space);
4745
4746
4747
4748
4749
4750
4751
4752 if (profile & (BTRFS_BLOCK_GROUP_DUP |
4753 BTRFS_BLOCK_GROUP_RAID1 |
4754 BTRFS_BLOCK_GROUP_RAID10))
4755 avail >>= 1;
4756
4757
4758
4759
4760
4761
4762 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4763 avail >>= 3;
4764 else
4765 avail >>= 1;
4766
4767 if (used + bytes < space_info->total_bytes + avail)
4768 return 1;
4769 return 0;
4770}
4771
4772static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
4773 unsigned long nr_pages, int nr_items)
4774{
4775 struct super_block *sb = fs_info->sb;
4776
4777 if (down_read_trylock(&sb->s_umount)) {
4778 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4779 up_read(&sb->s_umount);
4780 } else {
4781
4782
4783
4784
4785
4786
4787
4788 btrfs_start_delalloc_roots(fs_info, 0, nr_items);
4789 if (!current->journal_info)
4790 btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
4791 }
4792}
4793
4794static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
4795 u64 to_reclaim)
4796{
4797 u64 bytes;
4798 u64 nr;
4799
4800 bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
4801 nr = div64_u64(to_reclaim, bytes);
4802 if (!nr)
4803 nr = 1;
4804 return nr;
4805}
4806
4807#define EXTENT_SIZE_PER_ITEM SZ_256K
4808
4809
4810
4811
4812static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
4813 u64 orig, bool wait_ordered)
4814{
4815 struct btrfs_space_info *space_info;
4816 struct btrfs_trans_handle *trans;
4817 u64 delalloc_bytes;
4818 u64 max_reclaim;
4819 u64 items;
4820 long time_left;
4821 unsigned long nr_pages;
4822 int loops;
4823 enum btrfs_reserve_flush_enum flush;
4824
4825
4826 items = calc_reclaim_items_nr(fs_info, to_reclaim);
4827 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
4828
4829 trans = (struct btrfs_trans_handle *)current->journal_info;
4830 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4831
4832 delalloc_bytes = percpu_counter_sum_positive(
4833 &fs_info->delalloc_bytes);
4834 if (delalloc_bytes == 0) {
4835 if (trans)
4836 return;
4837 if (wait_ordered)
4838 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4839 return;
4840 }
4841
4842 loops = 0;
4843 while (delalloc_bytes && loops < 3) {
4844 max_reclaim = min(delalloc_bytes, to_reclaim);
4845 nr_pages = max_reclaim >> PAGE_SHIFT;
4846 btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
4847
4848
4849
4850
4851 max_reclaim = atomic_read(&fs_info->async_delalloc_pages);
4852 if (!max_reclaim)
4853 goto skip_async;
4854
4855 if (max_reclaim <= nr_pages)
4856 max_reclaim = 0;
4857 else
4858 max_reclaim -= nr_pages;
4859
4860 wait_event(fs_info->async_submit_wait,
4861 atomic_read(&fs_info->async_delalloc_pages) <=
4862 (int)max_reclaim);
4863skip_async:
4864 if (!trans)
4865 flush = BTRFS_RESERVE_FLUSH_ALL;
4866 else
4867 flush = BTRFS_RESERVE_NO_FLUSH;
4868 spin_lock(&space_info->lock);
4869 if (list_empty(&space_info->tickets) &&
4870 list_empty(&space_info->priority_tickets)) {
4871 spin_unlock(&space_info->lock);
4872 break;
4873 }
4874 spin_unlock(&space_info->lock);
4875
4876 loops++;
4877 if (wait_ordered && !trans) {
4878 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4879 } else {
4880 time_left = schedule_timeout_killable(1);
4881 if (time_left)
4882 break;
4883 }
4884 delalloc_bytes = percpu_counter_sum_positive(
4885 &fs_info->delalloc_bytes);
4886 }
4887}
4888
4889struct reserve_ticket {
4890 u64 bytes;
4891 int error;
4892 struct list_head list;
4893 wait_queue_head_t wait;
4894};
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906static int may_commit_transaction(struct btrfs_fs_info *fs_info,
4907 struct btrfs_space_info *space_info)
4908{
4909 struct reserve_ticket *ticket = NULL;
4910 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
4911 struct btrfs_trans_handle *trans;
4912 u64 bytes;
4913
4914 trans = (struct btrfs_trans_handle *)current->journal_info;
4915 if (trans)
4916 return -EAGAIN;
4917
4918 spin_lock(&space_info->lock);
4919 if (!list_empty(&space_info->priority_tickets))
4920 ticket = list_first_entry(&space_info->priority_tickets,
4921 struct reserve_ticket, list);
4922 else if (!list_empty(&space_info->tickets))
4923 ticket = list_first_entry(&space_info->tickets,
4924 struct reserve_ticket, list);
4925 bytes = (ticket) ? ticket->bytes : 0;
4926 spin_unlock(&space_info->lock);
4927
4928 if (!bytes)
4929 return 0;
4930
4931
4932 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4933 bytes) >= 0)
4934 goto commit;
4935
4936
4937
4938
4939
4940 if (space_info != delayed_rsv->space_info)
4941 return -ENOSPC;
4942
4943 spin_lock(&delayed_rsv->lock);
4944 if (delayed_rsv->size > bytes)
4945 bytes = 0;
4946 else
4947 bytes -= delayed_rsv->size;
4948 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4949 bytes) < 0) {
4950 spin_unlock(&delayed_rsv->lock);
4951 return -ENOSPC;
4952 }
4953 spin_unlock(&delayed_rsv->lock);
4954
4955commit:
4956 trans = btrfs_join_transaction(fs_info->extent_root);
4957 if (IS_ERR(trans))
4958 return -ENOSPC;
4959
4960 return btrfs_commit_transaction(trans);
4961}
4962
4963
4964
4965
4966
4967
4968static void flush_space(struct btrfs_fs_info *fs_info,
4969 struct btrfs_space_info *space_info, u64 num_bytes,
4970 int state)
4971{
4972 struct btrfs_root *root = fs_info->extent_root;
4973 struct btrfs_trans_handle *trans;
4974 int nr;
4975 int ret = 0;
4976
4977 switch (state) {
4978 case FLUSH_DELAYED_ITEMS_NR:
4979 case FLUSH_DELAYED_ITEMS:
4980 if (state == FLUSH_DELAYED_ITEMS_NR)
4981 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
4982 else
4983 nr = -1;
4984
4985 trans = btrfs_join_transaction(root);
4986 if (IS_ERR(trans)) {
4987 ret = PTR_ERR(trans);
4988 break;
4989 }
4990 ret = btrfs_run_delayed_items_nr(trans, fs_info, nr);
4991 btrfs_end_transaction(trans);
4992 break;
4993 case FLUSH_DELALLOC:
4994 case FLUSH_DELALLOC_WAIT:
4995 shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
4996 state == FLUSH_DELALLOC_WAIT);
4997 break;
4998 case ALLOC_CHUNK:
4999 trans = btrfs_join_transaction(root);
5000 if (IS_ERR(trans)) {
5001 ret = PTR_ERR(trans);
5002 break;
5003 }
5004 ret = do_chunk_alloc(trans, fs_info,
5005 btrfs_metadata_alloc_profile(fs_info),
5006 CHUNK_ALLOC_NO_FORCE);
5007 btrfs_end_transaction(trans);
5008 if (ret > 0 || ret == -ENOSPC)
5009 ret = 0;
5010 break;
5011 case COMMIT_TRANS:
5012 ret = may_commit_transaction(fs_info, space_info);
5013 break;
5014 default:
5015 ret = -ENOSPC;
5016 break;
5017 }
5018
5019 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
5020 ret);
5021 return;
5022}
5023
5024static inline u64
5025btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
5026 struct btrfs_space_info *space_info,
5027 bool system_chunk)
5028{
5029 struct reserve_ticket *ticket;
5030 u64 used;
5031 u64 expected;
5032 u64 to_reclaim = 0;
5033
5034 list_for_each_entry(ticket, &space_info->tickets, list)
5035 to_reclaim += ticket->bytes;
5036 list_for_each_entry(ticket, &space_info->priority_tickets, list)
5037 to_reclaim += ticket->bytes;
5038 if (to_reclaim)
5039 return to_reclaim;
5040
5041 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
5042 if (can_overcommit(fs_info, space_info, to_reclaim,
5043 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
5044 return 0;
5045
5046 used = btrfs_space_info_used(space_info, true);
5047
5048 if (can_overcommit(fs_info, space_info, SZ_1M,
5049 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
5050 expected = div_factor_fine(space_info->total_bytes, 95);
5051 else
5052 expected = div_factor_fine(space_info->total_bytes, 90);
5053
5054 if (used > expected)
5055 to_reclaim = used - expected;
5056 else
5057 to_reclaim = 0;
5058 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
5059 space_info->bytes_reserved);
5060 return to_reclaim;
5061}
5062
5063static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
5064 struct btrfs_space_info *space_info,
5065 u64 used, bool system_chunk)
5066{
5067 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
5068
5069
5070 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
5071 return 0;
5072
5073 if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5074 system_chunk))
5075 return 0;
5076
5077 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
5078 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
5079}
5080
5081static void wake_all_tickets(struct list_head *head)
5082{
5083 struct reserve_ticket *ticket;
5084
5085 while (!list_empty(head)) {
5086 ticket = list_first_entry(head, struct reserve_ticket, list);
5087 list_del_init(&ticket->list);
5088 ticket->error = -ENOSPC;
5089 wake_up(&ticket->wait);
5090 }
5091}
5092
5093
5094
5095
5096
5097
5098static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
5099{
5100 struct btrfs_fs_info *fs_info;
5101 struct btrfs_space_info *space_info;
5102 u64 to_reclaim;
5103 int flush_state;
5104 int commit_cycles = 0;
5105 u64 last_tickets_id;
5106
5107 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
5108 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5109
5110 spin_lock(&space_info->lock);
5111 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5112 false);
5113 if (!to_reclaim) {
5114 space_info->flush = 0;
5115 spin_unlock(&space_info->lock);
5116 return;
5117 }
5118 last_tickets_id = space_info->tickets_id;
5119 spin_unlock(&space_info->lock);
5120
5121 flush_state = FLUSH_DELAYED_ITEMS_NR;
5122 do {
5123 flush_space(fs_info, space_info, to_reclaim, flush_state);
5124 spin_lock(&space_info->lock);
5125 if (list_empty(&space_info->tickets)) {
5126 space_info->flush = 0;
5127 spin_unlock(&space_info->lock);
5128 return;
5129 }
5130 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
5131 space_info,
5132 false);
5133 if (last_tickets_id == space_info->tickets_id) {
5134 flush_state++;
5135 } else {
5136 last_tickets_id = space_info->tickets_id;
5137 flush_state = FLUSH_DELAYED_ITEMS_NR;
5138 if (commit_cycles)
5139 commit_cycles--;
5140 }
5141
5142 if (flush_state > COMMIT_TRANS) {
5143 commit_cycles++;
5144 if (commit_cycles > 2) {
5145 wake_all_tickets(&space_info->tickets);
5146 space_info->flush = 0;
5147 } else {
5148 flush_state = FLUSH_DELAYED_ITEMS_NR;
5149 }
5150 }
5151 spin_unlock(&space_info->lock);
5152 } while (flush_state <= COMMIT_TRANS);
5153}
5154
5155void btrfs_init_async_reclaim_work(struct work_struct *work)
5156{
5157 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
5158}
5159
5160static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
5161 struct btrfs_space_info *space_info,
5162 struct reserve_ticket *ticket)
5163{
5164 u64 to_reclaim;
5165 int flush_state = FLUSH_DELAYED_ITEMS_NR;
5166
5167 spin_lock(&space_info->lock);
5168 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5169 false);
5170 if (!to_reclaim) {
5171 spin_unlock(&space_info->lock);
5172 return;
5173 }
5174 spin_unlock(&space_info->lock);
5175
5176 do {
5177 flush_space(fs_info, space_info, to_reclaim, flush_state);
5178 flush_state++;
5179 spin_lock(&space_info->lock);
5180 if (ticket->bytes == 0) {
5181 spin_unlock(&space_info->lock);
5182 return;
5183 }
5184 spin_unlock(&space_info->lock);
5185
5186
5187
5188
5189
5190 if (flush_state == FLUSH_DELALLOC ||
5191 flush_state == FLUSH_DELALLOC_WAIT)
5192 flush_state = ALLOC_CHUNK;
5193 } while (flush_state < COMMIT_TRANS);
5194}
5195
5196static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
5197 struct btrfs_space_info *space_info,
5198 struct reserve_ticket *ticket, u64 orig_bytes)
5199
5200{
5201 DEFINE_WAIT(wait);
5202 int ret = 0;
5203
5204 spin_lock(&space_info->lock);
5205 while (ticket->bytes > 0 && ticket->error == 0) {
5206 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
5207 if (ret) {
5208 ret = -EINTR;
5209 break;
5210 }
5211 spin_unlock(&space_info->lock);
5212
5213 schedule();
5214
5215 finish_wait(&ticket->wait, &wait);
5216 spin_lock(&space_info->lock);
5217 }
5218 if (!ret)
5219 ret = ticket->error;
5220 if (!list_empty(&ticket->list))
5221 list_del_init(&ticket->list);
5222 if (ticket->bytes && ticket->bytes < orig_bytes) {
5223 u64 num_bytes = orig_bytes - ticket->bytes;
5224 space_info->bytes_may_use -= num_bytes;
5225 trace_btrfs_space_reservation(fs_info, "space_info",
5226 space_info->flags, num_bytes, 0);
5227 }
5228 spin_unlock(&space_info->lock);
5229
5230 return ret;
5231}
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
5248 struct btrfs_space_info *space_info,
5249 u64 orig_bytes,
5250 enum btrfs_reserve_flush_enum flush,
5251 bool system_chunk)
5252{
5253 struct reserve_ticket ticket;
5254 u64 used;
5255 int ret = 0;
5256
5257 ASSERT(orig_bytes);
5258 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
5259
5260 spin_lock(&space_info->lock);
5261 ret = -ENOSPC;
5262 used = btrfs_space_info_used(space_info, true);
5263
5264
5265
5266
5267
5268
5269 if (used + orig_bytes <= space_info->total_bytes) {
5270 space_info->bytes_may_use += orig_bytes;
5271 trace_btrfs_space_reservation(fs_info, "space_info",
5272 space_info->flags, orig_bytes, 1);
5273 ret = 0;
5274 } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
5275 system_chunk)) {
5276 space_info->bytes_may_use += orig_bytes;
5277 trace_btrfs_space_reservation(fs_info, "space_info",
5278 space_info->flags, orig_bytes, 1);
5279 ret = 0;
5280 }
5281
5282
5283
5284
5285
5286
5287
5288
5289 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5290 ticket.bytes = orig_bytes;
5291 ticket.error = 0;
5292 init_waitqueue_head(&ticket.wait);
5293 if (flush == BTRFS_RESERVE_FLUSH_ALL) {
5294 list_add_tail(&ticket.list, &space_info->tickets);
5295 if (!space_info->flush) {
5296 space_info->flush = 1;
5297 trace_btrfs_trigger_flush(fs_info,
5298 space_info->flags,
5299 orig_bytes, flush,
5300 "enospc");
5301 queue_work(system_unbound_wq,
5302 &fs_info->async_reclaim_work);
5303 }
5304 } else {
5305 list_add_tail(&ticket.list,
5306 &space_info->priority_tickets);
5307 }
5308 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5309 used += orig_bytes;
5310
5311
5312
5313
5314
5315 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
5316 need_do_async_reclaim(fs_info, space_info,
5317 used, system_chunk) &&
5318 !work_busy(&fs_info->async_reclaim_work)) {
5319 trace_btrfs_trigger_flush(fs_info, space_info->flags,
5320 orig_bytes, flush, "preempt");
5321 queue_work(system_unbound_wq,
5322 &fs_info->async_reclaim_work);
5323 }
5324 }
5325 spin_unlock(&space_info->lock);
5326 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5327 return ret;
5328
5329 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5330 return wait_reserve_ticket(fs_info, space_info, &ticket,
5331 orig_bytes);
5332
5333 ret = 0;
5334 priority_reclaim_metadata_space(fs_info, space_info, &ticket);
5335 spin_lock(&space_info->lock);
5336 if (ticket.bytes) {
5337 if (ticket.bytes < orig_bytes) {
5338 u64 num_bytes = orig_bytes - ticket.bytes;
5339 space_info->bytes_may_use -= num_bytes;
5340 trace_btrfs_space_reservation(fs_info, "space_info",
5341 space_info->flags,
5342 num_bytes, 0);
5343
5344 }
5345 list_del_init(&ticket.list);
5346 ret = -ENOSPC;
5347 }
5348 spin_unlock(&space_info->lock);
5349 ASSERT(list_empty(&ticket.list));
5350 return ret;
5351}
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367static int reserve_metadata_bytes(struct btrfs_root *root,
5368 struct btrfs_block_rsv *block_rsv,
5369 u64 orig_bytes,
5370 enum btrfs_reserve_flush_enum flush)
5371{
5372 struct btrfs_fs_info *fs_info = root->fs_info;
5373 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5374 int ret;
5375 bool system_chunk = (root == fs_info->chunk_root);
5376
5377 ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
5378 orig_bytes, flush, system_chunk);
5379 if (ret == -ENOSPC &&
5380 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5381 if (block_rsv != global_rsv &&
5382 !block_rsv_use_bytes(global_rsv, orig_bytes))
5383 ret = 0;
5384 }
5385 if (ret == -ENOSPC)
5386 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
5387 block_rsv->space_info->flags,
5388 orig_bytes, 1);
5389 return ret;
5390}
5391
5392static struct btrfs_block_rsv *get_block_rsv(
5393 const struct btrfs_trans_handle *trans,
5394 const struct btrfs_root *root)
5395{
5396 struct btrfs_fs_info *fs_info = root->fs_info;
5397 struct btrfs_block_rsv *block_rsv = NULL;
5398
5399 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
5400 (root == fs_info->csum_root && trans->adding_csums) ||
5401 (root == fs_info->uuid_root))
5402 block_rsv = trans->block_rsv;
5403
5404 if (!block_rsv)
5405 block_rsv = root->block_rsv;
5406
5407 if (!block_rsv)
5408 block_rsv = &fs_info->empty_block_rsv;
5409
5410 return block_rsv;
5411}
5412
5413static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
5414 u64 num_bytes)
5415{
5416 int ret = -ENOSPC;
5417 spin_lock(&block_rsv->lock);
5418 if (block_rsv->reserved >= num_bytes) {
5419 block_rsv->reserved -= num_bytes;
5420 if (block_rsv->reserved < block_rsv->size)
5421 block_rsv->full = 0;
5422 ret = 0;
5423 }
5424 spin_unlock(&block_rsv->lock);
5425 return ret;
5426}
5427
5428static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
5429 u64 num_bytes, int update_size)
5430{
5431 spin_lock(&block_rsv->lock);
5432 block_rsv->reserved += num_bytes;
5433 if (update_size)
5434 block_rsv->size += num_bytes;
5435 else if (block_rsv->reserved >= block_rsv->size)
5436 block_rsv->full = 1;
5437 spin_unlock(&block_rsv->lock);
5438}
5439
5440int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5441 struct btrfs_block_rsv *dest, u64 num_bytes,
5442 int min_factor)
5443{
5444 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5445 u64 min_bytes;
5446
5447 if (global_rsv->space_info != dest->space_info)
5448 return -ENOSPC;
5449
5450 spin_lock(&global_rsv->lock);
5451 min_bytes = div_factor(global_rsv->size, min_factor);
5452 if (global_rsv->reserved < min_bytes + num_bytes) {
5453 spin_unlock(&global_rsv->lock);
5454 return -ENOSPC;
5455 }
5456 global_rsv->reserved -= num_bytes;
5457 if (global_rsv->reserved < global_rsv->size)
5458 global_rsv->full = 0;
5459 spin_unlock(&global_rsv->lock);
5460
5461 block_rsv_add_bytes(dest, num_bytes, 1);
5462 return 0;
5463}
5464
5465
5466
5467
5468
5469static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
5470 struct btrfs_space_info *space_info,
5471 u64 num_bytes)
5472{
5473 struct reserve_ticket *ticket;
5474 struct list_head *head;
5475 u64 used;
5476 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
5477 bool check_overcommit = false;
5478
5479 spin_lock(&space_info->lock);
5480 head = &space_info->priority_tickets;
5481
5482
5483
5484
5485
5486
5487 used = btrfs_space_info_used(space_info, true);
5488 if (used - num_bytes >= space_info->total_bytes)
5489 check_overcommit = true;
5490again:
5491 while (!list_empty(head) && num_bytes) {
5492 ticket = list_first_entry(head, struct reserve_ticket,
5493 list);
5494
5495
5496
5497
5498 if (check_overcommit &&
5499 !can_overcommit(fs_info, space_info, 0, flush, false))
5500 break;
5501 if (num_bytes >= ticket->bytes) {
5502 list_del_init(&ticket->list);
5503 num_bytes -= ticket->bytes;
5504 ticket->bytes = 0;
5505 space_info->tickets_id++;
5506 wake_up(&ticket->wait);
5507 } else {
5508 ticket->bytes -= num_bytes;
5509 num_bytes = 0;
5510 }
5511 }
5512
5513 if (num_bytes && head == &space_info->priority_tickets) {
5514 head = &space_info->tickets;
5515 flush = BTRFS_RESERVE_FLUSH_ALL;
5516 goto again;
5517 }
5518 space_info->bytes_may_use -= num_bytes;
5519 trace_btrfs_space_reservation(fs_info, "space_info",
5520 space_info->flags, num_bytes, 0);
5521 spin_unlock(&space_info->lock);
5522}
5523
5524
5525
5526
5527
5528
5529static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
5530 struct btrfs_space_info *space_info,
5531 u64 num_bytes)
5532{
5533 struct reserve_ticket *ticket;
5534 struct list_head *head = &space_info->priority_tickets;
5535
5536again:
5537 while (!list_empty(head) && num_bytes) {
5538 ticket = list_first_entry(head, struct reserve_ticket,
5539 list);
5540 if (num_bytes >= ticket->bytes) {
5541 trace_btrfs_space_reservation(fs_info, "space_info",
5542 space_info->flags,
5543 ticket->bytes, 1);
5544 list_del_init(&ticket->list);
5545 num_bytes -= ticket->bytes;
5546 space_info->bytes_may_use += ticket->bytes;
5547 ticket->bytes = 0;
5548 space_info->tickets_id++;
5549 wake_up(&ticket->wait);
5550 } else {
5551 trace_btrfs_space_reservation(fs_info, "space_info",
5552 space_info->flags,
5553 num_bytes, 1);
5554 space_info->bytes_may_use += num_bytes;
5555 ticket->bytes -= num_bytes;
5556 num_bytes = 0;
5557 }
5558 }
5559
5560 if (num_bytes && head == &space_info->priority_tickets) {
5561 head = &space_info->tickets;
5562 goto again;
5563 }
5564}
5565
5566static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5567 struct btrfs_block_rsv *block_rsv,
5568 struct btrfs_block_rsv *dest, u64 num_bytes)
5569{
5570 struct btrfs_space_info *space_info = block_rsv->space_info;
5571 u64 ret;
5572
5573 spin_lock(&block_rsv->lock);
5574 if (num_bytes == (u64)-1)
5575 num_bytes = block_rsv->size;
5576 block_rsv->size -= num_bytes;
5577 if (block_rsv->reserved >= block_rsv->size) {
5578 num_bytes = block_rsv->reserved - block_rsv->size;
5579 block_rsv->reserved = block_rsv->size;
5580 block_rsv->full = 1;
5581 } else {
5582 num_bytes = 0;
5583 }
5584 spin_unlock(&block_rsv->lock);
5585
5586 ret = num_bytes;
5587 if (num_bytes > 0) {
5588 if (dest) {
5589 spin_lock(&dest->lock);
5590 if (!dest->full) {
5591 u64 bytes_to_add;
5592
5593 bytes_to_add = dest->size - dest->reserved;
5594 bytes_to_add = min(num_bytes, bytes_to_add);
5595 dest->reserved += bytes_to_add;
5596 if (dest->reserved >= dest->size)
5597 dest->full = 1;
5598 num_bytes -= bytes_to_add;
5599 }
5600 spin_unlock(&dest->lock);
5601 }
5602 if (num_bytes)
5603 space_info_add_old_bytes(fs_info, space_info,
5604 num_bytes);
5605 }
5606 return ret;
5607}
5608
5609int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
5610 struct btrfs_block_rsv *dst, u64 num_bytes,
5611 int update_size)
5612{
5613 int ret;
5614
5615 ret = block_rsv_use_bytes(src, num_bytes);
5616 if (ret)
5617 return ret;
5618
5619 block_rsv_add_bytes(dst, num_bytes, update_size);
5620 return 0;
5621}
5622
5623void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
5624{
5625 memset(rsv, 0, sizeof(*rsv));
5626 spin_lock_init(&rsv->lock);
5627 rsv->type = type;
5628}
5629
5630void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
5631 struct btrfs_block_rsv *rsv,
5632 unsigned short type)
5633{
5634 btrfs_init_block_rsv(rsv, type);
5635 rsv->space_info = __find_space_info(fs_info,
5636 BTRFS_BLOCK_GROUP_METADATA);
5637}
5638
5639struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
5640 unsigned short type)
5641{
5642 struct btrfs_block_rsv *block_rsv;
5643
5644 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5645 if (!block_rsv)
5646 return NULL;
5647
5648 btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
5649 return block_rsv;
5650}
5651
5652void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
5653 struct btrfs_block_rsv *rsv)
5654{
5655 if (!rsv)
5656 return;
5657 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
5658 kfree(rsv);
5659}
5660
5661void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
5662{
5663 kfree(rsv);
5664}
5665
5666int btrfs_block_rsv_add(struct btrfs_root *root,
5667 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5668 enum btrfs_reserve_flush_enum flush)
5669{
5670 int ret;
5671
5672 if (num_bytes == 0)
5673 return 0;
5674
5675 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5676 if (!ret) {
5677 block_rsv_add_bytes(block_rsv, num_bytes, 1);
5678 return 0;
5679 }
5680
5681 return ret;
5682}
5683
5684int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
5685{
5686 u64 num_bytes = 0;
5687 int ret = -ENOSPC;
5688
5689 if (!block_rsv)
5690 return 0;
5691
5692 spin_lock(&block_rsv->lock);
5693 num_bytes = div_factor(block_rsv->size, min_factor);
5694 if (block_rsv->reserved >= num_bytes)
5695 ret = 0;
5696 spin_unlock(&block_rsv->lock);
5697
5698 return ret;
5699}
5700
5701int btrfs_block_rsv_refill(struct btrfs_root *root,
5702 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5703 enum btrfs_reserve_flush_enum flush)
5704{
5705 u64 num_bytes = 0;
5706 int ret = -ENOSPC;
5707
5708 if (!block_rsv)
5709 return 0;
5710
5711 spin_lock(&block_rsv->lock);
5712 num_bytes = min_reserved;
5713 if (block_rsv->reserved >= num_bytes)
5714 ret = 0;
5715 else
5716 num_bytes -= block_rsv->reserved;
5717 spin_unlock(&block_rsv->lock);
5718
5719 if (!ret)
5720 return 0;
5721
5722 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5723 if (!ret) {
5724 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5725 return 0;
5726 }
5727
5728 return ret;
5729}
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
5742 enum btrfs_reserve_flush_enum flush)
5743{
5744 struct btrfs_root *root = inode->root;
5745 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5746 u64 num_bytes = 0;
5747 int ret = -ENOSPC;
5748
5749 spin_lock(&block_rsv->lock);
5750 if (block_rsv->reserved < block_rsv->size)
5751 num_bytes = block_rsv->size - block_rsv->reserved;
5752 spin_unlock(&block_rsv->lock);
5753
5754 if (num_bytes == 0)
5755 return 0;
5756
5757 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5758 if (!ret) {
5759 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5760 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5761 btrfs_ino(inode), num_bytes, 1);
5762 }
5763 return ret;
5764}
5765
5766
5767
5768
5769
5770
5771
5772
5773void btrfs_inode_rsv_release(struct btrfs_inode *inode)
5774{
5775 struct btrfs_fs_info *fs_info = inode->root->fs_info;
5776 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5777 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5778 u64 released = 0;
5779
5780
5781
5782
5783
5784
5785 released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
5786 if (released > 0)
5787 trace_btrfs_space_reservation(fs_info, "delalloc",
5788 btrfs_ino(inode), released, 0);
5789}
5790
5791void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5792 struct btrfs_block_rsv *block_rsv,
5793 u64 num_bytes)
5794{
5795 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5796
5797 if (global_rsv == block_rsv ||
5798 block_rsv->space_info != global_rsv->space_info)
5799 global_rsv = NULL;
5800 block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes);
5801}
5802
5803static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5804{
5805 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5806 struct btrfs_space_info *sinfo = block_rsv->space_info;
5807 u64 num_bytes;
5808
5809
5810
5811
5812
5813
5814 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
5815 btrfs_root_used(&fs_info->csum_root->root_item) +
5816 btrfs_root_used(&fs_info->tree_root->root_item);
5817 num_bytes = max_t(u64, num_bytes, SZ_16M);
5818
5819 spin_lock(&sinfo->lock);
5820 spin_lock(&block_rsv->lock);
5821
5822 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
5823
5824 if (block_rsv->reserved < block_rsv->size) {
5825 num_bytes = btrfs_space_info_used(sinfo, true);
5826 if (sinfo->total_bytes > num_bytes) {
5827 num_bytes = sinfo->total_bytes - num_bytes;
5828 num_bytes = min(num_bytes,
5829 block_rsv->size - block_rsv->reserved);
5830 block_rsv->reserved += num_bytes;
5831 sinfo->bytes_may_use += num_bytes;
5832 trace_btrfs_space_reservation(fs_info, "space_info",
5833 sinfo->flags, num_bytes,
5834 1);
5835 }
5836 } else if (block_rsv->reserved > block_rsv->size) {
5837 num_bytes = block_rsv->reserved - block_rsv->size;
5838 sinfo->bytes_may_use -= num_bytes;
5839 trace_btrfs_space_reservation(fs_info, "space_info",
5840 sinfo->flags, num_bytes, 0);
5841 block_rsv->reserved = block_rsv->size;
5842 }
5843
5844 if (block_rsv->reserved == block_rsv->size)
5845 block_rsv->full = 1;
5846 else
5847 block_rsv->full = 0;
5848
5849 spin_unlock(&block_rsv->lock);
5850 spin_unlock(&sinfo->lock);
5851}
5852
5853static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5854{
5855 struct btrfs_space_info *space_info;
5856
5857 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5858 fs_info->chunk_block_rsv.space_info = space_info;
5859
5860 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5861 fs_info->global_block_rsv.space_info = space_info;
5862 fs_info->trans_block_rsv.space_info = space_info;
5863 fs_info->empty_block_rsv.space_info = space_info;
5864 fs_info->delayed_block_rsv.space_info = space_info;
5865
5866 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
5867 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
5868 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5869 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
5870 if (fs_info->quota_root)
5871 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
5872 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
5873
5874 update_global_block_rsv(fs_info);
5875}
5876
5877static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5878{
5879 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5880 (u64)-1);
5881 WARN_ON(fs_info->trans_block_rsv.size > 0);
5882 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5883 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5884 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
5885 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5886 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
5887}
5888
5889void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
5890 struct btrfs_fs_info *fs_info)
5891{
5892 if (!trans->block_rsv) {
5893 ASSERT(!trans->bytes_reserved);
5894 return;
5895 }
5896
5897 if (!trans->bytes_reserved)
5898 return;
5899
5900 ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
5901 trace_btrfs_space_reservation(fs_info, "transaction",
5902 trans->transid, trans->bytes_reserved, 0);
5903 btrfs_block_rsv_release(fs_info, trans->block_rsv,
5904 trans->bytes_reserved);
5905 trans->bytes_reserved = 0;
5906}
5907
5908
5909
5910
5911
5912void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5913{
5914 struct btrfs_fs_info *fs_info = trans->fs_info;
5915
5916 if (!trans->chunk_bytes_reserved)
5917 return;
5918
5919 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5920
5921 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5922 trans->chunk_bytes_reserved);
5923 trans->chunk_bytes_reserved = 0;
5924}
5925
5926
5927int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5928 struct btrfs_inode *inode)
5929{
5930 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
5931 struct btrfs_root *root = inode->root;
5932
5933
5934
5935
5936
5937
5938 struct btrfs_block_rsv *src_rsv = trans->block_rsv;
5939 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
5940
5941
5942
5943
5944
5945
5946 u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
5947
5948 trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
5949 num_bytes, 1);
5950 return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
5951}
5952
5953void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
5954{
5955 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
5956 struct btrfs_root *root = inode->root;
5957 u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
5958
5959 trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
5960 num_bytes, 0);
5961 btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
5962}
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5979 struct btrfs_block_rsv *rsv,
5980 int items,
5981 u64 *qgroup_reserved,
5982 bool use_global_rsv)
5983{
5984 u64 num_bytes;
5985 int ret;
5986 struct btrfs_fs_info *fs_info = root->fs_info;
5987 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5988
5989 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
5990
5991 num_bytes = 3 * fs_info->nodesize;
5992 ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
5993 if (ret)
5994 return ret;
5995 } else {
5996 num_bytes = 0;
5997 }
5998
5999 *qgroup_reserved = num_bytes;
6000
6001 num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
6002 rsv->space_info = __find_space_info(fs_info,
6003 BTRFS_BLOCK_GROUP_METADATA);
6004 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
6005 BTRFS_RESERVE_FLUSH_ALL);
6006
6007 if (ret == -ENOSPC && use_global_rsv)
6008 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
6009
6010 if (ret && *qgroup_reserved)
6011 btrfs_qgroup_free_meta(root, *qgroup_reserved);
6012
6013 return ret;
6014}
6015
6016void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
6017 struct btrfs_block_rsv *rsv)
6018{
6019 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
6020}
6021
6022static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
6023 struct btrfs_inode *inode)
6024{
6025 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
6026 u64 reserve_size = 0;
6027 u64 csum_leaves;
6028 unsigned outstanding_extents;
6029
6030 lockdep_assert_held(&inode->lock);
6031 outstanding_extents = inode->outstanding_extents;
6032 if (outstanding_extents)
6033 reserve_size = btrfs_calc_trans_metadata_size(fs_info,
6034 outstanding_extents + 1);
6035 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
6036 inode->csum_bytes);
6037 reserve_size += btrfs_calc_trans_metadata_size(fs_info,
6038 csum_leaves);
6039
6040 spin_lock(&block_rsv->lock);
6041 block_rsv->size = reserve_size;
6042 spin_unlock(&block_rsv->lock);
6043}
6044
6045int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
6046{
6047 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
6048 struct btrfs_root *root = inode->root;
6049 unsigned nr_extents;
6050 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
6051 int ret = 0;
6052 bool delalloc_lock = true;
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062 if (btrfs_is_free_space_inode(inode)) {
6063 flush = BTRFS_RESERVE_NO_FLUSH;
6064 delalloc_lock = false;
6065 } else if (current->journal_info) {
6066 flush = BTRFS_RESERVE_FLUSH_LIMIT;
6067 }
6068
6069 if (flush != BTRFS_RESERVE_NO_FLUSH &&
6070 btrfs_transaction_in_commit(fs_info))
6071 schedule_timeout(1);
6072
6073 if (delalloc_lock)
6074 mutex_lock(&inode->delalloc_mutex);
6075
6076 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6077
6078
6079 spin_lock(&inode->lock);
6080 nr_extents = count_max_extents(num_bytes);
6081 btrfs_mod_outstanding_extents(inode, nr_extents);
6082 inode->csum_bytes += num_bytes;
6083 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6084 spin_unlock(&inode->lock);
6085
6086 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
6087 ret = btrfs_qgroup_reserve_meta(root,
6088 nr_extents * fs_info->nodesize, true);
6089 if (ret)
6090 goto out_fail;
6091 }
6092
6093 ret = btrfs_inode_rsv_refill(inode, flush);
6094 if (unlikely(ret)) {
6095 btrfs_qgroup_free_meta(root,
6096 nr_extents * fs_info->nodesize);
6097 goto out_fail;
6098 }
6099
6100 if (delalloc_lock)
6101 mutex_unlock(&inode->delalloc_mutex);
6102 return 0;
6103
6104out_fail:
6105 spin_lock(&inode->lock);
6106 nr_extents = count_max_extents(num_bytes);
6107 btrfs_mod_outstanding_extents(inode, -nr_extents);
6108 inode->csum_bytes -= num_bytes;
6109 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6110 spin_unlock(&inode->lock);
6111
6112 btrfs_inode_rsv_release(inode);
6113 if (delalloc_lock)
6114 mutex_unlock(&inode->delalloc_mutex);
6115 return ret;
6116}
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
6128{
6129 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
6130
6131 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6132 spin_lock(&inode->lock);
6133 inode->csum_bytes -= num_bytes;
6134 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6135 spin_unlock(&inode->lock);
6136
6137 if (btrfs_is_testing(fs_info))
6138 return;
6139
6140 btrfs_inode_rsv_release(inode);
6141}
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
6155{
6156 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
6157 unsigned num_extents;
6158
6159 spin_lock(&inode->lock);
6160 num_extents = count_max_extents(num_bytes);
6161 btrfs_mod_outstanding_extents(inode, -num_extents);
6162 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6163 spin_unlock(&inode->lock);
6164
6165 if (btrfs_is_testing(fs_info))
6166 return;
6167
6168 btrfs_inode_rsv_release(inode);
6169}
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196int btrfs_delalloc_reserve_space(struct inode *inode,
6197 struct extent_changeset **reserved, u64 start, u64 len)
6198{
6199 int ret;
6200
6201 ret = btrfs_check_data_free_space(inode, reserved, start, len);
6202 if (ret < 0)
6203 return ret;
6204 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
6205 if (ret < 0)
6206 btrfs_free_reserved_data_space(inode, *reserved, start, len);
6207 return ret;
6208}
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222void btrfs_delalloc_release_space(struct inode *inode,
6223 struct extent_changeset *reserved,
6224 u64 start, u64 len)
6225{
6226 btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
6227 btrfs_free_reserved_data_space(inode, reserved, start, len);
6228}
6229
6230static int update_block_group(struct btrfs_trans_handle *trans,
6231 struct btrfs_fs_info *info, u64 bytenr,
6232 u64 num_bytes, int alloc)
6233{
6234 struct btrfs_block_group_cache *cache = NULL;
6235 u64 total = num_bytes;
6236 u64 old_val;
6237 u64 byte_in_group;
6238 int factor;
6239
6240
6241 spin_lock(&info->delalloc_root_lock);
6242 old_val = btrfs_super_bytes_used(info->super_copy);
6243 if (alloc)
6244 old_val += num_bytes;
6245 else
6246 old_val -= num_bytes;
6247 btrfs_set_super_bytes_used(info->super_copy, old_val);
6248 spin_unlock(&info->delalloc_root_lock);
6249
6250 while (total) {
6251 cache = btrfs_lookup_block_group(info, bytenr);
6252 if (!cache)
6253 return -ENOENT;
6254 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
6255 BTRFS_BLOCK_GROUP_RAID1 |
6256 BTRFS_BLOCK_GROUP_RAID10))
6257 factor = 2;
6258 else
6259 factor = 1;
6260
6261
6262
6263
6264
6265
6266 if (!alloc && cache->cached == BTRFS_CACHE_NO)
6267 cache_block_group(cache, 1);
6268
6269 byte_in_group = bytenr - cache->key.objectid;
6270 WARN_ON(byte_in_group > cache->key.offset);
6271
6272 spin_lock(&cache->space_info->lock);
6273 spin_lock(&cache->lock);
6274
6275 if (btrfs_test_opt(info, SPACE_CACHE) &&
6276 cache->disk_cache_state < BTRFS_DC_CLEAR)
6277 cache->disk_cache_state = BTRFS_DC_CLEAR;
6278
6279 old_val = btrfs_block_group_used(&cache->item);
6280 num_bytes = min(total, cache->key.offset - byte_in_group);
6281 if (alloc) {
6282 old_val += num_bytes;
6283 btrfs_set_block_group_used(&cache->item, old_val);
6284 cache->reserved -= num_bytes;
6285 cache->space_info->bytes_reserved -= num_bytes;
6286 cache->space_info->bytes_used += num_bytes;
6287 cache->space_info->disk_used += num_bytes * factor;
6288 spin_unlock(&cache->lock);
6289 spin_unlock(&cache->space_info->lock);
6290 } else {
6291 old_val -= num_bytes;
6292 btrfs_set_block_group_used(&cache->item, old_val);
6293 cache->pinned += num_bytes;
6294 cache->space_info->bytes_pinned += num_bytes;
6295 cache->space_info->bytes_used -= num_bytes;
6296 cache->space_info->disk_used -= num_bytes * factor;
6297 spin_unlock(&cache->lock);
6298 spin_unlock(&cache->space_info->lock);
6299
6300 trace_btrfs_space_reservation(info, "pinned",
6301 cache->space_info->flags,
6302 num_bytes, 1);
6303 percpu_counter_add(&cache->space_info->total_bytes_pinned,
6304 num_bytes);
6305 set_extent_dirty(info->pinned_extents,
6306 bytenr, bytenr + num_bytes - 1,
6307 GFP_NOFS | __GFP_NOFAIL);
6308 }
6309
6310 spin_lock(&trans->transaction->dirty_bgs_lock);
6311 if (list_empty(&cache->dirty_list)) {
6312 list_add_tail(&cache->dirty_list,
6313 &trans->transaction->dirty_bgs);
6314 trans->transaction->num_dirty_bgs++;
6315 btrfs_get_block_group(cache);
6316 }
6317 spin_unlock(&trans->transaction->dirty_bgs_lock);
6318
6319
6320
6321
6322
6323
6324
6325 if (!alloc && old_val == 0) {
6326 spin_lock(&info->unused_bgs_lock);
6327 if (list_empty(&cache->bg_list)) {
6328 btrfs_get_block_group(cache);
6329 list_add_tail(&cache->bg_list,
6330 &info->unused_bgs);
6331 }
6332 spin_unlock(&info->unused_bgs_lock);
6333 }
6334
6335 btrfs_put_block_group(cache);
6336 total -= num_bytes;
6337 bytenr += num_bytes;
6338 }
6339 return 0;
6340}
6341
6342static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
6343{
6344 struct btrfs_block_group_cache *cache;
6345 u64 bytenr;
6346
6347 spin_lock(&fs_info->block_group_cache_lock);
6348 bytenr = fs_info->first_logical_byte;
6349 spin_unlock(&fs_info->block_group_cache_lock);
6350
6351 if (bytenr < (u64)-1)
6352 return bytenr;
6353
6354 cache = btrfs_lookup_first_block_group(fs_info, search_start);
6355 if (!cache)
6356 return 0;
6357
6358 bytenr = cache->key.objectid;
6359 btrfs_put_block_group(cache);
6360
6361 return bytenr;
6362}
6363
6364static int pin_down_extent(struct btrfs_fs_info *fs_info,
6365 struct btrfs_block_group_cache *cache,
6366 u64 bytenr, u64 num_bytes, int reserved)
6367{
6368 spin_lock(&cache->space_info->lock);
6369 spin_lock(&cache->lock);
6370 cache->pinned += num_bytes;
6371 cache->space_info->bytes_pinned += num_bytes;
6372 if (reserved) {
6373 cache->reserved -= num_bytes;
6374 cache->space_info->bytes_reserved -= num_bytes;
6375 }
6376 spin_unlock(&cache->lock);
6377 spin_unlock(&cache->space_info->lock);
6378
6379 trace_btrfs_space_reservation(fs_info, "pinned",
6380 cache->space_info->flags, num_bytes, 1);
6381 percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes);
6382 set_extent_dirty(fs_info->pinned_extents, bytenr,
6383 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
6384 return 0;
6385}
6386
6387
6388
6389
6390int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
6391 u64 bytenr, u64 num_bytes, int reserved)
6392{
6393 struct btrfs_block_group_cache *cache;
6394
6395 cache = btrfs_lookup_block_group(fs_info, bytenr);
6396 BUG_ON(!cache);
6397
6398 pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved);
6399
6400 btrfs_put_block_group(cache);
6401 return 0;
6402}
6403
6404
6405
6406
6407int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
6408 u64 bytenr, u64 num_bytes)
6409{
6410 struct btrfs_block_group_cache *cache;
6411 int ret;
6412
6413 cache = btrfs_lookup_block_group(fs_info, bytenr);
6414 if (!cache)
6415 return -EINVAL;
6416
6417
6418
6419
6420
6421
6422
6423 cache_block_group(cache, 1);
6424
6425 pin_down_extent(fs_info, cache, bytenr, num_bytes, 0);
6426
6427
6428 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
6429 btrfs_put_block_group(cache);
6430 return ret;
6431}
6432
6433static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
6434 u64 start, u64 num_bytes)
6435{
6436 int ret;
6437 struct btrfs_block_group_cache *block_group;
6438 struct btrfs_caching_control *caching_ctl;
6439
6440 block_group = btrfs_lookup_block_group(fs_info, start);
6441 if (!block_group)
6442 return -EINVAL;
6443
6444 cache_block_group(block_group, 0);
6445 caching_ctl = get_caching_control(block_group);
6446
6447 if (!caching_ctl) {
6448
6449 BUG_ON(!block_group_cache_done(block_group));
6450 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6451 } else {
6452 mutex_lock(&caching_ctl->mutex);
6453
6454 if (start >= caching_ctl->progress) {
6455 ret = add_excluded_extent(fs_info, start, num_bytes);
6456 } else if (start + num_bytes <= caching_ctl->progress) {
6457 ret = btrfs_remove_free_space(block_group,
6458 start, num_bytes);
6459 } else {
6460 num_bytes = caching_ctl->progress - start;
6461 ret = btrfs_remove_free_space(block_group,
6462 start, num_bytes);
6463 if (ret)
6464 goto out_lock;
6465
6466 num_bytes = (start + num_bytes) -
6467 caching_ctl->progress;
6468 start = caching_ctl->progress;
6469 ret = add_excluded_extent(fs_info, start, num_bytes);
6470 }
6471out_lock:
6472 mutex_unlock(&caching_ctl->mutex);
6473 put_caching_control(caching_ctl);
6474 }
6475 btrfs_put_block_group(block_group);
6476 return ret;
6477}
6478
6479int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
6480 struct extent_buffer *eb)
6481{
6482 struct btrfs_file_extent_item *item;
6483 struct btrfs_key key;
6484 int found_type;
6485 int i;
6486
6487 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
6488 return 0;
6489
6490 for (i = 0; i < btrfs_header_nritems(eb); i++) {
6491 btrfs_item_key_to_cpu(eb, &key, i);
6492 if (key.type != BTRFS_EXTENT_DATA_KEY)
6493 continue;
6494 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
6495 found_type = btrfs_file_extent_type(eb, item);
6496 if (found_type == BTRFS_FILE_EXTENT_INLINE)
6497 continue;
6498 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
6499 continue;
6500 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
6501 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
6502 __exclude_logged_extent(fs_info, key.objectid, key.offset);
6503 }
6504
6505 return 0;
6506}
6507
6508static void
6509btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
6510{
6511 atomic_inc(&bg->reservations);
6512}
6513
6514void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
6515 const u64 start)
6516{
6517 struct btrfs_block_group_cache *bg;
6518
6519 bg = btrfs_lookup_block_group(fs_info, start);
6520 ASSERT(bg);
6521 if (atomic_dec_and_test(&bg->reservations))
6522 wake_up_atomic_t(&bg->reservations);
6523 btrfs_put_block_group(bg);
6524}
6525
6526void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6527{
6528 struct btrfs_space_info *space_info = bg->space_info;
6529
6530 ASSERT(bg->ro);
6531
6532 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
6533 return;
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545 down_write(&space_info->groups_sem);
6546 up_write(&space_info->groups_sem);
6547
6548 wait_on_atomic_t(&bg->reservations, atomic_t_wait,
6549 TASK_UNINTERRUPTIBLE);
6550}
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
6565 u64 ram_bytes, u64 num_bytes, int delalloc)
6566{
6567 struct btrfs_space_info *space_info = cache->space_info;
6568 int ret = 0;
6569
6570 spin_lock(&space_info->lock);
6571 spin_lock(&cache->lock);
6572 if (cache->ro) {
6573 ret = -EAGAIN;
6574 } else {
6575 cache->reserved += num_bytes;
6576 space_info->bytes_reserved += num_bytes;
6577
6578 trace_btrfs_space_reservation(cache->fs_info,
6579 "space_info", space_info->flags,
6580 ram_bytes, 0);
6581 space_info->bytes_may_use -= ram_bytes;
6582 if (delalloc)
6583 cache->delalloc_bytes += num_bytes;
6584 }
6585 spin_unlock(&cache->lock);
6586 spin_unlock(&space_info->lock);
6587 return ret;
6588}
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
6603 u64 num_bytes, int delalloc)
6604{
6605 struct btrfs_space_info *space_info = cache->space_info;
6606 int ret = 0;
6607
6608 spin_lock(&space_info->lock);
6609 spin_lock(&cache->lock);
6610 if (cache->ro)
6611 space_info->bytes_readonly += num_bytes;
6612 cache->reserved -= num_bytes;
6613 space_info->bytes_reserved -= num_bytes;
6614
6615 if (delalloc)
6616 cache->delalloc_bytes -= num_bytes;
6617 spin_unlock(&cache->lock);
6618 spin_unlock(&space_info->lock);
6619 return ret;
6620}
6621void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
6622{
6623 struct btrfs_caching_control *next;
6624 struct btrfs_caching_control *caching_ctl;
6625 struct btrfs_block_group_cache *cache;
6626
6627 down_write(&fs_info->commit_root_sem);
6628
6629 list_for_each_entry_safe(caching_ctl, next,
6630 &fs_info->caching_block_groups, list) {
6631 cache = caching_ctl->block_group;
6632 if (block_group_cache_done(cache)) {
6633 cache->last_byte_to_unpin = (u64)-1;
6634 list_del_init(&caching_ctl->list);
6635 put_caching_control(caching_ctl);
6636 } else {
6637 cache->last_byte_to_unpin = caching_ctl->progress;
6638 }
6639 }
6640
6641 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6642 fs_info->pinned_extents = &fs_info->freed_extents[1];
6643 else
6644 fs_info->pinned_extents = &fs_info->freed_extents[0];
6645
6646 up_write(&fs_info->commit_root_sem);
6647
6648 update_global_block_rsv(fs_info);
6649}
6650
6651
6652
6653
6654
6655static struct btrfs_free_cluster *
6656fetch_cluster_info(struct btrfs_fs_info *fs_info,
6657 struct btrfs_space_info *space_info, u64 *empty_cluster)
6658{
6659 struct btrfs_free_cluster *ret = NULL;
6660
6661 *empty_cluster = 0;
6662 if (btrfs_mixed_space_info(space_info))
6663 return ret;
6664
6665 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
6666 ret = &fs_info->meta_alloc_cluster;
6667 if (btrfs_test_opt(fs_info, SSD))
6668 *empty_cluster = SZ_2M;
6669 else
6670 *empty_cluster = SZ_64K;
6671 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
6672 btrfs_test_opt(fs_info, SSD_SPREAD)) {
6673 *empty_cluster = SZ_2M;
6674 ret = &fs_info->data_alloc_cluster;
6675 }
6676
6677 return ret;
6678}
6679
6680static int unpin_extent_range(struct btrfs_fs_info *fs_info,
6681 u64 start, u64 end,
6682 const bool return_free_space)
6683{
6684 struct btrfs_block_group_cache *cache = NULL;
6685 struct btrfs_space_info *space_info;
6686 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6687 struct btrfs_free_cluster *cluster = NULL;
6688 u64 len;
6689 u64 total_unpinned = 0;
6690 u64 empty_cluster = 0;
6691 bool readonly;
6692
6693 while (start <= end) {
6694 readonly = false;
6695 if (!cache ||
6696 start >= cache->key.objectid + cache->key.offset) {
6697 if (cache)
6698 btrfs_put_block_group(cache);
6699 total_unpinned = 0;
6700 cache = btrfs_lookup_block_group(fs_info, start);
6701 BUG_ON(!cache);
6702
6703 cluster = fetch_cluster_info(fs_info,
6704 cache->space_info,
6705 &empty_cluster);
6706 empty_cluster <<= 1;
6707 }
6708
6709 len = cache->key.objectid + cache->key.offset - start;
6710 len = min(len, end + 1 - start);
6711
6712 if (start < cache->last_byte_to_unpin) {
6713 len = min(len, cache->last_byte_to_unpin - start);
6714 if (return_free_space)
6715 btrfs_add_free_space(cache, start, len);
6716 }
6717
6718 start += len;
6719 total_unpinned += len;
6720 space_info = cache->space_info;
6721
6722
6723
6724
6725
6726
6727
6728 if (cluster && cluster->fragmented &&
6729 total_unpinned > empty_cluster) {
6730 spin_lock(&cluster->lock);
6731 cluster->fragmented = 0;
6732 spin_unlock(&cluster->lock);
6733 }
6734
6735 spin_lock(&space_info->lock);
6736 spin_lock(&cache->lock);
6737 cache->pinned -= len;
6738 space_info->bytes_pinned -= len;
6739
6740 trace_btrfs_space_reservation(fs_info, "pinned",
6741 space_info->flags, len, 0);
6742 space_info->max_extent_size = 0;
6743 percpu_counter_add(&space_info->total_bytes_pinned, -len);
6744 if (cache->ro) {
6745 space_info->bytes_readonly += len;
6746 readonly = true;
6747 }
6748 spin_unlock(&cache->lock);
6749 if (!readonly && return_free_space &&
6750 global_rsv->space_info == space_info) {
6751 u64 to_add = len;
6752
6753 spin_lock(&global_rsv->lock);
6754 if (!global_rsv->full) {
6755 to_add = min(len, global_rsv->size -
6756 global_rsv->reserved);
6757 global_rsv->reserved += to_add;
6758 space_info->bytes_may_use += to_add;
6759 if (global_rsv->reserved >= global_rsv->size)
6760 global_rsv->full = 1;
6761 trace_btrfs_space_reservation(fs_info,
6762 "space_info",
6763 space_info->flags,
6764 to_add, 1);
6765 len -= to_add;
6766 }
6767 spin_unlock(&global_rsv->lock);
6768
6769 if (len)
6770 space_info_add_new_bytes(fs_info, space_info,
6771 len);
6772 }
6773 spin_unlock(&space_info->lock);
6774 }
6775
6776 if (cache)
6777 btrfs_put_block_group(cache);
6778 return 0;
6779}
6780
6781int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
6782 struct btrfs_fs_info *fs_info)
6783{
6784 struct btrfs_block_group_cache *block_group, *tmp;
6785 struct list_head *deleted_bgs;
6786 struct extent_io_tree *unpin;
6787 u64 start;
6788 u64 end;
6789 int ret;
6790
6791 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6792 unpin = &fs_info->freed_extents[1];
6793 else
6794 unpin = &fs_info->freed_extents[0];
6795
6796 while (!trans->aborted) {
6797 mutex_lock(&fs_info->unused_bg_unpin_mutex);
6798 ret = find_first_extent_bit(unpin, 0, &start, &end,
6799 EXTENT_DIRTY, NULL);
6800 if (ret) {
6801 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6802 break;
6803 }
6804
6805 if (btrfs_test_opt(fs_info, DISCARD))
6806 ret = btrfs_discard_extent(fs_info, start,
6807 end + 1 - start, NULL);
6808
6809 clear_extent_dirty(unpin, start, end);
6810 unpin_extent_range(fs_info, start, end, true);
6811 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6812 cond_resched();
6813 }
6814
6815
6816
6817
6818
6819
6820 deleted_bgs = &trans->transaction->deleted_bgs;
6821 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6822 u64 trimmed = 0;
6823
6824 ret = -EROFS;
6825 if (!trans->aborted)
6826 ret = btrfs_discard_extent(fs_info,
6827 block_group->key.objectid,
6828 block_group->key.offset,
6829 &trimmed);
6830
6831 list_del_init(&block_group->bg_list);
6832 btrfs_put_block_group_trimming(block_group);
6833 btrfs_put_block_group(block_group);
6834
6835 if (ret) {
6836 const char *errstr = btrfs_decode_error(ret);
6837 btrfs_warn(fs_info,
6838 "discard failed while removing blockgroup: errno=%d %s",
6839 ret, errstr);
6840 }
6841 }
6842
6843 return 0;
6844}
6845
6846static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6847 struct btrfs_fs_info *info,
6848 struct btrfs_delayed_ref_node *node, u64 parent,
6849 u64 root_objectid, u64 owner_objectid,
6850 u64 owner_offset, int refs_to_drop,
6851 struct btrfs_delayed_extent_op *extent_op)
6852{
6853 struct btrfs_key key;
6854 struct btrfs_path *path;
6855 struct btrfs_root *extent_root = info->extent_root;
6856 struct extent_buffer *leaf;
6857 struct btrfs_extent_item *ei;
6858 struct btrfs_extent_inline_ref *iref;
6859 int ret;
6860 int is_data;
6861 int extent_slot = 0;
6862 int found_extent = 0;
6863 int num_to_del = 1;
6864 u32 item_size;
6865 u64 refs;
6866 u64 bytenr = node->bytenr;
6867 u64 num_bytes = node->num_bytes;
6868 int last_ref = 0;
6869 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
6870
6871 path = btrfs_alloc_path();
6872 if (!path)
6873 return -ENOMEM;
6874
6875 path->reada = READA_FORWARD;
6876 path->leave_spinning = 1;
6877
6878 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6879 BUG_ON(!is_data && refs_to_drop != 1);
6880
6881 if (is_data)
6882 skinny_metadata = false;
6883
6884 ret = lookup_extent_backref(trans, info, path, &iref,
6885 bytenr, num_bytes, parent,
6886 root_objectid, owner_objectid,
6887 owner_offset);
6888 if (ret == 0) {
6889 extent_slot = path->slots[0];
6890 while (extent_slot >= 0) {
6891 btrfs_item_key_to_cpu(path->nodes[0], &key,
6892 extent_slot);
6893 if (key.objectid != bytenr)
6894 break;
6895 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6896 key.offset == num_bytes) {
6897 found_extent = 1;
6898 break;
6899 }
6900 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6901 key.offset == owner_objectid) {
6902 found_extent = 1;
6903 break;
6904 }
6905 if (path->slots[0] - extent_slot > 5)
6906 break;
6907 extent_slot--;
6908 }
6909#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6910 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
6911 if (found_extent && item_size < sizeof(*ei))
6912 found_extent = 0;
6913#endif
6914 if (!found_extent) {
6915 BUG_ON(iref);
6916 ret = remove_extent_backref(trans, info, path, NULL,
6917 refs_to_drop,
6918 is_data, &last_ref);
6919 if (ret) {
6920 btrfs_abort_transaction(trans, ret);
6921 goto out;
6922 }
6923 btrfs_release_path(path);
6924 path->leave_spinning = 1;
6925
6926 key.objectid = bytenr;
6927 key.type = BTRFS_EXTENT_ITEM_KEY;
6928 key.offset = num_bytes;
6929
6930 if (!is_data && skinny_metadata) {
6931 key.type = BTRFS_METADATA_ITEM_KEY;
6932 key.offset = owner_objectid;
6933 }
6934
6935 ret = btrfs_search_slot(trans, extent_root,
6936 &key, path, -1, 1);
6937 if (ret > 0 && skinny_metadata && path->slots[0]) {
6938
6939
6940
6941
6942 path->slots[0]--;
6943 btrfs_item_key_to_cpu(path->nodes[0], &key,
6944 path->slots[0]);
6945 if (key.objectid == bytenr &&
6946 key.type == BTRFS_EXTENT_ITEM_KEY &&
6947 key.offset == num_bytes)
6948 ret = 0;
6949 }
6950
6951 if (ret > 0 && skinny_metadata) {
6952 skinny_metadata = false;
6953 key.objectid = bytenr;
6954 key.type = BTRFS_EXTENT_ITEM_KEY;
6955 key.offset = num_bytes;
6956 btrfs_release_path(path);
6957 ret = btrfs_search_slot(trans, extent_root,
6958 &key, path, -1, 1);
6959 }
6960
6961 if (ret) {
6962 btrfs_err(info,
6963 "umm, got %d back from search, was looking for %llu",
6964 ret, bytenr);
6965 if (ret > 0)
6966 btrfs_print_leaf(path->nodes[0]);
6967 }
6968 if (ret < 0) {
6969 btrfs_abort_transaction(trans, ret);
6970 goto out;
6971 }
6972 extent_slot = path->slots[0];
6973 }
6974 } else if (WARN_ON(ret == -ENOENT)) {
6975 btrfs_print_leaf(path->nodes[0]);
6976 btrfs_err(info,
6977 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
6978 bytenr, parent, root_objectid, owner_objectid,
6979 owner_offset);
6980 btrfs_abort_transaction(trans, ret);
6981 goto out;
6982 } else {
6983 btrfs_abort_transaction(trans, ret);
6984 goto out;
6985 }
6986
6987 leaf = path->nodes[0];
6988 item_size = btrfs_item_size_nr(leaf, extent_slot);
6989#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6990 if (item_size < sizeof(*ei)) {
6991 BUG_ON(found_extent || extent_slot != path->slots[0]);
6992 ret = convert_extent_item_v0(trans, info, path, owner_objectid,
6993 0);
6994 if (ret < 0) {
6995 btrfs_abort_transaction(trans, ret);
6996 goto out;
6997 }
6998
6999 btrfs_release_path(path);
7000 path->leave_spinning = 1;
7001
7002 key.objectid = bytenr;
7003 key.type = BTRFS_EXTENT_ITEM_KEY;
7004 key.offset = num_bytes;
7005
7006 ret = btrfs_search_slot(trans, extent_root, &key, path,
7007 -1, 1);
7008 if (ret) {
7009 btrfs_err(info,
7010 "umm, got %d back from search, was looking for %llu",
7011 ret, bytenr);
7012 btrfs_print_leaf(path->nodes[0]);
7013 }
7014 if (ret < 0) {
7015 btrfs_abort_transaction(trans, ret);
7016 goto out;
7017 }
7018
7019 extent_slot = path->slots[0];
7020 leaf = path->nodes[0];
7021 item_size = btrfs_item_size_nr(leaf, extent_slot);
7022 }
7023#endif
7024 BUG_ON(item_size < sizeof(*ei));
7025 ei = btrfs_item_ptr(leaf, extent_slot,
7026 struct btrfs_extent_item);
7027 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
7028 key.type == BTRFS_EXTENT_ITEM_KEY) {
7029 struct btrfs_tree_block_info *bi;
7030 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
7031 bi = (struct btrfs_tree_block_info *)(ei + 1);
7032 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
7033 }
7034
7035 refs = btrfs_extent_refs(leaf, ei);
7036 if (refs < refs_to_drop) {
7037 btrfs_err(info,
7038 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
7039 refs_to_drop, refs, bytenr);
7040 ret = -EINVAL;
7041 btrfs_abort_transaction(trans, ret);
7042 goto out;
7043 }
7044 refs -= refs_to_drop;
7045
7046 if (refs > 0) {
7047 if (extent_op)
7048 __run_delayed_extent_op(extent_op, leaf, ei);
7049
7050
7051
7052
7053 if (iref) {
7054 BUG_ON(!found_extent);
7055 } else {
7056 btrfs_set_extent_refs(leaf, ei, refs);
7057 btrfs_mark_buffer_dirty(leaf);
7058 }
7059 if (found_extent) {
7060 ret = remove_extent_backref(trans, info, path,
7061 iref, refs_to_drop,
7062 is_data, &last_ref);
7063 if (ret) {
7064 btrfs_abort_transaction(trans, ret);
7065 goto out;
7066 }
7067 }
7068 } else {
7069 if (found_extent) {
7070 BUG_ON(is_data && refs_to_drop !=
7071 extent_data_ref_count(path, iref));
7072 if (iref) {
7073 BUG_ON(path->slots[0] != extent_slot);
7074 } else {
7075 BUG_ON(path->slots[0] != extent_slot + 1);
7076 path->slots[0] = extent_slot;
7077 num_to_del = 2;
7078 }
7079 }
7080
7081 last_ref = 1;
7082 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
7083 num_to_del);
7084 if (ret) {
7085 btrfs_abort_transaction(trans, ret);
7086 goto out;
7087 }
7088 btrfs_release_path(path);
7089
7090 if (is_data) {
7091 ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
7092 if (ret) {
7093 btrfs_abort_transaction(trans, ret);
7094 goto out;
7095 }
7096 }
7097
7098 ret = add_to_free_space_tree(trans, info, bytenr, num_bytes);
7099 if (ret) {
7100 btrfs_abort_transaction(trans, ret);
7101 goto out;
7102 }
7103
7104 ret = update_block_group(trans, info, bytenr, num_bytes, 0);
7105 if (ret) {
7106 btrfs_abort_transaction(trans, ret);
7107 goto out;
7108 }
7109 }
7110 btrfs_release_path(path);
7111
7112out:
7113 btrfs_free_path(path);
7114 return ret;
7115}
7116
7117
7118
7119
7120
7121
7122
7123static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
7124 u64 bytenr)
7125{
7126 struct btrfs_delayed_ref_head *head;
7127 struct btrfs_delayed_ref_root *delayed_refs;
7128 int ret = 0;
7129
7130 delayed_refs = &trans->transaction->delayed_refs;
7131 spin_lock(&delayed_refs->lock);
7132 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
7133 if (!head)
7134 goto out_delayed_unlock;
7135
7136 spin_lock(&head->lock);
7137 if (!RB_EMPTY_ROOT(&head->ref_tree))
7138 goto out;
7139
7140 if (head->extent_op) {
7141 if (!head->must_insert_reserved)
7142 goto out;
7143 btrfs_free_delayed_extent_op(head->extent_op);
7144 head->extent_op = NULL;
7145 }
7146
7147
7148
7149
7150
7151 if (!mutex_trylock(&head->mutex))
7152 goto out;
7153
7154
7155
7156
7157
7158 rb_erase(&head->href_node, &delayed_refs->href_root);
7159 RB_CLEAR_NODE(&head->href_node);
7160 atomic_dec(&delayed_refs->num_entries);
7161
7162
7163
7164
7165
7166 delayed_refs->num_heads--;
7167 if (head->processing == 0)
7168 delayed_refs->num_heads_ready--;
7169 head->processing = 0;
7170 spin_unlock(&head->lock);
7171 spin_unlock(&delayed_refs->lock);
7172
7173 BUG_ON(head->extent_op);
7174 if (head->must_insert_reserved)
7175 ret = 1;
7176
7177 mutex_unlock(&head->mutex);
7178 btrfs_put_delayed_ref_head(head);
7179 return ret;
7180out:
7181 spin_unlock(&head->lock);
7182
7183out_delayed_unlock:
7184 spin_unlock(&delayed_refs->lock);
7185 return 0;
7186}
7187
7188void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7189 struct btrfs_root *root,
7190 struct extent_buffer *buf,
7191 u64 parent, int last_ref)
7192{
7193 struct btrfs_fs_info *fs_info = root->fs_info;
7194 int pin = 1;
7195 int ret;
7196
7197 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7198 int old_ref_mod, new_ref_mod;
7199
7200 btrfs_ref_tree_mod(root, buf->start, buf->len, parent,
7201 root->root_key.objectid,
7202 btrfs_header_level(buf), 0,
7203 BTRFS_DROP_DELAYED_REF);
7204 ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
7205 buf->len, parent,
7206 root->root_key.objectid,
7207 btrfs_header_level(buf),
7208 BTRFS_DROP_DELAYED_REF, NULL,
7209 &old_ref_mod, &new_ref_mod);
7210 BUG_ON(ret);
7211 pin = old_ref_mod >= 0 && new_ref_mod < 0;
7212 }
7213
7214 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
7215 struct btrfs_block_group_cache *cache;
7216
7217 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7218 ret = check_ref_cleanup(trans, buf->start);
7219 if (!ret)
7220 goto out;
7221 }
7222
7223 pin = 0;
7224 cache = btrfs_lookup_block_group(fs_info, buf->start);
7225
7226 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
7227 pin_down_extent(fs_info, cache, buf->start,
7228 buf->len, 1);
7229 btrfs_put_block_group(cache);
7230 goto out;
7231 }
7232
7233 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
7234
7235 btrfs_add_free_space(cache, buf->start, buf->len);
7236 btrfs_free_reserved_bytes(cache, buf->len, 0);
7237 btrfs_put_block_group(cache);
7238 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
7239 }
7240out:
7241 if (pin)
7242 add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf),
7243 root->root_key.objectid);
7244
7245 if (last_ref) {
7246
7247
7248
7249
7250 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
7251 }
7252}
7253
7254
7255int btrfs_free_extent(struct btrfs_trans_handle *trans,
7256 struct btrfs_root *root,
7257 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
7258 u64 owner, u64 offset)
7259{
7260 struct btrfs_fs_info *fs_info = root->fs_info;
7261 int old_ref_mod, new_ref_mod;
7262 int ret;
7263
7264 if (btrfs_is_testing(fs_info))
7265 return 0;
7266
7267 if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
7268 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
7269 root_objectid, owner, offset,
7270 BTRFS_DROP_DELAYED_REF);
7271
7272
7273
7274
7275
7276 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
7277 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
7278
7279 btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
7280 old_ref_mod = new_ref_mod = 0;
7281 ret = 0;
7282 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
7283 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
7284 num_bytes, parent,
7285 root_objectid, (int)owner,
7286 BTRFS_DROP_DELAYED_REF, NULL,
7287 &old_ref_mod, &new_ref_mod);
7288 } else {
7289 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
7290 num_bytes, parent,
7291 root_objectid, owner, offset,
7292 0, BTRFS_DROP_DELAYED_REF,
7293 &old_ref_mod, &new_ref_mod);
7294 }
7295
7296 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
7297 add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
7298
7299 return ret;
7300}
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316static noinline void
7317wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
7318 u64 num_bytes)
7319{
7320 struct btrfs_caching_control *caching_ctl;
7321
7322 caching_ctl = get_caching_control(cache);
7323 if (!caching_ctl)
7324 return;
7325
7326 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
7327 (cache->free_space_ctl->free_space >= num_bytes));
7328
7329 put_caching_control(caching_ctl);
7330}
7331
7332static noinline int
7333wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
7334{
7335 struct btrfs_caching_control *caching_ctl;
7336 int ret = 0;
7337
7338 caching_ctl = get_caching_control(cache);
7339 if (!caching_ctl)
7340 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
7341
7342 wait_event(caching_ctl->wait, block_group_cache_done(cache));
7343 if (cache->cached == BTRFS_CACHE_ERROR)
7344 ret = -EIO;
7345 put_caching_control(caching_ctl);
7346 return ret;
7347}
7348
7349int __get_raid_index(u64 flags)
7350{
7351 if (flags & BTRFS_BLOCK_GROUP_RAID10)
7352 return BTRFS_RAID_RAID10;
7353 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
7354 return BTRFS_RAID_RAID1;
7355 else if (flags & BTRFS_BLOCK_GROUP_DUP)
7356 return BTRFS_RAID_DUP;
7357 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
7358 return BTRFS_RAID_RAID0;
7359 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
7360 return BTRFS_RAID_RAID5;
7361 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
7362 return BTRFS_RAID_RAID6;
7363
7364 return BTRFS_RAID_SINGLE;
7365}
7366
7367int get_block_group_index(struct btrfs_block_group_cache *cache)
7368{
7369 return __get_raid_index(cache->flags);
7370}
7371
7372static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
7373 [BTRFS_RAID_RAID10] = "raid10",
7374 [BTRFS_RAID_RAID1] = "raid1",
7375 [BTRFS_RAID_DUP] = "dup",
7376 [BTRFS_RAID_RAID0] = "raid0",
7377 [BTRFS_RAID_SINGLE] = "single",
7378 [BTRFS_RAID_RAID5] = "raid5",
7379 [BTRFS_RAID_RAID6] = "raid6",
7380};
7381
7382static const char *get_raid_name(enum btrfs_raid_types type)
7383{
7384 if (type >= BTRFS_NR_RAID_TYPES)
7385 return NULL;
7386
7387 return btrfs_raid_type_names[type];
7388}
7389
7390enum btrfs_loop_type {
7391 LOOP_CACHING_NOWAIT = 0,
7392 LOOP_CACHING_WAIT = 1,
7393 LOOP_ALLOC_CHUNK = 2,
7394 LOOP_NO_EMPTY_SIZE = 3,
7395};
7396
7397static inline void
7398btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
7399 int delalloc)
7400{
7401 if (delalloc)
7402 down_read(&cache->data_rwsem);
7403}
7404
7405static inline void
7406btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
7407 int delalloc)
7408{
7409 btrfs_get_block_group(cache);
7410 if (delalloc)
7411 down_read(&cache->data_rwsem);
7412}
7413
7414static struct btrfs_block_group_cache *
7415btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
7416 struct btrfs_free_cluster *cluster,
7417 int delalloc)
7418{
7419 struct btrfs_block_group_cache *used_bg = NULL;
7420
7421 spin_lock(&cluster->refill_lock);
7422 while (1) {
7423 used_bg = cluster->block_group;
7424 if (!used_bg)
7425 return NULL;
7426
7427 if (used_bg == block_group)
7428 return used_bg;
7429
7430 btrfs_get_block_group(used_bg);
7431
7432 if (!delalloc)
7433 return used_bg;
7434
7435 if (down_read_trylock(&used_bg->data_rwsem))
7436 return used_bg;
7437
7438 spin_unlock(&cluster->refill_lock);
7439
7440
7441 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
7442
7443 spin_lock(&cluster->refill_lock);
7444 if (used_bg == cluster->block_group)
7445 return used_bg;
7446
7447 up_read(&used_bg->data_rwsem);
7448 btrfs_put_block_group(used_bg);
7449 }
7450}
7451
7452static inline void
7453btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7454 int delalloc)
7455{
7456 if (delalloc)
7457 up_read(&cache->data_rwsem);
7458 btrfs_put_block_group(cache);
7459}
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
7473 u64 ram_bytes, u64 num_bytes, u64 empty_size,
7474 u64 hint_byte, struct btrfs_key *ins,
7475 u64 flags, int delalloc)
7476{
7477 int ret = 0;
7478 struct btrfs_root *root = fs_info->extent_root;
7479 struct btrfs_free_cluster *last_ptr = NULL;
7480 struct btrfs_block_group_cache *block_group = NULL;
7481 u64 search_start = 0;
7482 u64 max_extent_size = 0;
7483 u64 empty_cluster = 0;
7484 struct btrfs_space_info *space_info;
7485 int loop = 0;
7486 int index = __get_raid_index(flags);
7487 bool failed_cluster_refill = false;
7488 bool failed_alloc = false;
7489 bool use_cluster = true;
7490 bool have_caching_bg = false;
7491 bool orig_have_caching_bg = false;
7492 bool full_search = false;
7493
7494 WARN_ON(num_bytes < fs_info->sectorsize);
7495 ins->type = BTRFS_EXTENT_ITEM_KEY;
7496 ins->objectid = 0;
7497 ins->offset = 0;
7498
7499 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
7500
7501 space_info = __find_space_info(fs_info, flags);
7502 if (!space_info) {
7503 btrfs_err(fs_info, "No space info for %llu", flags);
7504 return -ENOSPC;
7505 }
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517 if (unlikely(space_info->max_extent_size)) {
7518 spin_lock(&space_info->lock);
7519 if (space_info->max_extent_size &&
7520 num_bytes > space_info->max_extent_size) {
7521 ins->offset = space_info->max_extent_size;
7522 spin_unlock(&space_info->lock);
7523 return -ENOSPC;
7524 } else if (space_info->max_extent_size) {
7525 use_cluster = false;
7526 }
7527 spin_unlock(&space_info->lock);
7528 }
7529
7530 last_ptr = fetch_cluster_info(fs_info, space_info, &empty_cluster);
7531 if (last_ptr) {
7532 spin_lock(&last_ptr->lock);
7533 if (last_ptr->block_group)
7534 hint_byte = last_ptr->window_start;
7535 if (last_ptr->fragmented) {
7536
7537
7538
7539
7540
7541 hint_byte = last_ptr->window_start;
7542 use_cluster = false;
7543 }
7544 spin_unlock(&last_ptr->lock);
7545 }
7546
7547 search_start = max(search_start, first_logical_byte(fs_info, 0));
7548 search_start = max(search_start, hint_byte);
7549 if (search_start == hint_byte) {
7550 block_group = btrfs_lookup_block_group(fs_info, search_start);
7551
7552
7553
7554
7555
7556
7557
7558 if (block_group && block_group_bits(block_group, flags) &&
7559 block_group->cached != BTRFS_CACHE_NO) {
7560 down_read(&space_info->groups_sem);
7561 if (list_empty(&block_group->list) ||
7562 block_group->ro) {
7563
7564
7565
7566
7567
7568
7569 btrfs_put_block_group(block_group);
7570 up_read(&space_info->groups_sem);
7571 } else {
7572 index = get_block_group_index(block_group);
7573 btrfs_lock_block_group(block_group, delalloc);
7574 goto have_block_group;
7575 }
7576 } else if (block_group) {
7577 btrfs_put_block_group(block_group);
7578 }
7579 }
7580search:
7581 have_caching_bg = false;
7582 if (index == 0 || index == __get_raid_index(flags))
7583 full_search = true;
7584 down_read(&space_info->groups_sem);
7585 list_for_each_entry(block_group, &space_info->block_groups[index],
7586 list) {
7587 u64 offset;
7588 int cached;
7589
7590
7591 if (unlikely(block_group->ro))
7592 continue;
7593
7594 btrfs_grab_block_group(block_group, delalloc);
7595 search_start = block_group->key.objectid;
7596
7597
7598
7599
7600
7601
7602 if (!block_group_bits(block_group, flags)) {
7603 u64 extra = BTRFS_BLOCK_GROUP_DUP |
7604 BTRFS_BLOCK_GROUP_RAID1 |
7605 BTRFS_BLOCK_GROUP_RAID5 |
7606 BTRFS_BLOCK_GROUP_RAID6 |
7607 BTRFS_BLOCK_GROUP_RAID10;
7608
7609
7610
7611
7612
7613
7614 if ((flags & extra) && !(block_group->flags & extra))
7615 goto loop;
7616 }
7617
7618have_block_group:
7619 cached = block_group_cache_done(block_group);
7620 if (unlikely(!cached)) {
7621 have_caching_bg = true;
7622 ret = cache_block_group(block_group, 0);
7623 BUG_ON(ret < 0);
7624 ret = 0;
7625 }
7626
7627 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
7628 goto loop;
7629
7630
7631
7632
7633
7634 if (last_ptr && use_cluster) {
7635 struct btrfs_block_group_cache *used_block_group;
7636 unsigned long aligned_cluster;
7637
7638
7639
7640
7641 used_block_group = btrfs_lock_cluster(block_group,
7642 last_ptr,
7643 delalloc);
7644 if (!used_block_group)
7645 goto refill_cluster;
7646
7647 if (used_block_group != block_group &&
7648 (used_block_group->ro ||
7649 !block_group_bits(used_block_group, flags)))
7650 goto release_cluster;
7651
7652 offset = btrfs_alloc_from_cluster(used_block_group,
7653 last_ptr,
7654 num_bytes,
7655 used_block_group->key.objectid,
7656 &max_extent_size);
7657 if (offset) {
7658
7659 spin_unlock(&last_ptr->refill_lock);
7660 trace_btrfs_reserve_extent_cluster(fs_info,
7661 used_block_group,
7662 search_start, num_bytes);
7663 if (used_block_group != block_group) {
7664 btrfs_release_block_group(block_group,
7665 delalloc);
7666 block_group = used_block_group;
7667 }
7668 goto checks;
7669 }
7670
7671 WARN_ON(last_ptr->block_group != used_block_group);
7672release_cluster:
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688 if (loop >= LOOP_NO_EMPTY_SIZE &&
7689 used_block_group != block_group) {
7690 spin_unlock(&last_ptr->refill_lock);
7691 btrfs_release_block_group(used_block_group,
7692 delalloc);
7693 goto unclustered_alloc;
7694 }
7695
7696
7697
7698
7699
7700 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7701
7702 if (used_block_group != block_group)
7703 btrfs_release_block_group(used_block_group,
7704 delalloc);
7705refill_cluster:
7706 if (loop >= LOOP_NO_EMPTY_SIZE) {
7707 spin_unlock(&last_ptr->refill_lock);
7708 goto unclustered_alloc;
7709 }
7710
7711 aligned_cluster = max_t(unsigned long,
7712 empty_cluster + empty_size,
7713 block_group->full_stripe_len);
7714
7715
7716 ret = btrfs_find_space_cluster(fs_info, block_group,
7717 last_ptr, search_start,
7718 num_bytes,
7719 aligned_cluster);
7720 if (ret == 0) {
7721
7722
7723
7724
7725 offset = btrfs_alloc_from_cluster(block_group,
7726 last_ptr,
7727 num_bytes,
7728 search_start,
7729 &max_extent_size);
7730 if (offset) {
7731
7732 spin_unlock(&last_ptr->refill_lock);
7733 trace_btrfs_reserve_extent_cluster(fs_info,
7734 block_group, search_start,
7735 num_bytes);
7736 goto checks;
7737 }
7738 } else if (!cached && loop > LOOP_CACHING_NOWAIT
7739 && !failed_cluster_refill) {
7740 spin_unlock(&last_ptr->refill_lock);
7741
7742 failed_cluster_refill = true;
7743 wait_block_group_cache_progress(block_group,
7744 num_bytes + empty_cluster + empty_size);
7745 goto have_block_group;
7746 }
7747
7748
7749
7750
7751
7752
7753
7754 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7755 spin_unlock(&last_ptr->refill_lock);
7756 goto loop;
7757 }
7758
7759unclustered_alloc:
7760
7761
7762
7763
7764
7765 if (unlikely(last_ptr)) {
7766 spin_lock(&last_ptr->lock);
7767 last_ptr->fragmented = 1;
7768 spin_unlock(&last_ptr->lock);
7769 }
7770 if (cached) {
7771 struct btrfs_free_space_ctl *ctl =
7772 block_group->free_space_ctl;
7773
7774 spin_lock(&ctl->tree_lock);
7775 if (ctl->free_space <
7776 num_bytes + empty_cluster + empty_size) {
7777 if (ctl->free_space > max_extent_size)
7778 max_extent_size = ctl->free_space;
7779 spin_unlock(&ctl->tree_lock);
7780 goto loop;
7781 }
7782 spin_unlock(&ctl->tree_lock);
7783 }
7784
7785 offset = btrfs_find_space_for_alloc(block_group, search_start,
7786 num_bytes, empty_size,
7787 &max_extent_size);
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797 if (!offset && !failed_alloc && !cached &&
7798 loop > LOOP_CACHING_NOWAIT) {
7799 wait_block_group_cache_progress(block_group,
7800 num_bytes + empty_size);
7801 failed_alloc = true;
7802 goto have_block_group;
7803 } else if (!offset) {
7804 goto loop;
7805 }
7806checks:
7807 search_start = ALIGN(offset, fs_info->stripesize);
7808
7809
7810 if (search_start + num_bytes >
7811 block_group->key.objectid + block_group->key.offset) {
7812 btrfs_add_free_space(block_group, offset, num_bytes);
7813 goto loop;
7814 }
7815
7816 if (offset < search_start)
7817 btrfs_add_free_space(block_group, offset,
7818 search_start - offset);
7819 BUG_ON(offset > search_start);
7820
7821 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
7822 num_bytes, delalloc);
7823 if (ret == -EAGAIN) {
7824 btrfs_add_free_space(block_group, offset, num_bytes);
7825 goto loop;
7826 }
7827 btrfs_inc_block_group_reservations(block_group);
7828
7829
7830 ins->objectid = search_start;
7831 ins->offset = num_bytes;
7832
7833 trace_btrfs_reserve_extent(fs_info, block_group,
7834 search_start, num_bytes);
7835 btrfs_release_block_group(block_group, delalloc);
7836 break;
7837loop:
7838 failed_cluster_refill = false;
7839 failed_alloc = false;
7840 BUG_ON(index != get_block_group_index(block_group));
7841 btrfs_release_block_group(block_group, delalloc);
7842 cond_resched();
7843 }
7844 up_read(&space_info->groups_sem);
7845
7846 if ((loop == LOOP_CACHING_NOWAIT) && have_caching_bg
7847 && !orig_have_caching_bg)
7848 orig_have_caching_bg = true;
7849
7850 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
7851 goto search;
7852
7853 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
7854 goto search;
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
7865 index = 0;
7866 if (loop == LOOP_CACHING_NOWAIT) {
7867
7868
7869
7870
7871
7872 if (orig_have_caching_bg || !full_search)
7873 loop = LOOP_CACHING_WAIT;
7874 else
7875 loop = LOOP_ALLOC_CHUNK;
7876 } else {
7877 loop++;
7878 }
7879
7880 if (loop == LOOP_ALLOC_CHUNK) {
7881 struct btrfs_trans_handle *trans;
7882 int exist = 0;
7883
7884 trans = current->journal_info;
7885 if (trans)
7886 exist = 1;
7887 else
7888 trans = btrfs_join_transaction(root);
7889
7890 if (IS_ERR(trans)) {
7891 ret = PTR_ERR(trans);
7892 goto out;
7893 }
7894
7895 ret = do_chunk_alloc(trans, fs_info, flags,
7896 CHUNK_ALLOC_FORCE);
7897
7898
7899
7900
7901
7902
7903 if (ret == -ENOSPC)
7904 loop = LOOP_NO_EMPTY_SIZE;
7905
7906
7907
7908
7909
7910 if (ret < 0 && ret != -ENOSPC)
7911 btrfs_abort_transaction(trans, ret);
7912 else
7913 ret = 0;
7914 if (!exist)
7915 btrfs_end_transaction(trans);
7916 if (ret)
7917 goto out;
7918 }
7919
7920 if (loop == LOOP_NO_EMPTY_SIZE) {
7921
7922
7923
7924
7925 if (empty_size == 0 &&
7926 empty_cluster == 0) {
7927 ret = -ENOSPC;
7928 goto out;
7929 }
7930 empty_size = 0;
7931 empty_cluster = 0;
7932 }
7933
7934 goto search;
7935 } else if (!ins->objectid) {
7936 ret = -ENOSPC;
7937 } else if (ins->objectid) {
7938 if (!use_cluster && last_ptr) {
7939 spin_lock(&last_ptr->lock);
7940 last_ptr->window_start = ins->objectid;
7941 spin_unlock(&last_ptr->lock);
7942 }
7943 ret = 0;
7944 }
7945out:
7946 if (ret == -ENOSPC) {
7947 spin_lock(&space_info->lock);
7948 space_info->max_extent_size = max_extent_size;
7949 spin_unlock(&space_info->lock);
7950 ins->offset = max_extent_size;
7951 }
7952 return ret;
7953}
7954
7955static void dump_space_info(struct btrfs_fs_info *fs_info,
7956 struct btrfs_space_info *info, u64 bytes,
7957 int dump_block_groups)
7958{
7959 struct btrfs_block_group_cache *cache;
7960 int index = 0;
7961
7962 spin_lock(&info->lock);
7963 btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
7964 info->flags,
7965 info->total_bytes - btrfs_space_info_used(info, true),
7966 info->full ? "" : "not ");
7967 btrfs_info(fs_info,
7968 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
7969 info->total_bytes, info->bytes_used, info->bytes_pinned,
7970 info->bytes_reserved, info->bytes_may_use,
7971 info->bytes_readonly);
7972 spin_unlock(&info->lock);
7973
7974 if (!dump_block_groups)
7975 return;
7976
7977 down_read(&info->groups_sem);
7978again:
7979 list_for_each_entry(cache, &info->block_groups[index], list) {
7980 spin_lock(&cache->lock);
7981 btrfs_info(fs_info,
7982 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
7983 cache->key.objectid, cache->key.offset,
7984 btrfs_block_group_used(&cache->item), cache->pinned,
7985 cache->reserved, cache->ro ? "[readonly]" : "");
7986 btrfs_dump_free_space(cache, bytes);
7987 spin_unlock(&cache->lock);
7988 }
7989 if (++index < BTRFS_NR_RAID_TYPES)
7990 goto again;
7991 up_read(&info->groups_sem);
7992}
7993
7994int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
7995 u64 num_bytes, u64 min_alloc_size,
7996 u64 empty_size, u64 hint_byte,
7997 struct btrfs_key *ins, int is_data, int delalloc)
7998{
7999 struct btrfs_fs_info *fs_info = root->fs_info;
8000 bool final_tried = num_bytes == min_alloc_size;
8001 u64 flags;
8002 int ret;
8003
8004 flags = get_alloc_profile_by_root(root, is_data);
8005again:
8006 WARN_ON(num_bytes < fs_info->sectorsize);
8007 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
8008 hint_byte, ins, flags, delalloc);
8009 if (!ret && !is_data) {
8010 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
8011 } else if (ret == -ENOSPC) {
8012 if (!final_tried && ins->offset) {
8013 num_bytes = min(num_bytes >> 1, ins->offset);
8014 num_bytes = round_down(num_bytes,
8015 fs_info->sectorsize);
8016 num_bytes = max(num_bytes, min_alloc_size);
8017 ram_bytes = num_bytes;
8018 if (num_bytes == min_alloc_size)
8019 final_tried = true;
8020 goto again;
8021 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8022 struct btrfs_space_info *sinfo;
8023
8024 sinfo = __find_space_info(fs_info, flags);
8025 btrfs_err(fs_info,
8026 "allocation failed flags %llu, wanted %llu",
8027 flags, num_bytes);
8028 if (sinfo)
8029 dump_space_info(fs_info, sinfo, num_bytes, 1);
8030 }
8031 }
8032
8033 return ret;
8034}
8035
8036static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
8037 u64 start, u64 len,
8038 int pin, int delalloc)
8039{
8040 struct btrfs_block_group_cache *cache;
8041 int ret = 0;
8042
8043 cache = btrfs_lookup_block_group(fs_info, start);
8044 if (!cache) {
8045 btrfs_err(fs_info, "Unable to find block group for %llu",
8046 start);
8047 return -ENOSPC;
8048 }
8049
8050 if (pin)
8051 pin_down_extent(fs_info, cache, start, len, 1);
8052 else {
8053 if (btrfs_test_opt(fs_info, DISCARD))
8054 ret = btrfs_discard_extent(fs_info, start, len, NULL);
8055 btrfs_add_free_space(cache, start, len);
8056 btrfs_free_reserved_bytes(cache, len, delalloc);
8057 trace_btrfs_reserved_extent_free(fs_info, start, len);
8058 }
8059
8060 btrfs_put_block_group(cache);
8061 return ret;
8062}
8063
8064int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
8065 u64 start, u64 len, int delalloc)
8066{
8067 return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
8068}
8069
8070int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
8071 u64 start, u64 len)
8072{
8073 return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
8074}
8075
8076static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8077 struct btrfs_fs_info *fs_info,
8078 u64 parent, u64 root_objectid,
8079 u64 flags, u64 owner, u64 offset,
8080 struct btrfs_key *ins, int ref_mod)
8081{
8082 int ret;
8083 struct btrfs_extent_item *extent_item;
8084 struct btrfs_extent_inline_ref *iref;
8085 struct btrfs_path *path;
8086 struct extent_buffer *leaf;
8087 int type;
8088 u32 size;
8089
8090 if (parent > 0)
8091 type = BTRFS_SHARED_DATA_REF_KEY;
8092 else
8093 type = BTRFS_EXTENT_DATA_REF_KEY;
8094
8095 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
8096
8097 path = btrfs_alloc_path();
8098 if (!path)
8099 return -ENOMEM;
8100
8101 path->leave_spinning = 1;
8102 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8103 ins, size);
8104 if (ret) {
8105 btrfs_free_path(path);
8106 return ret;
8107 }
8108
8109 leaf = path->nodes[0];
8110 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8111 struct btrfs_extent_item);
8112 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
8113 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8114 btrfs_set_extent_flags(leaf, extent_item,
8115 flags | BTRFS_EXTENT_FLAG_DATA);
8116
8117 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8118 btrfs_set_extent_inline_ref_type(leaf, iref, type);
8119 if (parent > 0) {
8120 struct btrfs_shared_data_ref *ref;
8121 ref = (struct btrfs_shared_data_ref *)(iref + 1);
8122 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
8123 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
8124 } else {
8125 struct btrfs_extent_data_ref *ref;
8126 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
8127 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
8128 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
8129 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
8130 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
8131 }
8132
8133 btrfs_mark_buffer_dirty(path->nodes[0]);
8134 btrfs_free_path(path);
8135
8136 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
8137 ins->offset);
8138 if (ret)
8139 return ret;
8140
8141 ret = update_block_group(trans, fs_info, ins->objectid, ins->offset, 1);
8142 if (ret) {
8143 btrfs_err(fs_info, "update block group failed for %llu %llu",
8144 ins->objectid, ins->offset);
8145 BUG();
8146 }
8147 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
8148 return ret;
8149}
8150
8151static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
8152 struct btrfs_fs_info *fs_info,
8153 u64 parent, u64 root_objectid,
8154 u64 flags, struct btrfs_disk_key *key,
8155 int level, struct btrfs_key *ins)
8156{
8157 int ret;
8158 struct btrfs_extent_item *extent_item;
8159 struct btrfs_tree_block_info *block_info;
8160 struct btrfs_extent_inline_ref *iref;
8161 struct btrfs_path *path;
8162 struct extent_buffer *leaf;
8163 u32 size = sizeof(*extent_item) + sizeof(*iref);
8164 u64 num_bytes = ins->offset;
8165 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
8166
8167 if (!skinny_metadata)
8168 size += sizeof(*block_info);
8169
8170 path = btrfs_alloc_path();
8171 if (!path) {
8172 btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
8173 fs_info->nodesize);
8174 return -ENOMEM;
8175 }
8176
8177 path->leave_spinning = 1;
8178 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8179 ins, size);
8180 if (ret) {
8181 btrfs_free_path(path);
8182 btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
8183 fs_info->nodesize);
8184 return ret;
8185 }
8186
8187 leaf = path->nodes[0];
8188 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8189 struct btrfs_extent_item);
8190 btrfs_set_extent_refs(leaf, extent_item, 1);
8191 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8192 btrfs_set_extent_flags(leaf, extent_item,
8193 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
8194
8195 if (skinny_metadata) {
8196 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8197 num_bytes = fs_info->nodesize;
8198 } else {
8199 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
8200 btrfs_set_tree_block_key(leaf, block_info, key);
8201 btrfs_set_tree_block_level(leaf, block_info, level);
8202 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
8203 }
8204
8205 if (parent > 0) {
8206 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
8207 btrfs_set_extent_inline_ref_type(leaf, iref,
8208 BTRFS_SHARED_BLOCK_REF_KEY);
8209 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
8210 } else {
8211 btrfs_set_extent_inline_ref_type(leaf, iref,
8212 BTRFS_TREE_BLOCK_REF_KEY);
8213 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
8214 }
8215
8216 btrfs_mark_buffer_dirty(leaf);
8217 btrfs_free_path(path);
8218
8219 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
8220 num_bytes);
8221 if (ret)
8222 return ret;
8223
8224 ret = update_block_group(trans, fs_info, ins->objectid,
8225 fs_info->nodesize, 1);
8226 if (ret) {
8227 btrfs_err(fs_info, "update block group failed for %llu %llu",
8228 ins->objectid, ins->offset);
8229 BUG();
8230 }
8231
8232 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid,
8233 fs_info->nodesize);
8234 return ret;
8235}
8236
8237int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8238 struct btrfs_root *root, u64 owner,
8239 u64 offset, u64 ram_bytes,
8240 struct btrfs_key *ins)
8241{
8242 struct btrfs_fs_info *fs_info = root->fs_info;
8243 int ret;
8244
8245 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
8246
8247 btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0,
8248 root->root_key.objectid, owner, offset,
8249 BTRFS_ADD_DELAYED_EXTENT);
8250
8251 ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
8252 ins->offset, 0,
8253 root->root_key.objectid, owner,
8254 offset, ram_bytes,
8255 BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
8256 return ret;
8257}
8258
8259
8260
8261
8262
8263
8264int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
8265 struct btrfs_fs_info *fs_info,
8266 u64 root_objectid, u64 owner, u64 offset,
8267 struct btrfs_key *ins)
8268{
8269 int ret;
8270 struct btrfs_block_group_cache *block_group;
8271 struct btrfs_space_info *space_info;
8272
8273
8274
8275
8276
8277 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8278 ret = __exclude_logged_extent(fs_info, ins->objectid,
8279 ins->offset);
8280 if (ret)
8281 return ret;
8282 }
8283
8284 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
8285 if (!block_group)
8286 return -EINVAL;
8287
8288 space_info = block_group->space_info;
8289 spin_lock(&space_info->lock);
8290 spin_lock(&block_group->lock);
8291 space_info->bytes_reserved += ins->offset;
8292 block_group->reserved += ins->offset;
8293 spin_unlock(&block_group->lock);
8294 spin_unlock(&space_info->lock);
8295
8296 ret = alloc_reserved_file_extent(trans, fs_info, 0, root_objectid,
8297 0, owner, offset, ins, 1);
8298 btrfs_put_block_group(block_group);
8299 return ret;
8300}
8301
8302static struct extent_buffer *
8303btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8304 u64 bytenr, int level)
8305{
8306 struct btrfs_fs_info *fs_info = root->fs_info;
8307 struct extent_buffer *buf;
8308
8309 buf = btrfs_find_create_tree_block(fs_info, bytenr);
8310 if (IS_ERR(buf))
8311 return buf;
8312
8313 btrfs_set_header_generation(buf, trans->transid);
8314 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
8315 btrfs_tree_lock(buf);
8316 clean_tree_block(fs_info, buf);
8317 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
8318
8319 btrfs_set_lock_blocking(buf);
8320 set_extent_buffer_uptodate(buf);
8321
8322 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
8323 buf->log_index = root->log_transid % 2;
8324
8325
8326
8327
8328 if (buf->log_index == 0)
8329 set_extent_dirty(&root->dirty_log_pages, buf->start,
8330 buf->start + buf->len - 1, GFP_NOFS);
8331 else
8332 set_extent_new(&root->dirty_log_pages, buf->start,
8333 buf->start + buf->len - 1);
8334 } else {
8335 buf->log_index = -1;
8336 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
8337 buf->start + buf->len - 1, GFP_NOFS);
8338 }
8339 trans->dirty = true;
8340
8341 return buf;
8342}
8343
8344static struct btrfs_block_rsv *
8345use_block_rsv(struct btrfs_trans_handle *trans,
8346 struct btrfs_root *root, u32 blocksize)
8347{
8348 struct btrfs_fs_info *fs_info = root->fs_info;
8349 struct btrfs_block_rsv *block_rsv;
8350 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
8351 int ret;
8352 bool global_updated = false;
8353
8354 block_rsv = get_block_rsv(trans, root);
8355
8356 if (unlikely(block_rsv->size == 0))
8357 goto try_reserve;
8358again:
8359 ret = block_rsv_use_bytes(block_rsv, blocksize);
8360 if (!ret)
8361 return block_rsv;
8362
8363 if (block_rsv->failfast)
8364 return ERR_PTR(ret);
8365
8366 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
8367 global_updated = true;
8368 update_global_block_rsv(fs_info);
8369 goto again;
8370 }
8371
8372 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8373 static DEFINE_RATELIMIT_STATE(_rs,
8374 DEFAULT_RATELIMIT_INTERVAL * 10,
8375 1);
8376 if (__ratelimit(&_rs))
8377 WARN(1, KERN_DEBUG
8378 "BTRFS: block rsv returned %d\n", ret);
8379 }
8380try_reserve:
8381 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
8382 BTRFS_RESERVE_NO_FLUSH);
8383 if (!ret)
8384 return block_rsv;
8385
8386
8387
8388
8389
8390 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
8391 block_rsv->space_info == global_rsv->space_info) {
8392 ret = block_rsv_use_bytes(global_rsv, blocksize);
8393 if (!ret)
8394 return global_rsv;
8395 }
8396 return ERR_PTR(ret);
8397}
8398
8399static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
8400 struct btrfs_block_rsv *block_rsv, u32 blocksize)
8401{
8402 block_rsv_add_bytes(block_rsv, blocksize, 0);
8403 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
8404}
8405
8406
8407
8408
8409
8410struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8411 struct btrfs_root *root,
8412 u64 parent, u64 root_objectid,
8413 const struct btrfs_disk_key *key,
8414 int level, u64 hint,
8415 u64 empty_size)
8416{
8417 struct btrfs_fs_info *fs_info = root->fs_info;
8418 struct btrfs_key ins;
8419 struct btrfs_block_rsv *block_rsv;
8420 struct extent_buffer *buf;
8421 struct btrfs_delayed_extent_op *extent_op;
8422 u64 flags = 0;
8423 int ret;
8424 u32 blocksize = fs_info->nodesize;
8425 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
8426
8427#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8428 if (btrfs_is_testing(fs_info)) {
8429 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
8430 level);
8431 if (!IS_ERR(buf))
8432 root->alloc_bytenr += blocksize;
8433 return buf;
8434 }
8435#endif
8436
8437 block_rsv = use_block_rsv(trans, root, blocksize);
8438 if (IS_ERR(block_rsv))
8439 return ERR_CAST(block_rsv);
8440
8441 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
8442 empty_size, hint, &ins, 0, 0);
8443 if (ret)
8444 goto out_unuse;
8445
8446 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
8447 if (IS_ERR(buf)) {
8448 ret = PTR_ERR(buf);
8449 goto out_free_reserved;
8450 }
8451
8452 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
8453 if (parent == 0)
8454 parent = ins.objectid;
8455 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8456 } else
8457 BUG_ON(parent > 0);
8458
8459 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
8460 extent_op = btrfs_alloc_delayed_extent_op();
8461 if (!extent_op) {
8462 ret = -ENOMEM;
8463 goto out_free_buf;
8464 }
8465 if (key)
8466 memcpy(&extent_op->key, key, sizeof(extent_op->key));
8467 else
8468 memset(&extent_op->key, 0, sizeof(extent_op->key));
8469 extent_op->flags_to_set = flags;
8470 extent_op->update_key = skinny_metadata ? false : true;
8471 extent_op->update_flags = true;
8472 extent_op->is_data = false;
8473 extent_op->level = level;
8474
8475 btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
8476 root_objectid, level, 0,
8477 BTRFS_ADD_DELAYED_EXTENT);
8478 ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid,
8479 ins.offset, parent,
8480 root_objectid, level,
8481 BTRFS_ADD_DELAYED_EXTENT,
8482 extent_op, NULL, NULL);
8483 if (ret)
8484 goto out_free_delayed;
8485 }
8486 return buf;
8487
8488out_free_delayed:
8489 btrfs_free_delayed_extent_op(extent_op);
8490out_free_buf:
8491 free_extent_buffer(buf);
8492out_free_reserved:
8493 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
8494out_unuse:
8495 unuse_block_rsv(fs_info, block_rsv, blocksize);
8496 return ERR_PTR(ret);
8497}
8498
8499struct walk_control {
8500 u64 refs[BTRFS_MAX_LEVEL];
8501 u64 flags[BTRFS_MAX_LEVEL];
8502 struct btrfs_key update_progress;
8503 int stage;
8504 int level;
8505 int shared_level;
8506 int update_ref;
8507 int keep_locks;
8508 int reada_slot;
8509 int reada_count;
8510 int for_reloc;
8511};
8512
8513#define DROP_REFERENCE 1
8514#define UPDATE_BACKREF 2
8515
8516static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
8517 struct btrfs_root *root,
8518 struct walk_control *wc,
8519 struct btrfs_path *path)
8520{
8521 struct btrfs_fs_info *fs_info = root->fs_info;
8522 u64 bytenr;
8523 u64 generation;
8524 u64 refs;
8525 u64 flags;
8526 u32 nritems;
8527 struct btrfs_key key;
8528 struct extent_buffer *eb;
8529 int ret;
8530 int slot;
8531 int nread = 0;
8532
8533 if (path->slots[wc->level] < wc->reada_slot) {
8534 wc->reada_count = wc->reada_count * 2 / 3;
8535 wc->reada_count = max(wc->reada_count, 2);
8536 } else {
8537 wc->reada_count = wc->reada_count * 3 / 2;
8538 wc->reada_count = min_t(int, wc->reada_count,
8539 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
8540 }
8541
8542 eb = path->nodes[wc->level];
8543 nritems = btrfs_header_nritems(eb);
8544
8545 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
8546 if (nread >= wc->reada_count)
8547 break;
8548
8549 cond_resched();
8550 bytenr = btrfs_node_blockptr(eb, slot);
8551 generation = btrfs_node_ptr_generation(eb, slot);
8552
8553 if (slot == path->slots[wc->level])
8554 goto reada;
8555
8556 if (wc->stage == UPDATE_BACKREF &&
8557 generation <= root->root_key.offset)
8558 continue;
8559
8560
8561 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
8562 wc->level - 1, 1, &refs,
8563 &flags);
8564
8565 if (ret < 0)
8566 continue;
8567 BUG_ON(refs == 0);
8568
8569 if (wc->stage == DROP_REFERENCE) {
8570 if (refs == 1)
8571 goto reada;
8572
8573 if (wc->level == 1 &&
8574 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8575 continue;
8576 if (!wc->update_ref ||
8577 generation <= root->root_key.offset)
8578 continue;
8579 btrfs_node_key_to_cpu(eb, &key, slot);
8580 ret = btrfs_comp_cpu_keys(&key,
8581 &wc->update_progress);
8582 if (ret < 0)
8583 continue;
8584 } else {
8585 if (wc->level == 1 &&
8586 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8587 continue;
8588 }
8589reada:
8590 readahead_tree_block(fs_info, bytenr);
8591 nread++;
8592 }
8593 wc->reada_slot = slot;
8594}
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8605 struct btrfs_root *root,
8606 struct btrfs_path *path,
8607 struct walk_control *wc, int lookup_info)
8608{
8609 struct btrfs_fs_info *fs_info = root->fs_info;
8610 int level = wc->level;
8611 struct extent_buffer *eb = path->nodes[level];
8612 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8613 int ret;
8614
8615 if (wc->stage == UPDATE_BACKREF &&
8616 btrfs_header_owner(eb) != root->root_key.objectid)
8617 return 1;
8618
8619
8620
8621
8622
8623 if (lookup_info &&
8624 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
8625 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
8626 BUG_ON(!path->locks[level]);
8627 ret = btrfs_lookup_extent_info(trans, fs_info,
8628 eb->start, level, 1,
8629 &wc->refs[level],
8630 &wc->flags[level]);
8631 BUG_ON(ret == -ENOMEM);
8632 if (ret)
8633 return ret;
8634 BUG_ON(wc->refs[level] == 0);
8635 }
8636
8637 if (wc->stage == DROP_REFERENCE) {
8638 if (wc->refs[level] > 1)
8639 return 1;
8640
8641 if (path->locks[level] && !wc->keep_locks) {
8642 btrfs_tree_unlock_rw(eb, path->locks[level]);
8643 path->locks[level] = 0;
8644 }
8645 return 0;
8646 }
8647
8648
8649 if (!(wc->flags[level] & flag)) {
8650 BUG_ON(!path->locks[level]);
8651 ret = btrfs_inc_ref(trans, root, eb, 1);
8652 BUG_ON(ret);
8653 ret = btrfs_dec_ref(trans, root, eb, 0);
8654 BUG_ON(ret);
8655 ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start,
8656 eb->len, flag,
8657 btrfs_header_level(eb), 0);
8658 BUG_ON(ret);
8659 wc->flags[level] |= flag;
8660 }
8661
8662
8663
8664
8665
8666 if (path->locks[level] && level > 0) {
8667 btrfs_tree_unlock_rw(eb, path->locks[level]);
8668 path->locks[level] = 0;
8669 }
8670 return 0;
8671}
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8687 struct btrfs_root *root,
8688 struct btrfs_path *path,
8689 struct walk_control *wc, int *lookup_info)
8690{
8691 struct btrfs_fs_info *fs_info = root->fs_info;
8692 u64 bytenr;
8693 u64 generation;
8694 u64 parent;
8695 u32 blocksize;
8696 struct btrfs_key key;
8697 struct extent_buffer *next;
8698 int level = wc->level;
8699 int reada = 0;
8700 int ret = 0;
8701 bool need_account = false;
8702
8703 generation = btrfs_node_ptr_generation(path->nodes[level],
8704 path->slots[level]);
8705
8706
8707
8708
8709
8710 if (wc->stage == UPDATE_BACKREF &&
8711 generation <= root->root_key.offset) {
8712 *lookup_info = 1;
8713 return 1;
8714 }
8715
8716 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8717 blocksize = fs_info->nodesize;
8718
8719 next = find_extent_buffer(fs_info, bytenr);
8720 if (!next) {
8721 next = btrfs_find_create_tree_block(fs_info, bytenr);
8722 if (IS_ERR(next))
8723 return PTR_ERR(next);
8724
8725 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8726 level - 1);
8727 reada = 1;
8728 }
8729 btrfs_tree_lock(next);
8730 btrfs_set_lock_blocking(next);
8731
8732 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
8733 &wc->refs[level - 1],
8734 &wc->flags[level - 1]);
8735 if (ret < 0)
8736 goto out_unlock;
8737
8738 if (unlikely(wc->refs[level - 1] == 0)) {
8739 btrfs_err(fs_info, "Missing references.");
8740 ret = -EIO;
8741 goto out_unlock;
8742 }
8743 *lookup_info = 0;
8744
8745 if (wc->stage == DROP_REFERENCE) {
8746 if (wc->refs[level - 1] > 1) {
8747 need_account = true;
8748 if (level == 1 &&
8749 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8750 goto skip;
8751
8752 if (!wc->update_ref ||
8753 generation <= root->root_key.offset)
8754 goto skip;
8755
8756 btrfs_node_key_to_cpu(path->nodes[level], &key,
8757 path->slots[level]);
8758 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8759 if (ret < 0)
8760 goto skip;
8761
8762 wc->stage = UPDATE_BACKREF;
8763 wc->shared_level = level - 1;
8764 }
8765 } else {
8766 if (level == 1 &&
8767 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8768 goto skip;
8769 }
8770
8771 if (!btrfs_buffer_uptodate(next, generation, 0)) {
8772 btrfs_tree_unlock(next);
8773 free_extent_buffer(next);
8774 next = NULL;
8775 *lookup_info = 1;
8776 }
8777
8778 if (!next) {
8779 if (reada && level == 1)
8780 reada_walk_down(trans, root, wc, path);
8781 next = read_tree_block(fs_info, bytenr, generation);
8782 if (IS_ERR(next)) {
8783 return PTR_ERR(next);
8784 } else if (!extent_buffer_uptodate(next)) {
8785 free_extent_buffer(next);
8786 return -EIO;
8787 }
8788 btrfs_tree_lock(next);
8789 btrfs_set_lock_blocking(next);
8790 }
8791
8792 level--;
8793 ASSERT(level == btrfs_header_level(next));
8794 if (level != btrfs_header_level(next)) {
8795 btrfs_err(root->fs_info, "mismatched level");
8796 ret = -EIO;
8797 goto out_unlock;
8798 }
8799 path->nodes[level] = next;
8800 path->slots[level] = 0;
8801 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8802 wc->level = level;
8803 if (wc->level == 1)
8804 wc->reada_slot = 0;
8805 return 0;
8806skip:
8807 wc->refs[level - 1] = 0;
8808 wc->flags[level - 1] = 0;
8809 if (wc->stage == DROP_REFERENCE) {
8810 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8811 parent = path->nodes[level]->start;
8812 } else {
8813 ASSERT(root->root_key.objectid ==
8814 btrfs_header_owner(path->nodes[level]));
8815 if (root->root_key.objectid !=
8816 btrfs_header_owner(path->nodes[level])) {
8817 btrfs_err(root->fs_info,
8818 "mismatched block owner");
8819 ret = -EIO;
8820 goto out_unlock;
8821 }
8822 parent = 0;
8823 }
8824
8825 if (need_account) {
8826 ret = btrfs_qgroup_trace_subtree(trans, root, next,
8827 generation, level - 1);
8828 if (ret) {
8829 btrfs_err_rl(fs_info,
8830 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
8831 ret);
8832 }
8833 }
8834 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
8835 parent, root->root_key.objectid,
8836 level - 1, 0);
8837 if (ret)
8838 goto out_unlock;
8839 }
8840
8841 *lookup_info = 1;
8842 ret = 1;
8843
8844out_unlock:
8845 btrfs_tree_unlock(next);
8846 free_extent_buffer(next);
8847
8848 return ret;
8849}
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
8864 struct btrfs_root *root,
8865 struct btrfs_path *path,
8866 struct walk_control *wc)
8867{
8868 struct btrfs_fs_info *fs_info = root->fs_info;
8869 int ret;
8870 int level = wc->level;
8871 struct extent_buffer *eb = path->nodes[level];
8872 u64 parent = 0;
8873
8874 if (wc->stage == UPDATE_BACKREF) {
8875 BUG_ON(wc->shared_level < level);
8876 if (level < wc->shared_level)
8877 goto out;
8878
8879 ret = find_next_key(path, level + 1, &wc->update_progress);
8880 if (ret > 0)
8881 wc->update_ref = 0;
8882
8883 wc->stage = DROP_REFERENCE;
8884 wc->shared_level = -1;
8885 path->slots[level] = 0;
8886
8887
8888
8889
8890
8891
8892 if (!path->locks[level]) {
8893 BUG_ON(level == 0);
8894 btrfs_tree_lock(eb);
8895 btrfs_set_lock_blocking(eb);
8896 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8897
8898 ret = btrfs_lookup_extent_info(trans, fs_info,
8899 eb->start, level, 1,
8900 &wc->refs[level],
8901 &wc->flags[level]);
8902 if (ret < 0) {
8903 btrfs_tree_unlock_rw(eb, path->locks[level]);
8904 path->locks[level] = 0;
8905 return ret;
8906 }
8907 BUG_ON(wc->refs[level] == 0);
8908 if (wc->refs[level] == 1) {
8909 btrfs_tree_unlock_rw(eb, path->locks[level]);
8910 path->locks[level] = 0;
8911 return 1;
8912 }
8913 }
8914 }
8915
8916
8917 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
8918
8919 if (wc->refs[level] == 1) {
8920 if (level == 0) {
8921 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8922 ret = btrfs_dec_ref(trans, root, eb, 1);
8923 else
8924 ret = btrfs_dec_ref(trans, root, eb, 0);
8925 BUG_ON(ret);
8926 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, eb);
8927 if (ret) {
8928 btrfs_err_rl(fs_info,
8929 "error %d accounting leaf items. Quota is out of sync, rescan required.",
8930 ret);
8931 }
8932 }
8933
8934 if (!path->locks[level] &&
8935 btrfs_header_generation(eb) == trans->transid) {
8936 btrfs_tree_lock(eb);
8937 btrfs_set_lock_blocking(eb);
8938 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8939 }
8940 clean_tree_block(fs_info, eb);
8941 }
8942
8943 if (eb == root->node) {
8944 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8945 parent = eb->start;
8946 else
8947 BUG_ON(root->root_key.objectid !=
8948 btrfs_header_owner(eb));
8949 } else {
8950 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8951 parent = path->nodes[level + 1]->start;
8952 else
8953 BUG_ON(root->root_key.objectid !=
8954 btrfs_header_owner(path->nodes[level + 1]));
8955 }
8956
8957 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
8958out:
8959 wc->refs[level] = 0;
8960 wc->flags[level] = 0;
8961 return 0;
8962}
8963
8964static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
8965 struct btrfs_root *root,
8966 struct btrfs_path *path,
8967 struct walk_control *wc)
8968{
8969 int level = wc->level;
8970 int lookup_info = 1;
8971 int ret;
8972
8973 while (level >= 0) {
8974 ret = walk_down_proc(trans, root, path, wc, lookup_info);
8975 if (ret > 0)
8976 break;
8977
8978 if (level == 0)
8979 break;
8980
8981 if (path->slots[level] >=
8982 btrfs_header_nritems(path->nodes[level]))
8983 break;
8984
8985 ret = do_walk_down(trans, root, path, wc, &lookup_info);
8986 if (ret > 0) {
8987 path->slots[level]++;
8988 continue;
8989 } else if (ret < 0)
8990 return ret;
8991 level = wc->level;
8992 }
8993 return 0;
8994}
8995
8996static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
8997 struct btrfs_root *root,
8998 struct btrfs_path *path,
8999 struct walk_control *wc, int max_level)
9000{
9001 int level = wc->level;
9002 int ret;
9003
9004 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
9005 while (level < max_level && path->nodes[level]) {
9006 wc->level = level;
9007 if (path->slots[level] + 1 <
9008 btrfs_header_nritems(path->nodes[level])) {
9009 path->slots[level]++;
9010 return 0;
9011 } else {
9012 ret = walk_up_proc(trans, root, path, wc);
9013 if (ret > 0)
9014 return 0;
9015
9016 if (path->locks[level]) {
9017 btrfs_tree_unlock_rw(path->nodes[level],
9018 path->locks[level]);
9019 path->locks[level] = 0;
9020 }
9021 free_extent_buffer(path->nodes[level]);
9022 path->nodes[level] = NULL;
9023 level++;
9024 }
9025 }
9026 return 1;
9027}
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042int btrfs_drop_snapshot(struct btrfs_root *root,
9043 struct btrfs_block_rsv *block_rsv, int update_ref,
9044 int for_reloc)
9045{
9046 struct btrfs_fs_info *fs_info = root->fs_info;
9047 struct btrfs_path *path;
9048 struct btrfs_trans_handle *trans;
9049 struct btrfs_root *tree_root = fs_info->tree_root;
9050 struct btrfs_root_item *root_item = &root->root_item;
9051 struct walk_control *wc;
9052 struct btrfs_key key;
9053 int err = 0;
9054 int ret;
9055 int level;
9056 bool root_dropped = false;
9057
9058 btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
9059
9060 path = btrfs_alloc_path();
9061 if (!path) {
9062 err = -ENOMEM;
9063 goto out;
9064 }
9065
9066 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9067 if (!wc) {
9068 btrfs_free_path(path);
9069 err = -ENOMEM;
9070 goto out;
9071 }
9072
9073 trans = btrfs_start_transaction(tree_root, 0);
9074 if (IS_ERR(trans)) {
9075 err = PTR_ERR(trans);
9076 goto out_free;
9077 }
9078
9079 if (block_rsv)
9080 trans->block_rsv = block_rsv;
9081
9082 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
9083 level = btrfs_header_level(root->node);
9084 path->nodes[level] = btrfs_lock_root_node(root);
9085 btrfs_set_lock_blocking(path->nodes[level]);
9086 path->slots[level] = 0;
9087 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9088 memset(&wc->update_progress, 0,
9089 sizeof(wc->update_progress));
9090 } else {
9091 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
9092 memcpy(&wc->update_progress, &key,
9093 sizeof(wc->update_progress));
9094
9095 level = root_item->drop_level;
9096 BUG_ON(level == 0);
9097 path->lowest_level = level;
9098 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9099 path->lowest_level = 0;
9100 if (ret < 0) {
9101 err = ret;
9102 goto out_end_trans;
9103 }
9104 WARN_ON(ret > 0);
9105
9106
9107
9108
9109
9110 btrfs_unlock_up_safe(path, 0);
9111
9112 level = btrfs_header_level(root->node);
9113 while (1) {
9114 btrfs_tree_lock(path->nodes[level]);
9115 btrfs_set_lock_blocking(path->nodes[level]);
9116 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9117
9118 ret = btrfs_lookup_extent_info(trans, fs_info,
9119 path->nodes[level]->start,
9120 level, 1, &wc->refs[level],
9121 &wc->flags[level]);
9122 if (ret < 0) {
9123 err = ret;
9124 goto out_end_trans;
9125 }
9126 BUG_ON(wc->refs[level] == 0);
9127
9128 if (level == root_item->drop_level)
9129 break;
9130
9131 btrfs_tree_unlock(path->nodes[level]);
9132 path->locks[level] = 0;
9133 WARN_ON(wc->refs[level] != 1);
9134 level--;
9135 }
9136 }
9137
9138 wc->level = level;
9139 wc->shared_level = -1;
9140 wc->stage = DROP_REFERENCE;
9141 wc->update_ref = update_ref;
9142 wc->keep_locks = 0;
9143 wc->for_reloc = for_reloc;
9144 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9145
9146 while (1) {
9147
9148 ret = walk_down_tree(trans, root, path, wc);
9149 if (ret < 0) {
9150 err = ret;
9151 break;
9152 }
9153
9154 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
9155 if (ret < 0) {
9156 err = ret;
9157 break;
9158 }
9159
9160 if (ret > 0) {
9161 BUG_ON(wc->stage != DROP_REFERENCE);
9162 break;
9163 }
9164
9165 if (wc->stage == DROP_REFERENCE) {
9166 level = wc->level;
9167 btrfs_node_key(path->nodes[level],
9168 &root_item->drop_progress,
9169 path->slots[level]);
9170 root_item->drop_level = level;
9171 }
9172
9173 BUG_ON(wc->level == 0);
9174 if (btrfs_should_end_transaction(trans) ||
9175 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
9176 ret = btrfs_update_root(trans, tree_root,
9177 &root->root_key,
9178 root_item);
9179 if (ret) {
9180 btrfs_abort_transaction(trans, ret);
9181 err = ret;
9182 goto out_end_trans;
9183 }
9184
9185 btrfs_end_transaction_throttle(trans);
9186 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
9187 btrfs_debug(fs_info,
9188 "drop snapshot early exit");
9189 err = -EAGAIN;
9190 goto out_free;
9191 }
9192
9193 trans = btrfs_start_transaction(tree_root, 0);
9194 if (IS_ERR(trans)) {
9195 err = PTR_ERR(trans);
9196 goto out_free;
9197 }
9198 if (block_rsv)
9199 trans->block_rsv = block_rsv;
9200 }
9201 }
9202 btrfs_release_path(path);
9203 if (err)
9204 goto out_end_trans;
9205
9206 ret = btrfs_del_root(trans, fs_info, &root->root_key);
9207 if (ret) {
9208 btrfs_abort_transaction(trans, ret);
9209 err = ret;
9210 goto out_end_trans;
9211 }
9212
9213 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9214 ret = btrfs_find_root(tree_root, &root->root_key, path,
9215 NULL, NULL);
9216 if (ret < 0) {
9217 btrfs_abort_transaction(trans, ret);
9218 err = ret;
9219 goto out_end_trans;
9220 } else if (ret > 0) {
9221
9222
9223
9224
9225
9226 btrfs_del_orphan_item(trans, tree_root,
9227 root->root_key.objectid);
9228 }
9229 }
9230
9231 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
9232 btrfs_add_dropped_root(trans, root);
9233 } else {
9234 free_extent_buffer(root->node);
9235 free_extent_buffer(root->commit_root);
9236 btrfs_put_fs_root(root);
9237 }
9238 root_dropped = true;
9239out_end_trans:
9240 btrfs_end_transaction_throttle(trans);
9241out_free:
9242 kfree(wc);
9243 btrfs_free_path(path);
9244out:
9245
9246
9247
9248
9249
9250
9251
9252 if (!for_reloc && !root_dropped)
9253 btrfs_add_dead_root(root);
9254 if (err && err != -EAGAIN)
9255 btrfs_handle_fs_error(fs_info, err, NULL);
9256 return err;
9257}
9258
9259
9260
9261
9262
9263
9264
9265int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
9266 struct btrfs_root *root,
9267 struct extent_buffer *node,
9268 struct extent_buffer *parent)
9269{
9270 struct btrfs_fs_info *fs_info = root->fs_info;
9271 struct btrfs_path *path;
9272 struct walk_control *wc;
9273 int level;
9274 int parent_level;
9275 int ret = 0;
9276 int wret;
9277
9278 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
9279
9280 path = btrfs_alloc_path();
9281 if (!path)
9282 return -ENOMEM;
9283
9284 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9285 if (!wc) {
9286 btrfs_free_path(path);
9287 return -ENOMEM;
9288 }
9289
9290 btrfs_assert_tree_locked(parent);
9291 parent_level = btrfs_header_level(parent);
9292 extent_buffer_get(parent);
9293 path->nodes[parent_level] = parent;
9294 path->slots[parent_level] = btrfs_header_nritems(parent);
9295
9296 btrfs_assert_tree_locked(node);
9297 level = btrfs_header_level(node);
9298 path->nodes[level] = node;
9299 path->slots[level] = 0;
9300 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9301
9302 wc->refs[parent_level] = 1;
9303 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
9304 wc->level = level;
9305 wc->shared_level = -1;
9306 wc->stage = DROP_REFERENCE;
9307 wc->update_ref = 0;
9308 wc->keep_locks = 1;
9309 wc->for_reloc = 1;
9310 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9311
9312 while (1) {
9313 wret = walk_down_tree(trans, root, path, wc);
9314 if (wret < 0) {
9315 ret = wret;
9316 break;
9317 }
9318
9319 wret = walk_up_tree(trans, root, path, wc, parent_level);
9320 if (wret < 0)
9321 ret = wret;
9322 if (wret != 0)
9323 break;
9324 }
9325
9326 kfree(wc);
9327 btrfs_free_path(path);
9328 return ret;
9329}
9330
9331static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
9332{
9333 u64 num_devices;
9334 u64 stripped;
9335
9336
9337
9338
9339
9340 stripped = get_restripe_target(fs_info, flags);
9341 if (stripped)
9342 return extended_to_chunk(stripped);
9343
9344 num_devices = fs_info->fs_devices->rw_devices;
9345
9346 stripped = BTRFS_BLOCK_GROUP_RAID0 |
9347 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
9348 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
9349
9350 if (num_devices == 1) {
9351 stripped |= BTRFS_BLOCK_GROUP_DUP;
9352 stripped = flags & ~stripped;
9353
9354
9355 if (flags & BTRFS_BLOCK_GROUP_RAID0)
9356 return stripped;
9357
9358
9359 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
9360 BTRFS_BLOCK_GROUP_RAID10))
9361 return stripped | BTRFS_BLOCK_GROUP_DUP;
9362 } else {
9363
9364 if (flags & stripped)
9365 return flags;
9366
9367 stripped |= BTRFS_BLOCK_GROUP_DUP;
9368 stripped = flags & ~stripped;
9369
9370
9371 if (flags & BTRFS_BLOCK_GROUP_DUP)
9372 return stripped | BTRFS_BLOCK_GROUP_RAID1;
9373
9374
9375 }
9376
9377 return flags;
9378}
9379
9380static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
9381{
9382 struct btrfs_space_info *sinfo = cache->space_info;
9383 u64 num_bytes;
9384 u64 min_allocable_bytes;
9385 int ret = -ENOSPC;
9386
9387
9388
9389
9390
9391
9392 if ((sinfo->flags &
9393 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
9394 !force)
9395 min_allocable_bytes = SZ_1M;
9396 else
9397 min_allocable_bytes = 0;
9398
9399 spin_lock(&sinfo->lock);
9400 spin_lock(&cache->lock);
9401
9402 if (cache->ro) {
9403 cache->ro++;
9404 ret = 0;
9405 goto out;
9406 }
9407
9408 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
9409 cache->bytes_super - btrfs_block_group_used(&cache->item);
9410
9411 if (btrfs_space_info_used(sinfo, true) + num_bytes +
9412 min_allocable_bytes <= sinfo->total_bytes) {
9413 sinfo->bytes_readonly += num_bytes;
9414 cache->ro++;
9415 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
9416 ret = 0;
9417 }
9418out:
9419 spin_unlock(&cache->lock);
9420 spin_unlock(&sinfo->lock);
9421 return ret;
9422}
9423
9424int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
9425 struct btrfs_block_group_cache *cache)
9426
9427{
9428 struct btrfs_trans_handle *trans;
9429 u64 alloc_flags;
9430 int ret;
9431
9432again:
9433 trans = btrfs_join_transaction(fs_info->extent_root);
9434 if (IS_ERR(trans))
9435 return PTR_ERR(trans);
9436
9437
9438
9439
9440
9441
9442 mutex_lock(&fs_info->ro_block_group_mutex);
9443 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
9444 u64 transid = trans->transid;
9445
9446 mutex_unlock(&fs_info->ro_block_group_mutex);
9447 btrfs_end_transaction(trans);
9448
9449 ret = btrfs_wait_for_commit(fs_info, transid);
9450 if (ret)
9451 return ret;
9452 goto again;
9453 }
9454
9455
9456
9457
9458
9459 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9460 if (alloc_flags != cache->flags) {
9461 ret = do_chunk_alloc(trans, fs_info, alloc_flags,
9462 CHUNK_ALLOC_FORCE);
9463
9464
9465
9466
9467
9468 if (ret == -ENOSPC)
9469 ret = 0;
9470 if (ret < 0)
9471 goto out;
9472 }
9473
9474 ret = inc_block_group_ro(cache, 0);
9475 if (!ret)
9476 goto out;
9477 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
9478 ret = do_chunk_alloc(trans, fs_info, alloc_flags,
9479 CHUNK_ALLOC_FORCE);
9480 if (ret < 0)
9481 goto out;
9482 ret = inc_block_group_ro(cache, 0);
9483out:
9484 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
9485 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9486 mutex_lock(&fs_info->chunk_mutex);
9487 check_system_chunk(trans, fs_info, alloc_flags);
9488 mutex_unlock(&fs_info->chunk_mutex);
9489 }
9490 mutex_unlock(&fs_info->ro_block_group_mutex);
9491
9492 btrfs_end_transaction(trans);
9493 return ret;
9494}
9495
9496int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
9497 struct btrfs_fs_info *fs_info, u64 type)
9498{
9499 u64 alloc_flags = get_alloc_profile(fs_info, type);
9500
9501 return do_chunk_alloc(trans, fs_info, alloc_flags, CHUNK_ALLOC_FORCE);
9502}
9503
9504
9505
9506
9507
9508u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
9509{
9510 struct btrfs_block_group_cache *block_group;
9511 u64 free_bytes = 0;
9512 int factor;
9513
9514
9515 if (list_empty(&sinfo->ro_bgs))
9516 return 0;
9517
9518 spin_lock(&sinfo->lock);
9519 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
9520 spin_lock(&block_group->lock);
9521
9522 if (!block_group->ro) {
9523 spin_unlock(&block_group->lock);
9524 continue;
9525 }
9526
9527 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
9528 BTRFS_BLOCK_GROUP_RAID10 |
9529 BTRFS_BLOCK_GROUP_DUP))
9530 factor = 2;
9531 else
9532 factor = 1;
9533
9534 free_bytes += (block_group->key.offset -
9535 btrfs_block_group_used(&block_group->item)) *
9536 factor;
9537
9538 spin_unlock(&block_group->lock);
9539 }
9540 spin_unlock(&sinfo->lock);
9541
9542 return free_bytes;
9543}
9544
9545void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
9546{
9547 struct btrfs_space_info *sinfo = cache->space_info;
9548 u64 num_bytes;
9549
9550 BUG_ON(!cache->ro);
9551
9552 spin_lock(&sinfo->lock);
9553 spin_lock(&cache->lock);
9554 if (!--cache->ro) {
9555 num_bytes = cache->key.offset - cache->reserved -
9556 cache->pinned - cache->bytes_super -
9557 btrfs_block_group_used(&cache->item);
9558 sinfo->bytes_readonly -= num_bytes;
9559 list_del_init(&cache->ro_list);
9560 }
9561 spin_unlock(&cache->lock);
9562 spin_unlock(&sinfo->lock);
9563}
9564
9565
9566
9567
9568
9569
9570
9571int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
9572{
9573 struct btrfs_root *root = fs_info->extent_root;
9574 struct btrfs_block_group_cache *block_group;
9575 struct btrfs_space_info *space_info;
9576 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
9577 struct btrfs_device *device;
9578 struct btrfs_trans_handle *trans;
9579 u64 min_free;
9580 u64 dev_min = 1;
9581 u64 dev_nr = 0;
9582 u64 target;
9583 int debug;
9584 int index;
9585 int full = 0;
9586 int ret = 0;
9587
9588 debug = btrfs_test_opt(fs_info, ENOSPC_DEBUG);
9589
9590 block_group = btrfs_lookup_block_group(fs_info, bytenr);
9591
9592
9593 if (!block_group) {
9594 if (debug)
9595 btrfs_warn(fs_info,
9596 "can't find block group for bytenr %llu",
9597 bytenr);
9598 return -1;
9599 }
9600
9601 min_free = btrfs_block_group_used(&block_group->item);
9602
9603
9604 if (!min_free)
9605 goto out;
9606
9607 space_info = block_group->space_info;
9608 spin_lock(&space_info->lock);
9609
9610 full = space_info->full;
9611
9612
9613
9614
9615
9616
9617
9618
9619 if ((space_info->total_bytes != block_group->key.offset) &&
9620 (btrfs_space_info_used(space_info, false) + min_free <
9621 space_info->total_bytes)) {
9622 spin_unlock(&space_info->lock);
9623 goto out;
9624 }
9625 spin_unlock(&space_info->lock);
9626
9627
9628
9629
9630
9631
9632
9633
9634 ret = -1;
9635
9636
9637
9638
9639
9640
9641
9642
9643
9644 target = get_restripe_target(fs_info, block_group->flags);
9645 if (target) {
9646 index = __get_raid_index(extended_to_chunk(target));
9647 } else {
9648
9649
9650
9651
9652 if (full) {
9653 if (debug)
9654 btrfs_warn(fs_info,
9655 "no space to alloc new chunk for block group %llu",
9656 block_group->key.objectid);
9657 goto out;
9658 }
9659
9660 index = get_block_group_index(block_group);
9661 }
9662
9663 if (index == BTRFS_RAID_RAID10) {
9664 dev_min = 4;
9665
9666 min_free >>= 1;
9667 } else if (index == BTRFS_RAID_RAID1) {
9668 dev_min = 2;
9669 } else if (index == BTRFS_RAID_DUP) {
9670
9671 min_free <<= 1;
9672 } else if (index == BTRFS_RAID_RAID0) {
9673 dev_min = fs_devices->rw_devices;
9674 min_free = div64_u64(min_free, dev_min);
9675 }
9676
9677
9678 trans = btrfs_join_transaction(root);
9679 if (IS_ERR(trans)) {
9680 ret = PTR_ERR(trans);
9681 goto out;
9682 }
9683
9684 mutex_lock(&fs_info->chunk_mutex);
9685 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
9686 u64 dev_offset;
9687
9688
9689
9690
9691
9692 if (device->total_bytes > device->bytes_used + min_free &&
9693 !device->is_tgtdev_for_dev_replace) {
9694 ret = find_free_dev_extent(trans, device, min_free,
9695 &dev_offset, NULL);
9696 if (!ret)
9697 dev_nr++;
9698
9699 if (dev_nr >= dev_min)
9700 break;
9701
9702 ret = -1;
9703 }
9704 }
9705 if (debug && ret == -1)
9706 btrfs_warn(fs_info,
9707 "no space to allocate a new chunk for block group %llu",
9708 block_group->key.objectid);
9709 mutex_unlock(&fs_info->chunk_mutex);
9710 btrfs_end_transaction(trans);
9711out:
9712 btrfs_put_block_group(block_group);
9713 return ret;
9714}
9715
9716static int find_first_block_group(struct btrfs_fs_info *fs_info,
9717 struct btrfs_path *path,
9718 struct btrfs_key *key)
9719{
9720 struct btrfs_root *root = fs_info->extent_root;
9721 int ret = 0;
9722 struct btrfs_key found_key;
9723 struct extent_buffer *leaf;
9724 int slot;
9725
9726 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9727 if (ret < 0)
9728 goto out;
9729
9730 while (1) {
9731 slot = path->slots[0];
9732 leaf = path->nodes[0];
9733 if (slot >= btrfs_header_nritems(leaf)) {
9734 ret = btrfs_next_leaf(root, path);
9735 if (ret == 0)
9736 continue;
9737 if (ret < 0)
9738 goto out;
9739 break;
9740 }
9741 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9742
9743 if (found_key.objectid >= key->objectid &&
9744 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9745 struct extent_map_tree *em_tree;
9746 struct extent_map *em;
9747
9748 em_tree = &root->fs_info->mapping_tree.map_tree;
9749 read_lock(&em_tree->lock);
9750 em = lookup_extent_mapping(em_tree, found_key.objectid,
9751 found_key.offset);
9752 read_unlock(&em_tree->lock);
9753 if (!em) {
9754 btrfs_err(fs_info,
9755 "logical %llu len %llu found bg but no related chunk",
9756 found_key.objectid, found_key.offset);
9757 ret = -ENOENT;
9758 } else {
9759 ret = 0;
9760 }
9761 free_extent_map(em);
9762 goto out;
9763 }
9764 path->slots[0]++;
9765 }
9766out:
9767 return ret;
9768}
9769
9770void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9771{
9772 struct btrfs_block_group_cache *block_group;
9773 u64 last = 0;
9774
9775 while (1) {
9776 struct inode *inode;
9777
9778 block_group = btrfs_lookup_first_block_group(info, last);
9779 while (block_group) {
9780 spin_lock(&block_group->lock);
9781 if (block_group->iref)
9782 break;
9783 spin_unlock(&block_group->lock);
9784 block_group = next_block_group(info, block_group);
9785 }
9786 if (!block_group) {
9787 if (last == 0)
9788 break;
9789 last = 0;
9790 continue;
9791 }
9792
9793 inode = block_group->inode;
9794 block_group->iref = 0;
9795 block_group->inode = NULL;
9796 spin_unlock(&block_group->lock);
9797 ASSERT(block_group->io_ctl.inode == NULL);
9798 iput(inode);
9799 last = block_group->key.objectid + block_group->key.offset;
9800 btrfs_put_block_group(block_group);
9801 }
9802}
9803
9804
9805
9806
9807
9808
9809int btrfs_free_block_groups(struct btrfs_fs_info *info)
9810{
9811 struct btrfs_block_group_cache *block_group;
9812 struct btrfs_space_info *space_info;
9813 struct btrfs_caching_control *caching_ctl;
9814 struct rb_node *n;
9815
9816 down_write(&info->commit_root_sem);
9817 while (!list_empty(&info->caching_block_groups)) {
9818 caching_ctl = list_entry(info->caching_block_groups.next,
9819 struct btrfs_caching_control, list);
9820 list_del(&caching_ctl->list);
9821 put_caching_control(caching_ctl);
9822 }
9823 up_write(&info->commit_root_sem);
9824
9825 spin_lock(&info->unused_bgs_lock);
9826 while (!list_empty(&info->unused_bgs)) {
9827 block_group = list_first_entry(&info->unused_bgs,
9828 struct btrfs_block_group_cache,
9829 bg_list);
9830 list_del_init(&block_group->bg_list);
9831 btrfs_put_block_group(block_group);
9832 }
9833 spin_unlock(&info->unused_bgs_lock);
9834
9835 spin_lock(&info->block_group_cache_lock);
9836 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
9837 block_group = rb_entry(n, struct btrfs_block_group_cache,
9838 cache_node);
9839 rb_erase(&block_group->cache_node,
9840 &info->block_group_cache_tree);
9841 RB_CLEAR_NODE(&block_group->cache_node);
9842 spin_unlock(&info->block_group_cache_lock);
9843
9844 down_write(&block_group->space_info->groups_sem);
9845 list_del(&block_group->list);
9846 up_write(&block_group->space_info->groups_sem);
9847
9848
9849
9850
9851
9852 if (block_group->cached == BTRFS_CACHE_NO ||
9853 block_group->cached == BTRFS_CACHE_ERROR)
9854 free_excluded_extents(info, block_group);
9855
9856 btrfs_remove_free_space_cache(block_group);
9857 ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
9858 ASSERT(list_empty(&block_group->dirty_list));
9859 ASSERT(list_empty(&block_group->io_list));
9860 ASSERT(list_empty(&block_group->bg_list));
9861 ASSERT(atomic_read(&block_group->count) == 1);
9862 btrfs_put_block_group(block_group);
9863
9864 spin_lock(&info->block_group_cache_lock);
9865 }
9866 spin_unlock(&info->block_group_cache_lock);
9867
9868
9869
9870
9871
9872
9873
9874 synchronize_rcu();
9875
9876 release_global_block_rsv(info);
9877
9878 while (!list_empty(&info->space_info)) {
9879 int i;
9880
9881 space_info = list_entry(info->space_info.next,
9882 struct btrfs_space_info,
9883 list);
9884
9885
9886
9887
9888
9889 if (WARN_ON(space_info->bytes_pinned > 0 ||
9890 space_info->bytes_reserved > 0 ||
9891 space_info->bytes_may_use > 0))
9892 dump_space_info(info, space_info, 0, 0);
9893 list_del(&space_info->list);
9894 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
9895 struct kobject *kobj;
9896 kobj = space_info->block_group_kobjs[i];
9897 space_info->block_group_kobjs[i] = NULL;
9898 if (kobj) {
9899 kobject_del(kobj);
9900 kobject_put(kobj);
9901 }
9902 }
9903 kobject_del(&space_info->kobj);
9904 kobject_put(&space_info->kobj);
9905 }
9906 return 0;
9907}
9908
9909static void link_block_group(struct btrfs_block_group_cache *cache)
9910{
9911 struct btrfs_space_info *space_info = cache->space_info;
9912 int index = get_block_group_index(cache);
9913 bool first = false;
9914
9915 down_write(&space_info->groups_sem);
9916 if (list_empty(&space_info->block_groups[index]))
9917 first = true;
9918 list_add_tail(&cache->list, &space_info->block_groups[index]);
9919 up_write(&space_info->groups_sem);
9920
9921 if (first) {
9922 struct raid_kobject *rkobj;
9923 int ret;
9924
9925 rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
9926 if (!rkobj)
9927 goto out_err;
9928 rkobj->raid_type = index;
9929 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
9930 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
9931 "%s", get_raid_name(index));
9932 if (ret) {
9933 kobject_put(&rkobj->kobj);
9934 goto out_err;
9935 }
9936 space_info->block_group_kobjs[index] = &rkobj->kobj;
9937 }
9938
9939 return;
9940out_err:
9941 btrfs_warn(cache->fs_info,
9942 "failed to add kobject for block cache, ignoring");
9943}
9944
9945static struct btrfs_block_group_cache *
9946btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
9947 u64 start, u64 size)
9948{
9949 struct btrfs_block_group_cache *cache;
9950
9951 cache = kzalloc(sizeof(*cache), GFP_NOFS);
9952 if (!cache)
9953 return NULL;
9954
9955 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
9956 GFP_NOFS);
9957 if (!cache->free_space_ctl) {
9958 kfree(cache);
9959 return NULL;
9960 }
9961
9962 cache->key.objectid = start;
9963 cache->key.offset = size;
9964 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9965
9966 cache->fs_info = fs_info;
9967 cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
9968 set_free_space_tree_thresholds(cache);
9969
9970 atomic_set(&cache->count, 1);
9971 spin_lock_init(&cache->lock);
9972 init_rwsem(&cache->data_rwsem);
9973 INIT_LIST_HEAD(&cache->list);
9974 INIT_LIST_HEAD(&cache->cluster_list);
9975 INIT_LIST_HEAD(&cache->bg_list);
9976 INIT_LIST_HEAD(&cache->ro_list);
9977 INIT_LIST_HEAD(&cache->dirty_list);
9978 INIT_LIST_HEAD(&cache->io_list);
9979 btrfs_init_free_space_ctl(cache);
9980 atomic_set(&cache->trimming, 0);
9981 mutex_init(&cache->free_space_lock);
9982 btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
9983
9984 return cache;
9985}
9986
9987int btrfs_read_block_groups(struct btrfs_fs_info *info)
9988{
9989 struct btrfs_path *path;
9990 int ret;
9991 struct btrfs_block_group_cache *cache;
9992 struct btrfs_space_info *space_info;
9993 struct btrfs_key key;
9994 struct btrfs_key found_key;
9995 struct extent_buffer *leaf;
9996 int need_clear = 0;
9997 u64 cache_gen;
9998 u64 feature;
9999 int mixed;
10000
10001 feature = btrfs_super_incompat_flags(info->super_copy);
10002 mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
10003
10004 key.objectid = 0;
10005 key.offset = 0;
10006 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10007 path = btrfs_alloc_path();
10008 if (!path)
10009 return -ENOMEM;
10010 path->reada = READA_FORWARD;
10011
10012 cache_gen = btrfs_super_cache_generation(info->super_copy);
10013 if (btrfs_test_opt(info, SPACE_CACHE) &&
10014 btrfs_super_generation(info->super_copy) != cache_gen)
10015 need_clear = 1;
10016 if (btrfs_test_opt(info, CLEAR_CACHE))
10017 need_clear = 1;
10018
10019 while (1) {
10020 ret = find_first_block_group(info, path, &key);
10021 if (ret > 0)
10022 break;
10023 if (ret != 0)
10024 goto error;
10025
10026 leaf = path->nodes[0];
10027 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10028
10029 cache = btrfs_create_block_group_cache(info, found_key.objectid,
10030 found_key.offset);
10031 if (!cache) {
10032 ret = -ENOMEM;
10033 goto error;
10034 }
10035
10036 if (need_clear) {
10037
10038
10039
10040
10041
10042
10043
10044
10045
10046
10047 if (btrfs_test_opt(info, SPACE_CACHE))
10048 cache->disk_cache_state = BTRFS_DC_CLEAR;
10049 }
10050
10051 read_extent_buffer(leaf, &cache->item,
10052 btrfs_item_ptr_offset(leaf, path->slots[0]),
10053 sizeof(cache->item));
10054 cache->flags = btrfs_block_group_flags(&cache->item);
10055 if (!mixed &&
10056 ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
10057 (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
10058 btrfs_err(info,
10059"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
10060 cache->key.objectid);
10061 ret = -EINVAL;
10062 goto error;
10063 }
10064
10065 key.objectid = found_key.objectid + found_key.offset;
10066 btrfs_release_path(path);
10067
10068
10069
10070
10071
10072
10073 ret = exclude_super_stripes(info, cache);
10074 if (ret) {
10075
10076
10077
10078
10079 free_excluded_extents(info, cache);
10080 btrfs_put_block_group(cache);
10081 goto error;
10082 }
10083
10084
10085
10086
10087
10088
10089
10090
10091 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
10092 cache->last_byte_to_unpin = (u64)-1;
10093 cache->cached = BTRFS_CACHE_FINISHED;
10094 free_excluded_extents(info, cache);
10095 } else if (btrfs_block_group_used(&cache->item) == 0) {
10096 cache->last_byte_to_unpin = (u64)-1;
10097 cache->cached = BTRFS_CACHE_FINISHED;
10098 add_new_free_space(cache, info,
10099 found_key.objectid,
10100 found_key.objectid +
10101 found_key.offset);
10102 free_excluded_extents(info, cache);
10103 }
10104
10105 ret = btrfs_add_block_group_cache(info, cache);
10106 if (ret) {
10107 btrfs_remove_free_space_cache(cache);
10108 btrfs_put_block_group(cache);
10109 goto error;
10110 }
10111
10112 trace_btrfs_add_block_group(info, cache, 0);
10113 update_space_info(info, cache->flags, found_key.offset,
10114 btrfs_block_group_used(&cache->item),
10115 cache->bytes_super, &space_info);
10116
10117 cache->space_info = space_info;
10118
10119 link_block_group(cache);
10120
10121 set_avail_alloc_bits(info, cache->flags);
10122 if (btrfs_chunk_readonly(info, cache->key.objectid)) {
10123 inc_block_group_ro(cache, 1);
10124 } else if (btrfs_block_group_used(&cache->item) == 0) {
10125 spin_lock(&info->unused_bgs_lock);
10126
10127 if (list_empty(&cache->bg_list)) {
10128 btrfs_get_block_group(cache);
10129 list_add_tail(&cache->bg_list,
10130 &info->unused_bgs);
10131 }
10132 spin_unlock(&info->unused_bgs_lock);
10133 }
10134 }
10135
10136 list_for_each_entry_rcu(space_info, &info->space_info, list) {
10137 if (!(get_alloc_profile(info, space_info->flags) &
10138 (BTRFS_BLOCK_GROUP_RAID10 |
10139 BTRFS_BLOCK_GROUP_RAID1 |
10140 BTRFS_BLOCK_GROUP_RAID5 |
10141 BTRFS_BLOCK_GROUP_RAID6 |
10142 BTRFS_BLOCK_GROUP_DUP)))
10143 continue;
10144
10145
10146
10147
10148 list_for_each_entry(cache,
10149 &space_info->block_groups[BTRFS_RAID_RAID0],
10150 list)
10151 inc_block_group_ro(cache, 1);
10152 list_for_each_entry(cache,
10153 &space_info->block_groups[BTRFS_RAID_SINGLE],
10154 list)
10155 inc_block_group_ro(cache, 1);
10156 }
10157
10158 init_global_block_rsv(info);
10159 ret = 0;
10160error:
10161 btrfs_free_path(path);
10162 return ret;
10163}
10164
10165void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
10166 struct btrfs_fs_info *fs_info)
10167{
10168 struct btrfs_block_group_cache *block_group, *tmp;
10169 struct btrfs_root *extent_root = fs_info->extent_root;
10170 struct btrfs_block_group_item item;
10171 struct btrfs_key key;
10172 int ret = 0;
10173 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
10174
10175 trans->can_flush_pending_bgs = false;
10176 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
10177 if (ret)
10178 goto next;
10179
10180 spin_lock(&block_group->lock);
10181 memcpy(&item, &block_group->item, sizeof(item));
10182 memcpy(&key, &block_group->key, sizeof(key));
10183 spin_unlock(&block_group->lock);
10184
10185 ret = btrfs_insert_item(trans, extent_root, &key, &item,
10186 sizeof(item));
10187 if (ret)
10188 btrfs_abort_transaction(trans, ret);
10189 ret = btrfs_finish_chunk_alloc(trans, fs_info, key.objectid,
10190 key.offset);
10191 if (ret)
10192 btrfs_abort_transaction(trans, ret);
10193 add_block_group_free_space(trans, fs_info, block_group);
10194
10195next:
10196 list_del_init(&block_group->bg_list);
10197 }
10198 trans->can_flush_pending_bgs = can_flush_pending_bgs;
10199}
10200
10201int btrfs_make_block_group(struct btrfs_trans_handle *trans,
10202 struct btrfs_fs_info *fs_info, u64 bytes_used,
10203 u64 type, u64 chunk_offset, u64 size)
10204{
10205 struct btrfs_block_group_cache *cache;
10206 int ret;
10207
10208 btrfs_set_log_full_commit(fs_info, trans);
10209
10210 cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
10211 if (!cache)
10212 return -ENOMEM;
10213
10214 btrfs_set_block_group_used(&cache->item, bytes_used);
10215 btrfs_set_block_group_chunk_objectid(&cache->item,
10216 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
10217 btrfs_set_block_group_flags(&cache->item, type);
10218
10219 cache->flags = type;
10220 cache->last_byte_to_unpin = (u64)-1;
10221 cache->cached = BTRFS_CACHE_FINISHED;
10222 cache->needs_free_space = 1;
10223 ret = exclude_super_stripes(fs_info, cache);
10224 if (ret) {
10225
10226
10227
10228
10229 free_excluded_extents(fs_info, cache);
10230 btrfs_put_block_group(cache);
10231 return ret;
10232 }
10233
10234 add_new_free_space(cache, fs_info, chunk_offset, chunk_offset + size);
10235
10236 free_excluded_extents(fs_info, cache);
10237
10238#ifdef CONFIG_BTRFS_DEBUG
10239 if (btrfs_should_fragment_free_space(cache)) {
10240 u64 new_bytes_used = size - bytes_used;
10241
10242 bytes_used += new_bytes_used >> 1;
10243 fragment_free_space(cache);
10244 }
10245#endif
10246
10247
10248
10249
10250
10251 cache->space_info = __find_space_info(fs_info, cache->flags);
10252 if (!cache->space_info) {
10253 ret = create_space_info(fs_info, cache->flags,
10254 &cache->space_info);
10255 if (ret) {
10256 btrfs_remove_free_space_cache(cache);
10257 btrfs_put_block_group(cache);
10258 return ret;
10259 }
10260 }
10261
10262 ret = btrfs_add_block_group_cache(fs_info, cache);
10263 if (ret) {
10264 btrfs_remove_free_space_cache(cache);
10265 btrfs_put_block_group(cache);
10266 return ret;
10267 }
10268
10269
10270
10271
10272
10273 trace_btrfs_add_block_group(fs_info, cache, 1);
10274 update_space_info(fs_info, cache->flags, size, bytes_used,
10275 cache->bytes_super, &cache->space_info);
10276 update_global_block_rsv(fs_info);
10277
10278 link_block_group(cache);
10279
10280 list_add_tail(&cache->bg_list, &trans->new_bgs);
10281
10282 set_avail_alloc_bits(fs_info, type);
10283 return 0;
10284}
10285
10286static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10287{
10288 u64 extra_flags = chunk_to_extended(flags) &
10289 BTRFS_EXTENDED_PROFILE_MASK;
10290
10291 write_seqlock(&fs_info->profiles_lock);
10292 if (flags & BTRFS_BLOCK_GROUP_DATA)
10293 fs_info->avail_data_alloc_bits &= ~extra_flags;
10294 if (flags & BTRFS_BLOCK_GROUP_METADATA)
10295 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
10296 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
10297 fs_info->avail_system_alloc_bits &= ~extra_flags;
10298 write_sequnlock(&fs_info->profiles_lock);
10299}
10300
10301int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10302 struct btrfs_fs_info *fs_info, u64 group_start,
10303 struct extent_map *em)
10304{
10305 struct btrfs_root *root = fs_info->extent_root;
10306 struct btrfs_path *path;
10307 struct btrfs_block_group_cache *block_group;
10308 struct btrfs_free_cluster *cluster;
10309 struct btrfs_root *tree_root = fs_info->tree_root;
10310 struct btrfs_key key;
10311 struct inode *inode;
10312 struct kobject *kobj = NULL;
10313 int ret;
10314 int index;
10315 int factor;
10316 struct btrfs_caching_control *caching_ctl = NULL;
10317 bool remove_em;
10318
10319 block_group = btrfs_lookup_block_group(fs_info, group_start);
10320 BUG_ON(!block_group);
10321 BUG_ON(!block_group->ro);
10322
10323
10324
10325
10326
10327 free_excluded_extents(fs_info, block_group);
10328 btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
10329 block_group->key.offset);
10330
10331 memcpy(&key, &block_group->key, sizeof(key));
10332 index = get_block_group_index(block_group);
10333 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
10334 BTRFS_BLOCK_GROUP_RAID1 |
10335 BTRFS_BLOCK_GROUP_RAID10))
10336 factor = 2;
10337 else
10338 factor = 1;
10339
10340
10341 cluster = &fs_info->data_alloc_cluster;
10342 spin_lock(&cluster->refill_lock);
10343 btrfs_return_cluster_to_free_space(block_group, cluster);
10344 spin_unlock(&cluster->refill_lock);
10345
10346
10347
10348
10349
10350 cluster = &fs_info->meta_alloc_cluster;
10351 spin_lock(&cluster->refill_lock);
10352 btrfs_return_cluster_to_free_space(block_group, cluster);
10353 spin_unlock(&cluster->refill_lock);
10354
10355 path = btrfs_alloc_path();
10356 if (!path) {
10357 ret = -ENOMEM;
10358 goto out;
10359 }
10360
10361
10362
10363
10364
10365 inode = lookup_free_space_inode(fs_info, block_group, path);
10366
10367 mutex_lock(&trans->transaction->cache_write_mutex);
10368
10369
10370
10371
10372 spin_lock(&trans->transaction->dirty_bgs_lock);
10373 if (!list_empty(&block_group->io_list)) {
10374 list_del_init(&block_group->io_list);
10375
10376 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
10377
10378 spin_unlock(&trans->transaction->dirty_bgs_lock);
10379 btrfs_wait_cache_io(trans, block_group, path);
10380 btrfs_put_block_group(block_group);
10381 spin_lock(&trans->transaction->dirty_bgs_lock);
10382 }
10383
10384 if (!list_empty(&block_group->dirty_list)) {
10385 list_del_init(&block_group->dirty_list);
10386 btrfs_put_block_group(block_group);
10387 }
10388 spin_unlock(&trans->transaction->dirty_bgs_lock);
10389 mutex_unlock(&trans->transaction->cache_write_mutex);
10390
10391 if (!IS_ERR(inode)) {
10392 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
10393 if (ret) {
10394 btrfs_add_delayed_iput(inode);
10395 goto out;
10396 }
10397 clear_nlink(inode);
10398
10399 spin_lock(&block_group->lock);
10400 if (block_group->iref) {
10401 block_group->iref = 0;
10402 block_group->inode = NULL;
10403 spin_unlock(&block_group->lock);
10404 iput(inode);
10405 } else {
10406 spin_unlock(&block_group->lock);
10407 }
10408
10409 btrfs_add_delayed_iput(inode);
10410 }
10411
10412 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
10413 key.offset = block_group->key.objectid;
10414 key.type = 0;
10415
10416 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
10417 if (ret < 0)
10418 goto out;
10419 if (ret > 0)
10420 btrfs_release_path(path);
10421 if (ret == 0) {
10422 ret = btrfs_del_item(trans, tree_root, path);
10423 if (ret)
10424 goto out;
10425 btrfs_release_path(path);
10426 }
10427
10428 spin_lock(&fs_info->block_group_cache_lock);
10429 rb_erase(&block_group->cache_node,
10430 &fs_info->block_group_cache_tree);
10431 RB_CLEAR_NODE(&block_group->cache_node);
10432
10433 if (fs_info->first_logical_byte == block_group->key.objectid)
10434 fs_info->first_logical_byte = (u64)-1;
10435 spin_unlock(&fs_info->block_group_cache_lock);
10436
10437 down_write(&block_group->space_info->groups_sem);
10438
10439
10440
10441
10442 list_del_init(&block_group->list);
10443 if (list_empty(&block_group->space_info->block_groups[index])) {
10444 kobj = block_group->space_info->block_group_kobjs[index];
10445 block_group->space_info->block_group_kobjs[index] = NULL;
10446 clear_avail_alloc_bits(fs_info, block_group->flags);
10447 }
10448 up_write(&block_group->space_info->groups_sem);
10449 if (kobj) {
10450 kobject_del(kobj);
10451 kobject_put(kobj);
10452 }
10453
10454 if (block_group->has_caching_ctl)
10455 caching_ctl = get_caching_control(block_group);
10456 if (block_group->cached == BTRFS_CACHE_STARTED)
10457 wait_block_group_cache_done(block_group);
10458 if (block_group->has_caching_ctl) {
10459 down_write(&fs_info->commit_root_sem);
10460 if (!caching_ctl) {
10461 struct btrfs_caching_control *ctl;
10462
10463 list_for_each_entry(ctl,
10464 &fs_info->caching_block_groups, list)
10465 if (ctl->block_group == block_group) {
10466 caching_ctl = ctl;
10467 refcount_inc(&caching_ctl->count);
10468 break;
10469 }
10470 }
10471 if (caching_ctl)
10472 list_del_init(&caching_ctl->list);
10473 up_write(&fs_info->commit_root_sem);
10474 if (caching_ctl) {
10475
10476 put_caching_control(caching_ctl);
10477 put_caching_control(caching_ctl);
10478 }
10479 }
10480
10481 spin_lock(&trans->transaction->dirty_bgs_lock);
10482 if (!list_empty(&block_group->dirty_list)) {
10483 WARN_ON(1);
10484 }
10485 if (!list_empty(&block_group->io_list)) {
10486 WARN_ON(1);
10487 }
10488 spin_unlock(&trans->transaction->dirty_bgs_lock);
10489 btrfs_remove_free_space_cache(block_group);
10490
10491 spin_lock(&block_group->space_info->lock);
10492 list_del_init(&block_group->ro_list);
10493
10494 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
10495 WARN_ON(block_group->space_info->total_bytes
10496 < block_group->key.offset);
10497 WARN_ON(block_group->space_info->bytes_readonly
10498 < block_group->key.offset);
10499 WARN_ON(block_group->space_info->disk_total
10500 < block_group->key.offset * factor);
10501 }
10502 block_group->space_info->total_bytes -= block_group->key.offset;
10503 block_group->space_info->bytes_readonly -= block_group->key.offset;
10504 block_group->space_info->disk_total -= block_group->key.offset * factor;
10505
10506 spin_unlock(&block_group->space_info->lock);
10507
10508 memcpy(&key, &block_group->key, sizeof(key));
10509
10510 mutex_lock(&fs_info->chunk_mutex);
10511 if (!list_empty(&em->list)) {
10512
10513 free_extent_map(em);
10514 }
10515 spin_lock(&block_group->lock);
10516 block_group->removed = 1;
10517
10518
10519
10520
10521
10522
10523
10524
10525
10526
10527
10528
10529
10530
10531
10532
10533
10534
10535
10536
10537
10538
10539
10540 remove_em = (atomic_read(&block_group->trimming) == 0);
10541
10542
10543
10544
10545
10546 if (!remove_em) {
10547
10548
10549
10550
10551
10552
10553
10554
10555
10556
10557
10558 list_move_tail(&em->list, &fs_info->pinned_chunks);
10559 }
10560 spin_unlock(&block_group->lock);
10561
10562 if (remove_em) {
10563 struct extent_map_tree *em_tree;
10564
10565 em_tree = &fs_info->mapping_tree.map_tree;
10566 write_lock(&em_tree->lock);
10567
10568
10569
10570
10571
10572 remove_extent_mapping(em_tree, em);
10573 write_unlock(&em_tree->lock);
10574
10575 free_extent_map(em);
10576 }
10577
10578 mutex_unlock(&fs_info->chunk_mutex);
10579
10580 ret = remove_block_group_free_space(trans, fs_info, block_group);
10581 if (ret)
10582 goto out;
10583
10584 btrfs_put_block_group(block_group);
10585 btrfs_put_block_group(block_group);
10586
10587 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10588 if (ret > 0)
10589 ret = -EIO;
10590 if (ret < 0)
10591 goto out;
10592
10593 ret = btrfs_del_item(trans, root, path);
10594out:
10595 btrfs_free_path(path);
10596 return ret;
10597}
10598
10599struct btrfs_trans_handle *
10600btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
10601 const u64 chunk_offset)
10602{
10603 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
10604 struct extent_map *em;
10605 struct map_lookup *map;
10606 unsigned int num_items;
10607
10608 read_lock(&em_tree->lock);
10609 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
10610 read_unlock(&em_tree->lock);
10611 ASSERT(em && em->start == chunk_offset);
10612
10613
10614
10615
10616
10617
10618
10619
10620
10621
10622
10623
10624
10625
10626
10627
10628
10629
10630
10631
10632 map = em->map_lookup;
10633 num_items = 3 + map->num_stripes;
10634 free_extent_map(em);
10635
10636 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
10637 num_items, 1);
10638}
10639
10640
10641
10642
10643
10644void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10645{
10646 struct btrfs_block_group_cache *block_group;
10647 struct btrfs_space_info *space_info;
10648 struct btrfs_trans_handle *trans;
10649 int ret = 0;
10650
10651 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
10652 return;
10653
10654 spin_lock(&fs_info->unused_bgs_lock);
10655 while (!list_empty(&fs_info->unused_bgs)) {
10656 u64 start, end;
10657 int trimming;
10658
10659 block_group = list_first_entry(&fs_info->unused_bgs,
10660 struct btrfs_block_group_cache,
10661 bg_list);
10662 list_del_init(&block_group->bg_list);
10663
10664 space_info = block_group->space_info;
10665
10666 if (ret || btrfs_mixed_space_info(space_info)) {
10667 btrfs_put_block_group(block_group);
10668 continue;
10669 }
10670 spin_unlock(&fs_info->unused_bgs_lock);
10671
10672 mutex_lock(&fs_info->delete_unused_bgs_mutex);
10673
10674
10675 down_write(&space_info->groups_sem);
10676 spin_lock(&block_group->lock);
10677 if (block_group->reserved ||
10678 btrfs_block_group_used(&block_group->item) ||
10679 block_group->ro ||
10680 list_is_singular(&block_group->list)) {
10681
10682
10683
10684
10685
10686
10687 spin_unlock(&block_group->lock);
10688 up_write(&space_info->groups_sem);
10689 goto next;
10690 }
10691 spin_unlock(&block_group->lock);
10692
10693
10694 ret = inc_block_group_ro(block_group, 0);
10695 up_write(&space_info->groups_sem);
10696 if (ret < 0) {
10697 ret = 0;
10698 goto next;
10699 }
10700
10701
10702
10703
10704
10705 trans = btrfs_start_trans_remove_block_group(fs_info,
10706 block_group->key.objectid);
10707 if (IS_ERR(trans)) {
10708 btrfs_dec_block_group_ro(block_group);
10709 ret = PTR_ERR(trans);
10710 goto next;
10711 }
10712
10713
10714
10715
10716
10717 start = block_group->key.objectid;
10718 end = start + block_group->key.offset - 1;
10719
10720
10721
10722
10723
10724
10725
10726
10727
10728
10729
10730 mutex_lock(&fs_info->unused_bg_unpin_mutex);
10731 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
10732 EXTENT_DIRTY);
10733 if (ret) {
10734 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10735 btrfs_dec_block_group_ro(block_group);
10736 goto end_trans;
10737 }
10738 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
10739 EXTENT_DIRTY);
10740 if (ret) {
10741 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10742 btrfs_dec_block_group_ro(block_group);
10743 goto end_trans;
10744 }
10745 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10746
10747
10748 spin_lock(&space_info->lock);
10749 spin_lock(&block_group->lock);
10750
10751 space_info->bytes_pinned -= block_group->pinned;
10752 space_info->bytes_readonly += block_group->pinned;
10753 percpu_counter_add(&space_info->total_bytes_pinned,
10754 -block_group->pinned);
10755 block_group->pinned = 0;
10756
10757 spin_unlock(&block_group->lock);
10758 spin_unlock(&space_info->lock);
10759
10760
10761 trimming = btrfs_test_opt(fs_info, DISCARD);
10762
10763
10764 if (trimming)
10765 btrfs_get_block_group_trimming(block_group);
10766
10767
10768
10769
10770
10771 ret = btrfs_remove_chunk(trans, fs_info,
10772 block_group->key.objectid);
10773
10774 if (ret) {
10775 if (trimming)
10776 btrfs_put_block_group_trimming(block_group);
10777 goto end_trans;
10778 }
10779
10780
10781
10782
10783
10784
10785 if (trimming) {
10786 spin_lock(&fs_info->unused_bgs_lock);
10787
10788
10789
10790
10791
10792 list_move(&block_group->bg_list,
10793 &trans->transaction->deleted_bgs);
10794 spin_unlock(&fs_info->unused_bgs_lock);
10795 btrfs_get_block_group(block_group);
10796 }
10797end_trans:
10798 btrfs_end_transaction(trans);
10799next:
10800 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
10801 btrfs_put_block_group(block_group);
10802 spin_lock(&fs_info->unused_bgs_lock);
10803 }
10804 spin_unlock(&fs_info->unused_bgs_lock);
10805}
10806
10807int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
10808{
10809 struct btrfs_space_info *space_info;
10810 struct btrfs_super_block *disk_super;
10811 u64 features;
10812 u64 flags;
10813 int mixed = 0;
10814 int ret;
10815
10816 disk_super = fs_info->super_copy;
10817 if (!btrfs_super_root(disk_super))
10818 return -EINVAL;
10819
10820 features = btrfs_super_incompat_flags(disk_super);
10821 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
10822 mixed = 1;
10823
10824 flags = BTRFS_BLOCK_GROUP_SYSTEM;
10825 ret = create_space_info(fs_info, flags, &space_info);
10826 if (ret)
10827 goto out;
10828
10829 if (mixed) {
10830 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
10831 ret = create_space_info(fs_info, flags, &space_info);
10832 } else {
10833 flags = BTRFS_BLOCK_GROUP_METADATA;
10834 ret = create_space_info(fs_info, flags, &space_info);
10835 if (ret)
10836 goto out;
10837
10838 flags = BTRFS_BLOCK_GROUP_DATA;
10839 ret = create_space_info(fs_info, flags, &space_info);
10840 }
10841out:
10842 return ret;
10843}
10844
10845int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
10846 u64 start, u64 end)
10847{
10848 return unpin_extent_range(fs_info, start, end, false);
10849}
10850
10851
10852
10853
10854
10855
10856
10857
10858
10859
10860
10861
10862
10863
10864
10865
10866
10867
10868
10869static int btrfs_trim_free_extents(struct btrfs_device *device,
10870 u64 minlen, u64 *trimmed)
10871{
10872 u64 start = 0, len = 0;
10873 int ret;
10874
10875 *trimmed = 0;
10876
10877
10878 if (!device->writeable)
10879 return 0;
10880
10881
10882 if (device->total_bytes <= device->bytes_used)
10883 return 0;
10884
10885 ret = 0;
10886
10887 while (1) {
10888 struct btrfs_fs_info *fs_info = device->fs_info;
10889 struct btrfs_transaction *trans;
10890 u64 bytes;
10891
10892 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
10893 if (ret)
10894 return ret;
10895
10896 down_read(&fs_info->commit_root_sem);
10897
10898 spin_lock(&fs_info->trans_lock);
10899 trans = fs_info->running_transaction;
10900 if (trans)
10901 refcount_inc(&trans->use_count);
10902 spin_unlock(&fs_info->trans_lock);
10903
10904 ret = find_free_dev_extent_start(trans, device, minlen, start,
10905 &start, &len);
10906 if (trans)
10907 btrfs_put_transaction(trans);
10908
10909 if (ret) {
10910 up_read(&fs_info->commit_root_sem);
10911 mutex_unlock(&fs_info->chunk_mutex);
10912 if (ret == -ENOSPC)
10913 ret = 0;
10914 break;
10915 }
10916
10917 ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
10918 up_read(&fs_info->commit_root_sem);
10919 mutex_unlock(&fs_info->chunk_mutex);
10920
10921 if (ret)
10922 break;
10923
10924 start += len;
10925 *trimmed += bytes;
10926
10927 if (fatal_signal_pending(current)) {
10928 ret = -ERESTARTSYS;
10929 break;
10930 }
10931
10932 cond_resched();
10933 }
10934
10935 return ret;
10936}
10937
10938int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
10939{
10940 struct btrfs_block_group_cache *cache = NULL;
10941 struct btrfs_device *device;
10942 struct list_head *devices;
10943 u64 group_trimmed;
10944 u64 start;
10945 u64 end;
10946 u64 trimmed = 0;
10947 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10948 int ret = 0;
10949
10950
10951
10952
10953 if (range->len == total_bytes)
10954 cache = btrfs_lookup_first_block_group(fs_info, range->start);
10955 else
10956 cache = btrfs_lookup_block_group(fs_info, range->start);
10957
10958 while (cache) {
10959 if (cache->key.objectid >= (range->start + range->len)) {
10960 btrfs_put_block_group(cache);
10961 break;
10962 }
10963
10964 start = max(range->start, cache->key.objectid);
10965 end = min(range->start + range->len,
10966 cache->key.objectid + cache->key.offset);
10967
10968 if (end - start >= range->minlen) {
10969 if (!block_group_cache_done(cache)) {
10970 ret = cache_block_group(cache, 0);
10971 if (ret) {
10972 btrfs_put_block_group(cache);
10973 break;
10974 }
10975 ret = wait_block_group_cache_done(cache);
10976 if (ret) {
10977 btrfs_put_block_group(cache);
10978 break;
10979 }
10980 }
10981 ret = btrfs_trim_block_group(cache,
10982 &group_trimmed,
10983 start,
10984 end,
10985 range->minlen);
10986
10987 trimmed += group_trimmed;
10988 if (ret) {
10989 btrfs_put_block_group(cache);
10990 break;
10991 }
10992 }
10993
10994 cache = next_block_group(fs_info, cache);
10995 }
10996
10997 mutex_lock(&fs_info->fs_devices->device_list_mutex);
10998 devices = &fs_info->fs_devices->alloc_list;
10999 list_for_each_entry(device, devices, dev_alloc_list) {
11000 ret = btrfs_trim_free_extents(device, range->minlen,
11001 &group_trimmed);
11002 if (ret)
11003 break;
11004
11005 trimmed += group_trimmed;
11006 }
11007 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
11008
11009 range->len = trimmed;
11010 return ret;
11011}
11012
11013
11014
11015
11016
11017
11018
11019
11020
11021void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
11022{
11023 percpu_counter_dec(&root->subv_writers->counter);
11024
11025
11026
11027 smp_mb();
11028 if (waitqueue_active(&root->subv_writers->wait))
11029 wake_up(&root->subv_writers->wait);
11030}
11031
11032int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
11033{
11034 if (atomic_read(&root->will_be_snapshotted))
11035 return 0;
11036
11037 percpu_counter_inc(&root->subv_writers->counter);
11038
11039
11040
11041 smp_mb();
11042 if (atomic_read(&root->will_be_snapshotted)) {
11043 btrfs_end_write_no_snapshotting(root);
11044 return 0;
11045 }
11046 return 1;
11047}
11048
11049void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
11050{
11051 while (true) {
11052 int ret;
11053
11054 ret = btrfs_start_write_no_snapshotting(root);
11055 if (ret)
11056 break;
11057 wait_on_atomic_t(&root->will_be_snapshotted, atomic_t_wait,
11058 TASK_UNINTERRUPTIBLE);
11059 }
11060}
11061