1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "tree-log.h"
20#include "disk-io.h"
21#include "print-tree.h"
22#include "volumes.h"
23#include "raid56.h"
24#include "locking.h"
25#include "free-space-cache.h"
26#include "free-space-tree.h"
27#include "math.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31
32#undef SCRAMBLE_DELAYED_REFS
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48enum {
49 CHUNK_ALLOC_NO_FORCE = 0,
50 CHUNK_ALLOC_LIMITED = 1,
51 CHUNK_ALLOC_FORCE = 2,
52};
53
54static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
55 struct btrfs_fs_info *fs_info,
56 struct btrfs_delayed_ref_node *node, u64 parent,
57 u64 root_objectid, u64 owner_objectid,
58 u64 owner_offset, int refs_to_drop,
59 struct btrfs_delayed_extent_op *extra_op);
60static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
61 struct extent_buffer *leaf,
62 struct btrfs_extent_item *ei);
63static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
64 struct btrfs_fs_info *fs_info,
65 u64 parent, u64 root_objectid,
66 u64 flags, u64 owner, u64 offset,
67 struct btrfs_key *ins, int ref_mod);
68static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
69 struct btrfs_fs_info *fs_info,
70 u64 parent, u64 root_objectid,
71 u64 flags, struct btrfs_disk_key *key,
72 int level, struct btrfs_key *ins);
73static int do_chunk_alloc(struct btrfs_trans_handle *trans,
74 struct btrfs_fs_info *fs_info, u64 flags,
75 int force);
76static int find_next_key(struct btrfs_path *path, int level,
77 struct btrfs_key *key);
78static void dump_space_info(struct btrfs_fs_info *fs_info,
79 struct btrfs_space_info *info, u64 bytes,
80 int dump_block_groups);
81static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
82 u64 num_bytes);
83static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
84 struct btrfs_space_info *space_info,
85 u64 num_bytes);
86static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
87 struct btrfs_space_info *space_info,
88 u64 num_bytes);
89
90static noinline int
91block_group_cache_done(struct btrfs_block_group_cache *cache)
92{
93 smp_mb();
94 return cache->cached == BTRFS_CACHE_FINISHED ||
95 cache->cached == BTRFS_CACHE_ERROR;
96}
97
98static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
99{
100 return (cache->flags & bits) == bits;
101}
102
103void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
104{
105 atomic_inc(&cache->count);
106}
107
108void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
109{
110 if (atomic_dec_and_test(&cache->count)) {
111 WARN_ON(cache->pinned > 0);
112 WARN_ON(cache->reserved > 0);
113
114
115
116
117
118
119
120
121
122 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
123 kfree(cache->free_space_ctl);
124 kfree(cache);
125 }
126}
127
128
129
130
131
132static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
133 struct btrfs_block_group_cache *block_group)
134{
135 struct rb_node **p;
136 struct rb_node *parent = NULL;
137 struct btrfs_block_group_cache *cache;
138
139 spin_lock(&info->block_group_cache_lock);
140 p = &info->block_group_cache_tree.rb_node;
141
142 while (*p) {
143 parent = *p;
144 cache = rb_entry(parent, struct btrfs_block_group_cache,
145 cache_node);
146 if (block_group->key.objectid < cache->key.objectid) {
147 p = &(*p)->rb_left;
148 } else if (block_group->key.objectid > cache->key.objectid) {
149 p = &(*p)->rb_right;
150 } else {
151 spin_unlock(&info->block_group_cache_lock);
152 return -EEXIST;
153 }
154 }
155
156 rb_link_node(&block_group->cache_node, parent, p);
157 rb_insert_color(&block_group->cache_node,
158 &info->block_group_cache_tree);
159
160 if (info->first_logical_byte > block_group->key.objectid)
161 info->first_logical_byte = block_group->key.objectid;
162
163 spin_unlock(&info->block_group_cache_lock);
164
165 return 0;
166}
167
168
169
170
171
172static struct btrfs_block_group_cache *
173block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
174 int contains)
175{
176 struct btrfs_block_group_cache *cache, *ret = NULL;
177 struct rb_node *n;
178 u64 end, start;
179
180 spin_lock(&info->block_group_cache_lock);
181 n = info->block_group_cache_tree.rb_node;
182
183 while (n) {
184 cache = rb_entry(n, struct btrfs_block_group_cache,
185 cache_node);
186 end = cache->key.objectid + cache->key.offset - 1;
187 start = cache->key.objectid;
188
189 if (bytenr < start) {
190 if (!contains && (!ret || start < ret->key.objectid))
191 ret = cache;
192 n = n->rb_left;
193 } else if (bytenr > start) {
194 if (contains && bytenr <= end) {
195 ret = cache;
196 break;
197 }
198 n = n->rb_right;
199 } else {
200 ret = cache;
201 break;
202 }
203 }
204 if (ret) {
205 btrfs_get_block_group(ret);
206 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
207 info->first_logical_byte = ret->key.objectid;
208 }
209 spin_unlock(&info->block_group_cache_lock);
210
211 return ret;
212}
213
214static int add_excluded_extent(struct btrfs_fs_info *fs_info,
215 u64 start, u64 num_bytes)
216{
217 u64 end = start + num_bytes - 1;
218 set_extent_bits(&fs_info->freed_extents[0],
219 start, end, EXTENT_UPTODATE);
220 set_extent_bits(&fs_info->freed_extents[1],
221 start, end, EXTENT_UPTODATE);
222 return 0;
223}
224
225static void free_excluded_extents(struct btrfs_fs_info *fs_info,
226 struct btrfs_block_group_cache *cache)
227{
228 u64 start, end;
229
230 start = cache->key.objectid;
231 end = start + cache->key.offset - 1;
232
233 clear_extent_bits(&fs_info->freed_extents[0],
234 start, end, EXTENT_UPTODATE);
235 clear_extent_bits(&fs_info->freed_extents[1],
236 start, end, EXTENT_UPTODATE);
237}
238
239static int exclude_super_stripes(struct btrfs_fs_info *fs_info,
240 struct btrfs_block_group_cache *cache)
241{
242 u64 bytenr;
243 u64 *logical;
244 int stripe_len;
245 int i, nr, ret;
246
247 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
248 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
249 cache->bytes_super += stripe_len;
250 ret = add_excluded_extent(fs_info, cache->key.objectid,
251 stripe_len);
252 if (ret)
253 return ret;
254 }
255
256 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
257 bytenr = btrfs_sb_offset(i);
258 ret = btrfs_rmap_block(fs_info, cache->key.objectid,
259 bytenr, 0, &logical, &nr, &stripe_len);
260 if (ret)
261 return ret;
262
263 while (nr--) {
264 u64 start, len;
265
266 if (logical[nr] > cache->key.objectid +
267 cache->key.offset)
268 continue;
269
270 if (logical[nr] + stripe_len <= cache->key.objectid)
271 continue;
272
273 start = logical[nr];
274 if (start < cache->key.objectid) {
275 start = cache->key.objectid;
276 len = (logical[nr] + stripe_len) - start;
277 } else {
278 len = min_t(u64, stripe_len,
279 cache->key.objectid +
280 cache->key.offset - start);
281 }
282
283 cache->bytes_super += len;
284 ret = add_excluded_extent(fs_info, start, len);
285 if (ret) {
286 kfree(logical);
287 return ret;
288 }
289 }
290
291 kfree(logical);
292 }
293 return 0;
294}
295
296static struct btrfs_caching_control *
297get_caching_control(struct btrfs_block_group_cache *cache)
298{
299 struct btrfs_caching_control *ctl;
300
301 spin_lock(&cache->lock);
302 if (!cache->caching_ctl) {
303 spin_unlock(&cache->lock);
304 return NULL;
305 }
306
307 ctl = cache->caching_ctl;
308 refcount_inc(&ctl->count);
309 spin_unlock(&cache->lock);
310 return ctl;
311}
312
313static void put_caching_control(struct btrfs_caching_control *ctl)
314{
315 if (refcount_dec_and_test(&ctl->count))
316 kfree(ctl);
317}
318
319#ifdef CONFIG_BTRFS_DEBUG
320static void fragment_free_space(struct btrfs_block_group_cache *block_group)
321{
322 struct btrfs_fs_info *fs_info = block_group->fs_info;
323 u64 start = block_group->key.objectid;
324 u64 len = block_group->key.offset;
325 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
326 fs_info->nodesize : fs_info->sectorsize;
327 u64 step = chunk << 1;
328
329 while (len > chunk) {
330 btrfs_remove_free_space(block_group, start, chunk);
331 start += step;
332 if (len < step)
333 len = 0;
334 else
335 len -= step;
336 }
337}
338#endif
339
340
341
342
343
344
345u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
346 struct btrfs_fs_info *info, u64 start, u64 end)
347{
348 u64 extent_start, extent_end, size, total_added = 0;
349 int ret;
350
351 while (start < end) {
352 ret = find_first_extent_bit(info->pinned_extents, start,
353 &extent_start, &extent_end,
354 EXTENT_DIRTY | EXTENT_UPTODATE,
355 NULL);
356 if (ret)
357 break;
358
359 if (extent_start <= start) {
360 start = extent_end + 1;
361 } else if (extent_start > start && extent_start < end) {
362 size = extent_start - start;
363 total_added += size;
364 ret = btrfs_add_free_space(block_group, start,
365 size);
366 BUG_ON(ret);
367 start = extent_end + 1;
368 } else {
369 break;
370 }
371 }
372
373 if (start < end) {
374 size = end - start;
375 total_added += size;
376 ret = btrfs_add_free_space(block_group, start, size);
377 BUG_ON(ret);
378 }
379
380 return total_added;
381}
382
383static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
384{
385 struct btrfs_block_group_cache *block_group = caching_ctl->block_group;
386 struct btrfs_fs_info *fs_info = block_group->fs_info;
387 struct btrfs_root *extent_root = fs_info->extent_root;
388 struct btrfs_path *path;
389 struct extent_buffer *leaf;
390 struct btrfs_key key;
391 u64 total_found = 0;
392 u64 last = 0;
393 u32 nritems;
394 int ret;
395 bool wakeup = true;
396
397 path = btrfs_alloc_path();
398 if (!path)
399 return -ENOMEM;
400
401 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
402
403#ifdef CONFIG_BTRFS_DEBUG
404
405
406
407
408
409 if (btrfs_should_fragment_free_space(block_group))
410 wakeup = false;
411#endif
412
413
414
415
416
417
418 path->skip_locking = 1;
419 path->search_commit_root = 1;
420 path->reada = READA_FORWARD;
421
422 key.objectid = last;
423 key.offset = 0;
424 key.type = BTRFS_EXTENT_ITEM_KEY;
425
426next:
427 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
428 if (ret < 0)
429 goto out;
430
431 leaf = path->nodes[0];
432 nritems = btrfs_header_nritems(leaf);
433
434 while (1) {
435 if (btrfs_fs_closing(fs_info) > 1) {
436 last = (u64)-1;
437 break;
438 }
439
440 if (path->slots[0] < nritems) {
441 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
442 } else {
443 ret = find_next_key(path, 0, &key);
444 if (ret)
445 break;
446
447 if (need_resched() ||
448 rwsem_is_contended(&fs_info->commit_root_sem)) {
449 if (wakeup)
450 caching_ctl->progress = last;
451 btrfs_release_path(path);
452 up_read(&fs_info->commit_root_sem);
453 mutex_unlock(&caching_ctl->mutex);
454 cond_resched();
455 mutex_lock(&caching_ctl->mutex);
456 down_read(&fs_info->commit_root_sem);
457 goto next;
458 }
459
460 ret = btrfs_next_leaf(extent_root, path);
461 if (ret < 0)
462 goto out;
463 if (ret)
464 break;
465 leaf = path->nodes[0];
466 nritems = btrfs_header_nritems(leaf);
467 continue;
468 }
469
470 if (key.objectid < last) {
471 key.objectid = last;
472 key.offset = 0;
473 key.type = BTRFS_EXTENT_ITEM_KEY;
474
475 if (wakeup)
476 caching_ctl->progress = last;
477 btrfs_release_path(path);
478 goto next;
479 }
480
481 if (key.objectid < block_group->key.objectid) {
482 path->slots[0]++;
483 continue;
484 }
485
486 if (key.objectid >= block_group->key.objectid +
487 block_group->key.offset)
488 break;
489
490 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
491 key.type == BTRFS_METADATA_ITEM_KEY) {
492 total_found += add_new_free_space(block_group,
493 fs_info, last,
494 key.objectid);
495 if (key.type == BTRFS_METADATA_ITEM_KEY)
496 last = key.objectid +
497 fs_info->nodesize;
498 else
499 last = key.objectid + key.offset;
500
501 if (total_found > CACHING_CTL_WAKE_UP) {
502 total_found = 0;
503 if (wakeup)
504 wake_up(&caching_ctl->wait);
505 }
506 }
507 path->slots[0]++;
508 }
509 ret = 0;
510
511 total_found += add_new_free_space(block_group, fs_info, last,
512 block_group->key.objectid +
513 block_group->key.offset);
514 caching_ctl->progress = (u64)-1;
515
516out:
517 btrfs_free_path(path);
518 return ret;
519}
520
521static noinline void caching_thread(struct btrfs_work *work)
522{
523 struct btrfs_block_group_cache *block_group;
524 struct btrfs_fs_info *fs_info;
525 struct btrfs_caching_control *caching_ctl;
526 int ret;
527
528 caching_ctl = container_of(work, struct btrfs_caching_control, work);
529 block_group = caching_ctl->block_group;
530 fs_info = block_group->fs_info;
531
532 mutex_lock(&caching_ctl->mutex);
533 down_read(&fs_info->commit_root_sem);
534
535 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
536 ret = load_free_space_tree(caching_ctl);
537 else
538 ret = load_extent_tree_free(caching_ctl);
539
540 spin_lock(&block_group->lock);
541 block_group->caching_ctl = NULL;
542 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
543 spin_unlock(&block_group->lock);
544
545#ifdef CONFIG_BTRFS_DEBUG
546 if (btrfs_should_fragment_free_space(block_group)) {
547 u64 bytes_used;
548
549 spin_lock(&block_group->space_info->lock);
550 spin_lock(&block_group->lock);
551 bytes_used = block_group->key.offset -
552 btrfs_block_group_used(&block_group->item);
553 block_group->space_info->bytes_used += bytes_used >> 1;
554 spin_unlock(&block_group->lock);
555 spin_unlock(&block_group->space_info->lock);
556 fragment_free_space(block_group);
557 }
558#endif
559
560 caching_ctl->progress = (u64)-1;
561
562 up_read(&fs_info->commit_root_sem);
563 free_excluded_extents(fs_info, block_group);
564 mutex_unlock(&caching_ctl->mutex);
565
566 wake_up(&caching_ctl->wait);
567
568 put_caching_control(caching_ctl);
569 btrfs_put_block_group(block_group);
570}
571
572static int cache_block_group(struct btrfs_block_group_cache *cache,
573 int load_cache_only)
574{
575 DEFINE_WAIT(wait);
576 struct btrfs_fs_info *fs_info = cache->fs_info;
577 struct btrfs_caching_control *caching_ctl;
578 int ret = 0;
579
580 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
581 if (!caching_ctl)
582 return -ENOMEM;
583
584 INIT_LIST_HEAD(&caching_ctl->list);
585 mutex_init(&caching_ctl->mutex);
586 init_waitqueue_head(&caching_ctl->wait);
587 caching_ctl->block_group = cache;
588 caching_ctl->progress = cache->key.objectid;
589 refcount_set(&caching_ctl->count, 1);
590 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
591 caching_thread, NULL, NULL);
592
593 spin_lock(&cache->lock);
594
595
596
597
598
599
600
601
602
603
604
605
606 while (cache->cached == BTRFS_CACHE_FAST) {
607 struct btrfs_caching_control *ctl;
608
609 ctl = cache->caching_ctl;
610 refcount_inc(&ctl->count);
611 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
612 spin_unlock(&cache->lock);
613
614 schedule();
615
616 finish_wait(&ctl->wait, &wait);
617 put_caching_control(ctl);
618 spin_lock(&cache->lock);
619 }
620
621 if (cache->cached != BTRFS_CACHE_NO) {
622 spin_unlock(&cache->lock);
623 kfree(caching_ctl);
624 return 0;
625 }
626 WARN_ON(cache->caching_ctl);
627 cache->caching_ctl = caching_ctl;
628 cache->cached = BTRFS_CACHE_FAST;
629 spin_unlock(&cache->lock);
630
631 if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
632 mutex_lock(&caching_ctl->mutex);
633 ret = load_free_space_cache(fs_info, cache);
634
635 spin_lock(&cache->lock);
636 if (ret == 1) {
637 cache->caching_ctl = NULL;
638 cache->cached = BTRFS_CACHE_FINISHED;
639 cache->last_byte_to_unpin = (u64)-1;
640 caching_ctl->progress = (u64)-1;
641 } else {
642 if (load_cache_only) {
643 cache->caching_ctl = NULL;
644 cache->cached = BTRFS_CACHE_NO;
645 } else {
646 cache->cached = BTRFS_CACHE_STARTED;
647 cache->has_caching_ctl = 1;
648 }
649 }
650 spin_unlock(&cache->lock);
651#ifdef CONFIG_BTRFS_DEBUG
652 if (ret == 1 &&
653 btrfs_should_fragment_free_space(cache)) {
654 u64 bytes_used;
655
656 spin_lock(&cache->space_info->lock);
657 spin_lock(&cache->lock);
658 bytes_used = cache->key.offset -
659 btrfs_block_group_used(&cache->item);
660 cache->space_info->bytes_used += bytes_used >> 1;
661 spin_unlock(&cache->lock);
662 spin_unlock(&cache->space_info->lock);
663 fragment_free_space(cache);
664 }
665#endif
666 mutex_unlock(&caching_ctl->mutex);
667
668 wake_up(&caching_ctl->wait);
669 if (ret == 1) {
670 put_caching_control(caching_ctl);
671 free_excluded_extents(fs_info, cache);
672 return 0;
673 }
674 } else {
675
676
677
678
679 spin_lock(&cache->lock);
680 if (load_cache_only) {
681 cache->caching_ctl = NULL;
682 cache->cached = BTRFS_CACHE_NO;
683 } else {
684 cache->cached = BTRFS_CACHE_STARTED;
685 cache->has_caching_ctl = 1;
686 }
687 spin_unlock(&cache->lock);
688 wake_up(&caching_ctl->wait);
689 }
690
691 if (load_cache_only) {
692 put_caching_control(caching_ctl);
693 return 0;
694 }
695
696 down_write(&fs_info->commit_root_sem);
697 refcount_inc(&caching_ctl->count);
698 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
699 up_write(&fs_info->commit_root_sem);
700
701 btrfs_get_block_group(cache);
702
703 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
704
705 return ret;
706}
707
708
709
710
711static struct btrfs_block_group_cache *
712btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
713{
714 return block_group_cache_tree_search(info, bytenr, 0);
715}
716
717
718
719
720struct btrfs_block_group_cache *btrfs_lookup_block_group(
721 struct btrfs_fs_info *info,
722 u64 bytenr)
723{
724 return block_group_cache_tree_search(info, bytenr, 1);
725}
726
727static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
728 u64 flags)
729{
730 struct list_head *head = &info->space_info;
731 struct btrfs_space_info *found;
732
733 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
734
735 rcu_read_lock();
736 list_for_each_entry_rcu(found, head, list) {
737 if (found->flags & flags) {
738 rcu_read_unlock();
739 return found;
740 }
741 }
742 rcu_read_unlock();
743 return NULL;
744}
745
746static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
747 u64 owner, u64 root_objectid)
748{
749 struct btrfs_space_info *space_info;
750 u64 flags;
751
752 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
753 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
754 flags = BTRFS_BLOCK_GROUP_SYSTEM;
755 else
756 flags = BTRFS_BLOCK_GROUP_METADATA;
757 } else {
758 flags = BTRFS_BLOCK_GROUP_DATA;
759 }
760
761 space_info = __find_space_info(fs_info, flags);
762 ASSERT(space_info);
763 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
764}
765
766
767
768
769
770void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
771{
772 struct list_head *head = &info->space_info;
773 struct btrfs_space_info *found;
774
775 rcu_read_lock();
776 list_for_each_entry_rcu(found, head, list)
777 found->full = 0;
778 rcu_read_unlock();
779}
780
781
782int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
783{
784 int ret;
785 struct btrfs_key key;
786 struct btrfs_path *path;
787
788 path = btrfs_alloc_path();
789 if (!path)
790 return -ENOMEM;
791
792 key.objectid = start;
793 key.offset = len;
794 key.type = BTRFS_EXTENT_ITEM_KEY;
795 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
796 btrfs_free_path(path);
797 return ret;
798}
799
800
801
802
803
804
805
806
807
808
809int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
810 struct btrfs_fs_info *fs_info, u64 bytenr,
811 u64 offset, int metadata, u64 *refs, u64 *flags)
812{
813 struct btrfs_delayed_ref_head *head;
814 struct btrfs_delayed_ref_root *delayed_refs;
815 struct btrfs_path *path;
816 struct btrfs_extent_item *ei;
817 struct extent_buffer *leaf;
818 struct btrfs_key key;
819 u32 item_size;
820 u64 num_refs;
821 u64 extent_flags;
822 int ret;
823
824
825
826
827
828 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
829 offset = fs_info->nodesize;
830 metadata = 0;
831 }
832
833 path = btrfs_alloc_path();
834 if (!path)
835 return -ENOMEM;
836
837 if (!trans) {
838 path->skip_locking = 1;
839 path->search_commit_root = 1;
840 }
841
842search_again:
843 key.objectid = bytenr;
844 key.offset = offset;
845 if (metadata)
846 key.type = BTRFS_METADATA_ITEM_KEY;
847 else
848 key.type = BTRFS_EXTENT_ITEM_KEY;
849
850 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
851 if (ret < 0)
852 goto out_free;
853
854 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
855 if (path->slots[0]) {
856 path->slots[0]--;
857 btrfs_item_key_to_cpu(path->nodes[0], &key,
858 path->slots[0]);
859 if (key.objectid == bytenr &&
860 key.type == BTRFS_EXTENT_ITEM_KEY &&
861 key.offset == fs_info->nodesize)
862 ret = 0;
863 }
864 }
865
866 if (ret == 0) {
867 leaf = path->nodes[0];
868 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
869 if (item_size >= sizeof(*ei)) {
870 ei = btrfs_item_ptr(leaf, path->slots[0],
871 struct btrfs_extent_item);
872 num_refs = btrfs_extent_refs(leaf, ei);
873 extent_flags = btrfs_extent_flags(leaf, ei);
874 } else {
875#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
876 struct btrfs_extent_item_v0 *ei0;
877 BUG_ON(item_size != sizeof(*ei0));
878 ei0 = btrfs_item_ptr(leaf, path->slots[0],
879 struct btrfs_extent_item_v0);
880 num_refs = btrfs_extent_refs_v0(leaf, ei0);
881
882 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
883#else
884 BUG();
885#endif
886 }
887 BUG_ON(num_refs == 0);
888 } else {
889 num_refs = 0;
890 extent_flags = 0;
891 ret = 0;
892 }
893
894 if (!trans)
895 goto out;
896
897 delayed_refs = &trans->transaction->delayed_refs;
898 spin_lock(&delayed_refs->lock);
899 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
900 if (head) {
901 if (!mutex_trylock(&head->mutex)) {
902 refcount_inc(&head->refs);
903 spin_unlock(&delayed_refs->lock);
904
905 btrfs_release_path(path);
906
907
908
909
910
911 mutex_lock(&head->mutex);
912 mutex_unlock(&head->mutex);
913 btrfs_put_delayed_ref_head(head);
914 goto search_again;
915 }
916 spin_lock(&head->lock);
917 if (head->extent_op && head->extent_op->update_flags)
918 extent_flags |= head->extent_op->flags_to_set;
919 else
920 BUG_ON(num_refs == 0);
921
922 num_refs += head->ref_mod;
923 spin_unlock(&head->lock);
924 mutex_unlock(&head->mutex);
925 }
926 spin_unlock(&delayed_refs->lock);
927out:
928 WARN_ON(num_refs == 0);
929 if (refs)
930 *refs = num_refs;
931 if (flags)
932 *flags = extent_flags;
933out_free:
934 btrfs_free_path(path);
935 return ret;
936}
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1045static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1046 struct btrfs_fs_info *fs_info,
1047 struct btrfs_path *path,
1048 u64 owner, u32 extra_size)
1049{
1050 struct btrfs_root *root = fs_info->extent_root;
1051 struct btrfs_extent_item *item;
1052 struct btrfs_extent_item_v0 *ei0;
1053 struct btrfs_extent_ref_v0 *ref0;
1054 struct btrfs_tree_block_info *bi;
1055 struct extent_buffer *leaf;
1056 struct btrfs_key key;
1057 struct btrfs_key found_key;
1058 u32 new_size = sizeof(*item);
1059 u64 refs;
1060 int ret;
1061
1062 leaf = path->nodes[0];
1063 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
1064
1065 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1066 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1067 struct btrfs_extent_item_v0);
1068 refs = btrfs_extent_refs_v0(leaf, ei0);
1069
1070 if (owner == (u64)-1) {
1071 while (1) {
1072 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1073 ret = btrfs_next_leaf(root, path);
1074 if (ret < 0)
1075 return ret;
1076 BUG_ON(ret > 0);
1077 leaf = path->nodes[0];
1078 }
1079 btrfs_item_key_to_cpu(leaf, &found_key,
1080 path->slots[0]);
1081 BUG_ON(key.objectid != found_key.objectid);
1082 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1083 path->slots[0]++;
1084 continue;
1085 }
1086 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1087 struct btrfs_extent_ref_v0);
1088 owner = btrfs_ref_objectid_v0(leaf, ref0);
1089 break;
1090 }
1091 }
1092 btrfs_release_path(path);
1093
1094 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1095 new_size += sizeof(*bi);
1096
1097 new_size -= sizeof(*ei0);
1098 ret = btrfs_search_slot(trans, root, &key, path,
1099 new_size + extra_size, 1);
1100 if (ret < 0)
1101 return ret;
1102 BUG_ON(ret);
1103
1104 btrfs_extend_item(fs_info, path, new_size);
1105
1106 leaf = path->nodes[0];
1107 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1108 btrfs_set_extent_refs(leaf, item, refs);
1109
1110 btrfs_set_extent_generation(leaf, item, 0);
1111 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1112 btrfs_set_extent_flags(leaf, item,
1113 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1114 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1115 bi = (struct btrfs_tree_block_info *)(item + 1);
1116
1117 memzero_extent_buffer(leaf, (unsigned long)bi, sizeof(*bi));
1118 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1119 } else {
1120 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1121 }
1122 btrfs_mark_buffer_dirty(leaf);
1123 return 0;
1124}
1125#endif
1126
1127
1128
1129
1130
1131
1132int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
1133 struct btrfs_extent_inline_ref *iref,
1134 enum btrfs_inline_ref_type is_data)
1135{
1136 int type = btrfs_extent_inline_ref_type(eb, iref);
1137 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
1138
1139 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1140 type == BTRFS_SHARED_BLOCK_REF_KEY ||
1141 type == BTRFS_SHARED_DATA_REF_KEY ||
1142 type == BTRFS_EXTENT_DATA_REF_KEY) {
1143 if (is_data == BTRFS_REF_TYPE_BLOCK) {
1144 if (type == BTRFS_TREE_BLOCK_REF_KEY)
1145 return type;
1146 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1147 ASSERT(eb->fs_info);
1148
1149
1150
1151
1152
1153 if (offset &&
1154 IS_ALIGNED(offset, eb->fs_info->nodesize))
1155 return type;
1156 }
1157 } else if (is_data == BTRFS_REF_TYPE_DATA) {
1158 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1159 return type;
1160 if (type == BTRFS_SHARED_DATA_REF_KEY) {
1161 ASSERT(eb->fs_info);
1162
1163
1164
1165
1166
1167 if (offset &&
1168 IS_ALIGNED(offset, eb->fs_info->nodesize))
1169 return type;
1170 }
1171 } else {
1172 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
1173 return type;
1174 }
1175 }
1176
1177 btrfs_print_leaf((struct extent_buffer *)eb);
1178 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
1179 eb->start, type);
1180 WARN_ON(1);
1181
1182 return BTRFS_REF_TYPE_INVALID;
1183}
1184
1185static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1186{
1187 u32 high_crc = ~(u32)0;
1188 u32 low_crc = ~(u32)0;
1189 __le64 lenum;
1190
1191 lenum = cpu_to_le64(root_objectid);
1192 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
1193 lenum = cpu_to_le64(owner);
1194 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1195 lenum = cpu_to_le64(offset);
1196 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1197
1198 return ((u64)high_crc << 31) ^ (u64)low_crc;
1199}
1200
1201static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1202 struct btrfs_extent_data_ref *ref)
1203{
1204 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1205 btrfs_extent_data_ref_objectid(leaf, ref),
1206 btrfs_extent_data_ref_offset(leaf, ref));
1207}
1208
1209static int match_extent_data_ref(struct extent_buffer *leaf,
1210 struct btrfs_extent_data_ref *ref,
1211 u64 root_objectid, u64 owner, u64 offset)
1212{
1213 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1214 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1215 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1216 return 0;
1217 return 1;
1218}
1219
1220static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1221 struct btrfs_fs_info *fs_info,
1222 struct btrfs_path *path,
1223 u64 bytenr, u64 parent,
1224 u64 root_objectid,
1225 u64 owner, u64 offset)
1226{
1227 struct btrfs_root *root = fs_info->extent_root;
1228 struct btrfs_key key;
1229 struct btrfs_extent_data_ref *ref;
1230 struct extent_buffer *leaf;
1231 u32 nritems;
1232 int ret;
1233 int recow;
1234 int err = -ENOENT;
1235
1236 key.objectid = bytenr;
1237 if (parent) {
1238 key.type = BTRFS_SHARED_DATA_REF_KEY;
1239 key.offset = parent;
1240 } else {
1241 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1242 key.offset = hash_extent_data_ref(root_objectid,
1243 owner, offset);
1244 }
1245again:
1246 recow = 0;
1247 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1248 if (ret < 0) {
1249 err = ret;
1250 goto fail;
1251 }
1252
1253 if (parent) {
1254 if (!ret)
1255 return 0;
1256#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1257 key.type = BTRFS_EXTENT_REF_V0_KEY;
1258 btrfs_release_path(path);
1259 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1260 if (ret < 0) {
1261 err = ret;
1262 goto fail;
1263 }
1264 if (!ret)
1265 return 0;
1266#endif
1267 goto fail;
1268 }
1269
1270 leaf = path->nodes[0];
1271 nritems = btrfs_header_nritems(leaf);
1272 while (1) {
1273 if (path->slots[0] >= nritems) {
1274 ret = btrfs_next_leaf(root, path);
1275 if (ret < 0)
1276 err = ret;
1277 if (ret)
1278 goto fail;
1279
1280 leaf = path->nodes[0];
1281 nritems = btrfs_header_nritems(leaf);
1282 recow = 1;
1283 }
1284
1285 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1286 if (key.objectid != bytenr ||
1287 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1288 goto fail;
1289
1290 ref = btrfs_item_ptr(leaf, path->slots[0],
1291 struct btrfs_extent_data_ref);
1292
1293 if (match_extent_data_ref(leaf, ref, root_objectid,
1294 owner, offset)) {
1295 if (recow) {
1296 btrfs_release_path(path);
1297 goto again;
1298 }
1299 err = 0;
1300 break;
1301 }
1302 path->slots[0]++;
1303 }
1304fail:
1305 return err;
1306}
1307
1308static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1309 struct btrfs_fs_info *fs_info,
1310 struct btrfs_path *path,
1311 u64 bytenr, u64 parent,
1312 u64 root_objectid, u64 owner,
1313 u64 offset, int refs_to_add)
1314{
1315 struct btrfs_root *root = fs_info->extent_root;
1316 struct btrfs_key key;
1317 struct extent_buffer *leaf;
1318 u32 size;
1319 u32 num_refs;
1320 int ret;
1321
1322 key.objectid = bytenr;
1323 if (parent) {
1324 key.type = BTRFS_SHARED_DATA_REF_KEY;
1325 key.offset = parent;
1326 size = sizeof(struct btrfs_shared_data_ref);
1327 } else {
1328 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1329 key.offset = hash_extent_data_ref(root_objectid,
1330 owner, offset);
1331 size = sizeof(struct btrfs_extent_data_ref);
1332 }
1333
1334 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1335 if (ret && ret != -EEXIST)
1336 goto fail;
1337
1338 leaf = path->nodes[0];
1339 if (parent) {
1340 struct btrfs_shared_data_ref *ref;
1341 ref = btrfs_item_ptr(leaf, path->slots[0],
1342 struct btrfs_shared_data_ref);
1343 if (ret == 0) {
1344 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1345 } else {
1346 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1347 num_refs += refs_to_add;
1348 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1349 }
1350 } else {
1351 struct btrfs_extent_data_ref *ref;
1352 while (ret == -EEXIST) {
1353 ref = btrfs_item_ptr(leaf, path->slots[0],
1354 struct btrfs_extent_data_ref);
1355 if (match_extent_data_ref(leaf, ref, root_objectid,
1356 owner, offset))
1357 break;
1358 btrfs_release_path(path);
1359 key.offset++;
1360 ret = btrfs_insert_empty_item(trans, root, path, &key,
1361 size);
1362 if (ret && ret != -EEXIST)
1363 goto fail;
1364
1365 leaf = path->nodes[0];
1366 }
1367 ref = btrfs_item_ptr(leaf, path->slots[0],
1368 struct btrfs_extent_data_ref);
1369 if (ret == 0) {
1370 btrfs_set_extent_data_ref_root(leaf, ref,
1371 root_objectid);
1372 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1373 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1374 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1375 } else {
1376 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1377 num_refs += refs_to_add;
1378 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1379 }
1380 }
1381 btrfs_mark_buffer_dirty(leaf);
1382 ret = 0;
1383fail:
1384 btrfs_release_path(path);
1385 return ret;
1386}
1387
1388static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1389 struct btrfs_fs_info *fs_info,
1390 struct btrfs_path *path,
1391 int refs_to_drop, int *last_ref)
1392{
1393 struct btrfs_key key;
1394 struct btrfs_extent_data_ref *ref1 = NULL;
1395 struct btrfs_shared_data_ref *ref2 = NULL;
1396 struct extent_buffer *leaf;
1397 u32 num_refs = 0;
1398 int ret = 0;
1399
1400 leaf = path->nodes[0];
1401 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1402
1403 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1404 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1405 struct btrfs_extent_data_ref);
1406 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1407 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1408 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1409 struct btrfs_shared_data_ref);
1410 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1411#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1412 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1413 struct btrfs_extent_ref_v0 *ref0;
1414 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1415 struct btrfs_extent_ref_v0);
1416 num_refs = btrfs_ref_count_v0(leaf, ref0);
1417#endif
1418 } else {
1419 BUG();
1420 }
1421
1422 BUG_ON(num_refs < refs_to_drop);
1423 num_refs -= refs_to_drop;
1424
1425 if (num_refs == 0) {
1426 ret = btrfs_del_item(trans, fs_info->extent_root, path);
1427 *last_ref = 1;
1428 } else {
1429 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1430 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1431 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1432 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1433#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1434 else {
1435 struct btrfs_extent_ref_v0 *ref0;
1436 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1437 struct btrfs_extent_ref_v0);
1438 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1439 }
1440#endif
1441 btrfs_mark_buffer_dirty(leaf);
1442 }
1443 return ret;
1444}
1445
1446static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1447 struct btrfs_extent_inline_ref *iref)
1448{
1449 struct btrfs_key key;
1450 struct extent_buffer *leaf;
1451 struct btrfs_extent_data_ref *ref1;
1452 struct btrfs_shared_data_ref *ref2;
1453 u32 num_refs = 0;
1454 int type;
1455
1456 leaf = path->nodes[0];
1457 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1458 if (iref) {
1459
1460
1461
1462
1463 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
1464 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1465 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1466 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1467 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1468 } else {
1469 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1470 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1471 }
1472 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1473 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1474 struct btrfs_extent_data_ref);
1475 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1476 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1477 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1478 struct btrfs_shared_data_ref);
1479 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1480#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1481 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1482 struct btrfs_extent_ref_v0 *ref0;
1483 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1484 struct btrfs_extent_ref_v0);
1485 num_refs = btrfs_ref_count_v0(leaf, ref0);
1486#endif
1487 } else {
1488 WARN_ON(1);
1489 }
1490 return num_refs;
1491}
1492
1493static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1494 struct btrfs_fs_info *fs_info,
1495 struct btrfs_path *path,
1496 u64 bytenr, u64 parent,
1497 u64 root_objectid)
1498{
1499 struct btrfs_root *root = fs_info->extent_root;
1500 struct btrfs_key key;
1501 int ret;
1502
1503 key.objectid = bytenr;
1504 if (parent) {
1505 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1506 key.offset = parent;
1507 } else {
1508 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1509 key.offset = root_objectid;
1510 }
1511
1512 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1513 if (ret > 0)
1514 ret = -ENOENT;
1515#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1516 if (ret == -ENOENT && parent) {
1517 btrfs_release_path(path);
1518 key.type = BTRFS_EXTENT_REF_V0_KEY;
1519 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1520 if (ret > 0)
1521 ret = -ENOENT;
1522 }
1523#endif
1524 return ret;
1525}
1526
1527static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1528 struct btrfs_fs_info *fs_info,
1529 struct btrfs_path *path,
1530 u64 bytenr, u64 parent,
1531 u64 root_objectid)
1532{
1533 struct btrfs_key key;
1534 int ret;
1535
1536 key.objectid = bytenr;
1537 if (parent) {
1538 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1539 key.offset = parent;
1540 } else {
1541 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1542 key.offset = root_objectid;
1543 }
1544
1545 ret = btrfs_insert_empty_item(trans, fs_info->extent_root,
1546 path, &key, 0);
1547 btrfs_release_path(path);
1548 return ret;
1549}
1550
1551static inline int extent_ref_type(u64 parent, u64 owner)
1552{
1553 int type;
1554 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1555 if (parent > 0)
1556 type = BTRFS_SHARED_BLOCK_REF_KEY;
1557 else
1558 type = BTRFS_TREE_BLOCK_REF_KEY;
1559 } else {
1560 if (parent > 0)
1561 type = BTRFS_SHARED_DATA_REF_KEY;
1562 else
1563 type = BTRFS_EXTENT_DATA_REF_KEY;
1564 }
1565 return type;
1566}
1567
1568static int find_next_key(struct btrfs_path *path, int level,
1569 struct btrfs_key *key)
1570
1571{
1572 for (; level < BTRFS_MAX_LEVEL; level++) {
1573 if (!path->nodes[level])
1574 break;
1575 if (path->slots[level] + 1 >=
1576 btrfs_header_nritems(path->nodes[level]))
1577 continue;
1578 if (level == 0)
1579 btrfs_item_key_to_cpu(path->nodes[level], key,
1580 path->slots[level] + 1);
1581 else
1582 btrfs_node_key_to_cpu(path->nodes[level], key,
1583 path->slots[level] + 1);
1584 return 0;
1585 }
1586 return 1;
1587}
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602static noinline_for_stack
1603int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1604 struct btrfs_fs_info *fs_info,
1605 struct btrfs_path *path,
1606 struct btrfs_extent_inline_ref **ref_ret,
1607 u64 bytenr, u64 num_bytes,
1608 u64 parent, u64 root_objectid,
1609 u64 owner, u64 offset, int insert)
1610{
1611 struct btrfs_root *root = fs_info->extent_root;
1612 struct btrfs_key key;
1613 struct extent_buffer *leaf;
1614 struct btrfs_extent_item *ei;
1615 struct btrfs_extent_inline_ref *iref;
1616 u64 flags;
1617 u64 item_size;
1618 unsigned long ptr;
1619 unsigned long end;
1620 int extra_size;
1621 int type;
1622 int want;
1623 int ret;
1624 int err = 0;
1625 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
1626 int needed;
1627
1628 key.objectid = bytenr;
1629 key.type = BTRFS_EXTENT_ITEM_KEY;
1630 key.offset = num_bytes;
1631
1632 want = extent_ref_type(parent, owner);
1633 if (insert) {
1634 extra_size = btrfs_extent_inline_ref_size(want);
1635 path->keep_locks = 1;
1636 } else
1637 extra_size = -1;
1638
1639
1640
1641
1642
1643 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1644 key.type = BTRFS_METADATA_ITEM_KEY;
1645 key.offset = owner;
1646 }
1647
1648again:
1649 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1650 if (ret < 0) {
1651 err = ret;
1652 goto out;
1653 }
1654
1655
1656
1657
1658
1659 if (ret > 0 && skinny_metadata) {
1660 skinny_metadata = false;
1661 if (path->slots[0]) {
1662 path->slots[0]--;
1663 btrfs_item_key_to_cpu(path->nodes[0], &key,
1664 path->slots[0]);
1665 if (key.objectid == bytenr &&
1666 key.type == BTRFS_EXTENT_ITEM_KEY &&
1667 key.offset == num_bytes)
1668 ret = 0;
1669 }
1670 if (ret) {
1671 key.objectid = bytenr;
1672 key.type = BTRFS_EXTENT_ITEM_KEY;
1673 key.offset = num_bytes;
1674 btrfs_release_path(path);
1675 goto again;
1676 }
1677 }
1678
1679 if (ret && !insert) {
1680 err = -ENOENT;
1681 goto out;
1682 } else if (WARN_ON(ret)) {
1683 err = -EIO;
1684 goto out;
1685 }
1686
1687 leaf = path->nodes[0];
1688 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1689#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1690 if (item_size < sizeof(*ei)) {
1691 if (!insert) {
1692 err = -ENOENT;
1693 goto out;
1694 }
1695 ret = convert_extent_item_v0(trans, fs_info, path, owner,
1696 extra_size);
1697 if (ret < 0) {
1698 err = ret;
1699 goto out;
1700 }
1701 leaf = path->nodes[0];
1702 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1703 }
1704#endif
1705 BUG_ON(item_size < sizeof(*ei));
1706
1707 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1708 flags = btrfs_extent_flags(leaf, ei);
1709
1710 ptr = (unsigned long)(ei + 1);
1711 end = (unsigned long)ei + item_size;
1712
1713 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1714 ptr += sizeof(struct btrfs_tree_block_info);
1715 BUG_ON(ptr > end);
1716 }
1717
1718 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
1719 needed = BTRFS_REF_TYPE_DATA;
1720 else
1721 needed = BTRFS_REF_TYPE_BLOCK;
1722
1723 err = -ENOENT;
1724 while (1) {
1725 if (ptr >= end) {
1726 WARN_ON(ptr > end);
1727 break;
1728 }
1729 iref = (struct btrfs_extent_inline_ref *)ptr;
1730 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
1731 if (type == BTRFS_REF_TYPE_INVALID) {
1732 err = -EINVAL;
1733 goto out;
1734 }
1735
1736 if (want < type)
1737 break;
1738 if (want > type) {
1739 ptr += btrfs_extent_inline_ref_size(type);
1740 continue;
1741 }
1742
1743 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1744 struct btrfs_extent_data_ref *dref;
1745 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1746 if (match_extent_data_ref(leaf, dref, root_objectid,
1747 owner, offset)) {
1748 err = 0;
1749 break;
1750 }
1751 if (hash_extent_data_ref_item(leaf, dref) <
1752 hash_extent_data_ref(root_objectid, owner, offset))
1753 break;
1754 } else {
1755 u64 ref_offset;
1756 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1757 if (parent > 0) {
1758 if (parent == ref_offset) {
1759 err = 0;
1760 break;
1761 }
1762 if (ref_offset < parent)
1763 break;
1764 } else {
1765 if (root_objectid == ref_offset) {
1766 err = 0;
1767 break;
1768 }
1769 if (ref_offset < root_objectid)
1770 break;
1771 }
1772 }
1773 ptr += btrfs_extent_inline_ref_size(type);
1774 }
1775 if (err == -ENOENT && insert) {
1776 if (item_size + extra_size >=
1777 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1778 err = -EAGAIN;
1779 goto out;
1780 }
1781
1782
1783
1784
1785
1786
1787 if (find_next_key(path, 0, &key) == 0 &&
1788 key.objectid == bytenr &&
1789 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1790 err = -EAGAIN;
1791 goto out;
1792 }
1793 }
1794 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1795out:
1796 if (insert) {
1797 path->keep_locks = 0;
1798 btrfs_unlock_up_safe(path, 1);
1799 }
1800 return err;
1801}
1802
1803
1804
1805
1806static noinline_for_stack
1807void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1808 struct btrfs_path *path,
1809 struct btrfs_extent_inline_ref *iref,
1810 u64 parent, u64 root_objectid,
1811 u64 owner, u64 offset, int refs_to_add,
1812 struct btrfs_delayed_extent_op *extent_op)
1813{
1814 struct extent_buffer *leaf;
1815 struct btrfs_extent_item *ei;
1816 unsigned long ptr;
1817 unsigned long end;
1818 unsigned long item_offset;
1819 u64 refs;
1820 int size;
1821 int type;
1822
1823 leaf = path->nodes[0];
1824 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1825 item_offset = (unsigned long)iref - (unsigned long)ei;
1826
1827 type = extent_ref_type(parent, owner);
1828 size = btrfs_extent_inline_ref_size(type);
1829
1830 btrfs_extend_item(fs_info, path, size);
1831
1832 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1833 refs = btrfs_extent_refs(leaf, ei);
1834 refs += refs_to_add;
1835 btrfs_set_extent_refs(leaf, ei, refs);
1836 if (extent_op)
1837 __run_delayed_extent_op(extent_op, leaf, ei);
1838
1839 ptr = (unsigned long)ei + item_offset;
1840 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1841 if (ptr < end - size)
1842 memmove_extent_buffer(leaf, ptr + size, ptr,
1843 end - size - ptr);
1844
1845 iref = (struct btrfs_extent_inline_ref *)ptr;
1846 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1847 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1848 struct btrfs_extent_data_ref *dref;
1849 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1850 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1851 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1852 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1853 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1854 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1855 struct btrfs_shared_data_ref *sref;
1856 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1857 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1858 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1859 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1860 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1861 } else {
1862 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1863 }
1864 btrfs_mark_buffer_dirty(leaf);
1865}
1866
1867static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1868 struct btrfs_fs_info *fs_info,
1869 struct btrfs_path *path,
1870 struct btrfs_extent_inline_ref **ref_ret,
1871 u64 bytenr, u64 num_bytes, u64 parent,
1872 u64 root_objectid, u64 owner, u64 offset)
1873{
1874 int ret;
1875
1876 ret = lookup_inline_extent_backref(trans, fs_info, path, ref_ret,
1877 bytenr, num_bytes, parent,
1878 root_objectid, owner, offset, 0);
1879 if (ret != -ENOENT)
1880 return ret;
1881
1882 btrfs_release_path(path);
1883 *ref_ret = NULL;
1884
1885 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1886 ret = lookup_tree_block_ref(trans, fs_info, path, bytenr,
1887 parent, root_objectid);
1888 } else {
1889 ret = lookup_extent_data_ref(trans, fs_info, path, bytenr,
1890 parent, root_objectid, owner,
1891 offset);
1892 }
1893 return ret;
1894}
1895
1896
1897
1898
1899static noinline_for_stack
1900void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
1901 struct btrfs_path *path,
1902 struct btrfs_extent_inline_ref *iref,
1903 int refs_to_mod,
1904 struct btrfs_delayed_extent_op *extent_op,
1905 int *last_ref)
1906{
1907 struct extent_buffer *leaf;
1908 struct btrfs_extent_item *ei;
1909 struct btrfs_extent_data_ref *dref = NULL;
1910 struct btrfs_shared_data_ref *sref = NULL;
1911 unsigned long ptr;
1912 unsigned long end;
1913 u32 item_size;
1914 int size;
1915 int type;
1916 u64 refs;
1917
1918 leaf = path->nodes[0];
1919 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1920 refs = btrfs_extent_refs(leaf, ei);
1921 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1922 refs += refs_to_mod;
1923 btrfs_set_extent_refs(leaf, ei, refs);
1924 if (extent_op)
1925 __run_delayed_extent_op(extent_op, leaf, ei);
1926
1927
1928
1929
1930
1931 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1932 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1933
1934 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1935 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1936 refs = btrfs_extent_data_ref_count(leaf, dref);
1937 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1938 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1939 refs = btrfs_shared_data_ref_count(leaf, sref);
1940 } else {
1941 refs = 1;
1942 BUG_ON(refs_to_mod != -1);
1943 }
1944
1945 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1946 refs += refs_to_mod;
1947
1948 if (refs > 0) {
1949 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1950 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1951 else
1952 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1953 } else {
1954 *last_ref = 1;
1955 size = btrfs_extent_inline_ref_size(type);
1956 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1957 ptr = (unsigned long)iref;
1958 end = (unsigned long)ei + item_size;
1959 if (ptr + size < end)
1960 memmove_extent_buffer(leaf, ptr, ptr + size,
1961 end - ptr - size);
1962 item_size -= size;
1963 btrfs_truncate_item(fs_info, path, item_size, 1);
1964 }
1965 btrfs_mark_buffer_dirty(leaf);
1966}
1967
1968static noinline_for_stack
1969int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1970 struct btrfs_fs_info *fs_info,
1971 struct btrfs_path *path,
1972 u64 bytenr, u64 num_bytes, u64 parent,
1973 u64 root_objectid, u64 owner,
1974 u64 offset, int refs_to_add,
1975 struct btrfs_delayed_extent_op *extent_op)
1976{
1977 struct btrfs_extent_inline_ref *iref;
1978 int ret;
1979
1980 ret = lookup_inline_extent_backref(trans, fs_info, path, &iref,
1981 bytenr, num_bytes, parent,
1982 root_objectid, owner, offset, 1);
1983 if (ret == 0) {
1984 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1985 update_inline_extent_backref(fs_info, path, iref,
1986 refs_to_add, extent_op, NULL);
1987 } else if (ret == -ENOENT) {
1988 setup_inline_extent_backref(fs_info, path, iref, parent,
1989 root_objectid, owner, offset,
1990 refs_to_add, extent_op);
1991 ret = 0;
1992 }
1993 return ret;
1994}
1995
1996static int insert_extent_backref(struct btrfs_trans_handle *trans,
1997 struct btrfs_fs_info *fs_info,
1998 struct btrfs_path *path,
1999 u64 bytenr, u64 parent, u64 root_objectid,
2000 u64 owner, u64 offset, int refs_to_add)
2001{
2002 int ret;
2003 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2004 BUG_ON(refs_to_add != 1);
2005 ret = insert_tree_block_ref(trans, fs_info, path, bytenr,
2006 parent, root_objectid);
2007 } else {
2008 ret = insert_extent_data_ref(trans, fs_info, path, bytenr,
2009 parent, root_objectid,
2010 owner, offset, refs_to_add);
2011 }
2012 return ret;
2013}
2014
2015static int remove_extent_backref(struct btrfs_trans_handle *trans,
2016 struct btrfs_fs_info *fs_info,
2017 struct btrfs_path *path,
2018 struct btrfs_extent_inline_ref *iref,
2019 int refs_to_drop, int is_data, int *last_ref)
2020{
2021 int ret = 0;
2022
2023 BUG_ON(!is_data && refs_to_drop != 1);
2024 if (iref) {
2025 update_inline_extent_backref(fs_info, path, iref,
2026 -refs_to_drop, NULL, last_ref);
2027 } else if (is_data) {
2028 ret = remove_extent_data_ref(trans, fs_info, path, refs_to_drop,
2029 last_ref);
2030 } else {
2031 *last_ref = 1;
2032 ret = btrfs_del_item(trans, fs_info->extent_root, path);
2033 }
2034 return ret;
2035}
2036
2037#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
2038static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
2039 u64 *discarded_bytes)
2040{
2041 int j, ret = 0;
2042 u64 bytes_left, end;
2043 u64 aligned_start = ALIGN(start, 1 << 9);
2044
2045 if (WARN_ON(start != aligned_start)) {
2046 len -= aligned_start - start;
2047 len = round_down(len, 1 << 9);
2048 start = aligned_start;
2049 }
2050
2051 *discarded_bytes = 0;
2052
2053 if (!len)
2054 return 0;
2055
2056 end = start + len;
2057 bytes_left = len;
2058
2059
2060 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
2061 u64 sb_start = btrfs_sb_offset(j);
2062 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
2063 u64 size = sb_start - start;
2064
2065 if (!in_range(sb_start, start, bytes_left) &&
2066 !in_range(sb_end, start, bytes_left) &&
2067 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
2068 continue;
2069
2070
2071
2072
2073
2074 if (sb_start <= start) {
2075 start += sb_end - start;
2076 if (start > end) {
2077 bytes_left = 0;
2078 break;
2079 }
2080 bytes_left = end - start;
2081 continue;
2082 }
2083
2084 if (size) {
2085 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
2086 GFP_NOFS, 0);
2087 if (!ret)
2088 *discarded_bytes += size;
2089 else if (ret != -EOPNOTSUPP)
2090 return ret;
2091 }
2092
2093 start = sb_end;
2094 if (start > end) {
2095 bytes_left = 0;
2096 break;
2097 }
2098 bytes_left = end - start;
2099 }
2100
2101 if (bytes_left) {
2102 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
2103 GFP_NOFS, 0);
2104 if (!ret)
2105 *discarded_bytes += bytes_left;
2106 }
2107 return ret;
2108}
2109
2110int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
2111 u64 num_bytes, u64 *actual_bytes)
2112{
2113 int ret;
2114 u64 discarded_bytes = 0;
2115 struct btrfs_bio *bbio = NULL;
2116
2117
2118
2119
2120
2121
2122 btrfs_bio_counter_inc_blocked(fs_info);
2123
2124 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
2125 &bbio, 0);
2126
2127 if (!ret) {
2128 struct btrfs_bio_stripe *stripe = bbio->stripes;
2129 int i;
2130
2131
2132 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
2133 u64 bytes;
2134 struct request_queue *req_q;
2135
2136 if (!stripe->dev->bdev) {
2137 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
2138 continue;
2139 }
2140 req_q = bdev_get_queue(stripe->dev->bdev);
2141 if (!blk_queue_discard(req_q))
2142 continue;
2143
2144 ret = btrfs_issue_discard(stripe->dev->bdev,
2145 stripe->physical,
2146 stripe->length,
2147 &bytes);
2148 if (!ret)
2149 discarded_bytes += bytes;
2150 else if (ret != -EOPNOTSUPP)
2151 break;
2152
2153
2154
2155
2156
2157
2158 ret = 0;
2159 }
2160 btrfs_put_bbio(bbio);
2161 }
2162 btrfs_bio_counter_dec(fs_info);
2163
2164 if (actual_bytes)
2165 *actual_bytes = discarded_bytes;
2166
2167
2168 if (ret == -EOPNOTSUPP)
2169 ret = 0;
2170 return ret;
2171}
2172
2173
2174int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2175 struct btrfs_root *root,
2176 u64 bytenr, u64 num_bytes, u64 parent,
2177 u64 root_objectid, u64 owner, u64 offset)
2178{
2179 struct btrfs_fs_info *fs_info = root->fs_info;
2180 int old_ref_mod, new_ref_mod;
2181 int ret;
2182
2183 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
2184 root_objectid == BTRFS_TREE_LOG_OBJECTID);
2185
2186 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
2187 owner, offset, BTRFS_ADD_DELAYED_REF);
2188
2189 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2190 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
2191 num_bytes, parent,
2192 root_objectid, (int)owner,
2193 BTRFS_ADD_DELAYED_REF, NULL,
2194 &old_ref_mod, &new_ref_mod);
2195 } else {
2196 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
2197 num_bytes, parent,
2198 root_objectid, owner, offset,
2199 0, BTRFS_ADD_DELAYED_REF,
2200 &old_ref_mod, &new_ref_mod);
2201 }
2202
2203 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
2204 add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
2205
2206 return ret;
2207}
2208
2209static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2210 struct btrfs_fs_info *fs_info,
2211 struct btrfs_delayed_ref_node *node,
2212 u64 parent, u64 root_objectid,
2213 u64 owner, u64 offset, int refs_to_add,
2214 struct btrfs_delayed_extent_op *extent_op)
2215{
2216 struct btrfs_path *path;
2217 struct extent_buffer *leaf;
2218 struct btrfs_extent_item *item;
2219 struct btrfs_key key;
2220 u64 bytenr = node->bytenr;
2221 u64 num_bytes = node->num_bytes;
2222 u64 refs;
2223 int ret;
2224
2225 path = btrfs_alloc_path();
2226 if (!path)
2227 return -ENOMEM;
2228
2229 path->reada = READA_FORWARD;
2230 path->leave_spinning = 1;
2231
2232 ret = insert_inline_extent_backref(trans, fs_info, path, bytenr,
2233 num_bytes, parent, root_objectid,
2234 owner, offset,
2235 refs_to_add, extent_op);
2236 if ((ret < 0 && ret != -EAGAIN) || !ret)
2237 goto out;
2238
2239
2240
2241
2242
2243
2244 leaf = path->nodes[0];
2245 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2246 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2247 refs = btrfs_extent_refs(leaf, item);
2248 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2249 if (extent_op)
2250 __run_delayed_extent_op(extent_op, leaf, item);
2251
2252 btrfs_mark_buffer_dirty(leaf);
2253 btrfs_release_path(path);
2254
2255 path->reada = READA_FORWARD;
2256 path->leave_spinning = 1;
2257
2258 ret = insert_extent_backref(trans, fs_info, path, bytenr, parent,
2259 root_objectid, owner, offset, refs_to_add);
2260 if (ret)
2261 btrfs_abort_transaction(trans, ret);
2262out:
2263 btrfs_free_path(path);
2264 return ret;
2265}
2266
2267static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2268 struct btrfs_fs_info *fs_info,
2269 struct btrfs_delayed_ref_node *node,
2270 struct btrfs_delayed_extent_op *extent_op,
2271 int insert_reserved)
2272{
2273 int ret = 0;
2274 struct btrfs_delayed_data_ref *ref;
2275 struct btrfs_key ins;
2276 u64 parent = 0;
2277 u64 ref_root = 0;
2278 u64 flags = 0;
2279
2280 ins.objectid = node->bytenr;
2281 ins.offset = node->num_bytes;
2282 ins.type = BTRFS_EXTENT_ITEM_KEY;
2283
2284 ref = btrfs_delayed_node_to_data_ref(node);
2285 trace_run_delayed_data_ref(fs_info, node, ref, node->action);
2286
2287 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2288 parent = ref->parent;
2289 ref_root = ref->root;
2290
2291 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2292 if (extent_op)
2293 flags |= extent_op->flags_to_set;
2294 ret = alloc_reserved_file_extent(trans, fs_info,
2295 parent, ref_root, flags,
2296 ref->objectid, ref->offset,
2297 &ins, node->ref_mod);
2298 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2299 ret = __btrfs_inc_extent_ref(trans, fs_info, node, parent,
2300 ref_root, ref->objectid,
2301 ref->offset, node->ref_mod,
2302 extent_op);
2303 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2304 ret = __btrfs_free_extent(trans, fs_info, node, parent,
2305 ref_root, ref->objectid,
2306 ref->offset, node->ref_mod,
2307 extent_op);
2308 } else {
2309 BUG();
2310 }
2311 return ret;
2312}
2313
2314static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2315 struct extent_buffer *leaf,
2316 struct btrfs_extent_item *ei)
2317{
2318 u64 flags = btrfs_extent_flags(leaf, ei);
2319 if (extent_op->update_flags) {
2320 flags |= extent_op->flags_to_set;
2321 btrfs_set_extent_flags(leaf, ei, flags);
2322 }
2323
2324 if (extent_op->update_key) {
2325 struct btrfs_tree_block_info *bi;
2326 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2327 bi = (struct btrfs_tree_block_info *)(ei + 1);
2328 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2329 }
2330}
2331
2332static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2333 struct btrfs_fs_info *fs_info,
2334 struct btrfs_delayed_ref_head *head,
2335 struct btrfs_delayed_extent_op *extent_op)
2336{
2337 struct btrfs_key key;
2338 struct btrfs_path *path;
2339 struct btrfs_extent_item *ei;
2340 struct extent_buffer *leaf;
2341 u32 item_size;
2342 int ret;
2343 int err = 0;
2344 int metadata = !extent_op->is_data;
2345
2346 if (trans->aborted)
2347 return 0;
2348
2349 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2350 metadata = 0;
2351
2352 path = btrfs_alloc_path();
2353 if (!path)
2354 return -ENOMEM;
2355
2356 key.objectid = head->bytenr;
2357
2358 if (metadata) {
2359 key.type = BTRFS_METADATA_ITEM_KEY;
2360 key.offset = extent_op->level;
2361 } else {
2362 key.type = BTRFS_EXTENT_ITEM_KEY;
2363 key.offset = head->num_bytes;
2364 }
2365
2366again:
2367 path->reada = READA_FORWARD;
2368 path->leave_spinning = 1;
2369 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
2370 if (ret < 0) {
2371 err = ret;
2372 goto out;
2373 }
2374 if (ret > 0) {
2375 if (metadata) {
2376 if (path->slots[0] > 0) {
2377 path->slots[0]--;
2378 btrfs_item_key_to_cpu(path->nodes[0], &key,
2379 path->slots[0]);
2380 if (key.objectid == head->bytenr &&
2381 key.type == BTRFS_EXTENT_ITEM_KEY &&
2382 key.offset == head->num_bytes)
2383 ret = 0;
2384 }
2385 if (ret > 0) {
2386 btrfs_release_path(path);
2387 metadata = 0;
2388
2389 key.objectid = head->bytenr;
2390 key.offset = head->num_bytes;
2391 key.type = BTRFS_EXTENT_ITEM_KEY;
2392 goto again;
2393 }
2394 } else {
2395 err = -EIO;
2396 goto out;
2397 }
2398 }
2399
2400 leaf = path->nodes[0];
2401 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2402#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2403 if (item_size < sizeof(*ei)) {
2404 ret = convert_extent_item_v0(trans, fs_info, path, (u64)-1, 0);
2405 if (ret < 0) {
2406 err = ret;
2407 goto out;
2408 }
2409 leaf = path->nodes[0];
2410 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2411 }
2412#endif
2413 BUG_ON(item_size < sizeof(*ei));
2414 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2415 __run_delayed_extent_op(extent_op, leaf, ei);
2416
2417 btrfs_mark_buffer_dirty(leaf);
2418out:
2419 btrfs_free_path(path);
2420 return err;
2421}
2422
2423static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2424 struct btrfs_fs_info *fs_info,
2425 struct btrfs_delayed_ref_node *node,
2426 struct btrfs_delayed_extent_op *extent_op,
2427 int insert_reserved)
2428{
2429 int ret = 0;
2430 struct btrfs_delayed_tree_ref *ref;
2431 struct btrfs_key ins;
2432 u64 parent = 0;
2433 u64 ref_root = 0;
2434 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2435
2436 ref = btrfs_delayed_node_to_tree_ref(node);
2437 trace_run_delayed_tree_ref(fs_info, node, ref, node->action);
2438
2439 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2440 parent = ref->parent;
2441 ref_root = ref->root;
2442
2443 ins.objectid = node->bytenr;
2444 if (skinny_metadata) {
2445 ins.offset = ref->level;
2446 ins.type = BTRFS_METADATA_ITEM_KEY;
2447 } else {
2448 ins.offset = node->num_bytes;
2449 ins.type = BTRFS_EXTENT_ITEM_KEY;
2450 }
2451
2452 if (node->ref_mod != 1) {
2453 btrfs_err(fs_info,
2454 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
2455 node->bytenr, node->ref_mod, node->action, ref_root,
2456 parent);
2457 return -EIO;
2458 }
2459 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2460 BUG_ON(!extent_op || !extent_op->update_flags);
2461 ret = alloc_reserved_tree_block(trans, fs_info,
2462 parent, ref_root,
2463 extent_op->flags_to_set,
2464 &extent_op->key,
2465 ref->level, &ins);
2466 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2467 ret = __btrfs_inc_extent_ref(trans, fs_info, node,
2468 parent, ref_root,
2469 ref->level, 0, 1,
2470 extent_op);
2471 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2472 ret = __btrfs_free_extent(trans, fs_info, node,
2473 parent, ref_root,
2474 ref->level, 0, 1, extent_op);
2475 } else {
2476 BUG();
2477 }
2478 return ret;
2479}
2480
2481
2482static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2483 struct btrfs_fs_info *fs_info,
2484 struct btrfs_delayed_ref_node *node,
2485 struct btrfs_delayed_extent_op *extent_op,
2486 int insert_reserved)
2487{
2488 int ret = 0;
2489
2490 if (trans->aborted) {
2491 if (insert_reserved)
2492 btrfs_pin_extent(fs_info, node->bytenr,
2493 node->num_bytes, 1);
2494 return 0;
2495 }
2496
2497 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2498 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2499 ret = run_delayed_tree_ref(trans, fs_info, node, extent_op,
2500 insert_reserved);
2501 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2502 node->type == BTRFS_SHARED_DATA_REF_KEY)
2503 ret = run_delayed_data_ref(trans, fs_info, node, extent_op,
2504 insert_reserved);
2505 else
2506 BUG();
2507 return ret;
2508}
2509
2510static inline struct btrfs_delayed_ref_node *
2511select_delayed_ref(struct btrfs_delayed_ref_head *head)
2512{
2513 struct btrfs_delayed_ref_node *ref;
2514
2515 if (RB_EMPTY_ROOT(&head->ref_tree))
2516 return NULL;
2517
2518
2519
2520
2521
2522
2523
2524 if (!list_empty(&head->ref_add_list))
2525 return list_first_entry(&head->ref_add_list,
2526 struct btrfs_delayed_ref_node, add_list);
2527
2528 ref = rb_entry(rb_first(&head->ref_tree),
2529 struct btrfs_delayed_ref_node, ref_node);
2530 ASSERT(list_empty(&ref->add_list));
2531 return ref;
2532}
2533
2534static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
2535 struct btrfs_delayed_ref_head *head)
2536{
2537 spin_lock(&delayed_refs->lock);
2538 head->processing = 0;
2539 delayed_refs->num_heads_ready++;
2540 spin_unlock(&delayed_refs->lock);
2541 btrfs_delayed_ref_unlock(head);
2542}
2543
2544static int cleanup_extent_op(struct btrfs_trans_handle *trans,
2545 struct btrfs_fs_info *fs_info,
2546 struct btrfs_delayed_ref_head *head)
2547{
2548 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
2549 int ret;
2550
2551 if (!extent_op)
2552 return 0;
2553 head->extent_op = NULL;
2554 if (head->must_insert_reserved) {
2555 btrfs_free_delayed_extent_op(extent_op);
2556 return 0;
2557 }
2558 spin_unlock(&head->lock);
2559 ret = run_delayed_extent_op(trans, fs_info, head, extent_op);
2560 btrfs_free_delayed_extent_op(extent_op);
2561 return ret ? ret : 1;
2562}
2563
2564static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2565 struct btrfs_fs_info *fs_info,
2566 struct btrfs_delayed_ref_head *head)
2567{
2568 struct btrfs_delayed_ref_root *delayed_refs;
2569 int ret;
2570
2571 delayed_refs = &trans->transaction->delayed_refs;
2572
2573 ret = cleanup_extent_op(trans, fs_info, head);
2574 if (ret < 0) {
2575 unselect_delayed_ref_head(delayed_refs, head);
2576 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2577 return ret;
2578 } else if (ret) {
2579 return ret;
2580 }
2581
2582
2583
2584
2585
2586 spin_unlock(&head->lock);
2587 spin_lock(&delayed_refs->lock);
2588 spin_lock(&head->lock);
2589 if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) {
2590 spin_unlock(&head->lock);
2591 spin_unlock(&delayed_refs->lock);
2592 return 1;
2593 }
2594 delayed_refs->num_heads--;
2595 rb_erase(&head->href_node, &delayed_refs->href_root);
2596 RB_CLEAR_NODE(&head->href_node);
2597 spin_unlock(&delayed_refs->lock);
2598 spin_unlock(&head->lock);
2599 atomic_dec(&delayed_refs->num_entries);
2600
2601 trace_run_delayed_ref_head(fs_info, head, 0);
2602
2603 if (head->total_ref_mod < 0) {
2604 struct btrfs_space_info *space_info;
2605 u64 flags;
2606
2607 if (head->is_data)
2608 flags = BTRFS_BLOCK_GROUP_DATA;
2609 else if (head->is_system)
2610 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2611 else
2612 flags = BTRFS_BLOCK_GROUP_METADATA;
2613 space_info = __find_space_info(fs_info, flags);
2614 ASSERT(space_info);
2615 percpu_counter_add(&space_info->total_bytes_pinned,
2616 -head->num_bytes);
2617
2618 if (head->is_data) {
2619 spin_lock(&delayed_refs->lock);
2620 delayed_refs->pending_csums -= head->num_bytes;
2621 spin_unlock(&delayed_refs->lock);
2622 }
2623 }
2624
2625 if (head->must_insert_reserved) {
2626 btrfs_pin_extent(fs_info, head->bytenr,
2627 head->num_bytes, 1);
2628 if (head->is_data) {
2629 ret = btrfs_del_csums(trans, fs_info, head->bytenr,
2630 head->num_bytes);
2631 }
2632 }
2633
2634
2635 btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
2636 head->qgroup_reserved);
2637 btrfs_delayed_ref_unlock(head);
2638 btrfs_put_delayed_ref_head(head);
2639 return 0;
2640}
2641
2642
2643
2644
2645
2646static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2647 unsigned long nr)
2648{
2649 struct btrfs_fs_info *fs_info = trans->fs_info;
2650 struct btrfs_delayed_ref_root *delayed_refs;
2651 struct btrfs_delayed_ref_node *ref;
2652 struct btrfs_delayed_ref_head *locked_ref = NULL;
2653 struct btrfs_delayed_extent_op *extent_op;
2654 ktime_t start = ktime_get();
2655 int ret;
2656 unsigned long count = 0;
2657 unsigned long actual_count = 0;
2658 int must_insert_reserved = 0;
2659
2660 delayed_refs = &trans->transaction->delayed_refs;
2661 while (1) {
2662 if (!locked_ref) {
2663 if (count >= nr)
2664 break;
2665
2666 spin_lock(&delayed_refs->lock);
2667 locked_ref = btrfs_select_ref_head(trans);
2668 if (!locked_ref) {
2669 spin_unlock(&delayed_refs->lock);
2670 break;
2671 }
2672
2673
2674
2675 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2676 spin_unlock(&delayed_refs->lock);
2677
2678
2679
2680
2681
2682
2683 if (ret == -EAGAIN) {
2684 locked_ref = NULL;
2685 count++;
2686 continue;
2687 }
2688 }
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702 spin_lock(&locked_ref->lock);
2703 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2704 locked_ref);
2705
2706
2707
2708
2709
2710 ref = select_delayed_ref(locked_ref);
2711
2712 if (ref && ref->seq &&
2713 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2714 spin_unlock(&locked_ref->lock);
2715 unselect_delayed_ref_head(delayed_refs, locked_ref);
2716 locked_ref = NULL;
2717 cond_resched();
2718 count++;
2719 continue;
2720 }
2721
2722
2723
2724
2725
2726 if (!ref) {
2727 ret = cleanup_ref_head(trans, fs_info, locked_ref);
2728 if (ret > 0 ) {
2729
2730 ret = 0;
2731 continue;
2732 } else if (ret) {
2733 return ret;
2734 }
2735 locked_ref = NULL;
2736 count++;
2737 continue;
2738 }
2739
2740 actual_count++;
2741 ref->in_tree = 0;
2742 rb_erase(&ref->ref_node, &locked_ref->ref_tree);
2743 RB_CLEAR_NODE(&ref->ref_node);
2744 if (!list_empty(&ref->add_list))
2745 list_del(&ref->add_list);
2746
2747
2748
2749
2750 switch (ref->action) {
2751 case BTRFS_ADD_DELAYED_REF:
2752 case BTRFS_ADD_DELAYED_EXTENT:
2753 locked_ref->ref_mod -= ref->ref_mod;
2754 break;
2755 case BTRFS_DROP_DELAYED_REF:
2756 locked_ref->ref_mod += ref->ref_mod;
2757 break;
2758 default:
2759 WARN_ON(1);
2760 }
2761 atomic_dec(&delayed_refs->num_entries);
2762
2763
2764
2765
2766
2767 must_insert_reserved = locked_ref->must_insert_reserved;
2768 locked_ref->must_insert_reserved = 0;
2769
2770 extent_op = locked_ref->extent_op;
2771 locked_ref->extent_op = NULL;
2772 spin_unlock(&locked_ref->lock);
2773
2774 ret = run_one_delayed_ref(trans, fs_info, ref, extent_op,
2775 must_insert_reserved);
2776
2777 btrfs_free_delayed_extent_op(extent_op);
2778 if (ret) {
2779 unselect_delayed_ref_head(delayed_refs, locked_ref);
2780 btrfs_put_delayed_ref(ref);
2781 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
2782 ret);
2783 return ret;
2784 }
2785
2786 btrfs_put_delayed_ref(ref);
2787 count++;
2788 cond_resched();
2789 }
2790
2791
2792
2793
2794
2795
2796 if (actual_count > 0) {
2797 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2798 u64 avg;
2799
2800
2801
2802
2803
2804 spin_lock(&delayed_refs->lock);
2805 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2806 fs_info->avg_delayed_ref_runtime = avg >> 2;
2807 spin_unlock(&delayed_refs->lock);
2808 }
2809 return 0;
2810}
2811
2812#ifdef SCRAMBLE_DELAYED_REFS
2813
2814
2815
2816
2817
2818static u64 find_middle(struct rb_root *root)
2819{
2820 struct rb_node *n = root->rb_node;
2821 struct btrfs_delayed_ref_node *entry;
2822 int alt = 1;
2823 u64 middle;
2824 u64 first = 0, last = 0;
2825
2826 n = rb_first(root);
2827 if (n) {
2828 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2829 first = entry->bytenr;
2830 }
2831 n = rb_last(root);
2832 if (n) {
2833 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2834 last = entry->bytenr;
2835 }
2836 n = root->rb_node;
2837
2838 while (n) {
2839 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2840 WARN_ON(!entry->in_tree);
2841
2842 middle = entry->bytenr;
2843
2844 if (alt)
2845 n = n->rb_left;
2846 else
2847 n = n->rb_right;
2848
2849 alt = 1 - alt;
2850 }
2851 return middle;
2852}
2853#endif
2854
2855static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2856{
2857 u64 num_bytes;
2858
2859 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2860 sizeof(struct btrfs_extent_inline_ref));
2861 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2862 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2863
2864
2865
2866
2867
2868 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2869}
2870
2871
2872
2873
2874
2875u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2876{
2877 u64 csum_size;
2878 u64 num_csums_per_leaf;
2879 u64 num_csums;
2880
2881 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2882 num_csums_per_leaf = div64_u64(csum_size,
2883 (u64)btrfs_super_csum_size(fs_info->super_copy));
2884 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2885 num_csums += num_csums_per_leaf - 1;
2886 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2887 return num_csums;
2888}
2889
2890int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2891 struct btrfs_fs_info *fs_info)
2892{
2893 struct btrfs_block_rsv *global_rsv;
2894 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2895 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2896 unsigned int num_dirty_bgs = trans->transaction->num_dirty_bgs;
2897 u64 num_bytes, num_dirty_bgs_bytes;
2898 int ret = 0;
2899
2900 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
2901 num_heads = heads_to_leaves(fs_info, num_heads);
2902 if (num_heads > 1)
2903 num_bytes += (num_heads - 1) * fs_info->nodesize;
2904 num_bytes <<= 1;
2905 num_bytes += btrfs_csum_bytes_to_leaves(fs_info, csum_bytes) *
2906 fs_info->nodesize;
2907 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(fs_info,
2908 num_dirty_bgs);
2909 global_rsv = &fs_info->global_block_rsv;
2910
2911
2912
2913
2914
2915 if (global_rsv->space_info->full) {
2916 num_dirty_bgs_bytes <<= 1;
2917 num_bytes <<= 1;
2918 }
2919
2920 spin_lock(&global_rsv->lock);
2921 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
2922 ret = 1;
2923 spin_unlock(&global_rsv->lock);
2924 return ret;
2925}
2926
2927int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2928 struct btrfs_fs_info *fs_info)
2929{
2930 u64 num_entries =
2931 atomic_read(&trans->transaction->delayed_refs.num_entries);
2932 u64 avg_runtime;
2933 u64 val;
2934
2935 smp_mb();
2936 avg_runtime = fs_info->avg_delayed_ref_runtime;
2937 val = num_entries * avg_runtime;
2938 if (val >= NSEC_PER_SEC)
2939 return 1;
2940 if (val >= NSEC_PER_SEC / 2)
2941 return 2;
2942
2943 return btrfs_check_space_for_delayed_refs(trans, fs_info);
2944}
2945
2946struct async_delayed_refs {
2947 struct btrfs_root *root;
2948 u64 transid;
2949 int count;
2950 int error;
2951 int sync;
2952 struct completion wait;
2953 struct btrfs_work work;
2954};
2955
2956static inline struct async_delayed_refs *
2957to_async_delayed_refs(struct btrfs_work *work)
2958{
2959 return container_of(work, struct async_delayed_refs, work);
2960}
2961
2962static void delayed_ref_async_start(struct btrfs_work *work)
2963{
2964 struct async_delayed_refs *async = to_async_delayed_refs(work);
2965 struct btrfs_trans_handle *trans;
2966 struct btrfs_fs_info *fs_info = async->root->fs_info;
2967 int ret;
2968
2969
2970 if (btrfs_transaction_blocked(fs_info))
2971 goto done;
2972
2973 trans = btrfs_join_transaction(async->root);
2974 if (IS_ERR(trans)) {
2975 async->error = PTR_ERR(trans);
2976 goto done;
2977 }
2978
2979
2980
2981
2982
2983 trans->sync = true;
2984
2985
2986 if (trans->transid > async->transid)
2987 goto end;
2988
2989 ret = btrfs_run_delayed_refs(trans, async->count);
2990 if (ret)
2991 async->error = ret;
2992end:
2993 ret = btrfs_end_transaction(trans);
2994 if (ret && !async->error)
2995 async->error = ret;
2996done:
2997 if (async->sync)
2998 complete(&async->wait);
2999 else
3000 kfree(async);
3001}
3002
3003int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
3004 unsigned long count, u64 transid, int wait)
3005{
3006 struct async_delayed_refs *async;
3007 int ret;
3008
3009 async = kmalloc(sizeof(*async), GFP_NOFS);
3010 if (!async)
3011 return -ENOMEM;
3012
3013 async->root = fs_info->tree_root;
3014 async->count = count;
3015 async->error = 0;
3016 async->transid = transid;
3017 if (wait)
3018 async->sync = 1;
3019 else
3020 async->sync = 0;
3021 init_completion(&async->wait);
3022
3023 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
3024 delayed_ref_async_start, NULL, NULL);
3025
3026 btrfs_queue_work(fs_info->extent_workers, &async->work);
3027
3028 if (wait) {
3029 wait_for_completion(&async->wait);
3030 ret = async->error;
3031 kfree(async);
3032 return ret;
3033 }
3034 return 0;
3035}
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
3048 unsigned long count)
3049{
3050 struct btrfs_fs_info *fs_info = trans->fs_info;
3051 struct rb_node *node;
3052 struct btrfs_delayed_ref_root *delayed_refs;
3053 struct btrfs_delayed_ref_head *head;
3054 int ret;
3055 int run_all = count == (unsigned long)-1;
3056 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
3057
3058
3059 if (trans->aborted)
3060 return 0;
3061
3062 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
3063 return 0;
3064
3065 delayed_refs = &trans->transaction->delayed_refs;
3066 if (count == 0)
3067 count = atomic_read(&delayed_refs->num_entries) * 2;
3068
3069again:
3070#ifdef SCRAMBLE_DELAYED_REFS
3071 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
3072#endif
3073 trans->can_flush_pending_bgs = false;
3074 ret = __btrfs_run_delayed_refs(trans, count);
3075 if (ret < 0) {
3076 btrfs_abort_transaction(trans, ret);
3077 return ret;
3078 }
3079
3080 if (run_all) {
3081 if (!list_empty(&trans->new_bgs))
3082 btrfs_create_pending_block_groups(trans);
3083
3084 spin_lock(&delayed_refs->lock);
3085 node = rb_first(&delayed_refs->href_root);
3086 if (!node) {
3087 spin_unlock(&delayed_refs->lock);
3088 goto out;
3089 }
3090 head = rb_entry(node, struct btrfs_delayed_ref_head,
3091 href_node);
3092 refcount_inc(&head->refs);
3093 spin_unlock(&delayed_refs->lock);
3094
3095
3096 mutex_lock(&head->mutex);
3097 mutex_unlock(&head->mutex);
3098
3099 btrfs_put_delayed_ref_head(head);
3100 cond_resched();
3101 goto again;
3102 }
3103out:
3104 trans->can_flush_pending_bgs = can_flush_pending_bgs;
3105 return 0;
3106}
3107
3108int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3109 struct btrfs_fs_info *fs_info,
3110 u64 bytenr, u64 num_bytes, u64 flags,
3111 int level, int is_data)
3112{
3113 struct btrfs_delayed_extent_op *extent_op;
3114 int ret;
3115
3116 extent_op = btrfs_alloc_delayed_extent_op();
3117 if (!extent_op)
3118 return -ENOMEM;
3119
3120 extent_op->flags_to_set = flags;
3121 extent_op->update_flags = true;
3122 extent_op->update_key = false;
3123 extent_op->is_data = is_data ? true : false;
3124 extent_op->level = level;
3125
3126 ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr,
3127 num_bytes, extent_op);
3128 if (ret)
3129 btrfs_free_delayed_extent_op(extent_op);
3130 return ret;
3131}
3132
3133static noinline int check_delayed_ref(struct btrfs_root *root,
3134 struct btrfs_path *path,
3135 u64 objectid, u64 offset, u64 bytenr)
3136{
3137 struct btrfs_delayed_ref_head *head;
3138 struct btrfs_delayed_ref_node *ref;
3139 struct btrfs_delayed_data_ref *data_ref;
3140 struct btrfs_delayed_ref_root *delayed_refs;
3141 struct btrfs_transaction *cur_trans;
3142 struct rb_node *node;
3143 int ret = 0;
3144
3145 spin_lock(&root->fs_info->trans_lock);
3146 cur_trans = root->fs_info->running_transaction;
3147 if (cur_trans)
3148 refcount_inc(&cur_trans->use_count);
3149 spin_unlock(&root->fs_info->trans_lock);
3150 if (!cur_trans)
3151 return 0;
3152
3153 delayed_refs = &cur_trans->delayed_refs;
3154 spin_lock(&delayed_refs->lock);
3155 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3156 if (!head) {
3157 spin_unlock(&delayed_refs->lock);
3158 btrfs_put_transaction(cur_trans);
3159 return 0;
3160 }
3161
3162 if (!mutex_trylock(&head->mutex)) {
3163 refcount_inc(&head->refs);
3164 spin_unlock(&delayed_refs->lock);
3165
3166 btrfs_release_path(path);
3167
3168
3169
3170
3171
3172 mutex_lock(&head->mutex);
3173 mutex_unlock(&head->mutex);
3174 btrfs_put_delayed_ref_head(head);
3175 btrfs_put_transaction(cur_trans);
3176 return -EAGAIN;
3177 }
3178 spin_unlock(&delayed_refs->lock);
3179
3180 spin_lock(&head->lock);
3181
3182
3183
3184
3185 for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
3186 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
3187
3188 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
3189 ret = 1;
3190 break;
3191 }
3192
3193 data_ref = btrfs_delayed_node_to_data_ref(ref);
3194
3195
3196
3197
3198
3199 if (data_ref->root != root->root_key.objectid ||
3200 data_ref->objectid != objectid ||
3201 data_ref->offset != offset) {
3202 ret = 1;
3203 break;
3204 }
3205 }
3206 spin_unlock(&head->lock);
3207 mutex_unlock(&head->mutex);
3208 btrfs_put_transaction(cur_trans);
3209 return ret;
3210}
3211
3212static noinline int check_committed_ref(struct btrfs_root *root,
3213 struct btrfs_path *path,
3214 u64 objectid, u64 offset, u64 bytenr)
3215{
3216 struct btrfs_fs_info *fs_info = root->fs_info;
3217 struct btrfs_root *extent_root = fs_info->extent_root;
3218 struct extent_buffer *leaf;
3219 struct btrfs_extent_data_ref *ref;
3220 struct btrfs_extent_inline_ref *iref;
3221 struct btrfs_extent_item *ei;
3222 struct btrfs_key key;
3223 u32 item_size;
3224 int type;
3225 int ret;
3226
3227 key.objectid = bytenr;
3228 key.offset = (u64)-1;
3229 key.type = BTRFS_EXTENT_ITEM_KEY;
3230
3231 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3232 if (ret < 0)
3233 goto out;
3234 BUG_ON(ret == 0);
3235
3236 ret = -ENOENT;
3237 if (path->slots[0] == 0)
3238 goto out;
3239
3240 path->slots[0]--;
3241 leaf = path->nodes[0];
3242 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3243
3244 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
3245 goto out;
3246
3247 ret = 1;
3248 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3249#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3250 if (item_size < sizeof(*ei)) {
3251 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3252 goto out;
3253 }
3254#endif
3255 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3256
3257 if (item_size != sizeof(*ei) +
3258 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3259 goto out;
3260
3261 if (btrfs_extent_generation(leaf, ei) <=
3262 btrfs_root_last_snapshot(&root->root_item))
3263 goto out;
3264
3265 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3266
3267 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
3268 if (type != BTRFS_EXTENT_DATA_REF_KEY)
3269 goto out;
3270
3271 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3272 if (btrfs_extent_refs(leaf, ei) !=
3273 btrfs_extent_data_ref_count(leaf, ref) ||
3274 btrfs_extent_data_ref_root(leaf, ref) !=
3275 root->root_key.objectid ||
3276 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3277 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3278 goto out;
3279
3280 ret = 0;
3281out:
3282 return ret;
3283}
3284
3285int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
3286 u64 bytenr)
3287{
3288 struct btrfs_path *path;
3289 int ret;
3290 int ret2;
3291
3292 path = btrfs_alloc_path();
3293 if (!path)
3294 return -ENOENT;
3295
3296 do {
3297 ret = check_committed_ref(root, path, objectid,
3298 offset, bytenr);
3299 if (ret && ret != -ENOENT)
3300 goto out;
3301
3302 ret2 = check_delayed_ref(root, path, objectid,
3303 offset, bytenr);
3304 } while (ret2 == -EAGAIN);
3305
3306 if (ret2 && ret2 != -ENOENT) {
3307 ret = ret2;
3308 goto out;
3309 }
3310
3311 if (ret != -ENOENT || ret2 != -ENOENT)
3312 ret = 0;
3313out:
3314 btrfs_free_path(path);
3315 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3316 WARN_ON(ret > 0);
3317 return ret;
3318}
3319
3320static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3321 struct btrfs_root *root,
3322 struct extent_buffer *buf,
3323 int full_backref, int inc)
3324{
3325 struct btrfs_fs_info *fs_info = root->fs_info;
3326 u64 bytenr;
3327 u64 num_bytes;
3328 u64 parent;
3329 u64 ref_root;
3330 u32 nritems;
3331 struct btrfs_key key;
3332 struct btrfs_file_extent_item *fi;
3333 int i;
3334 int level;
3335 int ret = 0;
3336 int (*process_func)(struct btrfs_trans_handle *,
3337 struct btrfs_root *,
3338 u64, u64, u64, u64, u64, u64);
3339
3340
3341 if (btrfs_is_testing(fs_info))
3342 return 0;
3343
3344 ref_root = btrfs_header_owner(buf);
3345 nritems = btrfs_header_nritems(buf);
3346 level = btrfs_header_level(buf);
3347
3348 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3349 return 0;
3350
3351 if (inc)
3352 process_func = btrfs_inc_extent_ref;
3353 else
3354 process_func = btrfs_free_extent;
3355
3356 if (full_backref)
3357 parent = buf->start;
3358 else
3359 parent = 0;
3360
3361 for (i = 0; i < nritems; i++) {
3362 if (level == 0) {
3363 btrfs_item_key_to_cpu(buf, &key, i);
3364 if (key.type != BTRFS_EXTENT_DATA_KEY)
3365 continue;
3366 fi = btrfs_item_ptr(buf, i,
3367 struct btrfs_file_extent_item);
3368 if (btrfs_file_extent_type(buf, fi) ==
3369 BTRFS_FILE_EXTENT_INLINE)
3370 continue;
3371 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3372 if (bytenr == 0)
3373 continue;
3374
3375 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3376 key.offset -= btrfs_file_extent_offset(buf, fi);
3377 ret = process_func(trans, root, bytenr, num_bytes,
3378 parent, ref_root, key.objectid,
3379 key.offset);
3380 if (ret)
3381 goto fail;
3382 } else {
3383 bytenr = btrfs_node_blockptr(buf, i);
3384 num_bytes = fs_info->nodesize;
3385 ret = process_func(trans, root, bytenr, num_bytes,
3386 parent, ref_root, level - 1, 0);
3387 if (ret)
3388 goto fail;
3389 }
3390 }
3391 return 0;
3392fail:
3393 return ret;
3394}
3395
3396int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3397 struct extent_buffer *buf, int full_backref)
3398{
3399 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3400}
3401
3402int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3403 struct extent_buffer *buf, int full_backref)
3404{
3405 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3406}
3407
3408static int write_one_cache_group(struct btrfs_trans_handle *trans,
3409 struct btrfs_fs_info *fs_info,
3410 struct btrfs_path *path,
3411 struct btrfs_block_group_cache *cache)
3412{
3413 int ret;
3414 struct btrfs_root *extent_root = fs_info->extent_root;
3415 unsigned long bi;
3416 struct extent_buffer *leaf;
3417
3418 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3419 if (ret) {
3420 if (ret > 0)
3421 ret = -ENOENT;
3422 goto fail;
3423 }
3424
3425 leaf = path->nodes[0];
3426 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3427 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3428 btrfs_mark_buffer_dirty(leaf);
3429fail:
3430 btrfs_release_path(path);
3431 return ret;
3432
3433}
3434
3435static struct btrfs_block_group_cache *
3436next_block_group(struct btrfs_fs_info *fs_info,
3437 struct btrfs_block_group_cache *cache)
3438{
3439 struct rb_node *node;
3440
3441 spin_lock(&fs_info->block_group_cache_lock);
3442
3443
3444 if (RB_EMPTY_NODE(&cache->cache_node)) {
3445 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3446
3447 spin_unlock(&fs_info->block_group_cache_lock);
3448 btrfs_put_block_group(cache);
3449 cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache;
3450 }
3451 node = rb_next(&cache->cache_node);
3452 btrfs_put_block_group(cache);
3453 if (node) {
3454 cache = rb_entry(node, struct btrfs_block_group_cache,
3455 cache_node);
3456 btrfs_get_block_group(cache);
3457 } else
3458 cache = NULL;
3459 spin_unlock(&fs_info->block_group_cache_lock);
3460 return cache;
3461}
3462
3463static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3464 struct btrfs_trans_handle *trans,
3465 struct btrfs_path *path)
3466{
3467 struct btrfs_fs_info *fs_info = block_group->fs_info;
3468 struct btrfs_root *root = fs_info->tree_root;
3469 struct inode *inode = NULL;
3470 struct extent_changeset *data_reserved = NULL;
3471 u64 alloc_hint = 0;
3472 int dcs = BTRFS_DC_ERROR;
3473 u64 num_pages = 0;
3474 int retries = 0;
3475 int ret = 0;
3476
3477
3478
3479
3480
3481 if (block_group->key.offset < (100 * SZ_1M)) {
3482 spin_lock(&block_group->lock);
3483 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3484 spin_unlock(&block_group->lock);
3485 return 0;
3486 }
3487
3488 if (trans->aborted)
3489 return 0;
3490again:
3491 inode = lookup_free_space_inode(fs_info, block_group, path);
3492 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3493 ret = PTR_ERR(inode);
3494 btrfs_release_path(path);
3495 goto out;
3496 }
3497
3498 if (IS_ERR(inode)) {
3499 BUG_ON(retries);
3500 retries++;
3501
3502 if (block_group->ro)
3503 goto out_free;
3504
3505 ret = create_free_space_inode(fs_info, trans, block_group,
3506 path);
3507 if (ret)
3508 goto out_free;
3509 goto again;
3510 }
3511
3512
3513
3514
3515
3516
3517 BTRFS_I(inode)->generation = 0;
3518 ret = btrfs_update_inode(trans, root, inode);
3519 if (ret) {
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530 btrfs_abort_transaction(trans, ret);
3531 goto out_put;
3532 }
3533 WARN_ON(ret);
3534
3535
3536 if (block_group->cache_generation == trans->transid &&
3537 i_size_read(inode)) {
3538 dcs = BTRFS_DC_SETUP;
3539 goto out_put;
3540 }
3541
3542 if (i_size_read(inode) > 0) {
3543 ret = btrfs_check_trunc_cache_free_space(fs_info,
3544 &fs_info->global_block_rsv);
3545 if (ret)
3546 goto out_put;
3547
3548 ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
3549 if (ret)
3550 goto out_put;
3551 }
3552
3553 spin_lock(&block_group->lock);
3554 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3555 !btrfs_test_opt(fs_info, SPACE_CACHE)) {
3556
3557
3558
3559
3560
3561
3562 dcs = BTRFS_DC_WRITTEN;
3563 spin_unlock(&block_group->lock);
3564 goto out_put;
3565 }
3566 spin_unlock(&block_group->lock);
3567
3568
3569
3570
3571
3572 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
3573 ret = -ENOSPC;
3574 goto out_put;
3575 }
3576
3577
3578
3579
3580
3581
3582
3583 num_pages = div_u64(block_group->key.offset, SZ_256M);
3584 if (!num_pages)
3585 num_pages = 1;
3586
3587 num_pages *= 16;
3588 num_pages *= PAGE_SIZE;
3589
3590 ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
3591 if (ret)
3592 goto out_put;
3593
3594 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3595 num_pages, num_pages,
3596 &alloc_hint);
3597
3598
3599
3600
3601
3602
3603
3604
3605 if (!ret)
3606 dcs = BTRFS_DC_SETUP;
3607 else if (ret == -ENOSPC)
3608 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3609
3610out_put:
3611 iput(inode);
3612out_free:
3613 btrfs_release_path(path);
3614out:
3615 spin_lock(&block_group->lock);
3616 if (!ret && dcs == BTRFS_DC_SETUP)
3617 block_group->cache_generation = trans->transid;
3618 block_group->disk_cache_state = dcs;
3619 spin_unlock(&block_group->lock);
3620
3621 extent_changeset_free(data_reserved);
3622 return ret;
3623}
3624
3625int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3626 struct btrfs_fs_info *fs_info)
3627{
3628 struct btrfs_block_group_cache *cache, *tmp;
3629 struct btrfs_transaction *cur_trans = trans->transaction;
3630 struct btrfs_path *path;
3631
3632 if (list_empty(&cur_trans->dirty_bgs) ||
3633 !btrfs_test_opt(fs_info, SPACE_CACHE))
3634 return 0;
3635
3636 path = btrfs_alloc_path();
3637 if (!path)
3638 return -ENOMEM;
3639
3640
3641 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3642 dirty_list) {
3643 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3644 cache_save_setup(cache, trans, path);
3645 }
3646
3647 btrfs_free_path(path);
3648 return 0;
3649}
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
3664{
3665 struct btrfs_fs_info *fs_info = trans->fs_info;
3666 struct btrfs_block_group_cache *cache;
3667 struct btrfs_transaction *cur_trans = trans->transaction;
3668 int ret = 0;
3669 int should_put;
3670 struct btrfs_path *path = NULL;
3671 LIST_HEAD(dirty);
3672 struct list_head *io = &cur_trans->io_bgs;
3673 int num_started = 0;
3674 int loops = 0;
3675
3676 spin_lock(&cur_trans->dirty_bgs_lock);
3677 if (list_empty(&cur_trans->dirty_bgs)) {
3678 spin_unlock(&cur_trans->dirty_bgs_lock);
3679 return 0;
3680 }
3681 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3682 spin_unlock(&cur_trans->dirty_bgs_lock);
3683
3684again:
3685
3686
3687
3688
3689 btrfs_create_pending_block_groups(trans);
3690
3691 if (!path) {
3692 path = btrfs_alloc_path();
3693 if (!path)
3694 return -ENOMEM;
3695 }
3696
3697
3698
3699
3700
3701
3702 mutex_lock(&trans->transaction->cache_write_mutex);
3703 while (!list_empty(&dirty)) {
3704 cache = list_first_entry(&dirty,
3705 struct btrfs_block_group_cache,
3706 dirty_list);
3707
3708
3709
3710
3711
3712 if (!list_empty(&cache->io_list)) {
3713 list_del_init(&cache->io_list);
3714 btrfs_wait_cache_io(trans, cache, path);
3715 btrfs_put_block_group(cache);
3716 }
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727 spin_lock(&cur_trans->dirty_bgs_lock);
3728 list_del_init(&cache->dirty_list);
3729 spin_unlock(&cur_trans->dirty_bgs_lock);
3730
3731 should_put = 1;
3732
3733 cache_save_setup(cache, trans, path);
3734
3735 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3736 cache->io_ctl.inode = NULL;
3737 ret = btrfs_write_out_cache(fs_info, trans,
3738 cache, path);
3739 if (ret == 0 && cache->io_ctl.inode) {
3740 num_started++;
3741 should_put = 0;
3742
3743
3744
3745
3746
3747
3748 list_add_tail(&cache->io_list, io);
3749 } else {
3750
3751
3752
3753
3754 ret = 0;
3755 }
3756 }
3757 if (!ret) {
3758 ret = write_one_cache_group(trans, fs_info,
3759 path, cache);
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769 if (ret == -ENOENT) {
3770 ret = 0;
3771 spin_lock(&cur_trans->dirty_bgs_lock);
3772 if (list_empty(&cache->dirty_list)) {
3773 list_add_tail(&cache->dirty_list,
3774 &cur_trans->dirty_bgs);
3775 btrfs_get_block_group(cache);
3776 }
3777 spin_unlock(&cur_trans->dirty_bgs_lock);
3778 } else if (ret) {
3779 btrfs_abort_transaction(trans, ret);
3780 }
3781 }
3782
3783
3784 if (should_put)
3785 btrfs_put_block_group(cache);
3786
3787 if (ret)
3788 break;
3789
3790
3791
3792
3793
3794
3795 mutex_unlock(&trans->transaction->cache_write_mutex);
3796 mutex_lock(&trans->transaction->cache_write_mutex);
3797 }
3798 mutex_unlock(&trans->transaction->cache_write_mutex);
3799
3800
3801
3802
3803
3804 ret = btrfs_run_delayed_refs(trans, 0);
3805 if (!ret && loops == 0) {
3806 loops++;
3807 spin_lock(&cur_trans->dirty_bgs_lock);
3808 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3809
3810
3811
3812
3813 if (!list_empty(&dirty)) {
3814 spin_unlock(&cur_trans->dirty_bgs_lock);
3815 goto again;
3816 }
3817 spin_unlock(&cur_trans->dirty_bgs_lock);
3818 } else if (ret < 0) {
3819 btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
3820 }
3821
3822 btrfs_free_path(path);
3823 return ret;
3824}
3825
3826int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3827 struct btrfs_fs_info *fs_info)
3828{
3829 struct btrfs_block_group_cache *cache;
3830 struct btrfs_transaction *cur_trans = trans->transaction;
3831 int ret = 0;
3832 int should_put;
3833 struct btrfs_path *path;
3834 struct list_head *io = &cur_trans->io_bgs;
3835 int num_started = 0;
3836
3837 path = btrfs_alloc_path();
3838 if (!path)
3839 return -ENOMEM;
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856 spin_lock(&cur_trans->dirty_bgs_lock);
3857 while (!list_empty(&cur_trans->dirty_bgs)) {
3858 cache = list_first_entry(&cur_trans->dirty_bgs,
3859 struct btrfs_block_group_cache,
3860 dirty_list);
3861
3862
3863
3864
3865
3866
3867 if (!list_empty(&cache->io_list)) {
3868 spin_unlock(&cur_trans->dirty_bgs_lock);
3869 list_del_init(&cache->io_list);
3870 btrfs_wait_cache_io(trans, cache, path);
3871 btrfs_put_block_group(cache);
3872 spin_lock(&cur_trans->dirty_bgs_lock);
3873 }
3874
3875
3876
3877
3878
3879 list_del_init(&cache->dirty_list);
3880 spin_unlock(&cur_trans->dirty_bgs_lock);
3881 should_put = 1;
3882
3883 cache_save_setup(cache, trans, path);
3884
3885 if (!ret)
3886 ret = btrfs_run_delayed_refs(trans,
3887 (unsigned long) -1);
3888
3889 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3890 cache->io_ctl.inode = NULL;
3891 ret = btrfs_write_out_cache(fs_info, trans,
3892 cache, path);
3893 if (ret == 0 && cache->io_ctl.inode) {
3894 num_started++;
3895 should_put = 0;
3896 list_add_tail(&cache->io_list, io);
3897 } else {
3898
3899
3900
3901
3902 ret = 0;
3903 }
3904 }
3905 if (!ret) {
3906 ret = write_one_cache_group(trans, fs_info,
3907 path, cache);
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921 if (ret == -ENOENT) {
3922 wait_event(cur_trans->writer_wait,
3923 atomic_read(&cur_trans->num_writers) == 1);
3924 ret = write_one_cache_group(trans, fs_info,
3925 path, cache);
3926 }
3927 if (ret)
3928 btrfs_abort_transaction(trans, ret);
3929 }
3930
3931
3932 if (should_put)
3933 btrfs_put_block_group(cache);
3934 spin_lock(&cur_trans->dirty_bgs_lock);
3935 }
3936 spin_unlock(&cur_trans->dirty_bgs_lock);
3937
3938
3939
3940
3941
3942 while (!list_empty(io)) {
3943 cache = list_first_entry(io, struct btrfs_block_group_cache,
3944 io_list);
3945 list_del_init(&cache->io_list);
3946 btrfs_wait_cache_io(trans, cache, path);
3947 btrfs_put_block_group(cache);
3948 }
3949
3950 btrfs_free_path(path);
3951 return ret;
3952}
3953
3954int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
3955{
3956 struct btrfs_block_group_cache *block_group;
3957 int readonly = 0;
3958
3959 block_group = btrfs_lookup_block_group(fs_info, bytenr);
3960 if (!block_group || block_group->ro)
3961 readonly = 1;
3962 if (block_group)
3963 btrfs_put_block_group(block_group);
3964 return readonly;
3965}
3966
3967bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3968{
3969 struct btrfs_block_group_cache *bg;
3970 bool ret = true;
3971
3972 bg = btrfs_lookup_block_group(fs_info, bytenr);
3973 if (!bg)
3974 return false;
3975
3976 spin_lock(&bg->lock);
3977 if (bg->ro)
3978 ret = false;
3979 else
3980 atomic_inc(&bg->nocow_writers);
3981 spin_unlock(&bg->lock);
3982
3983
3984 if (!ret)
3985 btrfs_put_block_group(bg);
3986
3987 return ret;
3988
3989}
3990
3991void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3992{
3993 struct btrfs_block_group_cache *bg;
3994
3995 bg = btrfs_lookup_block_group(fs_info, bytenr);
3996 ASSERT(bg);
3997 if (atomic_dec_and_test(&bg->nocow_writers))
3998 wake_up_var(&bg->nocow_writers);
3999
4000
4001
4002
4003 btrfs_put_block_group(bg);
4004 btrfs_put_block_group(bg);
4005}
4006
4007void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
4008{
4009 wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
4010}
4011
4012static const char *alloc_name(u64 flags)
4013{
4014 switch (flags) {
4015 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
4016 return "mixed";
4017 case BTRFS_BLOCK_GROUP_METADATA:
4018 return "metadata";
4019 case BTRFS_BLOCK_GROUP_DATA:
4020 return "data";
4021 case BTRFS_BLOCK_GROUP_SYSTEM:
4022 return "system";
4023 default:
4024 WARN_ON(1);
4025 return "invalid-combination";
4026 };
4027}
4028
4029static int create_space_info(struct btrfs_fs_info *info, u64 flags,
4030 struct btrfs_space_info **new)
4031{
4032
4033 struct btrfs_space_info *space_info;
4034 int i;
4035 int ret;
4036
4037 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
4038 if (!space_info)
4039 return -ENOMEM;
4040
4041 ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
4042 GFP_KERNEL);
4043 if (ret) {
4044 kfree(space_info);
4045 return ret;
4046 }
4047
4048 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
4049 INIT_LIST_HEAD(&space_info->block_groups[i]);
4050 init_rwsem(&space_info->groups_sem);
4051 spin_lock_init(&space_info->lock);
4052 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
4053 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4054 init_waitqueue_head(&space_info->wait);
4055 INIT_LIST_HEAD(&space_info->ro_bgs);
4056 INIT_LIST_HEAD(&space_info->tickets);
4057 INIT_LIST_HEAD(&space_info->priority_tickets);
4058
4059 ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
4060 info->space_info_kobj, "%s",
4061 alloc_name(space_info->flags));
4062 if (ret) {
4063 percpu_counter_destroy(&space_info->total_bytes_pinned);
4064 kfree(space_info);
4065 return ret;
4066 }
4067
4068 *new = space_info;
4069 list_add_rcu(&space_info->list, &info->space_info);
4070 if (flags & BTRFS_BLOCK_GROUP_DATA)
4071 info->data_sinfo = space_info;
4072
4073 return ret;
4074}
4075
4076static void update_space_info(struct btrfs_fs_info *info, u64 flags,
4077 u64 total_bytes, u64 bytes_used,
4078 u64 bytes_readonly,
4079 struct btrfs_space_info **space_info)
4080{
4081 struct btrfs_space_info *found;
4082 int factor;
4083
4084 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
4085 BTRFS_BLOCK_GROUP_RAID10))
4086 factor = 2;
4087 else
4088 factor = 1;
4089
4090 found = __find_space_info(info, flags);
4091 ASSERT(found);
4092 spin_lock(&found->lock);
4093 found->total_bytes += total_bytes;
4094 found->disk_total += total_bytes * factor;
4095 found->bytes_used += bytes_used;
4096 found->disk_used += bytes_used * factor;
4097 found->bytes_readonly += bytes_readonly;
4098 if (total_bytes > 0)
4099 found->full = 0;
4100 space_info_add_new_bytes(info, found, total_bytes -
4101 bytes_used - bytes_readonly);
4102 spin_unlock(&found->lock);
4103 *space_info = found;
4104}
4105
4106static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
4107{
4108 u64 extra_flags = chunk_to_extended(flags) &
4109 BTRFS_EXTENDED_PROFILE_MASK;
4110
4111 write_seqlock(&fs_info->profiles_lock);
4112 if (flags & BTRFS_BLOCK_GROUP_DATA)
4113 fs_info->avail_data_alloc_bits |= extra_flags;
4114 if (flags & BTRFS_BLOCK_GROUP_METADATA)
4115 fs_info->avail_metadata_alloc_bits |= extra_flags;
4116 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4117 fs_info->avail_system_alloc_bits |= extra_flags;
4118 write_sequnlock(&fs_info->profiles_lock);
4119}
4120
4121
4122
4123
4124
4125
4126
4127static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
4128{
4129 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4130 u64 target = 0;
4131
4132 if (!bctl)
4133 return 0;
4134
4135 if (flags & BTRFS_BLOCK_GROUP_DATA &&
4136 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4137 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
4138 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
4139 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4140 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
4141 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
4142 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4143 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
4144 }
4145
4146 return target;
4147}
4148
4149
4150
4151
4152
4153
4154
4155
4156static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
4157{
4158 u64 num_devices = fs_info->fs_devices->rw_devices;
4159 u64 target;
4160 u64 raid_type;
4161 u64 allowed = 0;
4162
4163
4164
4165
4166
4167 spin_lock(&fs_info->balance_lock);
4168 target = get_restripe_target(fs_info, flags);
4169 if (target) {
4170
4171 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
4172 spin_unlock(&fs_info->balance_lock);
4173 return extended_to_chunk(target);
4174 }
4175 }
4176 spin_unlock(&fs_info->balance_lock);
4177
4178
4179 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
4180 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
4181 allowed |= btrfs_raid_group[raid_type];
4182 }
4183 allowed &= flags;
4184
4185 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
4186 allowed = BTRFS_BLOCK_GROUP_RAID6;
4187 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
4188 allowed = BTRFS_BLOCK_GROUP_RAID5;
4189 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
4190 allowed = BTRFS_BLOCK_GROUP_RAID10;
4191 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
4192 allowed = BTRFS_BLOCK_GROUP_RAID1;
4193 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
4194 allowed = BTRFS_BLOCK_GROUP_RAID0;
4195
4196 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
4197
4198 return extended_to_chunk(flags | allowed);
4199}
4200
4201static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
4202{
4203 unsigned seq;
4204 u64 flags;
4205
4206 do {
4207 flags = orig_flags;
4208 seq = read_seqbegin(&fs_info->profiles_lock);
4209
4210 if (flags & BTRFS_BLOCK_GROUP_DATA)
4211 flags |= fs_info->avail_data_alloc_bits;
4212 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4213 flags |= fs_info->avail_system_alloc_bits;
4214 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
4215 flags |= fs_info->avail_metadata_alloc_bits;
4216 } while (read_seqretry(&fs_info->profiles_lock, seq));
4217
4218 return btrfs_reduce_alloc_profile(fs_info, flags);
4219}
4220
4221static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
4222{
4223 struct btrfs_fs_info *fs_info = root->fs_info;
4224 u64 flags;
4225 u64 ret;
4226
4227 if (data)
4228 flags = BTRFS_BLOCK_GROUP_DATA;
4229 else if (root == fs_info->chunk_root)
4230 flags = BTRFS_BLOCK_GROUP_SYSTEM;
4231 else
4232 flags = BTRFS_BLOCK_GROUP_METADATA;
4233
4234 ret = get_alloc_profile(fs_info, flags);
4235 return ret;
4236}
4237
4238u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
4239{
4240 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
4241}
4242
4243u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
4244{
4245 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4246}
4247
4248u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
4249{
4250 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4251}
4252
4253static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
4254 bool may_use_included)
4255{
4256 ASSERT(s_info);
4257 return s_info->bytes_used + s_info->bytes_reserved +
4258 s_info->bytes_pinned + s_info->bytes_readonly +
4259 (may_use_included ? s_info->bytes_may_use : 0);
4260}
4261
4262int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
4263{
4264 struct btrfs_root *root = inode->root;
4265 struct btrfs_fs_info *fs_info = root->fs_info;
4266 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
4267 u64 used;
4268 int ret = 0;
4269 int need_commit = 2;
4270 int have_pinned_space;
4271
4272
4273 bytes = ALIGN(bytes, fs_info->sectorsize);
4274
4275 if (btrfs_is_free_space_inode(inode)) {
4276 need_commit = 0;
4277 ASSERT(current->journal_info);
4278 }
4279
4280again:
4281
4282 spin_lock(&data_sinfo->lock);
4283 used = btrfs_space_info_used(data_sinfo, true);
4284
4285 if (used + bytes > data_sinfo->total_bytes) {
4286 struct btrfs_trans_handle *trans;
4287
4288
4289
4290
4291
4292 if (!data_sinfo->full) {
4293 u64 alloc_target;
4294
4295 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
4296 spin_unlock(&data_sinfo->lock);
4297
4298 alloc_target = btrfs_data_alloc_profile(fs_info);
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309 trans = btrfs_join_transaction(root);
4310 if (IS_ERR(trans))
4311 return PTR_ERR(trans);
4312
4313 ret = do_chunk_alloc(trans, fs_info, alloc_target,
4314 CHUNK_ALLOC_NO_FORCE);
4315 btrfs_end_transaction(trans);
4316 if (ret < 0) {
4317 if (ret != -ENOSPC)
4318 return ret;
4319 else {
4320 have_pinned_space = 1;
4321 goto commit_trans;
4322 }
4323 }
4324
4325 goto again;
4326 }
4327
4328
4329
4330
4331
4332
4333 have_pinned_space = percpu_counter_compare(
4334 &data_sinfo->total_bytes_pinned,
4335 used + bytes - data_sinfo->total_bytes);
4336 spin_unlock(&data_sinfo->lock);
4337
4338
4339commit_trans:
4340 if (need_commit) {
4341 need_commit--;
4342
4343 if (need_commit > 0) {
4344 btrfs_start_delalloc_roots(fs_info, 0, -1);
4345 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
4346 (u64)-1);
4347 }
4348
4349 trans = btrfs_join_transaction(root);
4350 if (IS_ERR(trans))
4351 return PTR_ERR(trans);
4352 if (have_pinned_space >= 0 ||
4353 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4354 &trans->transaction->flags) ||
4355 need_commit > 0) {
4356 ret = btrfs_commit_transaction(trans);
4357 if (ret)
4358 return ret;
4359
4360
4361
4362
4363
4364 mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
4365 mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
4366 goto again;
4367 } else {
4368 btrfs_end_transaction(trans);
4369 }
4370 }
4371
4372 trace_btrfs_space_reservation(fs_info,
4373 "space_info:enospc",
4374 data_sinfo->flags, bytes, 1);
4375 return -ENOSPC;
4376 }
4377 data_sinfo->bytes_may_use += bytes;
4378 trace_btrfs_space_reservation(fs_info, "space_info",
4379 data_sinfo->flags, bytes, 1);
4380 spin_unlock(&data_sinfo->lock);
4381
4382 return ret;
4383}
4384
4385int btrfs_check_data_free_space(struct inode *inode,
4386 struct extent_changeset **reserved, u64 start, u64 len)
4387{
4388 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4389 int ret;
4390
4391
4392 len = round_up(start + len, fs_info->sectorsize) -
4393 round_down(start, fs_info->sectorsize);
4394 start = round_down(start, fs_info->sectorsize);
4395
4396 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
4397 if (ret < 0)
4398 return ret;
4399
4400
4401 ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
4402 if (ret < 0)
4403 btrfs_free_reserved_data_space_noquota(inode, start, len);
4404 else
4405 ret = 0;
4406 return ret;
4407}
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
4418 u64 len)
4419{
4420 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4421 struct btrfs_space_info *data_sinfo;
4422
4423
4424 len = round_up(start + len, fs_info->sectorsize) -
4425 round_down(start, fs_info->sectorsize);
4426 start = round_down(start, fs_info->sectorsize);
4427
4428 data_sinfo = fs_info->data_sinfo;
4429 spin_lock(&data_sinfo->lock);
4430 if (WARN_ON(data_sinfo->bytes_may_use < len))
4431 data_sinfo->bytes_may_use = 0;
4432 else
4433 data_sinfo->bytes_may_use -= len;
4434 trace_btrfs_space_reservation(fs_info, "space_info",
4435 data_sinfo->flags, len, 0);
4436 spin_unlock(&data_sinfo->lock);
4437}
4438
4439
4440
4441
4442
4443
4444
4445
4446void btrfs_free_reserved_data_space(struct inode *inode,
4447 struct extent_changeset *reserved, u64 start, u64 len)
4448{
4449 struct btrfs_root *root = BTRFS_I(inode)->root;
4450
4451
4452 len = round_up(start + len, root->fs_info->sectorsize) -
4453 round_down(start, root->fs_info->sectorsize);
4454 start = round_down(start, root->fs_info->sectorsize);
4455
4456 btrfs_free_reserved_data_space_noquota(inode, start, len);
4457 btrfs_qgroup_free_data(inode, reserved, start, len);
4458}
4459
4460static void force_metadata_allocation(struct btrfs_fs_info *info)
4461{
4462 struct list_head *head = &info->space_info;
4463 struct btrfs_space_info *found;
4464
4465 rcu_read_lock();
4466 list_for_each_entry_rcu(found, head, list) {
4467 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
4468 found->force_alloc = CHUNK_ALLOC_FORCE;
4469 }
4470 rcu_read_unlock();
4471}
4472
4473static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4474{
4475 return (global->size << 1);
4476}
4477
4478static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
4479 struct btrfs_space_info *sinfo, int force)
4480{
4481 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4482 u64 bytes_used = btrfs_space_info_used(sinfo, false);
4483 u64 thresh;
4484
4485 if (force == CHUNK_ALLOC_FORCE)
4486 return 1;
4487
4488
4489
4490
4491
4492
4493 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
4494 bytes_used += calc_global_rsv_need_space(global_rsv);
4495
4496
4497
4498
4499
4500 if (force == CHUNK_ALLOC_LIMITED) {
4501 thresh = btrfs_super_total_bytes(fs_info->super_copy);
4502 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
4503
4504 if (sinfo->total_bytes - bytes_used < thresh)
4505 return 1;
4506 }
4507
4508 if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
4509 return 0;
4510 return 1;
4511}
4512
4513static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
4514{
4515 u64 num_dev;
4516
4517 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4518 BTRFS_BLOCK_GROUP_RAID0 |
4519 BTRFS_BLOCK_GROUP_RAID5 |
4520 BTRFS_BLOCK_GROUP_RAID6))
4521 num_dev = fs_info->fs_devices->rw_devices;
4522 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4523 num_dev = 2;
4524 else
4525 num_dev = 1;
4526
4527 return num_dev;
4528}
4529
4530
4531
4532
4533
4534
4535void check_system_chunk(struct btrfs_trans_handle *trans,
4536 struct btrfs_fs_info *fs_info, u64 type)
4537{
4538 struct btrfs_space_info *info;
4539 u64 left;
4540 u64 thresh;
4541 int ret = 0;
4542 u64 num_devs;
4543
4544
4545
4546
4547
4548 lockdep_assert_held(&fs_info->chunk_mutex);
4549
4550 info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4551 spin_lock(&info->lock);
4552 left = info->total_bytes - btrfs_space_info_used(info, true);
4553 spin_unlock(&info->lock);
4554
4555 num_devs = get_profile_num_devs(fs_info, type);
4556
4557
4558 thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
4559 btrfs_calc_trans_metadata_size(fs_info, 1);
4560
4561 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4562 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
4563 left, thresh, type);
4564 dump_space_info(fs_info, info, 0, 0);
4565 }
4566
4567 if (left < thresh) {
4568 u64 flags = btrfs_system_alloc_profile(fs_info);
4569
4570
4571
4572
4573
4574
4575
4576 ret = btrfs_alloc_chunk(trans, fs_info, flags);
4577 }
4578
4579 if (!ret) {
4580 ret = btrfs_block_rsv_add(fs_info->chunk_root,
4581 &fs_info->chunk_block_rsv,
4582 thresh, BTRFS_RESERVE_NO_FLUSH);
4583 if (!ret)
4584 trans->chunk_bytes_reserved += thresh;
4585 }
4586}
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4598 struct btrfs_fs_info *fs_info, u64 flags, int force)
4599{
4600 struct btrfs_space_info *space_info;
4601 int wait_for_alloc = 0;
4602 int ret = 0;
4603
4604
4605 if (trans->allocating_chunk)
4606 return -ENOSPC;
4607
4608 space_info = __find_space_info(fs_info, flags);
4609 ASSERT(space_info);
4610
4611again:
4612 spin_lock(&space_info->lock);
4613 if (force < space_info->force_alloc)
4614 force = space_info->force_alloc;
4615 if (space_info->full) {
4616 if (should_alloc_chunk(fs_info, space_info, force))
4617 ret = -ENOSPC;
4618 else
4619 ret = 0;
4620 spin_unlock(&space_info->lock);
4621 return ret;
4622 }
4623
4624 if (!should_alloc_chunk(fs_info, space_info, force)) {
4625 spin_unlock(&space_info->lock);
4626 return 0;
4627 } else if (space_info->chunk_alloc) {
4628 wait_for_alloc = 1;
4629 } else {
4630 space_info->chunk_alloc = 1;
4631 }
4632
4633 spin_unlock(&space_info->lock);
4634
4635 mutex_lock(&fs_info->chunk_mutex);
4636
4637
4638
4639
4640
4641
4642
4643 if (wait_for_alloc) {
4644 mutex_unlock(&fs_info->chunk_mutex);
4645 wait_for_alloc = 0;
4646 cond_resched();
4647 goto again;
4648 }
4649
4650 trans->allocating_chunk = true;
4651
4652
4653
4654
4655
4656 if (btrfs_mixed_space_info(space_info))
4657 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4658
4659
4660
4661
4662
4663
4664 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
4665 fs_info->data_chunk_allocations++;
4666 if (!(fs_info->data_chunk_allocations %
4667 fs_info->metadata_ratio))
4668 force_metadata_allocation(fs_info);
4669 }
4670
4671
4672
4673
4674
4675 check_system_chunk(trans, fs_info, flags);
4676
4677 ret = btrfs_alloc_chunk(trans, fs_info, flags);
4678 trans->allocating_chunk = false;
4679
4680 spin_lock(&space_info->lock);
4681 if (ret < 0 && ret != -ENOSPC)
4682 goto out;
4683 if (ret)
4684 space_info->full = 1;
4685 else
4686 ret = 1;
4687
4688 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4689out:
4690 space_info->chunk_alloc = 0;
4691 spin_unlock(&space_info->lock);
4692 mutex_unlock(&fs_info->chunk_mutex);
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707 if (trans->can_flush_pending_bgs &&
4708 trans->chunk_bytes_reserved >= (u64)SZ_2M) {
4709 btrfs_create_pending_block_groups(trans);
4710 btrfs_trans_release_chunk_metadata(trans);
4711 }
4712 return ret;
4713}
4714
4715static int can_overcommit(struct btrfs_fs_info *fs_info,
4716 struct btrfs_space_info *space_info, u64 bytes,
4717 enum btrfs_reserve_flush_enum flush,
4718 bool system_chunk)
4719{
4720 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4721 u64 profile;
4722 u64 space_size;
4723 u64 avail;
4724 u64 used;
4725
4726
4727 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
4728 return 0;
4729
4730 if (system_chunk)
4731 profile = btrfs_system_alloc_profile(fs_info);
4732 else
4733 profile = btrfs_metadata_alloc_profile(fs_info);
4734
4735 used = btrfs_space_info_used(space_info, false);
4736
4737
4738
4739
4740
4741
4742
4743 spin_lock(&global_rsv->lock);
4744 space_size = calc_global_rsv_need_space(global_rsv);
4745 spin_unlock(&global_rsv->lock);
4746 if (used + space_size >= space_info->total_bytes)
4747 return 0;
4748
4749 used += space_info->bytes_may_use;
4750
4751 avail = atomic64_read(&fs_info->free_chunk_space);
4752
4753
4754
4755
4756
4757
4758
4759 if (profile & (BTRFS_BLOCK_GROUP_DUP |
4760 BTRFS_BLOCK_GROUP_RAID1 |
4761 BTRFS_BLOCK_GROUP_RAID10))
4762 avail >>= 1;
4763
4764
4765
4766
4767
4768
4769 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4770 avail >>= 3;
4771 else
4772 avail >>= 1;
4773
4774 if (used + bytes < space_info->total_bytes + avail)
4775 return 1;
4776 return 0;
4777}
4778
4779static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
4780 unsigned long nr_pages, int nr_items)
4781{
4782 struct super_block *sb = fs_info->sb;
4783
4784 if (down_read_trylock(&sb->s_umount)) {
4785 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4786 up_read(&sb->s_umount);
4787 } else {
4788
4789
4790
4791
4792
4793
4794
4795 btrfs_start_delalloc_roots(fs_info, 0, nr_items);
4796 if (!current->journal_info)
4797 btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
4798 }
4799}
4800
4801static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
4802 u64 to_reclaim)
4803{
4804 u64 bytes;
4805 u64 nr;
4806
4807 bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
4808 nr = div64_u64(to_reclaim, bytes);
4809 if (!nr)
4810 nr = 1;
4811 return nr;
4812}
4813
4814#define EXTENT_SIZE_PER_ITEM SZ_256K
4815
4816
4817
4818
4819static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
4820 u64 orig, bool wait_ordered)
4821{
4822 struct btrfs_space_info *space_info;
4823 struct btrfs_trans_handle *trans;
4824 u64 delalloc_bytes;
4825 u64 max_reclaim;
4826 u64 items;
4827 long time_left;
4828 unsigned long nr_pages;
4829 int loops;
4830
4831
4832 items = calc_reclaim_items_nr(fs_info, to_reclaim);
4833 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
4834
4835 trans = (struct btrfs_trans_handle *)current->journal_info;
4836 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4837
4838 delalloc_bytes = percpu_counter_sum_positive(
4839 &fs_info->delalloc_bytes);
4840 if (delalloc_bytes == 0) {
4841 if (trans)
4842 return;
4843 if (wait_ordered)
4844 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4845 return;
4846 }
4847
4848 loops = 0;
4849 while (delalloc_bytes && loops < 3) {
4850 max_reclaim = min(delalloc_bytes, to_reclaim);
4851 nr_pages = max_reclaim >> PAGE_SHIFT;
4852 btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
4853
4854
4855
4856
4857 max_reclaim = atomic_read(&fs_info->async_delalloc_pages);
4858 if (!max_reclaim)
4859 goto skip_async;
4860
4861 if (max_reclaim <= nr_pages)
4862 max_reclaim = 0;
4863 else
4864 max_reclaim -= nr_pages;
4865
4866 wait_event(fs_info->async_submit_wait,
4867 atomic_read(&fs_info->async_delalloc_pages) <=
4868 (int)max_reclaim);
4869skip_async:
4870 spin_lock(&space_info->lock);
4871 if (list_empty(&space_info->tickets) &&
4872 list_empty(&space_info->priority_tickets)) {
4873 spin_unlock(&space_info->lock);
4874 break;
4875 }
4876 spin_unlock(&space_info->lock);
4877
4878 loops++;
4879 if (wait_ordered && !trans) {
4880 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4881 } else {
4882 time_left = schedule_timeout_killable(1);
4883 if (time_left)
4884 break;
4885 }
4886 delalloc_bytes = percpu_counter_sum_positive(
4887 &fs_info->delalloc_bytes);
4888 }
4889}
4890
4891struct reserve_ticket {
4892 u64 bytes;
4893 int error;
4894 struct list_head list;
4895 wait_queue_head_t wait;
4896};
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908static int may_commit_transaction(struct btrfs_fs_info *fs_info,
4909 struct btrfs_space_info *space_info)
4910{
4911 struct reserve_ticket *ticket = NULL;
4912 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
4913 struct btrfs_trans_handle *trans;
4914 u64 bytes;
4915
4916 trans = (struct btrfs_trans_handle *)current->journal_info;
4917 if (trans)
4918 return -EAGAIN;
4919
4920 spin_lock(&space_info->lock);
4921 if (!list_empty(&space_info->priority_tickets))
4922 ticket = list_first_entry(&space_info->priority_tickets,
4923 struct reserve_ticket, list);
4924 else if (!list_empty(&space_info->tickets))
4925 ticket = list_first_entry(&space_info->tickets,
4926 struct reserve_ticket, list);
4927 bytes = (ticket) ? ticket->bytes : 0;
4928 spin_unlock(&space_info->lock);
4929
4930 if (!bytes)
4931 return 0;
4932
4933
4934 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4935 bytes) >= 0)
4936 goto commit;
4937
4938
4939
4940
4941
4942 if (space_info != delayed_rsv->space_info)
4943 return -ENOSPC;
4944
4945 spin_lock(&delayed_rsv->lock);
4946 if (delayed_rsv->size > bytes)
4947 bytes = 0;
4948 else
4949 bytes -= delayed_rsv->size;
4950 spin_unlock(&delayed_rsv->lock);
4951
4952 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4953 bytes) < 0) {
4954 return -ENOSPC;
4955 }
4956
4957commit:
4958 trans = btrfs_join_transaction(fs_info->extent_root);
4959 if (IS_ERR(trans))
4960 return -ENOSPC;
4961
4962 return btrfs_commit_transaction(trans);
4963}
4964
4965
4966
4967
4968
4969
4970static void flush_space(struct btrfs_fs_info *fs_info,
4971 struct btrfs_space_info *space_info, u64 num_bytes,
4972 int state)
4973{
4974 struct btrfs_root *root = fs_info->extent_root;
4975 struct btrfs_trans_handle *trans;
4976 int nr;
4977 int ret = 0;
4978
4979 switch (state) {
4980 case FLUSH_DELAYED_ITEMS_NR:
4981 case FLUSH_DELAYED_ITEMS:
4982 if (state == FLUSH_DELAYED_ITEMS_NR)
4983 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
4984 else
4985 nr = -1;
4986
4987 trans = btrfs_join_transaction(root);
4988 if (IS_ERR(trans)) {
4989 ret = PTR_ERR(trans);
4990 break;
4991 }
4992 ret = btrfs_run_delayed_items_nr(trans, nr);
4993 btrfs_end_transaction(trans);
4994 break;
4995 case FLUSH_DELALLOC:
4996 case FLUSH_DELALLOC_WAIT:
4997 shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
4998 state == FLUSH_DELALLOC_WAIT);
4999 break;
5000 case ALLOC_CHUNK:
5001 trans = btrfs_join_transaction(root);
5002 if (IS_ERR(trans)) {
5003 ret = PTR_ERR(trans);
5004 break;
5005 }
5006 ret = do_chunk_alloc(trans, fs_info,
5007 btrfs_metadata_alloc_profile(fs_info),
5008 CHUNK_ALLOC_NO_FORCE);
5009 btrfs_end_transaction(trans);
5010 if (ret > 0 || ret == -ENOSPC)
5011 ret = 0;
5012 break;
5013 case COMMIT_TRANS:
5014 ret = may_commit_transaction(fs_info, space_info);
5015 break;
5016 default:
5017 ret = -ENOSPC;
5018 break;
5019 }
5020
5021 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
5022 ret);
5023 return;
5024}
5025
5026static inline u64
5027btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
5028 struct btrfs_space_info *space_info,
5029 bool system_chunk)
5030{
5031 struct reserve_ticket *ticket;
5032 u64 used;
5033 u64 expected;
5034 u64 to_reclaim = 0;
5035
5036 list_for_each_entry(ticket, &space_info->tickets, list)
5037 to_reclaim += ticket->bytes;
5038 list_for_each_entry(ticket, &space_info->priority_tickets, list)
5039 to_reclaim += ticket->bytes;
5040 if (to_reclaim)
5041 return to_reclaim;
5042
5043 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
5044 if (can_overcommit(fs_info, space_info, to_reclaim,
5045 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
5046 return 0;
5047
5048 used = btrfs_space_info_used(space_info, true);
5049
5050 if (can_overcommit(fs_info, space_info, SZ_1M,
5051 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
5052 expected = div_factor_fine(space_info->total_bytes, 95);
5053 else
5054 expected = div_factor_fine(space_info->total_bytes, 90);
5055
5056 if (used > expected)
5057 to_reclaim = used - expected;
5058 else
5059 to_reclaim = 0;
5060 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
5061 space_info->bytes_reserved);
5062 return to_reclaim;
5063}
5064
5065static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
5066 struct btrfs_space_info *space_info,
5067 u64 used, bool system_chunk)
5068{
5069 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
5070
5071
5072 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
5073 return 0;
5074
5075 if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5076 system_chunk))
5077 return 0;
5078
5079 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
5080 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
5081}
5082
5083static void wake_all_tickets(struct list_head *head)
5084{
5085 struct reserve_ticket *ticket;
5086
5087 while (!list_empty(head)) {
5088 ticket = list_first_entry(head, struct reserve_ticket, list);
5089 list_del_init(&ticket->list);
5090 ticket->error = -ENOSPC;
5091 wake_up(&ticket->wait);
5092 }
5093}
5094
5095
5096
5097
5098
5099
5100static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
5101{
5102 struct btrfs_fs_info *fs_info;
5103 struct btrfs_space_info *space_info;
5104 u64 to_reclaim;
5105 int flush_state;
5106 int commit_cycles = 0;
5107 u64 last_tickets_id;
5108
5109 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
5110 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5111
5112 spin_lock(&space_info->lock);
5113 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5114 false);
5115 if (!to_reclaim) {
5116 space_info->flush = 0;
5117 spin_unlock(&space_info->lock);
5118 return;
5119 }
5120 last_tickets_id = space_info->tickets_id;
5121 spin_unlock(&space_info->lock);
5122
5123 flush_state = FLUSH_DELAYED_ITEMS_NR;
5124 do {
5125 flush_space(fs_info, space_info, to_reclaim, flush_state);
5126 spin_lock(&space_info->lock);
5127 if (list_empty(&space_info->tickets)) {
5128 space_info->flush = 0;
5129 spin_unlock(&space_info->lock);
5130 return;
5131 }
5132 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
5133 space_info,
5134 false);
5135 if (last_tickets_id == space_info->tickets_id) {
5136 flush_state++;
5137 } else {
5138 last_tickets_id = space_info->tickets_id;
5139 flush_state = FLUSH_DELAYED_ITEMS_NR;
5140 if (commit_cycles)
5141 commit_cycles--;
5142 }
5143
5144 if (flush_state > COMMIT_TRANS) {
5145 commit_cycles++;
5146 if (commit_cycles > 2) {
5147 wake_all_tickets(&space_info->tickets);
5148 space_info->flush = 0;
5149 } else {
5150 flush_state = FLUSH_DELAYED_ITEMS_NR;
5151 }
5152 }
5153 spin_unlock(&space_info->lock);
5154 } while (flush_state <= COMMIT_TRANS);
5155}
5156
5157void btrfs_init_async_reclaim_work(struct work_struct *work)
5158{
5159 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
5160}
5161
5162static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
5163 struct btrfs_space_info *space_info,
5164 struct reserve_ticket *ticket)
5165{
5166 u64 to_reclaim;
5167 int flush_state = FLUSH_DELAYED_ITEMS_NR;
5168
5169 spin_lock(&space_info->lock);
5170 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5171 false);
5172 if (!to_reclaim) {
5173 spin_unlock(&space_info->lock);
5174 return;
5175 }
5176 spin_unlock(&space_info->lock);
5177
5178 do {
5179 flush_space(fs_info, space_info, to_reclaim, flush_state);
5180 flush_state++;
5181 spin_lock(&space_info->lock);
5182 if (ticket->bytes == 0) {
5183 spin_unlock(&space_info->lock);
5184 return;
5185 }
5186 spin_unlock(&space_info->lock);
5187
5188
5189
5190
5191
5192 if (flush_state == FLUSH_DELALLOC ||
5193 flush_state == FLUSH_DELALLOC_WAIT)
5194 flush_state = ALLOC_CHUNK;
5195 } while (flush_state < COMMIT_TRANS);
5196}
5197
5198static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
5199 struct btrfs_space_info *space_info,
5200 struct reserve_ticket *ticket, u64 orig_bytes)
5201
5202{
5203 DEFINE_WAIT(wait);
5204 int ret = 0;
5205
5206 spin_lock(&space_info->lock);
5207 while (ticket->bytes > 0 && ticket->error == 0) {
5208 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
5209 if (ret) {
5210 ret = -EINTR;
5211 break;
5212 }
5213 spin_unlock(&space_info->lock);
5214
5215 schedule();
5216
5217 finish_wait(&ticket->wait, &wait);
5218 spin_lock(&space_info->lock);
5219 }
5220 if (!ret)
5221 ret = ticket->error;
5222 if (!list_empty(&ticket->list))
5223 list_del_init(&ticket->list);
5224 if (ticket->bytes && ticket->bytes < orig_bytes) {
5225 u64 num_bytes = orig_bytes - ticket->bytes;
5226 space_info->bytes_may_use -= num_bytes;
5227 trace_btrfs_space_reservation(fs_info, "space_info",
5228 space_info->flags, num_bytes, 0);
5229 }
5230 spin_unlock(&space_info->lock);
5231
5232 return ret;
5233}
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
5250 struct btrfs_space_info *space_info,
5251 u64 orig_bytes,
5252 enum btrfs_reserve_flush_enum flush,
5253 bool system_chunk)
5254{
5255 struct reserve_ticket ticket;
5256 u64 used;
5257 int ret = 0;
5258
5259 ASSERT(orig_bytes);
5260 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
5261
5262 spin_lock(&space_info->lock);
5263 ret = -ENOSPC;
5264 used = btrfs_space_info_used(space_info, true);
5265
5266
5267
5268
5269
5270
5271 if (used + orig_bytes <= space_info->total_bytes) {
5272 space_info->bytes_may_use += orig_bytes;
5273 trace_btrfs_space_reservation(fs_info, "space_info",
5274 space_info->flags, orig_bytes, 1);
5275 ret = 0;
5276 } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
5277 system_chunk)) {
5278 space_info->bytes_may_use += orig_bytes;
5279 trace_btrfs_space_reservation(fs_info, "space_info",
5280 space_info->flags, orig_bytes, 1);
5281 ret = 0;
5282 }
5283
5284
5285
5286
5287
5288
5289
5290
5291 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5292 ticket.bytes = orig_bytes;
5293 ticket.error = 0;
5294 init_waitqueue_head(&ticket.wait);
5295 if (flush == BTRFS_RESERVE_FLUSH_ALL) {
5296 list_add_tail(&ticket.list, &space_info->tickets);
5297 if (!space_info->flush) {
5298 space_info->flush = 1;
5299 trace_btrfs_trigger_flush(fs_info,
5300 space_info->flags,
5301 orig_bytes, flush,
5302 "enospc");
5303 queue_work(system_unbound_wq,
5304 &fs_info->async_reclaim_work);
5305 }
5306 } else {
5307 list_add_tail(&ticket.list,
5308 &space_info->priority_tickets);
5309 }
5310 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5311 used += orig_bytes;
5312
5313
5314
5315
5316
5317 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
5318 need_do_async_reclaim(fs_info, space_info,
5319 used, system_chunk) &&
5320 !work_busy(&fs_info->async_reclaim_work)) {
5321 trace_btrfs_trigger_flush(fs_info, space_info->flags,
5322 orig_bytes, flush, "preempt");
5323 queue_work(system_unbound_wq,
5324 &fs_info->async_reclaim_work);
5325 }
5326 }
5327 spin_unlock(&space_info->lock);
5328 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5329 return ret;
5330
5331 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5332 return wait_reserve_ticket(fs_info, space_info, &ticket,
5333 orig_bytes);
5334
5335 ret = 0;
5336 priority_reclaim_metadata_space(fs_info, space_info, &ticket);
5337 spin_lock(&space_info->lock);
5338 if (ticket.bytes) {
5339 if (ticket.bytes < orig_bytes) {
5340 u64 num_bytes = orig_bytes - ticket.bytes;
5341 space_info->bytes_may_use -= num_bytes;
5342 trace_btrfs_space_reservation(fs_info, "space_info",
5343 space_info->flags,
5344 num_bytes, 0);
5345
5346 }
5347 list_del_init(&ticket.list);
5348 ret = -ENOSPC;
5349 }
5350 spin_unlock(&space_info->lock);
5351 ASSERT(list_empty(&ticket.list));
5352 return ret;
5353}
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369static int reserve_metadata_bytes(struct btrfs_root *root,
5370 struct btrfs_block_rsv *block_rsv,
5371 u64 orig_bytes,
5372 enum btrfs_reserve_flush_enum flush)
5373{
5374 struct btrfs_fs_info *fs_info = root->fs_info;
5375 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5376 int ret;
5377 bool system_chunk = (root == fs_info->chunk_root);
5378
5379 ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
5380 orig_bytes, flush, system_chunk);
5381 if (ret == -ENOSPC &&
5382 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5383 if (block_rsv != global_rsv &&
5384 !block_rsv_use_bytes(global_rsv, orig_bytes))
5385 ret = 0;
5386 }
5387 if (ret == -ENOSPC) {
5388 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
5389 block_rsv->space_info->flags,
5390 orig_bytes, 1);
5391
5392 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
5393 dump_space_info(fs_info, block_rsv->space_info,
5394 orig_bytes, 0);
5395 }
5396 return ret;
5397}
5398
5399static struct btrfs_block_rsv *get_block_rsv(
5400 const struct btrfs_trans_handle *trans,
5401 const struct btrfs_root *root)
5402{
5403 struct btrfs_fs_info *fs_info = root->fs_info;
5404 struct btrfs_block_rsv *block_rsv = NULL;
5405
5406 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
5407 (root == fs_info->csum_root && trans->adding_csums) ||
5408 (root == fs_info->uuid_root))
5409 block_rsv = trans->block_rsv;
5410
5411 if (!block_rsv)
5412 block_rsv = root->block_rsv;
5413
5414 if (!block_rsv)
5415 block_rsv = &fs_info->empty_block_rsv;
5416
5417 return block_rsv;
5418}
5419
5420static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
5421 u64 num_bytes)
5422{
5423 int ret = -ENOSPC;
5424 spin_lock(&block_rsv->lock);
5425 if (block_rsv->reserved >= num_bytes) {
5426 block_rsv->reserved -= num_bytes;
5427 if (block_rsv->reserved < block_rsv->size)
5428 block_rsv->full = 0;
5429 ret = 0;
5430 }
5431 spin_unlock(&block_rsv->lock);
5432 return ret;
5433}
5434
5435static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
5436 u64 num_bytes, int update_size)
5437{
5438 spin_lock(&block_rsv->lock);
5439 block_rsv->reserved += num_bytes;
5440 if (update_size)
5441 block_rsv->size += num_bytes;
5442 else if (block_rsv->reserved >= block_rsv->size)
5443 block_rsv->full = 1;
5444 spin_unlock(&block_rsv->lock);
5445}
5446
5447int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5448 struct btrfs_block_rsv *dest, u64 num_bytes,
5449 int min_factor)
5450{
5451 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5452 u64 min_bytes;
5453
5454 if (global_rsv->space_info != dest->space_info)
5455 return -ENOSPC;
5456
5457 spin_lock(&global_rsv->lock);
5458 min_bytes = div_factor(global_rsv->size, min_factor);
5459 if (global_rsv->reserved < min_bytes + num_bytes) {
5460 spin_unlock(&global_rsv->lock);
5461 return -ENOSPC;
5462 }
5463 global_rsv->reserved -= num_bytes;
5464 if (global_rsv->reserved < global_rsv->size)
5465 global_rsv->full = 0;
5466 spin_unlock(&global_rsv->lock);
5467
5468 block_rsv_add_bytes(dest, num_bytes, 1);
5469 return 0;
5470}
5471
5472
5473
5474
5475
5476static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
5477 struct btrfs_space_info *space_info,
5478 u64 num_bytes)
5479{
5480 struct reserve_ticket *ticket;
5481 struct list_head *head;
5482 u64 used;
5483 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
5484 bool check_overcommit = false;
5485
5486 spin_lock(&space_info->lock);
5487 head = &space_info->priority_tickets;
5488
5489
5490
5491
5492
5493
5494 used = btrfs_space_info_used(space_info, true);
5495 if (used - num_bytes >= space_info->total_bytes)
5496 check_overcommit = true;
5497again:
5498 while (!list_empty(head) && num_bytes) {
5499 ticket = list_first_entry(head, struct reserve_ticket,
5500 list);
5501
5502
5503
5504
5505 if (check_overcommit &&
5506 !can_overcommit(fs_info, space_info, 0, flush, false))
5507 break;
5508 if (num_bytes >= ticket->bytes) {
5509 list_del_init(&ticket->list);
5510 num_bytes -= ticket->bytes;
5511 ticket->bytes = 0;
5512 space_info->tickets_id++;
5513 wake_up(&ticket->wait);
5514 } else {
5515 ticket->bytes -= num_bytes;
5516 num_bytes = 0;
5517 }
5518 }
5519
5520 if (num_bytes && head == &space_info->priority_tickets) {
5521 head = &space_info->tickets;
5522 flush = BTRFS_RESERVE_FLUSH_ALL;
5523 goto again;
5524 }
5525 space_info->bytes_may_use -= num_bytes;
5526 trace_btrfs_space_reservation(fs_info, "space_info",
5527 space_info->flags, num_bytes, 0);
5528 spin_unlock(&space_info->lock);
5529}
5530
5531
5532
5533
5534
5535
5536static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
5537 struct btrfs_space_info *space_info,
5538 u64 num_bytes)
5539{
5540 struct reserve_ticket *ticket;
5541 struct list_head *head = &space_info->priority_tickets;
5542
5543again:
5544 while (!list_empty(head) && num_bytes) {
5545 ticket = list_first_entry(head, struct reserve_ticket,
5546 list);
5547 if (num_bytes >= ticket->bytes) {
5548 trace_btrfs_space_reservation(fs_info, "space_info",
5549 space_info->flags,
5550 ticket->bytes, 1);
5551 list_del_init(&ticket->list);
5552 num_bytes -= ticket->bytes;
5553 space_info->bytes_may_use += ticket->bytes;
5554 ticket->bytes = 0;
5555 space_info->tickets_id++;
5556 wake_up(&ticket->wait);
5557 } else {
5558 trace_btrfs_space_reservation(fs_info, "space_info",
5559 space_info->flags,
5560 num_bytes, 1);
5561 space_info->bytes_may_use += num_bytes;
5562 ticket->bytes -= num_bytes;
5563 num_bytes = 0;
5564 }
5565 }
5566
5567 if (num_bytes && head == &space_info->priority_tickets) {
5568 head = &space_info->tickets;
5569 goto again;
5570 }
5571}
5572
5573static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5574 struct btrfs_block_rsv *block_rsv,
5575 struct btrfs_block_rsv *dest, u64 num_bytes,
5576 u64 *qgroup_to_release_ret)
5577{
5578 struct btrfs_space_info *space_info = block_rsv->space_info;
5579 u64 qgroup_to_release = 0;
5580 u64 ret;
5581
5582 spin_lock(&block_rsv->lock);
5583 if (num_bytes == (u64)-1) {
5584 num_bytes = block_rsv->size;
5585 qgroup_to_release = block_rsv->qgroup_rsv_size;
5586 }
5587 block_rsv->size -= num_bytes;
5588 if (block_rsv->reserved >= block_rsv->size) {
5589 num_bytes = block_rsv->reserved - block_rsv->size;
5590 block_rsv->reserved = block_rsv->size;
5591 block_rsv->full = 1;
5592 } else {
5593 num_bytes = 0;
5594 }
5595 if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
5596 qgroup_to_release = block_rsv->qgroup_rsv_reserved -
5597 block_rsv->qgroup_rsv_size;
5598 block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
5599 } else {
5600 qgroup_to_release = 0;
5601 }
5602 spin_unlock(&block_rsv->lock);
5603
5604 ret = num_bytes;
5605 if (num_bytes > 0) {
5606 if (dest) {
5607 spin_lock(&dest->lock);
5608 if (!dest->full) {
5609 u64 bytes_to_add;
5610
5611 bytes_to_add = dest->size - dest->reserved;
5612 bytes_to_add = min(num_bytes, bytes_to_add);
5613 dest->reserved += bytes_to_add;
5614 if (dest->reserved >= dest->size)
5615 dest->full = 1;
5616 num_bytes -= bytes_to_add;
5617 }
5618 spin_unlock(&dest->lock);
5619 }
5620 if (num_bytes)
5621 space_info_add_old_bytes(fs_info, space_info,
5622 num_bytes);
5623 }
5624 if (qgroup_to_release_ret)
5625 *qgroup_to_release_ret = qgroup_to_release;
5626 return ret;
5627}
5628
5629int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
5630 struct btrfs_block_rsv *dst, u64 num_bytes,
5631 int update_size)
5632{
5633 int ret;
5634
5635 ret = block_rsv_use_bytes(src, num_bytes);
5636 if (ret)
5637 return ret;
5638
5639 block_rsv_add_bytes(dst, num_bytes, update_size);
5640 return 0;
5641}
5642
5643void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
5644{
5645 memset(rsv, 0, sizeof(*rsv));
5646 spin_lock_init(&rsv->lock);
5647 rsv->type = type;
5648}
5649
5650void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
5651 struct btrfs_block_rsv *rsv,
5652 unsigned short type)
5653{
5654 btrfs_init_block_rsv(rsv, type);
5655 rsv->space_info = __find_space_info(fs_info,
5656 BTRFS_BLOCK_GROUP_METADATA);
5657}
5658
5659struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
5660 unsigned short type)
5661{
5662 struct btrfs_block_rsv *block_rsv;
5663
5664 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5665 if (!block_rsv)
5666 return NULL;
5667
5668 btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
5669 return block_rsv;
5670}
5671
5672void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
5673 struct btrfs_block_rsv *rsv)
5674{
5675 if (!rsv)
5676 return;
5677 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
5678 kfree(rsv);
5679}
5680
5681void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
5682{
5683 kfree(rsv);
5684}
5685
5686int btrfs_block_rsv_add(struct btrfs_root *root,
5687 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5688 enum btrfs_reserve_flush_enum flush)
5689{
5690 int ret;
5691
5692 if (num_bytes == 0)
5693 return 0;
5694
5695 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5696 if (!ret) {
5697 block_rsv_add_bytes(block_rsv, num_bytes, 1);
5698 return 0;
5699 }
5700
5701 return ret;
5702}
5703
5704int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
5705{
5706 u64 num_bytes = 0;
5707 int ret = -ENOSPC;
5708
5709 if (!block_rsv)
5710 return 0;
5711
5712 spin_lock(&block_rsv->lock);
5713 num_bytes = div_factor(block_rsv->size, min_factor);
5714 if (block_rsv->reserved >= num_bytes)
5715 ret = 0;
5716 spin_unlock(&block_rsv->lock);
5717
5718 return ret;
5719}
5720
5721int btrfs_block_rsv_refill(struct btrfs_root *root,
5722 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5723 enum btrfs_reserve_flush_enum flush)
5724{
5725 u64 num_bytes = 0;
5726 int ret = -ENOSPC;
5727
5728 if (!block_rsv)
5729 return 0;
5730
5731 spin_lock(&block_rsv->lock);
5732 num_bytes = min_reserved;
5733 if (block_rsv->reserved >= num_bytes)
5734 ret = 0;
5735 else
5736 num_bytes -= block_rsv->reserved;
5737 spin_unlock(&block_rsv->lock);
5738
5739 if (!ret)
5740 return 0;
5741
5742 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5743 if (!ret) {
5744 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5745 return 0;
5746 }
5747
5748 return ret;
5749}
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
5762 enum btrfs_reserve_flush_enum flush)
5763{
5764 struct btrfs_root *root = inode->root;
5765 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5766 u64 num_bytes = 0;
5767 u64 qgroup_num_bytes = 0;
5768 int ret = -ENOSPC;
5769
5770 spin_lock(&block_rsv->lock);
5771 if (block_rsv->reserved < block_rsv->size)
5772 num_bytes = block_rsv->size - block_rsv->reserved;
5773 if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
5774 qgroup_num_bytes = block_rsv->qgroup_rsv_size -
5775 block_rsv->qgroup_rsv_reserved;
5776 spin_unlock(&block_rsv->lock);
5777
5778 if (num_bytes == 0)
5779 return 0;
5780
5781 ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
5782 if (ret)
5783 return ret;
5784 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5785 if (!ret) {
5786 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5787 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5788 btrfs_ino(inode), num_bytes, 1);
5789
5790
5791 spin_lock(&block_rsv->lock);
5792 block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
5793 spin_unlock(&block_rsv->lock);
5794 } else
5795 btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
5796 return ret;
5797}
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
5811{
5812 struct btrfs_fs_info *fs_info = inode->root->fs_info;
5813 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5814 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5815 u64 released = 0;
5816 u64 qgroup_to_release = 0;
5817
5818
5819
5820
5821
5822
5823 released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
5824 &qgroup_to_release);
5825 if (released > 0)
5826 trace_btrfs_space_reservation(fs_info, "delalloc",
5827 btrfs_ino(inode), released, 0);
5828 if (qgroup_free)
5829 btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
5830 else
5831 btrfs_qgroup_convert_reserved_meta(inode->root,
5832 qgroup_to_release);
5833}
5834
5835void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5836 struct btrfs_block_rsv *block_rsv,
5837 u64 num_bytes)
5838{
5839 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5840
5841 if (global_rsv == block_rsv ||
5842 block_rsv->space_info != global_rsv->space_info)
5843 global_rsv = NULL;
5844 block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
5845}
5846
5847static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5848{
5849 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5850 struct btrfs_space_info *sinfo = block_rsv->space_info;
5851 u64 num_bytes;
5852
5853
5854
5855
5856
5857
5858 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
5859 btrfs_root_used(&fs_info->csum_root->root_item) +
5860 btrfs_root_used(&fs_info->tree_root->root_item);
5861 num_bytes = max_t(u64, num_bytes, SZ_16M);
5862
5863 spin_lock(&sinfo->lock);
5864 spin_lock(&block_rsv->lock);
5865
5866 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
5867
5868 if (block_rsv->reserved < block_rsv->size) {
5869 num_bytes = btrfs_space_info_used(sinfo, true);
5870 if (sinfo->total_bytes > num_bytes) {
5871 num_bytes = sinfo->total_bytes - num_bytes;
5872 num_bytes = min(num_bytes,
5873 block_rsv->size - block_rsv->reserved);
5874 block_rsv->reserved += num_bytes;
5875 sinfo->bytes_may_use += num_bytes;
5876 trace_btrfs_space_reservation(fs_info, "space_info",
5877 sinfo->flags, num_bytes,
5878 1);
5879 }
5880 } else if (block_rsv->reserved > block_rsv->size) {
5881 num_bytes = block_rsv->reserved - block_rsv->size;
5882 sinfo->bytes_may_use -= num_bytes;
5883 trace_btrfs_space_reservation(fs_info, "space_info",
5884 sinfo->flags, num_bytes, 0);
5885 block_rsv->reserved = block_rsv->size;
5886 }
5887
5888 if (block_rsv->reserved == block_rsv->size)
5889 block_rsv->full = 1;
5890 else
5891 block_rsv->full = 0;
5892
5893 spin_unlock(&block_rsv->lock);
5894 spin_unlock(&sinfo->lock);
5895}
5896
5897static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5898{
5899 struct btrfs_space_info *space_info;
5900
5901 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5902 fs_info->chunk_block_rsv.space_info = space_info;
5903
5904 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5905 fs_info->global_block_rsv.space_info = space_info;
5906 fs_info->trans_block_rsv.space_info = space_info;
5907 fs_info->empty_block_rsv.space_info = space_info;
5908 fs_info->delayed_block_rsv.space_info = space_info;
5909
5910 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
5911 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
5912 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5913 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
5914 if (fs_info->quota_root)
5915 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
5916 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
5917
5918 update_global_block_rsv(fs_info);
5919}
5920
5921static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5922{
5923 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5924 (u64)-1, NULL);
5925 WARN_ON(fs_info->trans_block_rsv.size > 0);
5926 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5927 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5928 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
5929 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5930 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
5931}
5932
5933
5934
5935
5936
5937
5938void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5939{
5940 struct btrfs_fs_info *fs_info = trans->fs_info;
5941
5942 if (!trans->chunk_bytes_reserved)
5943 return;
5944
5945 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5946
5947 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5948 trans->chunk_bytes_reserved, NULL);
5949 trans->chunk_bytes_reserved = 0;
5950}
5951
5952
5953int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5954 struct btrfs_inode *inode)
5955{
5956 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
5957 struct btrfs_root *root = inode->root;
5958
5959
5960
5961
5962
5963
5964 struct btrfs_block_rsv *src_rsv = trans->block_rsv;
5965 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
5966
5967
5968
5969
5970
5971
5972 u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
5973
5974 trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
5975 num_bytes, 1);
5976 return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
5977}
5978
5979void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
5980{
5981 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
5982 struct btrfs_root *root = inode->root;
5983 u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
5984
5985 trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
5986 num_bytes, 0);
5987 btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
5988}
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
6005 struct btrfs_block_rsv *rsv,
6006 int items,
6007 u64 *qgroup_reserved,
6008 bool use_global_rsv)
6009{
6010 u64 num_bytes;
6011 int ret;
6012 struct btrfs_fs_info *fs_info = root->fs_info;
6013 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6014
6015 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
6016
6017 num_bytes = 3 * fs_info->nodesize;
6018 ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
6019 if (ret)
6020 return ret;
6021 } else {
6022 num_bytes = 0;
6023 }
6024
6025 *qgroup_reserved = num_bytes;
6026
6027 num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
6028 rsv->space_info = __find_space_info(fs_info,
6029 BTRFS_BLOCK_GROUP_METADATA);
6030 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
6031 BTRFS_RESERVE_FLUSH_ALL);
6032
6033 if (ret == -ENOSPC && use_global_rsv)
6034 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
6035
6036 if (ret && *qgroup_reserved)
6037 btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved);
6038
6039 return ret;
6040}
6041
6042void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
6043 struct btrfs_block_rsv *rsv)
6044{
6045 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
6046}
6047
6048static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
6049 struct btrfs_inode *inode)
6050{
6051 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
6052 u64 reserve_size = 0;
6053 u64 qgroup_rsv_size = 0;
6054 u64 csum_leaves;
6055 unsigned outstanding_extents;
6056
6057 lockdep_assert_held(&inode->lock);
6058 outstanding_extents = inode->outstanding_extents;
6059 if (outstanding_extents)
6060 reserve_size = btrfs_calc_trans_metadata_size(fs_info,
6061 outstanding_extents + 1);
6062 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
6063 inode->csum_bytes);
6064 reserve_size += btrfs_calc_trans_metadata_size(fs_info,
6065 csum_leaves);
6066
6067
6068
6069
6070
6071
6072 qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
6073
6074 spin_lock(&block_rsv->lock);
6075 block_rsv->size = reserve_size;
6076 block_rsv->qgroup_rsv_size = qgroup_rsv_size;
6077 spin_unlock(&block_rsv->lock);
6078}
6079
6080int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
6081{
6082 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
6083 unsigned nr_extents;
6084 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
6085 int ret = 0;
6086 bool delalloc_lock = true;
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096 if (btrfs_is_free_space_inode(inode)) {
6097 flush = BTRFS_RESERVE_NO_FLUSH;
6098 delalloc_lock = false;
6099 } else {
6100 if (current->journal_info)
6101 flush = BTRFS_RESERVE_FLUSH_LIMIT;
6102
6103 if (btrfs_transaction_in_commit(fs_info))
6104 schedule_timeout(1);
6105 }
6106
6107 if (delalloc_lock)
6108 mutex_lock(&inode->delalloc_mutex);
6109
6110 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6111
6112
6113 spin_lock(&inode->lock);
6114 nr_extents = count_max_extents(num_bytes);
6115 btrfs_mod_outstanding_extents(inode, nr_extents);
6116 inode->csum_bytes += num_bytes;
6117 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6118 spin_unlock(&inode->lock);
6119
6120 ret = btrfs_inode_rsv_refill(inode, flush);
6121 if (unlikely(ret))
6122 goto out_fail;
6123
6124 if (delalloc_lock)
6125 mutex_unlock(&inode->delalloc_mutex);
6126 return 0;
6127
6128out_fail:
6129 spin_lock(&inode->lock);
6130 nr_extents = count_max_extents(num_bytes);
6131 btrfs_mod_outstanding_extents(inode, -nr_extents);
6132 inode->csum_bytes -= num_bytes;
6133 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6134 spin_unlock(&inode->lock);
6135
6136 btrfs_inode_rsv_release(inode, true);
6137 if (delalloc_lock)
6138 mutex_unlock(&inode->delalloc_mutex);
6139 return ret;
6140}
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
6153 bool qgroup_free)
6154{
6155 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
6156
6157 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6158 spin_lock(&inode->lock);
6159 inode->csum_bytes -= num_bytes;
6160 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6161 spin_unlock(&inode->lock);
6162
6163 if (btrfs_is_testing(fs_info))
6164 return;
6165
6166 btrfs_inode_rsv_release(inode, qgroup_free);
6167}
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
6182 bool qgroup_free)
6183{
6184 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
6185 unsigned num_extents;
6186
6187 spin_lock(&inode->lock);
6188 num_extents = count_max_extents(num_bytes);
6189 btrfs_mod_outstanding_extents(inode, -num_extents);
6190 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6191 spin_unlock(&inode->lock);
6192
6193 if (btrfs_is_testing(fs_info))
6194 return;
6195
6196 btrfs_inode_rsv_release(inode, qgroup_free);
6197}
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224int btrfs_delalloc_reserve_space(struct inode *inode,
6225 struct extent_changeset **reserved, u64 start, u64 len)
6226{
6227 int ret;
6228
6229 ret = btrfs_check_data_free_space(inode, reserved, start, len);
6230 if (ret < 0)
6231 return ret;
6232 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
6233 if (ret < 0)
6234 btrfs_free_reserved_data_space(inode, *reserved, start, len);
6235 return ret;
6236}
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250void btrfs_delalloc_release_space(struct inode *inode,
6251 struct extent_changeset *reserved,
6252 u64 start, u64 len, bool qgroup_free)
6253{
6254 btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
6255 btrfs_free_reserved_data_space(inode, reserved, start, len);
6256}
6257
6258static int update_block_group(struct btrfs_trans_handle *trans,
6259 struct btrfs_fs_info *info, u64 bytenr,
6260 u64 num_bytes, int alloc)
6261{
6262 struct btrfs_block_group_cache *cache = NULL;
6263 u64 total = num_bytes;
6264 u64 old_val;
6265 u64 byte_in_group;
6266 int factor;
6267
6268
6269 spin_lock(&info->delalloc_root_lock);
6270 old_val = btrfs_super_bytes_used(info->super_copy);
6271 if (alloc)
6272 old_val += num_bytes;
6273 else
6274 old_val -= num_bytes;
6275 btrfs_set_super_bytes_used(info->super_copy, old_val);
6276 spin_unlock(&info->delalloc_root_lock);
6277
6278 while (total) {
6279 cache = btrfs_lookup_block_group(info, bytenr);
6280 if (!cache)
6281 return -ENOENT;
6282 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
6283 BTRFS_BLOCK_GROUP_RAID1 |
6284 BTRFS_BLOCK_GROUP_RAID10))
6285 factor = 2;
6286 else
6287 factor = 1;
6288
6289
6290
6291
6292
6293
6294 if (!alloc && cache->cached == BTRFS_CACHE_NO)
6295 cache_block_group(cache, 1);
6296
6297 byte_in_group = bytenr - cache->key.objectid;
6298 WARN_ON(byte_in_group > cache->key.offset);
6299
6300 spin_lock(&cache->space_info->lock);
6301 spin_lock(&cache->lock);
6302
6303 if (btrfs_test_opt(info, SPACE_CACHE) &&
6304 cache->disk_cache_state < BTRFS_DC_CLEAR)
6305 cache->disk_cache_state = BTRFS_DC_CLEAR;
6306
6307 old_val = btrfs_block_group_used(&cache->item);
6308 num_bytes = min(total, cache->key.offset - byte_in_group);
6309 if (alloc) {
6310 old_val += num_bytes;
6311 btrfs_set_block_group_used(&cache->item, old_val);
6312 cache->reserved -= num_bytes;
6313 cache->space_info->bytes_reserved -= num_bytes;
6314 cache->space_info->bytes_used += num_bytes;
6315 cache->space_info->disk_used += num_bytes * factor;
6316 spin_unlock(&cache->lock);
6317 spin_unlock(&cache->space_info->lock);
6318 } else {
6319 old_val -= num_bytes;
6320 btrfs_set_block_group_used(&cache->item, old_val);
6321 cache->pinned += num_bytes;
6322 cache->space_info->bytes_pinned += num_bytes;
6323 cache->space_info->bytes_used -= num_bytes;
6324 cache->space_info->disk_used -= num_bytes * factor;
6325 spin_unlock(&cache->lock);
6326 spin_unlock(&cache->space_info->lock);
6327
6328 trace_btrfs_space_reservation(info, "pinned",
6329 cache->space_info->flags,
6330 num_bytes, 1);
6331 percpu_counter_add(&cache->space_info->total_bytes_pinned,
6332 num_bytes);
6333 set_extent_dirty(info->pinned_extents,
6334 bytenr, bytenr + num_bytes - 1,
6335 GFP_NOFS | __GFP_NOFAIL);
6336 }
6337
6338 spin_lock(&trans->transaction->dirty_bgs_lock);
6339 if (list_empty(&cache->dirty_list)) {
6340 list_add_tail(&cache->dirty_list,
6341 &trans->transaction->dirty_bgs);
6342 trans->transaction->num_dirty_bgs++;
6343 btrfs_get_block_group(cache);
6344 }
6345 spin_unlock(&trans->transaction->dirty_bgs_lock);
6346
6347
6348
6349
6350
6351
6352
6353 if (!alloc && old_val == 0) {
6354 spin_lock(&info->unused_bgs_lock);
6355 if (list_empty(&cache->bg_list)) {
6356 btrfs_get_block_group(cache);
6357 list_add_tail(&cache->bg_list,
6358 &info->unused_bgs);
6359 }
6360 spin_unlock(&info->unused_bgs_lock);
6361 }
6362
6363 btrfs_put_block_group(cache);
6364 total -= num_bytes;
6365 bytenr += num_bytes;
6366 }
6367 return 0;
6368}
6369
6370static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
6371{
6372 struct btrfs_block_group_cache *cache;
6373 u64 bytenr;
6374
6375 spin_lock(&fs_info->block_group_cache_lock);
6376 bytenr = fs_info->first_logical_byte;
6377 spin_unlock(&fs_info->block_group_cache_lock);
6378
6379 if (bytenr < (u64)-1)
6380 return bytenr;
6381
6382 cache = btrfs_lookup_first_block_group(fs_info, search_start);
6383 if (!cache)
6384 return 0;
6385
6386 bytenr = cache->key.objectid;
6387 btrfs_put_block_group(cache);
6388
6389 return bytenr;
6390}
6391
6392static int pin_down_extent(struct btrfs_fs_info *fs_info,
6393 struct btrfs_block_group_cache *cache,
6394 u64 bytenr, u64 num_bytes, int reserved)
6395{
6396 spin_lock(&cache->space_info->lock);
6397 spin_lock(&cache->lock);
6398 cache->pinned += num_bytes;
6399 cache->space_info->bytes_pinned += num_bytes;
6400 if (reserved) {
6401 cache->reserved -= num_bytes;
6402 cache->space_info->bytes_reserved -= num_bytes;
6403 }
6404 spin_unlock(&cache->lock);
6405 spin_unlock(&cache->space_info->lock);
6406
6407 trace_btrfs_space_reservation(fs_info, "pinned",
6408 cache->space_info->flags, num_bytes, 1);
6409 percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes);
6410 set_extent_dirty(fs_info->pinned_extents, bytenr,
6411 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
6412 return 0;
6413}
6414
6415
6416
6417
6418int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
6419 u64 bytenr, u64 num_bytes, int reserved)
6420{
6421 struct btrfs_block_group_cache *cache;
6422
6423 cache = btrfs_lookup_block_group(fs_info, bytenr);
6424 BUG_ON(!cache);
6425
6426 pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved);
6427
6428 btrfs_put_block_group(cache);
6429 return 0;
6430}
6431
6432
6433
6434
6435int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
6436 u64 bytenr, u64 num_bytes)
6437{
6438 struct btrfs_block_group_cache *cache;
6439 int ret;
6440
6441 cache = btrfs_lookup_block_group(fs_info, bytenr);
6442 if (!cache)
6443 return -EINVAL;
6444
6445
6446
6447
6448
6449
6450
6451 cache_block_group(cache, 1);
6452
6453 pin_down_extent(fs_info, cache, bytenr, num_bytes, 0);
6454
6455
6456 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
6457 btrfs_put_block_group(cache);
6458 return ret;
6459}
6460
6461static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
6462 u64 start, u64 num_bytes)
6463{
6464 int ret;
6465 struct btrfs_block_group_cache *block_group;
6466 struct btrfs_caching_control *caching_ctl;
6467
6468 block_group = btrfs_lookup_block_group(fs_info, start);
6469 if (!block_group)
6470 return -EINVAL;
6471
6472 cache_block_group(block_group, 0);
6473 caching_ctl = get_caching_control(block_group);
6474
6475 if (!caching_ctl) {
6476
6477 BUG_ON(!block_group_cache_done(block_group));
6478 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6479 } else {
6480 mutex_lock(&caching_ctl->mutex);
6481
6482 if (start >= caching_ctl->progress) {
6483 ret = add_excluded_extent(fs_info, start, num_bytes);
6484 } else if (start + num_bytes <= caching_ctl->progress) {
6485 ret = btrfs_remove_free_space(block_group,
6486 start, num_bytes);
6487 } else {
6488 num_bytes = caching_ctl->progress - start;
6489 ret = btrfs_remove_free_space(block_group,
6490 start, num_bytes);
6491 if (ret)
6492 goto out_lock;
6493
6494 num_bytes = (start + num_bytes) -
6495 caching_ctl->progress;
6496 start = caching_ctl->progress;
6497 ret = add_excluded_extent(fs_info, start, num_bytes);
6498 }
6499out_lock:
6500 mutex_unlock(&caching_ctl->mutex);
6501 put_caching_control(caching_ctl);
6502 }
6503 btrfs_put_block_group(block_group);
6504 return ret;
6505}
6506
6507int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
6508 struct extent_buffer *eb)
6509{
6510 struct btrfs_file_extent_item *item;
6511 struct btrfs_key key;
6512 int found_type;
6513 int i;
6514
6515 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
6516 return 0;
6517
6518 for (i = 0; i < btrfs_header_nritems(eb); i++) {
6519 btrfs_item_key_to_cpu(eb, &key, i);
6520 if (key.type != BTRFS_EXTENT_DATA_KEY)
6521 continue;
6522 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
6523 found_type = btrfs_file_extent_type(eb, item);
6524 if (found_type == BTRFS_FILE_EXTENT_INLINE)
6525 continue;
6526 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
6527 continue;
6528 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
6529 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
6530 __exclude_logged_extent(fs_info, key.objectid, key.offset);
6531 }
6532
6533 return 0;
6534}
6535
6536static void
6537btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
6538{
6539 atomic_inc(&bg->reservations);
6540}
6541
6542void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
6543 const u64 start)
6544{
6545 struct btrfs_block_group_cache *bg;
6546
6547 bg = btrfs_lookup_block_group(fs_info, start);
6548 ASSERT(bg);
6549 if (atomic_dec_and_test(&bg->reservations))
6550 wake_up_var(&bg->reservations);
6551 btrfs_put_block_group(bg);
6552}
6553
6554void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6555{
6556 struct btrfs_space_info *space_info = bg->space_info;
6557
6558 ASSERT(bg->ro);
6559
6560 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
6561 return;
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573 down_write(&space_info->groups_sem);
6574 up_write(&space_info->groups_sem);
6575
6576 wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
6577}
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
6592 u64 ram_bytes, u64 num_bytes, int delalloc)
6593{
6594 struct btrfs_space_info *space_info = cache->space_info;
6595 int ret = 0;
6596
6597 spin_lock(&space_info->lock);
6598 spin_lock(&cache->lock);
6599 if (cache->ro) {
6600 ret = -EAGAIN;
6601 } else {
6602 cache->reserved += num_bytes;
6603 space_info->bytes_reserved += num_bytes;
6604
6605 trace_btrfs_space_reservation(cache->fs_info,
6606 "space_info", space_info->flags,
6607 ram_bytes, 0);
6608 space_info->bytes_may_use -= ram_bytes;
6609 if (delalloc)
6610 cache->delalloc_bytes += num_bytes;
6611 }
6612 spin_unlock(&cache->lock);
6613 spin_unlock(&space_info->lock);
6614 return ret;
6615}
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
6630 u64 num_bytes, int delalloc)
6631{
6632 struct btrfs_space_info *space_info = cache->space_info;
6633 int ret = 0;
6634
6635 spin_lock(&space_info->lock);
6636 spin_lock(&cache->lock);
6637 if (cache->ro)
6638 space_info->bytes_readonly += num_bytes;
6639 cache->reserved -= num_bytes;
6640 space_info->bytes_reserved -= num_bytes;
6641
6642 if (delalloc)
6643 cache->delalloc_bytes -= num_bytes;
6644 spin_unlock(&cache->lock);
6645 spin_unlock(&space_info->lock);
6646 return ret;
6647}
6648void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
6649{
6650 struct btrfs_caching_control *next;
6651 struct btrfs_caching_control *caching_ctl;
6652 struct btrfs_block_group_cache *cache;
6653
6654 down_write(&fs_info->commit_root_sem);
6655
6656 list_for_each_entry_safe(caching_ctl, next,
6657 &fs_info->caching_block_groups, list) {
6658 cache = caching_ctl->block_group;
6659 if (block_group_cache_done(cache)) {
6660 cache->last_byte_to_unpin = (u64)-1;
6661 list_del_init(&caching_ctl->list);
6662 put_caching_control(caching_ctl);
6663 } else {
6664 cache->last_byte_to_unpin = caching_ctl->progress;
6665 }
6666 }
6667
6668 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6669 fs_info->pinned_extents = &fs_info->freed_extents[1];
6670 else
6671 fs_info->pinned_extents = &fs_info->freed_extents[0];
6672
6673 up_write(&fs_info->commit_root_sem);
6674
6675 update_global_block_rsv(fs_info);
6676}
6677
6678
6679
6680
6681
6682static struct btrfs_free_cluster *
6683fetch_cluster_info(struct btrfs_fs_info *fs_info,
6684 struct btrfs_space_info *space_info, u64 *empty_cluster)
6685{
6686 struct btrfs_free_cluster *ret = NULL;
6687
6688 *empty_cluster = 0;
6689 if (btrfs_mixed_space_info(space_info))
6690 return ret;
6691
6692 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
6693 ret = &fs_info->meta_alloc_cluster;
6694 if (btrfs_test_opt(fs_info, SSD))
6695 *empty_cluster = SZ_2M;
6696 else
6697 *empty_cluster = SZ_64K;
6698 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
6699 btrfs_test_opt(fs_info, SSD_SPREAD)) {
6700 *empty_cluster = SZ_2M;
6701 ret = &fs_info->data_alloc_cluster;
6702 }
6703
6704 return ret;
6705}
6706
6707static int unpin_extent_range(struct btrfs_fs_info *fs_info,
6708 u64 start, u64 end,
6709 const bool return_free_space)
6710{
6711 struct btrfs_block_group_cache *cache = NULL;
6712 struct btrfs_space_info *space_info;
6713 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6714 struct btrfs_free_cluster *cluster = NULL;
6715 u64 len;
6716 u64 total_unpinned = 0;
6717 u64 empty_cluster = 0;
6718 bool readonly;
6719
6720 while (start <= end) {
6721 readonly = false;
6722 if (!cache ||
6723 start >= cache->key.objectid + cache->key.offset) {
6724 if (cache)
6725 btrfs_put_block_group(cache);
6726 total_unpinned = 0;
6727 cache = btrfs_lookup_block_group(fs_info, start);
6728 BUG_ON(!cache);
6729
6730 cluster = fetch_cluster_info(fs_info,
6731 cache->space_info,
6732 &empty_cluster);
6733 empty_cluster <<= 1;
6734 }
6735
6736 len = cache->key.objectid + cache->key.offset - start;
6737 len = min(len, end + 1 - start);
6738
6739 if (start < cache->last_byte_to_unpin) {
6740 len = min(len, cache->last_byte_to_unpin - start);
6741 if (return_free_space)
6742 btrfs_add_free_space(cache, start, len);
6743 }
6744
6745 start += len;
6746 total_unpinned += len;
6747 space_info = cache->space_info;
6748
6749
6750
6751
6752
6753
6754
6755 if (cluster && cluster->fragmented &&
6756 total_unpinned > empty_cluster) {
6757 spin_lock(&cluster->lock);
6758 cluster->fragmented = 0;
6759 spin_unlock(&cluster->lock);
6760 }
6761
6762 spin_lock(&space_info->lock);
6763 spin_lock(&cache->lock);
6764 cache->pinned -= len;
6765 space_info->bytes_pinned -= len;
6766
6767 trace_btrfs_space_reservation(fs_info, "pinned",
6768 space_info->flags, len, 0);
6769 space_info->max_extent_size = 0;
6770 percpu_counter_add(&space_info->total_bytes_pinned, -len);
6771 if (cache->ro) {
6772 space_info->bytes_readonly += len;
6773 readonly = true;
6774 }
6775 spin_unlock(&cache->lock);
6776 if (!readonly && return_free_space &&
6777 global_rsv->space_info == space_info) {
6778 u64 to_add = len;
6779
6780 spin_lock(&global_rsv->lock);
6781 if (!global_rsv->full) {
6782 to_add = min(len, global_rsv->size -
6783 global_rsv->reserved);
6784 global_rsv->reserved += to_add;
6785 space_info->bytes_may_use += to_add;
6786 if (global_rsv->reserved >= global_rsv->size)
6787 global_rsv->full = 1;
6788 trace_btrfs_space_reservation(fs_info,
6789 "space_info",
6790 space_info->flags,
6791 to_add, 1);
6792 len -= to_add;
6793 }
6794 spin_unlock(&global_rsv->lock);
6795
6796 if (len)
6797 space_info_add_new_bytes(fs_info, space_info,
6798 len);
6799 }
6800 spin_unlock(&space_info->lock);
6801 }
6802
6803 if (cache)
6804 btrfs_put_block_group(cache);
6805 return 0;
6806}
6807
6808int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
6809{
6810 struct btrfs_fs_info *fs_info = trans->fs_info;
6811 struct btrfs_block_group_cache *block_group, *tmp;
6812 struct list_head *deleted_bgs;
6813 struct extent_io_tree *unpin;
6814 u64 start;
6815 u64 end;
6816 int ret;
6817
6818 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6819 unpin = &fs_info->freed_extents[1];
6820 else
6821 unpin = &fs_info->freed_extents[0];
6822
6823 while (!trans->aborted) {
6824 mutex_lock(&fs_info->unused_bg_unpin_mutex);
6825 ret = find_first_extent_bit(unpin, 0, &start, &end,
6826 EXTENT_DIRTY, NULL);
6827 if (ret) {
6828 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6829 break;
6830 }
6831
6832 if (btrfs_test_opt(fs_info, DISCARD))
6833 ret = btrfs_discard_extent(fs_info, start,
6834 end + 1 - start, NULL);
6835
6836 clear_extent_dirty(unpin, start, end);
6837 unpin_extent_range(fs_info, start, end, true);
6838 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6839 cond_resched();
6840 }
6841
6842
6843
6844
6845
6846
6847 deleted_bgs = &trans->transaction->deleted_bgs;
6848 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6849 u64 trimmed = 0;
6850
6851 ret = -EROFS;
6852 if (!trans->aborted)
6853 ret = btrfs_discard_extent(fs_info,
6854 block_group->key.objectid,
6855 block_group->key.offset,
6856 &trimmed);
6857
6858 list_del_init(&block_group->bg_list);
6859 btrfs_put_block_group_trimming(block_group);
6860 btrfs_put_block_group(block_group);
6861
6862 if (ret) {
6863 const char *errstr = btrfs_decode_error(ret);
6864 btrfs_warn(fs_info,
6865 "discard failed while removing blockgroup: errno=%d %s",
6866 ret, errstr);
6867 }
6868 }
6869
6870 return 0;
6871}
6872
6873static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6874 struct btrfs_fs_info *info,
6875 struct btrfs_delayed_ref_node *node, u64 parent,
6876 u64 root_objectid, u64 owner_objectid,
6877 u64 owner_offset, int refs_to_drop,
6878 struct btrfs_delayed_extent_op *extent_op)
6879{
6880 struct btrfs_key key;
6881 struct btrfs_path *path;
6882 struct btrfs_root *extent_root = info->extent_root;
6883 struct extent_buffer *leaf;
6884 struct btrfs_extent_item *ei;
6885 struct btrfs_extent_inline_ref *iref;
6886 int ret;
6887 int is_data;
6888 int extent_slot = 0;
6889 int found_extent = 0;
6890 int num_to_del = 1;
6891 u32 item_size;
6892 u64 refs;
6893 u64 bytenr = node->bytenr;
6894 u64 num_bytes = node->num_bytes;
6895 int last_ref = 0;
6896 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
6897
6898 path = btrfs_alloc_path();
6899 if (!path)
6900 return -ENOMEM;
6901
6902 path->reada = READA_FORWARD;
6903 path->leave_spinning = 1;
6904
6905 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6906 BUG_ON(!is_data && refs_to_drop != 1);
6907
6908 if (is_data)
6909 skinny_metadata = false;
6910
6911 ret = lookup_extent_backref(trans, info, path, &iref,
6912 bytenr, num_bytes, parent,
6913 root_objectid, owner_objectid,
6914 owner_offset);
6915 if (ret == 0) {
6916 extent_slot = path->slots[0];
6917 while (extent_slot >= 0) {
6918 btrfs_item_key_to_cpu(path->nodes[0], &key,
6919 extent_slot);
6920 if (key.objectid != bytenr)
6921 break;
6922 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6923 key.offset == num_bytes) {
6924 found_extent = 1;
6925 break;
6926 }
6927 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6928 key.offset == owner_objectid) {
6929 found_extent = 1;
6930 break;
6931 }
6932 if (path->slots[0] - extent_slot > 5)
6933 break;
6934 extent_slot--;
6935 }
6936#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6937 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
6938 if (found_extent && item_size < sizeof(*ei))
6939 found_extent = 0;
6940#endif
6941 if (!found_extent) {
6942 BUG_ON(iref);
6943 ret = remove_extent_backref(trans, info, path, NULL,
6944 refs_to_drop,
6945 is_data, &last_ref);
6946 if (ret) {
6947 btrfs_abort_transaction(trans, ret);
6948 goto out;
6949 }
6950 btrfs_release_path(path);
6951 path->leave_spinning = 1;
6952
6953 key.objectid = bytenr;
6954 key.type = BTRFS_EXTENT_ITEM_KEY;
6955 key.offset = num_bytes;
6956
6957 if (!is_data && skinny_metadata) {
6958 key.type = BTRFS_METADATA_ITEM_KEY;
6959 key.offset = owner_objectid;
6960 }
6961
6962 ret = btrfs_search_slot(trans, extent_root,
6963 &key, path, -1, 1);
6964 if (ret > 0 && skinny_metadata && path->slots[0]) {
6965
6966
6967
6968
6969 path->slots[0]--;
6970 btrfs_item_key_to_cpu(path->nodes[0], &key,
6971 path->slots[0]);
6972 if (key.objectid == bytenr &&
6973 key.type == BTRFS_EXTENT_ITEM_KEY &&
6974 key.offset == num_bytes)
6975 ret = 0;
6976 }
6977
6978 if (ret > 0 && skinny_metadata) {
6979 skinny_metadata = false;
6980 key.objectid = bytenr;
6981 key.type = BTRFS_EXTENT_ITEM_KEY;
6982 key.offset = num_bytes;
6983 btrfs_release_path(path);
6984 ret = btrfs_search_slot(trans, extent_root,
6985 &key, path, -1, 1);
6986 }
6987
6988 if (ret) {
6989 btrfs_err(info,
6990 "umm, got %d back from search, was looking for %llu",
6991 ret, bytenr);
6992 if (ret > 0)
6993 btrfs_print_leaf(path->nodes[0]);
6994 }
6995 if (ret < 0) {
6996 btrfs_abort_transaction(trans, ret);
6997 goto out;
6998 }
6999 extent_slot = path->slots[0];
7000 }
7001 } else if (WARN_ON(ret == -ENOENT)) {
7002 btrfs_print_leaf(path->nodes[0]);
7003 btrfs_err(info,
7004 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
7005 bytenr, parent, root_objectid, owner_objectid,
7006 owner_offset);
7007 btrfs_abort_transaction(trans, ret);
7008 goto out;
7009 } else {
7010 btrfs_abort_transaction(trans, ret);
7011 goto out;
7012 }
7013
7014 leaf = path->nodes[0];
7015 item_size = btrfs_item_size_nr(leaf, extent_slot);
7016#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7017 if (item_size < sizeof(*ei)) {
7018 BUG_ON(found_extent || extent_slot != path->slots[0]);
7019 ret = convert_extent_item_v0(trans, info, path, owner_objectid,
7020 0);
7021 if (ret < 0) {
7022 btrfs_abort_transaction(trans, ret);
7023 goto out;
7024 }
7025
7026 btrfs_release_path(path);
7027 path->leave_spinning = 1;
7028
7029 key.objectid = bytenr;
7030 key.type = BTRFS_EXTENT_ITEM_KEY;
7031 key.offset = num_bytes;
7032
7033 ret = btrfs_search_slot(trans, extent_root, &key, path,
7034 -1, 1);
7035 if (ret) {
7036 btrfs_err(info,
7037 "umm, got %d back from search, was looking for %llu",
7038 ret, bytenr);
7039 btrfs_print_leaf(path->nodes[0]);
7040 }
7041 if (ret < 0) {
7042 btrfs_abort_transaction(trans, ret);
7043 goto out;
7044 }
7045
7046 extent_slot = path->slots[0];
7047 leaf = path->nodes[0];
7048 item_size = btrfs_item_size_nr(leaf, extent_slot);
7049 }
7050#endif
7051 BUG_ON(item_size < sizeof(*ei));
7052 ei = btrfs_item_ptr(leaf, extent_slot,
7053 struct btrfs_extent_item);
7054 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
7055 key.type == BTRFS_EXTENT_ITEM_KEY) {
7056 struct btrfs_tree_block_info *bi;
7057 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
7058 bi = (struct btrfs_tree_block_info *)(ei + 1);
7059 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
7060 }
7061
7062 refs = btrfs_extent_refs(leaf, ei);
7063 if (refs < refs_to_drop) {
7064 btrfs_err(info,
7065 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
7066 refs_to_drop, refs, bytenr);
7067 ret = -EINVAL;
7068 btrfs_abort_transaction(trans, ret);
7069 goto out;
7070 }
7071 refs -= refs_to_drop;
7072
7073 if (refs > 0) {
7074 if (extent_op)
7075 __run_delayed_extent_op(extent_op, leaf, ei);
7076
7077
7078
7079
7080 if (iref) {
7081 BUG_ON(!found_extent);
7082 } else {
7083 btrfs_set_extent_refs(leaf, ei, refs);
7084 btrfs_mark_buffer_dirty(leaf);
7085 }
7086 if (found_extent) {
7087 ret = remove_extent_backref(trans, info, path,
7088 iref, refs_to_drop,
7089 is_data, &last_ref);
7090 if (ret) {
7091 btrfs_abort_transaction(trans, ret);
7092 goto out;
7093 }
7094 }
7095 } else {
7096 if (found_extent) {
7097 BUG_ON(is_data && refs_to_drop !=
7098 extent_data_ref_count(path, iref));
7099 if (iref) {
7100 BUG_ON(path->slots[0] != extent_slot);
7101 } else {
7102 BUG_ON(path->slots[0] != extent_slot + 1);
7103 path->slots[0] = extent_slot;
7104 num_to_del = 2;
7105 }
7106 }
7107
7108 last_ref = 1;
7109 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
7110 num_to_del);
7111 if (ret) {
7112 btrfs_abort_transaction(trans, ret);
7113 goto out;
7114 }
7115 btrfs_release_path(path);
7116
7117 if (is_data) {
7118 ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
7119 if (ret) {
7120 btrfs_abort_transaction(trans, ret);
7121 goto out;
7122 }
7123 }
7124
7125 ret = add_to_free_space_tree(trans, info, bytenr, num_bytes);
7126 if (ret) {
7127 btrfs_abort_transaction(trans, ret);
7128 goto out;
7129 }
7130
7131 ret = update_block_group(trans, info, bytenr, num_bytes, 0);
7132 if (ret) {
7133 btrfs_abort_transaction(trans, ret);
7134 goto out;
7135 }
7136 }
7137 btrfs_release_path(path);
7138
7139out:
7140 btrfs_free_path(path);
7141 return ret;
7142}
7143
7144
7145
7146
7147
7148
7149
7150static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
7151 u64 bytenr)
7152{
7153 struct btrfs_delayed_ref_head *head;
7154 struct btrfs_delayed_ref_root *delayed_refs;
7155 int ret = 0;
7156
7157 delayed_refs = &trans->transaction->delayed_refs;
7158 spin_lock(&delayed_refs->lock);
7159 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
7160 if (!head)
7161 goto out_delayed_unlock;
7162
7163 spin_lock(&head->lock);
7164 if (!RB_EMPTY_ROOT(&head->ref_tree))
7165 goto out;
7166
7167 if (head->extent_op) {
7168 if (!head->must_insert_reserved)
7169 goto out;
7170 btrfs_free_delayed_extent_op(head->extent_op);
7171 head->extent_op = NULL;
7172 }
7173
7174
7175
7176
7177
7178 if (!mutex_trylock(&head->mutex))
7179 goto out;
7180
7181
7182
7183
7184
7185 rb_erase(&head->href_node, &delayed_refs->href_root);
7186 RB_CLEAR_NODE(&head->href_node);
7187 atomic_dec(&delayed_refs->num_entries);
7188
7189
7190
7191
7192
7193 delayed_refs->num_heads--;
7194 if (head->processing == 0)
7195 delayed_refs->num_heads_ready--;
7196 head->processing = 0;
7197 spin_unlock(&head->lock);
7198 spin_unlock(&delayed_refs->lock);
7199
7200 BUG_ON(head->extent_op);
7201 if (head->must_insert_reserved)
7202 ret = 1;
7203
7204 mutex_unlock(&head->mutex);
7205 btrfs_put_delayed_ref_head(head);
7206 return ret;
7207out:
7208 spin_unlock(&head->lock);
7209
7210out_delayed_unlock:
7211 spin_unlock(&delayed_refs->lock);
7212 return 0;
7213}
7214
7215void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7216 struct btrfs_root *root,
7217 struct extent_buffer *buf,
7218 u64 parent, int last_ref)
7219{
7220 struct btrfs_fs_info *fs_info = root->fs_info;
7221 int pin = 1;
7222 int ret;
7223
7224 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7225 int old_ref_mod, new_ref_mod;
7226
7227 btrfs_ref_tree_mod(root, buf->start, buf->len, parent,
7228 root->root_key.objectid,
7229 btrfs_header_level(buf), 0,
7230 BTRFS_DROP_DELAYED_REF);
7231 ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
7232 buf->len, parent,
7233 root->root_key.objectid,
7234 btrfs_header_level(buf),
7235 BTRFS_DROP_DELAYED_REF, NULL,
7236 &old_ref_mod, &new_ref_mod);
7237 BUG_ON(ret);
7238 pin = old_ref_mod >= 0 && new_ref_mod < 0;
7239 }
7240
7241 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
7242 struct btrfs_block_group_cache *cache;
7243
7244 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7245 ret = check_ref_cleanup(trans, buf->start);
7246 if (!ret)
7247 goto out;
7248 }
7249
7250 pin = 0;
7251 cache = btrfs_lookup_block_group(fs_info, buf->start);
7252
7253 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
7254 pin_down_extent(fs_info, cache, buf->start,
7255 buf->len, 1);
7256 btrfs_put_block_group(cache);
7257 goto out;
7258 }
7259
7260 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
7261
7262 btrfs_add_free_space(cache, buf->start, buf->len);
7263 btrfs_free_reserved_bytes(cache, buf->len, 0);
7264 btrfs_put_block_group(cache);
7265 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
7266 }
7267out:
7268 if (pin)
7269 add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf),
7270 root->root_key.objectid);
7271
7272 if (last_ref) {
7273
7274
7275
7276
7277 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
7278 }
7279}
7280
7281
7282int btrfs_free_extent(struct btrfs_trans_handle *trans,
7283 struct btrfs_root *root,
7284 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
7285 u64 owner, u64 offset)
7286{
7287 struct btrfs_fs_info *fs_info = root->fs_info;
7288 int old_ref_mod, new_ref_mod;
7289 int ret;
7290
7291 if (btrfs_is_testing(fs_info))
7292 return 0;
7293
7294 if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
7295 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
7296 root_objectid, owner, offset,
7297 BTRFS_DROP_DELAYED_REF);
7298
7299
7300
7301
7302
7303 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
7304 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
7305
7306 btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
7307 old_ref_mod = new_ref_mod = 0;
7308 ret = 0;
7309 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
7310 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
7311 num_bytes, parent,
7312 root_objectid, (int)owner,
7313 BTRFS_DROP_DELAYED_REF, NULL,
7314 &old_ref_mod, &new_ref_mod);
7315 } else {
7316 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
7317 num_bytes, parent,
7318 root_objectid, owner, offset,
7319 0, BTRFS_DROP_DELAYED_REF,
7320 &old_ref_mod, &new_ref_mod);
7321 }
7322
7323 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
7324 add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
7325
7326 return ret;
7327}
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343static noinline void
7344wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
7345 u64 num_bytes)
7346{
7347 struct btrfs_caching_control *caching_ctl;
7348
7349 caching_ctl = get_caching_control(cache);
7350 if (!caching_ctl)
7351 return;
7352
7353 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
7354 (cache->free_space_ctl->free_space >= num_bytes));
7355
7356 put_caching_control(caching_ctl);
7357}
7358
7359static noinline int
7360wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
7361{
7362 struct btrfs_caching_control *caching_ctl;
7363 int ret = 0;
7364
7365 caching_ctl = get_caching_control(cache);
7366 if (!caching_ctl)
7367 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
7368
7369 wait_event(caching_ctl->wait, block_group_cache_done(cache));
7370 if (cache->cached == BTRFS_CACHE_ERROR)
7371 ret = -EIO;
7372 put_caching_control(caching_ctl);
7373 return ret;
7374}
7375
7376static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
7377 [BTRFS_RAID_RAID10] = "raid10",
7378 [BTRFS_RAID_RAID1] = "raid1",
7379 [BTRFS_RAID_DUP] = "dup",
7380 [BTRFS_RAID_RAID0] = "raid0",
7381 [BTRFS_RAID_SINGLE] = "single",
7382 [BTRFS_RAID_RAID5] = "raid5",
7383 [BTRFS_RAID_RAID6] = "raid6",
7384};
7385
7386static const char *get_raid_name(enum btrfs_raid_types type)
7387{
7388 if (type >= BTRFS_NR_RAID_TYPES)
7389 return NULL;
7390
7391 return btrfs_raid_type_names[type];
7392}
7393
7394enum btrfs_loop_type {
7395 LOOP_CACHING_NOWAIT = 0,
7396 LOOP_CACHING_WAIT = 1,
7397 LOOP_ALLOC_CHUNK = 2,
7398 LOOP_NO_EMPTY_SIZE = 3,
7399};
7400
7401static inline void
7402btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
7403 int delalloc)
7404{
7405 if (delalloc)
7406 down_read(&cache->data_rwsem);
7407}
7408
7409static inline void
7410btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
7411 int delalloc)
7412{
7413 btrfs_get_block_group(cache);
7414 if (delalloc)
7415 down_read(&cache->data_rwsem);
7416}
7417
7418static struct btrfs_block_group_cache *
7419btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
7420 struct btrfs_free_cluster *cluster,
7421 int delalloc)
7422{
7423 struct btrfs_block_group_cache *used_bg = NULL;
7424
7425 spin_lock(&cluster->refill_lock);
7426 while (1) {
7427 used_bg = cluster->block_group;
7428 if (!used_bg)
7429 return NULL;
7430
7431 if (used_bg == block_group)
7432 return used_bg;
7433
7434 btrfs_get_block_group(used_bg);
7435
7436 if (!delalloc)
7437 return used_bg;
7438
7439 if (down_read_trylock(&used_bg->data_rwsem))
7440 return used_bg;
7441
7442 spin_unlock(&cluster->refill_lock);
7443
7444
7445 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
7446
7447 spin_lock(&cluster->refill_lock);
7448 if (used_bg == cluster->block_group)
7449 return used_bg;
7450
7451 up_read(&used_bg->data_rwsem);
7452 btrfs_put_block_group(used_bg);
7453 }
7454}
7455
7456static inline void
7457btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7458 int delalloc)
7459{
7460 if (delalloc)
7461 up_read(&cache->data_rwsem);
7462 btrfs_put_block_group(cache);
7463}
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
7477 u64 ram_bytes, u64 num_bytes, u64 empty_size,
7478 u64 hint_byte, struct btrfs_key *ins,
7479 u64 flags, int delalloc)
7480{
7481 int ret = 0;
7482 struct btrfs_root *root = fs_info->extent_root;
7483 struct btrfs_free_cluster *last_ptr = NULL;
7484 struct btrfs_block_group_cache *block_group = NULL;
7485 u64 search_start = 0;
7486 u64 max_extent_size = 0;
7487 u64 empty_cluster = 0;
7488 struct btrfs_space_info *space_info;
7489 int loop = 0;
7490 int index = btrfs_bg_flags_to_raid_index(flags);
7491 bool failed_cluster_refill = false;
7492 bool failed_alloc = false;
7493 bool use_cluster = true;
7494 bool have_caching_bg = false;
7495 bool orig_have_caching_bg = false;
7496 bool full_search = false;
7497
7498 WARN_ON(num_bytes < fs_info->sectorsize);
7499 ins->type = BTRFS_EXTENT_ITEM_KEY;
7500 ins->objectid = 0;
7501 ins->offset = 0;
7502
7503 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
7504
7505 space_info = __find_space_info(fs_info, flags);
7506 if (!space_info) {
7507 btrfs_err(fs_info, "No space info for %llu", flags);
7508 return -ENOSPC;
7509 }
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521 if (unlikely(space_info->max_extent_size)) {
7522 spin_lock(&space_info->lock);
7523 if (space_info->max_extent_size &&
7524 num_bytes > space_info->max_extent_size) {
7525 ins->offset = space_info->max_extent_size;
7526 spin_unlock(&space_info->lock);
7527 return -ENOSPC;
7528 } else if (space_info->max_extent_size) {
7529 use_cluster = false;
7530 }
7531 spin_unlock(&space_info->lock);
7532 }
7533
7534 last_ptr = fetch_cluster_info(fs_info, space_info, &empty_cluster);
7535 if (last_ptr) {
7536 spin_lock(&last_ptr->lock);
7537 if (last_ptr->block_group)
7538 hint_byte = last_ptr->window_start;
7539 if (last_ptr->fragmented) {
7540
7541
7542
7543
7544
7545 hint_byte = last_ptr->window_start;
7546 use_cluster = false;
7547 }
7548 spin_unlock(&last_ptr->lock);
7549 }
7550
7551 search_start = max(search_start, first_logical_byte(fs_info, 0));
7552 search_start = max(search_start, hint_byte);
7553 if (search_start == hint_byte) {
7554 block_group = btrfs_lookup_block_group(fs_info, search_start);
7555
7556
7557
7558
7559
7560
7561
7562 if (block_group && block_group_bits(block_group, flags) &&
7563 block_group->cached != BTRFS_CACHE_NO) {
7564 down_read(&space_info->groups_sem);
7565 if (list_empty(&block_group->list) ||
7566 block_group->ro) {
7567
7568
7569
7570
7571
7572
7573 btrfs_put_block_group(block_group);
7574 up_read(&space_info->groups_sem);
7575 } else {
7576 index = btrfs_bg_flags_to_raid_index(
7577 block_group->flags);
7578 btrfs_lock_block_group(block_group, delalloc);
7579 goto have_block_group;
7580 }
7581 } else if (block_group) {
7582 btrfs_put_block_group(block_group);
7583 }
7584 }
7585search:
7586 have_caching_bg = false;
7587 if (index == 0 || index == btrfs_bg_flags_to_raid_index(flags))
7588 full_search = true;
7589 down_read(&space_info->groups_sem);
7590 list_for_each_entry(block_group, &space_info->block_groups[index],
7591 list) {
7592 u64 offset;
7593 int cached;
7594
7595
7596 if (unlikely(block_group->ro))
7597 continue;
7598
7599 btrfs_grab_block_group(block_group, delalloc);
7600 search_start = block_group->key.objectid;
7601
7602
7603
7604
7605
7606
7607 if (!block_group_bits(block_group, flags)) {
7608 u64 extra = BTRFS_BLOCK_GROUP_DUP |
7609 BTRFS_BLOCK_GROUP_RAID1 |
7610 BTRFS_BLOCK_GROUP_RAID5 |
7611 BTRFS_BLOCK_GROUP_RAID6 |
7612 BTRFS_BLOCK_GROUP_RAID10;
7613
7614
7615
7616
7617
7618
7619 if ((flags & extra) && !(block_group->flags & extra))
7620 goto loop;
7621 }
7622
7623have_block_group:
7624 cached = block_group_cache_done(block_group);
7625 if (unlikely(!cached)) {
7626 have_caching_bg = true;
7627 ret = cache_block_group(block_group, 0);
7628 BUG_ON(ret < 0);
7629 ret = 0;
7630 }
7631
7632 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
7633 goto loop;
7634
7635
7636
7637
7638
7639 if (last_ptr && use_cluster) {
7640 struct btrfs_block_group_cache *used_block_group;
7641 unsigned long aligned_cluster;
7642
7643
7644
7645
7646 used_block_group = btrfs_lock_cluster(block_group,
7647 last_ptr,
7648 delalloc);
7649 if (!used_block_group)
7650 goto refill_cluster;
7651
7652 if (used_block_group != block_group &&
7653 (used_block_group->ro ||
7654 !block_group_bits(used_block_group, flags)))
7655 goto release_cluster;
7656
7657 offset = btrfs_alloc_from_cluster(used_block_group,
7658 last_ptr,
7659 num_bytes,
7660 used_block_group->key.objectid,
7661 &max_extent_size);
7662 if (offset) {
7663
7664 spin_unlock(&last_ptr->refill_lock);
7665 trace_btrfs_reserve_extent_cluster(fs_info,
7666 used_block_group,
7667 search_start, num_bytes);
7668 if (used_block_group != block_group) {
7669 btrfs_release_block_group(block_group,
7670 delalloc);
7671 block_group = used_block_group;
7672 }
7673 goto checks;
7674 }
7675
7676 WARN_ON(last_ptr->block_group != used_block_group);
7677release_cluster:
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693 if (loop >= LOOP_NO_EMPTY_SIZE &&
7694 used_block_group != block_group) {
7695 spin_unlock(&last_ptr->refill_lock);
7696 btrfs_release_block_group(used_block_group,
7697 delalloc);
7698 goto unclustered_alloc;
7699 }
7700
7701
7702
7703
7704
7705 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7706
7707 if (used_block_group != block_group)
7708 btrfs_release_block_group(used_block_group,
7709 delalloc);
7710refill_cluster:
7711 if (loop >= LOOP_NO_EMPTY_SIZE) {
7712 spin_unlock(&last_ptr->refill_lock);
7713 goto unclustered_alloc;
7714 }
7715
7716 aligned_cluster = max_t(unsigned long,
7717 empty_cluster + empty_size,
7718 block_group->full_stripe_len);
7719
7720
7721 ret = btrfs_find_space_cluster(fs_info, block_group,
7722 last_ptr, search_start,
7723 num_bytes,
7724 aligned_cluster);
7725 if (ret == 0) {
7726
7727
7728
7729
7730 offset = btrfs_alloc_from_cluster(block_group,
7731 last_ptr,
7732 num_bytes,
7733 search_start,
7734 &max_extent_size);
7735 if (offset) {
7736
7737 spin_unlock(&last_ptr->refill_lock);
7738 trace_btrfs_reserve_extent_cluster(fs_info,
7739 block_group, search_start,
7740 num_bytes);
7741 goto checks;
7742 }
7743 } else if (!cached && loop > LOOP_CACHING_NOWAIT
7744 && !failed_cluster_refill) {
7745 spin_unlock(&last_ptr->refill_lock);
7746
7747 failed_cluster_refill = true;
7748 wait_block_group_cache_progress(block_group,
7749 num_bytes + empty_cluster + empty_size);
7750 goto have_block_group;
7751 }
7752
7753
7754
7755
7756
7757
7758
7759 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7760 spin_unlock(&last_ptr->refill_lock);
7761 goto loop;
7762 }
7763
7764unclustered_alloc:
7765
7766
7767
7768
7769
7770 if (unlikely(last_ptr)) {
7771 spin_lock(&last_ptr->lock);
7772 last_ptr->fragmented = 1;
7773 spin_unlock(&last_ptr->lock);
7774 }
7775 if (cached) {
7776 struct btrfs_free_space_ctl *ctl =
7777 block_group->free_space_ctl;
7778
7779 spin_lock(&ctl->tree_lock);
7780 if (ctl->free_space <
7781 num_bytes + empty_cluster + empty_size) {
7782 if (ctl->free_space > max_extent_size)
7783 max_extent_size = ctl->free_space;
7784 spin_unlock(&ctl->tree_lock);
7785 goto loop;
7786 }
7787 spin_unlock(&ctl->tree_lock);
7788 }
7789
7790 offset = btrfs_find_space_for_alloc(block_group, search_start,
7791 num_bytes, empty_size,
7792 &max_extent_size);
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802 if (!offset && !failed_alloc && !cached &&
7803 loop > LOOP_CACHING_NOWAIT) {
7804 wait_block_group_cache_progress(block_group,
7805 num_bytes + empty_size);
7806 failed_alloc = true;
7807 goto have_block_group;
7808 } else if (!offset) {
7809 goto loop;
7810 }
7811checks:
7812 search_start = ALIGN(offset, fs_info->stripesize);
7813
7814
7815 if (search_start + num_bytes >
7816 block_group->key.objectid + block_group->key.offset) {
7817 btrfs_add_free_space(block_group, offset, num_bytes);
7818 goto loop;
7819 }
7820
7821 if (offset < search_start)
7822 btrfs_add_free_space(block_group, offset,
7823 search_start - offset);
7824 BUG_ON(offset > search_start);
7825
7826 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
7827 num_bytes, delalloc);
7828 if (ret == -EAGAIN) {
7829 btrfs_add_free_space(block_group, offset, num_bytes);
7830 goto loop;
7831 }
7832 btrfs_inc_block_group_reservations(block_group);
7833
7834
7835 ins->objectid = search_start;
7836 ins->offset = num_bytes;
7837
7838 trace_btrfs_reserve_extent(fs_info, block_group,
7839 search_start, num_bytes);
7840 btrfs_release_block_group(block_group, delalloc);
7841 break;
7842loop:
7843 failed_cluster_refill = false;
7844 failed_alloc = false;
7845 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
7846 index);
7847 btrfs_release_block_group(block_group, delalloc);
7848 cond_resched();
7849 }
7850 up_read(&space_info->groups_sem);
7851
7852 if ((loop == LOOP_CACHING_NOWAIT) && have_caching_bg
7853 && !orig_have_caching_bg)
7854 orig_have_caching_bg = true;
7855
7856 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
7857 goto search;
7858
7859 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
7860 goto search;
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
7871 index = 0;
7872 if (loop == LOOP_CACHING_NOWAIT) {
7873
7874
7875
7876
7877
7878 if (orig_have_caching_bg || !full_search)
7879 loop = LOOP_CACHING_WAIT;
7880 else
7881 loop = LOOP_ALLOC_CHUNK;
7882 } else {
7883 loop++;
7884 }
7885
7886 if (loop == LOOP_ALLOC_CHUNK) {
7887 struct btrfs_trans_handle *trans;
7888 int exist = 0;
7889
7890 trans = current->journal_info;
7891 if (trans)
7892 exist = 1;
7893 else
7894 trans = btrfs_join_transaction(root);
7895
7896 if (IS_ERR(trans)) {
7897 ret = PTR_ERR(trans);
7898 goto out;
7899 }
7900
7901 ret = do_chunk_alloc(trans, fs_info, flags,
7902 CHUNK_ALLOC_FORCE);
7903
7904
7905
7906
7907
7908
7909 if (ret == -ENOSPC)
7910 loop = LOOP_NO_EMPTY_SIZE;
7911
7912
7913
7914
7915
7916 if (ret < 0 && ret != -ENOSPC)
7917 btrfs_abort_transaction(trans, ret);
7918 else
7919 ret = 0;
7920 if (!exist)
7921 btrfs_end_transaction(trans);
7922 if (ret)
7923 goto out;
7924 }
7925
7926 if (loop == LOOP_NO_EMPTY_SIZE) {
7927
7928
7929
7930
7931 if (empty_size == 0 &&
7932 empty_cluster == 0) {
7933 ret = -ENOSPC;
7934 goto out;
7935 }
7936 empty_size = 0;
7937 empty_cluster = 0;
7938 }
7939
7940 goto search;
7941 } else if (!ins->objectid) {
7942 ret = -ENOSPC;
7943 } else if (ins->objectid) {
7944 if (!use_cluster && last_ptr) {
7945 spin_lock(&last_ptr->lock);
7946 last_ptr->window_start = ins->objectid;
7947 spin_unlock(&last_ptr->lock);
7948 }
7949 ret = 0;
7950 }
7951out:
7952 if (ret == -ENOSPC) {
7953 spin_lock(&space_info->lock);
7954 space_info->max_extent_size = max_extent_size;
7955 spin_unlock(&space_info->lock);
7956 ins->offset = max_extent_size;
7957 }
7958 return ret;
7959}
7960
7961static void dump_space_info(struct btrfs_fs_info *fs_info,
7962 struct btrfs_space_info *info, u64 bytes,
7963 int dump_block_groups)
7964{
7965 struct btrfs_block_group_cache *cache;
7966 int index = 0;
7967
7968 spin_lock(&info->lock);
7969 btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
7970 info->flags,
7971 info->total_bytes - btrfs_space_info_used(info, true),
7972 info->full ? "" : "not ");
7973 btrfs_info(fs_info,
7974 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
7975 info->total_bytes, info->bytes_used, info->bytes_pinned,
7976 info->bytes_reserved, info->bytes_may_use,
7977 info->bytes_readonly);
7978 spin_unlock(&info->lock);
7979
7980 if (!dump_block_groups)
7981 return;
7982
7983 down_read(&info->groups_sem);
7984again:
7985 list_for_each_entry(cache, &info->block_groups[index], list) {
7986 spin_lock(&cache->lock);
7987 btrfs_info(fs_info,
7988 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
7989 cache->key.objectid, cache->key.offset,
7990 btrfs_block_group_used(&cache->item), cache->pinned,
7991 cache->reserved, cache->ro ? "[readonly]" : "");
7992 btrfs_dump_free_space(cache, bytes);
7993 spin_unlock(&cache->lock);
7994 }
7995 if (++index < BTRFS_NR_RAID_TYPES)
7996 goto again;
7997 up_read(&info->groups_sem);
7998}
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
8046 u64 num_bytes, u64 min_alloc_size,
8047 u64 empty_size, u64 hint_byte,
8048 struct btrfs_key *ins, int is_data, int delalloc)
8049{
8050 struct btrfs_fs_info *fs_info = root->fs_info;
8051 bool final_tried = num_bytes == min_alloc_size;
8052 u64 flags;
8053 int ret;
8054
8055 flags = get_alloc_profile_by_root(root, is_data);
8056again:
8057 WARN_ON(num_bytes < fs_info->sectorsize);
8058 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
8059 hint_byte, ins, flags, delalloc);
8060 if (!ret && !is_data) {
8061 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
8062 } else if (ret == -ENOSPC) {
8063 if (!final_tried && ins->offset) {
8064 num_bytes = min(num_bytes >> 1, ins->offset);
8065 num_bytes = round_down(num_bytes,
8066 fs_info->sectorsize);
8067 num_bytes = max(num_bytes, min_alloc_size);
8068 ram_bytes = num_bytes;
8069 if (num_bytes == min_alloc_size)
8070 final_tried = true;
8071 goto again;
8072 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8073 struct btrfs_space_info *sinfo;
8074
8075 sinfo = __find_space_info(fs_info, flags);
8076 btrfs_err(fs_info,
8077 "allocation failed flags %llu, wanted %llu",
8078 flags, num_bytes);
8079 if (sinfo)
8080 dump_space_info(fs_info, sinfo, num_bytes, 1);
8081 }
8082 }
8083
8084 return ret;
8085}
8086
8087static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
8088 u64 start, u64 len,
8089 int pin, int delalloc)
8090{
8091 struct btrfs_block_group_cache *cache;
8092 int ret = 0;
8093
8094 cache = btrfs_lookup_block_group(fs_info, start);
8095 if (!cache) {
8096 btrfs_err(fs_info, "Unable to find block group for %llu",
8097 start);
8098 return -ENOSPC;
8099 }
8100
8101 if (pin)
8102 pin_down_extent(fs_info, cache, start, len, 1);
8103 else {
8104 if (btrfs_test_opt(fs_info, DISCARD))
8105 ret = btrfs_discard_extent(fs_info, start, len, NULL);
8106 btrfs_add_free_space(cache, start, len);
8107 btrfs_free_reserved_bytes(cache, len, delalloc);
8108 trace_btrfs_reserved_extent_free(fs_info, start, len);
8109 }
8110
8111 btrfs_put_block_group(cache);
8112 return ret;
8113}
8114
8115int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
8116 u64 start, u64 len, int delalloc)
8117{
8118 return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
8119}
8120
8121int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
8122 u64 start, u64 len)
8123{
8124 return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
8125}
8126
8127static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8128 struct btrfs_fs_info *fs_info,
8129 u64 parent, u64 root_objectid,
8130 u64 flags, u64 owner, u64 offset,
8131 struct btrfs_key *ins, int ref_mod)
8132{
8133 int ret;
8134 struct btrfs_extent_item *extent_item;
8135 struct btrfs_extent_inline_ref *iref;
8136 struct btrfs_path *path;
8137 struct extent_buffer *leaf;
8138 int type;
8139 u32 size;
8140
8141 if (parent > 0)
8142 type = BTRFS_SHARED_DATA_REF_KEY;
8143 else
8144 type = BTRFS_EXTENT_DATA_REF_KEY;
8145
8146 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
8147
8148 path = btrfs_alloc_path();
8149 if (!path)
8150 return -ENOMEM;
8151
8152 path->leave_spinning = 1;
8153 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8154 ins, size);
8155 if (ret) {
8156 btrfs_free_path(path);
8157 return ret;
8158 }
8159
8160 leaf = path->nodes[0];
8161 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8162 struct btrfs_extent_item);
8163 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
8164 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8165 btrfs_set_extent_flags(leaf, extent_item,
8166 flags | BTRFS_EXTENT_FLAG_DATA);
8167
8168 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8169 btrfs_set_extent_inline_ref_type(leaf, iref, type);
8170 if (parent > 0) {
8171 struct btrfs_shared_data_ref *ref;
8172 ref = (struct btrfs_shared_data_ref *)(iref + 1);
8173 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
8174 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
8175 } else {
8176 struct btrfs_extent_data_ref *ref;
8177 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
8178 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
8179 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
8180 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
8181 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
8182 }
8183
8184 btrfs_mark_buffer_dirty(path->nodes[0]);
8185 btrfs_free_path(path);
8186
8187 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
8188 ins->offset);
8189 if (ret)
8190 return ret;
8191
8192 ret = update_block_group(trans, fs_info, ins->objectid, ins->offset, 1);
8193 if (ret) {
8194 btrfs_err(fs_info, "update block group failed for %llu %llu",
8195 ins->objectid, ins->offset);
8196 BUG();
8197 }
8198 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
8199 return ret;
8200}
8201
8202static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
8203 struct btrfs_fs_info *fs_info,
8204 u64 parent, u64 root_objectid,
8205 u64 flags, struct btrfs_disk_key *key,
8206 int level, struct btrfs_key *ins)
8207{
8208 int ret;
8209 struct btrfs_extent_item *extent_item;
8210 struct btrfs_tree_block_info *block_info;
8211 struct btrfs_extent_inline_ref *iref;
8212 struct btrfs_path *path;
8213 struct extent_buffer *leaf;
8214 u32 size = sizeof(*extent_item) + sizeof(*iref);
8215 u64 num_bytes = ins->offset;
8216 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
8217
8218 if (!skinny_metadata)
8219 size += sizeof(*block_info);
8220
8221 path = btrfs_alloc_path();
8222 if (!path) {
8223 btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
8224 fs_info->nodesize);
8225 return -ENOMEM;
8226 }
8227
8228 path->leave_spinning = 1;
8229 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8230 ins, size);
8231 if (ret) {
8232 btrfs_free_path(path);
8233 btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
8234 fs_info->nodesize);
8235 return ret;
8236 }
8237
8238 leaf = path->nodes[0];
8239 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8240 struct btrfs_extent_item);
8241 btrfs_set_extent_refs(leaf, extent_item, 1);
8242 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8243 btrfs_set_extent_flags(leaf, extent_item,
8244 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
8245
8246 if (skinny_metadata) {
8247 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8248 num_bytes = fs_info->nodesize;
8249 } else {
8250 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
8251 btrfs_set_tree_block_key(leaf, block_info, key);
8252 btrfs_set_tree_block_level(leaf, block_info, level);
8253 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
8254 }
8255
8256 if (parent > 0) {
8257 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
8258 btrfs_set_extent_inline_ref_type(leaf, iref,
8259 BTRFS_SHARED_BLOCK_REF_KEY);
8260 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
8261 } else {
8262 btrfs_set_extent_inline_ref_type(leaf, iref,
8263 BTRFS_TREE_BLOCK_REF_KEY);
8264 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
8265 }
8266
8267 btrfs_mark_buffer_dirty(leaf);
8268 btrfs_free_path(path);
8269
8270 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
8271 num_bytes);
8272 if (ret)
8273 return ret;
8274
8275 ret = update_block_group(trans, fs_info, ins->objectid,
8276 fs_info->nodesize, 1);
8277 if (ret) {
8278 btrfs_err(fs_info, "update block group failed for %llu %llu",
8279 ins->objectid, ins->offset);
8280 BUG();
8281 }
8282
8283 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid,
8284 fs_info->nodesize);
8285 return ret;
8286}
8287
8288int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8289 struct btrfs_root *root, u64 owner,
8290 u64 offset, u64 ram_bytes,
8291 struct btrfs_key *ins)
8292{
8293 struct btrfs_fs_info *fs_info = root->fs_info;
8294 int ret;
8295
8296 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
8297
8298 btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0,
8299 root->root_key.objectid, owner, offset,
8300 BTRFS_ADD_DELAYED_EXTENT);
8301
8302 ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
8303 ins->offset, 0,
8304 root->root_key.objectid, owner,
8305 offset, ram_bytes,
8306 BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
8307 return ret;
8308}
8309
8310
8311
8312
8313
8314
8315int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
8316 struct btrfs_fs_info *fs_info,
8317 u64 root_objectid, u64 owner, u64 offset,
8318 struct btrfs_key *ins)
8319{
8320 int ret;
8321 struct btrfs_block_group_cache *block_group;
8322 struct btrfs_space_info *space_info;
8323
8324
8325
8326
8327
8328 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8329 ret = __exclude_logged_extent(fs_info, ins->objectid,
8330 ins->offset);
8331 if (ret)
8332 return ret;
8333 }
8334
8335 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
8336 if (!block_group)
8337 return -EINVAL;
8338
8339 space_info = block_group->space_info;
8340 spin_lock(&space_info->lock);
8341 spin_lock(&block_group->lock);
8342 space_info->bytes_reserved += ins->offset;
8343 block_group->reserved += ins->offset;
8344 spin_unlock(&block_group->lock);
8345 spin_unlock(&space_info->lock);
8346
8347 ret = alloc_reserved_file_extent(trans, fs_info, 0, root_objectid,
8348 0, owner, offset, ins, 1);
8349 btrfs_put_block_group(block_group);
8350 return ret;
8351}
8352
8353static struct extent_buffer *
8354btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8355 u64 bytenr, int level)
8356{
8357 struct btrfs_fs_info *fs_info = root->fs_info;
8358 struct extent_buffer *buf;
8359
8360 buf = btrfs_find_create_tree_block(fs_info, bytenr);
8361 if (IS_ERR(buf))
8362 return buf;
8363
8364 btrfs_set_header_generation(buf, trans->transid);
8365 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
8366 btrfs_tree_lock(buf);
8367 clean_tree_block(fs_info, buf);
8368 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
8369
8370 btrfs_set_lock_blocking(buf);
8371 set_extent_buffer_uptodate(buf);
8372
8373 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
8374 buf->log_index = root->log_transid % 2;
8375
8376
8377
8378
8379 if (buf->log_index == 0)
8380 set_extent_dirty(&root->dirty_log_pages, buf->start,
8381 buf->start + buf->len - 1, GFP_NOFS);
8382 else
8383 set_extent_new(&root->dirty_log_pages, buf->start,
8384 buf->start + buf->len - 1);
8385 } else {
8386 buf->log_index = -1;
8387 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
8388 buf->start + buf->len - 1, GFP_NOFS);
8389 }
8390 trans->dirty = true;
8391
8392 return buf;
8393}
8394
8395static struct btrfs_block_rsv *
8396use_block_rsv(struct btrfs_trans_handle *trans,
8397 struct btrfs_root *root, u32 blocksize)
8398{
8399 struct btrfs_fs_info *fs_info = root->fs_info;
8400 struct btrfs_block_rsv *block_rsv;
8401 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
8402 int ret;
8403 bool global_updated = false;
8404
8405 block_rsv = get_block_rsv(trans, root);
8406
8407 if (unlikely(block_rsv->size == 0))
8408 goto try_reserve;
8409again:
8410 ret = block_rsv_use_bytes(block_rsv, blocksize);
8411 if (!ret)
8412 return block_rsv;
8413
8414 if (block_rsv->failfast)
8415 return ERR_PTR(ret);
8416
8417 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
8418 global_updated = true;
8419 update_global_block_rsv(fs_info);
8420 goto again;
8421 }
8422
8423 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8424 static DEFINE_RATELIMIT_STATE(_rs,
8425 DEFAULT_RATELIMIT_INTERVAL * 10,
8426 1);
8427 if (__ratelimit(&_rs))
8428 WARN(1, KERN_DEBUG
8429 "BTRFS: block rsv returned %d\n", ret);
8430 }
8431try_reserve:
8432 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
8433 BTRFS_RESERVE_NO_FLUSH);
8434 if (!ret)
8435 return block_rsv;
8436
8437
8438
8439
8440
8441 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
8442 block_rsv->space_info == global_rsv->space_info) {
8443 ret = block_rsv_use_bytes(global_rsv, blocksize);
8444 if (!ret)
8445 return global_rsv;
8446 }
8447 return ERR_PTR(ret);
8448}
8449
8450static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
8451 struct btrfs_block_rsv *block_rsv, u32 blocksize)
8452{
8453 block_rsv_add_bytes(block_rsv, blocksize, 0);
8454 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
8455}
8456
8457
8458
8459
8460
8461struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8462 struct btrfs_root *root,
8463 u64 parent, u64 root_objectid,
8464 const struct btrfs_disk_key *key,
8465 int level, u64 hint,
8466 u64 empty_size)
8467{
8468 struct btrfs_fs_info *fs_info = root->fs_info;
8469 struct btrfs_key ins;
8470 struct btrfs_block_rsv *block_rsv;
8471 struct extent_buffer *buf;
8472 struct btrfs_delayed_extent_op *extent_op;
8473 u64 flags = 0;
8474 int ret;
8475 u32 blocksize = fs_info->nodesize;
8476 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
8477
8478#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8479 if (btrfs_is_testing(fs_info)) {
8480 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
8481 level);
8482 if (!IS_ERR(buf))
8483 root->alloc_bytenr += blocksize;
8484 return buf;
8485 }
8486#endif
8487
8488 block_rsv = use_block_rsv(trans, root, blocksize);
8489 if (IS_ERR(block_rsv))
8490 return ERR_CAST(block_rsv);
8491
8492 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
8493 empty_size, hint, &ins, 0, 0);
8494 if (ret)
8495 goto out_unuse;
8496
8497 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
8498 if (IS_ERR(buf)) {
8499 ret = PTR_ERR(buf);
8500 goto out_free_reserved;
8501 }
8502
8503 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
8504 if (parent == 0)
8505 parent = ins.objectid;
8506 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8507 } else
8508 BUG_ON(parent > 0);
8509
8510 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
8511 extent_op = btrfs_alloc_delayed_extent_op();
8512 if (!extent_op) {
8513 ret = -ENOMEM;
8514 goto out_free_buf;
8515 }
8516 if (key)
8517 memcpy(&extent_op->key, key, sizeof(extent_op->key));
8518 else
8519 memset(&extent_op->key, 0, sizeof(extent_op->key));
8520 extent_op->flags_to_set = flags;
8521 extent_op->update_key = skinny_metadata ? false : true;
8522 extent_op->update_flags = true;
8523 extent_op->is_data = false;
8524 extent_op->level = level;
8525
8526 btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
8527 root_objectid, level, 0,
8528 BTRFS_ADD_DELAYED_EXTENT);
8529 ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid,
8530 ins.offset, parent,
8531 root_objectid, level,
8532 BTRFS_ADD_DELAYED_EXTENT,
8533 extent_op, NULL, NULL);
8534 if (ret)
8535 goto out_free_delayed;
8536 }
8537 return buf;
8538
8539out_free_delayed:
8540 btrfs_free_delayed_extent_op(extent_op);
8541out_free_buf:
8542 free_extent_buffer(buf);
8543out_free_reserved:
8544 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
8545out_unuse:
8546 unuse_block_rsv(fs_info, block_rsv, blocksize);
8547 return ERR_PTR(ret);
8548}
8549
8550struct walk_control {
8551 u64 refs[BTRFS_MAX_LEVEL];
8552 u64 flags[BTRFS_MAX_LEVEL];
8553 struct btrfs_key update_progress;
8554 int stage;
8555 int level;
8556 int shared_level;
8557 int update_ref;
8558 int keep_locks;
8559 int reada_slot;
8560 int reada_count;
8561 int for_reloc;
8562};
8563
8564#define DROP_REFERENCE 1
8565#define UPDATE_BACKREF 2
8566
8567static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
8568 struct btrfs_root *root,
8569 struct walk_control *wc,
8570 struct btrfs_path *path)
8571{
8572 struct btrfs_fs_info *fs_info = root->fs_info;
8573 u64 bytenr;
8574 u64 generation;
8575 u64 refs;
8576 u64 flags;
8577 u32 nritems;
8578 struct btrfs_key key;
8579 struct extent_buffer *eb;
8580 int ret;
8581 int slot;
8582 int nread = 0;
8583
8584 if (path->slots[wc->level] < wc->reada_slot) {
8585 wc->reada_count = wc->reada_count * 2 / 3;
8586 wc->reada_count = max(wc->reada_count, 2);
8587 } else {
8588 wc->reada_count = wc->reada_count * 3 / 2;
8589 wc->reada_count = min_t(int, wc->reada_count,
8590 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
8591 }
8592
8593 eb = path->nodes[wc->level];
8594 nritems = btrfs_header_nritems(eb);
8595
8596 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
8597 if (nread >= wc->reada_count)
8598 break;
8599
8600 cond_resched();
8601 bytenr = btrfs_node_blockptr(eb, slot);
8602 generation = btrfs_node_ptr_generation(eb, slot);
8603
8604 if (slot == path->slots[wc->level])
8605 goto reada;
8606
8607 if (wc->stage == UPDATE_BACKREF &&
8608 generation <= root->root_key.offset)
8609 continue;
8610
8611
8612 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
8613 wc->level - 1, 1, &refs,
8614 &flags);
8615
8616 if (ret < 0)
8617 continue;
8618 BUG_ON(refs == 0);
8619
8620 if (wc->stage == DROP_REFERENCE) {
8621 if (refs == 1)
8622 goto reada;
8623
8624 if (wc->level == 1 &&
8625 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8626 continue;
8627 if (!wc->update_ref ||
8628 generation <= root->root_key.offset)
8629 continue;
8630 btrfs_node_key_to_cpu(eb, &key, slot);
8631 ret = btrfs_comp_cpu_keys(&key,
8632 &wc->update_progress);
8633 if (ret < 0)
8634 continue;
8635 } else {
8636 if (wc->level == 1 &&
8637 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8638 continue;
8639 }
8640reada:
8641 readahead_tree_block(fs_info, bytenr);
8642 nread++;
8643 }
8644 wc->reada_slot = slot;
8645}
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8656 struct btrfs_root *root,
8657 struct btrfs_path *path,
8658 struct walk_control *wc, int lookup_info)
8659{
8660 struct btrfs_fs_info *fs_info = root->fs_info;
8661 int level = wc->level;
8662 struct extent_buffer *eb = path->nodes[level];
8663 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8664 int ret;
8665
8666 if (wc->stage == UPDATE_BACKREF &&
8667 btrfs_header_owner(eb) != root->root_key.objectid)
8668 return 1;
8669
8670
8671
8672
8673
8674 if (lookup_info &&
8675 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
8676 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
8677 BUG_ON(!path->locks[level]);
8678 ret = btrfs_lookup_extent_info(trans, fs_info,
8679 eb->start, level, 1,
8680 &wc->refs[level],
8681 &wc->flags[level]);
8682 BUG_ON(ret == -ENOMEM);
8683 if (ret)
8684 return ret;
8685 BUG_ON(wc->refs[level] == 0);
8686 }
8687
8688 if (wc->stage == DROP_REFERENCE) {
8689 if (wc->refs[level] > 1)
8690 return 1;
8691
8692 if (path->locks[level] && !wc->keep_locks) {
8693 btrfs_tree_unlock_rw(eb, path->locks[level]);
8694 path->locks[level] = 0;
8695 }
8696 return 0;
8697 }
8698
8699
8700 if (!(wc->flags[level] & flag)) {
8701 BUG_ON(!path->locks[level]);
8702 ret = btrfs_inc_ref(trans, root, eb, 1);
8703 BUG_ON(ret);
8704 ret = btrfs_dec_ref(trans, root, eb, 0);
8705 BUG_ON(ret);
8706 ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start,
8707 eb->len, flag,
8708 btrfs_header_level(eb), 0);
8709 BUG_ON(ret);
8710 wc->flags[level] |= flag;
8711 }
8712
8713
8714
8715
8716
8717 if (path->locks[level] && level > 0) {
8718 btrfs_tree_unlock_rw(eb, path->locks[level]);
8719 path->locks[level] = 0;
8720 }
8721 return 0;
8722}
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8738 struct btrfs_root *root,
8739 struct btrfs_path *path,
8740 struct walk_control *wc, int *lookup_info)
8741{
8742 struct btrfs_fs_info *fs_info = root->fs_info;
8743 u64 bytenr;
8744 u64 generation;
8745 u64 parent;
8746 u32 blocksize;
8747 struct btrfs_key key;
8748 struct btrfs_key first_key;
8749 struct extent_buffer *next;
8750 int level = wc->level;
8751 int reada = 0;
8752 int ret = 0;
8753 bool need_account = false;
8754
8755 generation = btrfs_node_ptr_generation(path->nodes[level],
8756 path->slots[level]);
8757
8758
8759
8760
8761
8762 if (wc->stage == UPDATE_BACKREF &&
8763 generation <= root->root_key.offset) {
8764 *lookup_info = 1;
8765 return 1;
8766 }
8767
8768 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8769 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
8770 path->slots[level]);
8771 blocksize = fs_info->nodesize;
8772
8773 next = find_extent_buffer(fs_info, bytenr);
8774 if (!next) {
8775 next = btrfs_find_create_tree_block(fs_info, bytenr);
8776 if (IS_ERR(next))
8777 return PTR_ERR(next);
8778
8779 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8780 level - 1);
8781 reada = 1;
8782 }
8783 btrfs_tree_lock(next);
8784 btrfs_set_lock_blocking(next);
8785
8786 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
8787 &wc->refs[level - 1],
8788 &wc->flags[level - 1]);
8789 if (ret < 0)
8790 goto out_unlock;
8791
8792 if (unlikely(wc->refs[level - 1] == 0)) {
8793 btrfs_err(fs_info, "Missing references.");
8794 ret = -EIO;
8795 goto out_unlock;
8796 }
8797 *lookup_info = 0;
8798
8799 if (wc->stage == DROP_REFERENCE) {
8800 if (wc->refs[level - 1] > 1) {
8801 need_account = true;
8802 if (level == 1 &&
8803 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8804 goto skip;
8805
8806 if (!wc->update_ref ||
8807 generation <= root->root_key.offset)
8808 goto skip;
8809
8810 btrfs_node_key_to_cpu(path->nodes[level], &key,
8811 path->slots[level]);
8812 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8813 if (ret < 0)
8814 goto skip;
8815
8816 wc->stage = UPDATE_BACKREF;
8817 wc->shared_level = level - 1;
8818 }
8819 } else {
8820 if (level == 1 &&
8821 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8822 goto skip;
8823 }
8824
8825 if (!btrfs_buffer_uptodate(next, generation, 0)) {
8826 btrfs_tree_unlock(next);
8827 free_extent_buffer(next);
8828 next = NULL;
8829 *lookup_info = 1;
8830 }
8831
8832 if (!next) {
8833 if (reada && level == 1)
8834 reada_walk_down(trans, root, wc, path);
8835 next = read_tree_block(fs_info, bytenr, generation, level - 1,
8836 &first_key);
8837 if (IS_ERR(next)) {
8838 return PTR_ERR(next);
8839 } else if (!extent_buffer_uptodate(next)) {
8840 free_extent_buffer(next);
8841 return -EIO;
8842 }
8843 btrfs_tree_lock(next);
8844 btrfs_set_lock_blocking(next);
8845 }
8846
8847 level--;
8848 ASSERT(level == btrfs_header_level(next));
8849 if (level != btrfs_header_level(next)) {
8850 btrfs_err(root->fs_info, "mismatched level");
8851 ret = -EIO;
8852 goto out_unlock;
8853 }
8854 path->nodes[level] = next;
8855 path->slots[level] = 0;
8856 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8857 wc->level = level;
8858 if (wc->level == 1)
8859 wc->reada_slot = 0;
8860 return 0;
8861skip:
8862 wc->refs[level - 1] = 0;
8863 wc->flags[level - 1] = 0;
8864 if (wc->stage == DROP_REFERENCE) {
8865 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8866 parent = path->nodes[level]->start;
8867 } else {
8868 ASSERT(root->root_key.objectid ==
8869 btrfs_header_owner(path->nodes[level]));
8870 if (root->root_key.objectid !=
8871 btrfs_header_owner(path->nodes[level])) {
8872 btrfs_err(root->fs_info,
8873 "mismatched block owner");
8874 ret = -EIO;
8875 goto out_unlock;
8876 }
8877 parent = 0;
8878 }
8879
8880 if (need_account) {
8881 ret = btrfs_qgroup_trace_subtree(trans, root, next,
8882 generation, level - 1);
8883 if (ret) {
8884 btrfs_err_rl(fs_info,
8885 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
8886 ret);
8887 }
8888 }
8889 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
8890 parent, root->root_key.objectid,
8891 level - 1, 0);
8892 if (ret)
8893 goto out_unlock;
8894 }
8895
8896 *lookup_info = 1;
8897 ret = 1;
8898
8899out_unlock:
8900 btrfs_tree_unlock(next);
8901 free_extent_buffer(next);
8902
8903 return ret;
8904}
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
8919 struct btrfs_root *root,
8920 struct btrfs_path *path,
8921 struct walk_control *wc)
8922{
8923 struct btrfs_fs_info *fs_info = root->fs_info;
8924 int ret;
8925 int level = wc->level;
8926 struct extent_buffer *eb = path->nodes[level];
8927 u64 parent = 0;
8928
8929 if (wc->stage == UPDATE_BACKREF) {
8930 BUG_ON(wc->shared_level < level);
8931 if (level < wc->shared_level)
8932 goto out;
8933
8934 ret = find_next_key(path, level + 1, &wc->update_progress);
8935 if (ret > 0)
8936 wc->update_ref = 0;
8937
8938 wc->stage = DROP_REFERENCE;
8939 wc->shared_level = -1;
8940 path->slots[level] = 0;
8941
8942
8943
8944
8945
8946
8947 if (!path->locks[level]) {
8948 BUG_ON(level == 0);
8949 btrfs_tree_lock(eb);
8950 btrfs_set_lock_blocking(eb);
8951 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8952
8953 ret = btrfs_lookup_extent_info(trans, fs_info,
8954 eb->start, level, 1,
8955 &wc->refs[level],
8956 &wc->flags[level]);
8957 if (ret < 0) {
8958 btrfs_tree_unlock_rw(eb, path->locks[level]);
8959 path->locks[level] = 0;
8960 return ret;
8961 }
8962 BUG_ON(wc->refs[level] == 0);
8963 if (wc->refs[level] == 1) {
8964 btrfs_tree_unlock_rw(eb, path->locks[level]);
8965 path->locks[level] = 0;
8966 return 1;
8967 }
8968 }
8969 }
8970
8971
8972 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
8973
8974 if (wc->refs[level] == 1) {
8975 if (level == 0) {
8976 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8977 ret = btrfs_dec_ref(trans, root, eb, 1);
8978 else
8979 ret = btrfs_dec_ref(trans, root, eb, 0);
8980 BUG_ON(ret);
8981 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, eb);
8982 if (ret) {
8983 btrfs_err_rl(fs_info,
8984 "error %d accounting leaf items. Quota is out of sync, rescan required.",
8985 ret);
8986 }
8987 }
8988
8989 if (!path->locks[level] &&
8990 btrfs_header_generation(eb) == trans->transid) {
8991 btrfs_tree_lock(eb);
8992 btrfs_set_lock_blocking(eb);
8993 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8994 }
8995 clean_tree_block(fs_info, eb);
8996 }
8997
8998 if (eb == root->node) {
8999 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9000 parent = eb->start;
9001 else
9002 BUG_ON(root->root_key.objectid !=
9003 btrfs_header_owner(eb));
9004 } else {
9005 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9006 parent = path->nodes[level + 1]->start;
9007 else
9008 BUG_ON(root->root_key.objectid !=
9009 btrfs_header_owner(path->nodes[level + 1]));
9010 }
9011
9012 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
9013out:
9014 wc->refs[level] = 0;
9015 wc->flags[level] = 0;
9016 return 0;
9017}
9018
9019static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
9020 struct btrfs_root *root,
9021 struct btrfs_path *path,
9022 struct walk_control *wc)
9023{
9024 int level = wc->level;
9025 int lookup_info = 1;
9026 int ret;
9027
9028 while (level >= 0) {
9029 ret = walk_down_proc(trans, root, path, wc, lookup_info);
9030 if (ret > 0)
9031 break;
9032
9033 if (level == 0)
9034 break;
9035
9036 if (path->slots[level] >=
9037 btrfs_header_nritems(path->nodes[level]))
9038 break;
9039
9040 ret = do_walk_down(trans, root, path, wc, &lookup_info);
9041 if (ret > 0) {
9042 path->slots[level]++;
9043 continue;
9044 } else if (ret < 0)
9045 return ret;
9046 level = wc->level;
9047 }
9048 return 0;
9049}
9050
9051static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
9052 struct btrfs_root *root,
9053 struct btrfs_path *path,
9054 struct walk_control *wc, int max_level)
9055{
9056 int level = wc->level;
9057 int ret;
9058
9059 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
9060 while (level < max_level && path->nodes[level]) {
9061 wc->level = level;
9062 if (path->slots[level] + 1 <
9063 btrfs_header_nritems(path->nodes[level])) {
9064 path->slots[level]++;
9065 return 0;
9066 } else {
9067 ret = walk_up_proc(trans, root, path, wc);
9068 if (ret > 0)
9069 return 0;
9070
9071 if (path->locks[level]) {
9072 btrfs_tree_unlock_rw(path->nodes[level],
9073 path->locks[level]);
9074 path->locks[level] = 0;
9075 }
9076 free_extent_buffer(path->nodes[level]);
9077 path->nodes[level] = NULL;
9078 level++;
9079 }
9080 }
9081 return 1;
9082}
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097int btrfs_drop_snapshot(struct btrfs_root *root,
9098 struct btrfs_block_rsv *block_rsv, int update_ref,
9099 int for_reloc)
9100{
9101 struct btrfs_fs_info *fs_info = root->fs_info;
9102 struct btrfs_path *path;
9103 struct btrfs_trans_handle *trans;
9104 struct btrfs_root *tree_root = fs_info->tree_root;
9105 struct btrfs_root_item *root_item = &root->root_item;
9106 struct walk_control *wc;
9107 struct btrfs_key key;
9108 int err = 0;
9109 int ret;
9110 int level;
9111 bool root_dropped = false;
9112
9113 btrfs_debug(fs_info, "Drop subvolume %llu", root->objectid);
9114
9115 path = btrfs_alloc_path();
9116 if (!path) {
9117 err = -ENOMEM;
9118 goto out;
9119 }
9120
9121 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9122 if (!wc) {
9123 btrfs_free_path(path);
9124 err = -ENOMEM;
9125 goto out;
9126 }
9127
9128 trans = btrfs_start_transaction(tree_root, 0);
9129 if (IS_ERR(trans)) {
9130 err = PTR_ERR(trans);
9131 goto out_free;
9132 }
9133
9134 if (block_rsv)
9135 trans->block_rsv = block_rsv;
9136
9137 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
9138 level = btrfs_header_level(root->node);
9139 path->nodes[level] = btrfs_lock_root_node(root);
9140 btrfs_set_lock_blocking(path->nodes[level]);
9141 path->slots[level] = 0;
9142 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9143 memset(&wc->update_progress, 0,
9144 sizeof(wc->update_progress));
9145 } else {
9146 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
9147 memcpy(&wc->update_progress, &key,
9148 sizeof(wc->update_progress));
9149
9150 level = root_item->drop_level;
9151 BUG_ON(level == 0);
9152 path->lowest_level = level;
9153 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9154 path->lowest_level = 0;
9155 if (ret < 0) {
9156 err = ret;
9157 goto out_end_trans;
9158 }
9159 WARN_ON(ret > 0);
9160
9161
9162
9163
9164
9165 btrfs_unlock_up_safe(path, 0);
9166
9167 level = btrfs_header_level(root->node);
9168 while (1) {
9169 btrfs_tree_lock(path->nodes[level]);
9170 btrfs_set_lock_blocking(path->nodes[level]);
9171 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9172
9173 ret = btrfs_lookup_extent_info(trans, fs_info,
9174 path->nodes[level]->start,
9175 level, 1, &wc->refs[level],
9176 &wc->flags[level]);
9177 if (ret < 0) {
9178 err = ret;
9179 goto out_end_trans;
9180 }
9181 BUG_ON(wc->refs[level] == 0);
9182
9183 if (level == root_item->drop_level)
9184 break;
9185
9186 btrfs_tree_unlock(path->nodes[level]);
9187 path->locks[level] = 0;
9188 WARN_ON(wc->refs[level] != 1);
9189 level--;
9190 }
9191 }
9192
9193 wc->level = level;
9194 wc->shared_level = -1;
9195 wc->stage = DROP_REFERENCE;
9196 wc->update_ref = update_ref;
9197 wc->keep_locks = 0;
9198 wc->for_reloc = for_reloc;
9199 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9200
9201 while (1) {
9202
9203 ret = walk_down_tree(trans, root, path, wc);
9204 if (ret < 0) {
9205 err = ret;
9206 break;
9207 }
9208
9209 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
9210 if (ret < 0) {
9211 err = ret;
9212 break;
9213 }
9214
9215 if (ret > 0) {
9216 BUG_ON(wc->stage != DROP_REFERENCE);
9217 break;
9218 }
9219
9220 if (wc->stage == DROP_REFERENCE) {
9221 level = wc->level;
9222 btrfs_node_key(path->nodes[level],
9223 &root_item->drop_progress,
9224 path->slots[level]);
9225 root_item->drop_level = level;
9226 }
9227
9228 BUG_ON(wc->level == 0);
9229 if (btrfs_should_end_transaction(trans) ||
9230 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
9231 ret = btrfs_update_root(trans, tree_root,
9232 &root->root_key,
9233 root_item);
9234 if (ret) {
9235 btrfs_abort_transaction(trans, ret);
9236 err = ret;
9237 goto out_end_trans;
9238 }
9239
9240 btrfs_end_transaction_throttle(trans);
9241 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
9242 btrfs_debug(fs_info,
9243 "drop snapshot early exit");
9244 err = -EAGAIN;
9245 goto out_free;
9246 }
9247
9248 trans = btrfs_start_transaction(tree_root, 0);
9249 if (IS_ERR(trans)) {
9250 err = PTR_ERR(trans);
9251 goto out_free;
9252 }
9253 if (block_rsv)
9254 trans->block_rsv = block_rsv;
9255 }
9256 }
9257 btrfs_release_path(path);
9258 if (err)
9259 goto out_end_trans;
9260
9261 ret = btrfs_del_root(trans, fs_info, &root->root_key);
9262 if (ret) {
9263 btrfs_abort_transaction(trans, ret);
9264 err = ret;
9265 goto out_end_trans;
9266 }
9267
9268 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9269 ret = btrfs_find_root(tree_root, &root->root_key, path,
9270 NULL, NULL);
9271 if (ret < 0) {
9272 btrfs_abort_transaction(trans, ret);
9273 err = ret;
9274 goto out_end_trans;
9275 } else if (ret > 0) {
9276
9277
9278
9279
9280
9281 btrfs_del_orphan_item(trans, tree_root,
9282 root->root_key.objectid);
9283 }
9284 }
9285
9286 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
9287 btrfs_add_dropped_root(trans, root);
9288 } else {
9289 free_extent_buffer(root->node);
9290 free_extent_buffer(root->commit_root);
9291 btrfs_put_fs_root(root);
9292 }
9293 root_dropped = true;
9294out_end_trans:
9295 btrfs_end_transaction_throttle(trans);
9296out_free:
9297 kfree(wc);
9298 btrfs_free_path(path);
9299out:
9300
9301
9302
9303
9304
9305
9306
9307 if (!for_reloc && !root_dropped)
9308 btrfs_add_dead_root(root);
9309 if (err && err != -EAGAIN)
9310 btrfs_handle_fs_error(fs_info, err, NULL);
9311 return err;
9312}
9313
9314
9315
9316
9317
9318
9319
9320int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
9321 struct btrfs_root *root,
9322 struct extent_buffer *node,
9323 struct extent_buffer *parent)
9324{
9325 struct btrfs_fs_info *fs_info = root->fs_info;
9326 struct btrfs_path *path;
9327 struct walk_control *wc;
9328 int level;
9329 int parent_level;
9330 int ret = 0;
9331 int wret;
9332
9333 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
9334
9335 path = btrfs_alloc_path();
9336 if (!path)
9337 return -ENOMEM;
9338
9339 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9340 if (!wc) {
9341 btrfs_free_path(path);
9342 return -ENOMEM;
9343 }
9344
9345 btrfs_assert_tree_locked(parent);
9346 parent_level = btrfs_header_level(parent);
9347 extent_buffer_get(parent);
9348 path->nodes[parent_level] = parent;
9349 path->slots[parent_level] = btrfs_header_nritems(parent);
9350
9351 btrfs_assert_tree_locked(node);
9352 level = btrfs_header_level(node);
9353 path->nodes[level] = node;
9354 path->slots[level] = 0;
9355 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9356
9357 wc->refs[parent_level] = 1;
9358 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
9359 wc->level = level;
9360 wc->shared_level = -1;
9361 wc->stage = DROP_REFERENCE;
9362 wc->update_ref = 0;
9363 wc->keep_locks = 1;
9364 wc->for_reloc = 1;
9365 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9366
9367 while (1) {
9368 wret = walk_down_tree(trans, root, path, wc);
9369 if (wret < 0) {
9370 ret = wret;
9371 break;
9372 }
9373
9374 wret = walk_up_tree(trans, root, path, wc, parent_level);
9375 if (wret < 0)
9376 ret = wret;
9377 if (wret != 0)
9378 break;
9379 }
9380
9381 kfree(wc);
9382 btrfs_free_path(path);
9383 return ret;
9384}
9385
9386static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
9387{
9388 u64 num_devices;
9389 u64 stripped;
9390
9391
9392
9393
9394
9395 stripped = get_restripe_target(fs_info, flags);
9396 if (stripped)
9397 return extended_to_chunk(stripped);
9398
9399 num_devices = fs_info->fs_devices->rw_devices;
9400
9401 stripped = BTRFS_BLOCK_GROUP_RAID0 |
9402 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
9403 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
9404
9405 if (num_devices == 1) {
9406 stripped |= BTRFS_BLOCK_GROUP_DUP;
9407 stripped = flags & ~stripped;
9408
9409
9410 if (flags & BTRFS_BLOCK_GROUP_RAID0)
9411 return stripped;
9412
9413
9414 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
9415 BTRFS_BLOCK_GROUP_RAID10))
9416 return stripped | BTRFS_BLOCK_GROUP_DUP;
9417 } else {
9418
9419 if (flags & stripped)
9420 return flags;
9421
9422 stripped |= BTRFS_BLOCK_GROUP_DUP;
9423 stripped = flags & ~stripped;
9424
9425
9426 if (flags & BTRFS_BLOCK_GROUP_DUP)
9427 return stripped | BTRFS_BLOCK_GROUP_RAID1;
9428
9429
9430 }
9431
9432 return flags;
9433}
9434
9435static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
9436{
9437 struct btrfs_space_info *sinfo = cache->space_info;
9438 u64 num_bytes;
9439 u64 min_allocable_bytes;
9440 int ret = -ENOSPC;
9441
9442
9443
9444
9445
9446
9447 if ((sinfo->flags &
9448 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
9449 !force)
9450 min_allocable_bytes = SZ_1M;
9451 else
9452 min_allocable_bytes = 0;
9453
9454 spin_lock(&sinfo->lock);
9455 spin_lock(&cache->lock);
9456
9457 if (cache->ro) {
9458 cache->ro++;
9459 ret = 0;
9460 goto out;
9461 }
9462
9463 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
9464 cache->bytes_super - btrfs_block_group_used(&cache->item);
9465
9466 if (btrfs_space_info_used(sinfo, true) + num_bytes +
9467 min_allocable_bytes <= sinfo->total_bytes) {
9468 sinfo->bytes_readonly += num_bytes;
9469 cache->ro++;
9470 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
9471 ret = 0;
9472 }
9473out:
9474 spin_unlock(&cache->lock);
9475 spin_unlock(&sinfo->lock);
9476 return ret;
9477}
9478
9479int btrfs_inc_block_group_ro(struct btrfs_fs_info *fs_info,
9480 struct btrfs_block_group_cache *cache)
9481
9482{
9483 struct btrfs_trans_handle *trans;
9484 u64 alloc_flags;
9485 int ret;
9486
9487again:
9488 trans = btrfs_join_transaction(fs_info->extent_root);
9489 if (IS_ERR(trans))
9490 return PTR_ERR(trans);
9491
9492
9493
9494
9495
9496
9497 mutex_lock(&fs_info->ro_block_group_mutex);
9498 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
9499 u64 transid = trans->transid;
9500
9501 mutex_unlock(&fs_info->ro_block_group_mutex);
9502 btrfs_end_transaction(trans);
9503
9504 ret = btrfs_wait_for_commit(fs_info, transid);
9505 if (ret)
9506 return ret;
9507 goto again;
9508 }
9509
9510
9511
9512
9513
9514 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9515 if (alloc_flags != cache->flags) {
9516 ret = do_chunk_alloc(trans, fs_info, alloc_flags,
9517 CHUNK_ALLOC_FORCE);
9518
9519
9520
9521
9522
9523 if (ret == -ENOSPC)
9524 ret = 0;
9525 if (ret < 0)
9526 goto out;
9527 }
9528
9529 ret = inc_block_group_ro(cache, 0);
9530 if (!ret)
9531 goto out;
9532 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
9533 ret = do_chunk_alloc(trans, fs_info, alloc_flags,
9534 CHUNK_ALLOC_FORCE);
9535 if (ret < 0)
9536 goto out;
9537 ret = inc_block_group_ro(cache, 0);
9538out:
9539 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
9540 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9541 mutex_lock(&fs_info->chunk_mutex);
9542 check_system_chunk(trans, fs_info, alloc_flags);
9543 mutex_unlock(&fs_info->chunk_mutex);
9544 }
9545 mutex_unlock(&fs_info->ro_block_group_mutex);
9546
9547 btrfs_end_transaction(trans);
9548 return ret;
9549}
9550
9551int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
9552 struct btrfs_fs_info *fs_info, u64 type)
9553{
9554 u64 alloc_flags = get_alloc_profile(fs_info, type);
9555
9556 return do_chunk_alloc(trans, fs_info, alloc_flags, CHUNK_ALLOC_FORCE);
9557}
9558
9559
9560
9561
9562
9563u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
9564{
9565 struct btrfs_block_group_cache *block_group;
9566 u64 free_bytes = 0;
9567 int factor;
9568
9569
9570 if (list_empty(&sinfo->ro_bgs))
9571 return 0;
9572
9573 spin_lock(&sinfo->lock);
9574 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
9575 spin_lock(&block_group->lock);
9576
9577 if (!block_group->ro) {
9578 spin_unlock(&block_group->lock);
9579 continue;
9580 }
9581
9582 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
9583 BTRFS_BLOCK_GROUP_RAID10 |
9584 BTRFS_BLOCK_GROUP_DUP))
9585 factor = 2;
9586 else
9587 factor = 1;
9588
9589 free_bytes += (block_group->key.offset -
9590 btrfs_block_group_used(&block_group->item)) *
9591 factor;
9592
9593 spin_unlock(&block_group->lock);
9594 }
9595 spin_unlock(&sinfo->lock);
9596
9597 return free_bytes;
9598}
9599
9600void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
9601{
9602 struct btrfs_space_info *sinfo = cache->space_info;
9603 u64 num_bytes;
9604
9605 BUG_ON(!cache->ro);
9606
9607 spin_lock(&sinfo->lock);
9608 spin_lock(&cache->lock);
9609 if (!--cache->ro) {
9610 num_bytes = cache->key.offset - cache->reserved -
9611 cache->pinned - cache->bytes_super -
9612 btrfs_block_group_used(&cache->item);
9613 sinfo->bytes_readonly -= num_bytes;
9614 list_del_init(&cache->ro_list);
9615 }
9616 spin_unlock(&cache->lock);
9617 spin_unlock(&sinfo->lock);
9618}
9619
9620
9621
9622
9623
9624
9625
9626int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
9627{
9628 struct btrfs_root *root = fs_info->extent_root;
9629 struct btrfs_block_group_cache *block_group;
9630 struct btrfs_space_info *space_info;
9631 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
9632 struct btrfs_device *device;
9633 struct btrfs_trans_handle *trans;
9634 u64 min_free;
9635 u64 dev_min = 1;
9636 u64 dev_nr = 0;
9637 u64 target;
9638 int debug;
9639 int index;
9640 int full = 0;
9641 int ret = 0;
9642
9643 debug = btrfs_test_opt(fs_info, ENOSPC_DEBUG);
9644
9645 block_group = btrfs_lookup_block_group(fs_info, bytenr);
9646
9647
9648 if (!block_group) {
9649 if (debug)
9650 btrfs_warn(fs_info,
9651 "can't find block group for bytenr %llu",
9652 bytenr);
9653 return -1;
9654 }
9655
9656 min_free = btrfs_block_group_used(&block_group->item);
9657
9658
9659 if (!min_free)
9660 goto out;
9661
9662 space_info = block_group->space_info;
9663 spin_lock(&space_info->lock);
9664
9665 full = space_info->full;
9666
9667
9668
9669
9670
9671
9672
9673
9674 if ((space_info->total_bytes != block_group->key.offset) &&
9675 (btrfs_space_info_used(space_info, false) + min_free <
9676 space_info->total_bytes)) {
9677 spin_unlock(&space_info->lock);
9678 goto out;
9679 }
9680 spin_unlock(&space_info->lock);
9681
9682
9683
9684
9685
9686
9687
9688
9689 ret = -1;
9690
9691
9692
9693
9694
9695
9696
9697
9698
9699 target = get_restripe_target(fs_info, block_group->flags);
9700 if (target) {
9701 index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target));
9702 } else {
9703
9704
9705
9706
9707 if (full) {
9708 if (debug)
9709 btrfs_warn(fs_info,
9710 "no space to alloc new chunk for block group %llu",
9711 block_group->key.objectid);
9712 goto out;
9713 }
9714
9715 index = btrfs_bg_flags_to_raid_index(block_group->flags);
9716 }
9717
9718 if (index == BTRFS_RAID_RAID10) {
9719 dev_min = 4;
9720
9721 min_free >>= 1;
9722 } else if (index == BTRFS_RAID_RAID1) {
9723 dev_min = 2;
9724 } else if (index == BTRFS_RAID_DUP) {
9725
9726 min_free <<= 1;
9727 } else if (index == BTRFS_RAID_RAID0) {
9728 dev_min = fs_devices->rw_devices;
9729 min_free = div64_u64(min_free, dev_min);
9730 }
9731
9732
9733 trans = btrfs_join_transaction(root);
9734 if (IS_ERR(trans)) {
9735 ret = PTR_ERR(trans);
9736 goto out;
9737 }
9738
9739 mutex_lock(&fs_info->chunk_mutex);
9740 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
9741 u64 dev_offset;
9742
9743
9744
9745
9746
9747 if (device->total_bytes > device->bytes_used + min_free &&
9748 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
9749 ret = find_free_dev_extent(trans, device, min_free,
9750 &dev_offset, NULL);
9751 if (!ret)
9752 dev_nr++;
9753
9754 if (dev_nr >= dev_min)
9755 break;
9756
9757 ret = -1;
9758 }
9759 }
9760 if (debug && ret == -1)
9761 btrfs_warn(fs_info,
9762 "no space to allocate a new chunk for block group %llu",
9763 block_group->key.objectid);
9764 mutex_unlock(&fs_info->chunk_mutex);
9765 btrfs_end_transaction(trans);
9766out:
9767 btrfs_put_block_group(block_group);
9768 return ret;
9769}
9770
9771static int find_first_block_group(struct btrfs_fs_info *fs_info,
9772 struct btrfs_path *path,
9773 struct btrfs_key *key)
9774{
9775 struct btrfs_root *root = fs_info->extent_root;
9776 int ret = 0;
9777 struct btrfs_key found_key;
9778 struct extent_buffer *leaf;
9779 int slot;
9780
9781 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9782 if (ret < 0)
9783 goto out;
9784
9785 while (1) {
9786 slot = path->slots[0];
9787 leaf = path->nodes[0];
9788 if (slot >= btrfs_header_nritems(leaf)) {
9789 ret = btrfs_next_leaf(root, path);
9790 if (ret == 0)
9791 continue;
9792 if (ret < 0)
9793 goto out;
9794 break;
9795 }
9796 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9797
9798 if (found_key.objectid >= key->objectid &&
9799 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9800 struct extent_map_tree *em_tree;
9801 struct extent_map *em;
9802
9803 em_tree = &root->fs_info->mapping_tree.map_tree;
9804 read_lock(&em_tree->lock);
9805 em = lookup_extent_mapping(em_tree, found_key.objectid,
9806 found_key.offset);
9807 read_unlock(&em_tree->lock);
9808 if (!em) {
9809 btrfs_err(fs_info,
9810 "logical %llu len %llu found bg but no related chunk",
9811 found_key.objectid, found_key.offset);
9812 ret = -ENOENT;
9813 } else {
9814 ret = 0;
9815 }
9816 free_extent_map(em);
9817 goto out;
9818 }
9819 path->slots[0]++;
9820 }
9821out:
9822 return ret;
9823}
9824
9825void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9826{
9827 struct btrfs_block_group_cache *block_group;
9828 u64 last = 0;
9829
9830 while (1) {
9831 struct inode *inode;
9832
9833 block_group = btrfs_lookup_first_block_group(info, last);
9834 while (block_group) {
9835 spin_lock(&block_group->lock);
9836 if (block_group->iref)
9837 break;
9838 spin_unlock(&block_group->lock);
9839 block_group = next_block_group(info, block_group);
9840 }
9841 if (!block_group) {
9842 if (last == 0)
9843 break;
9844 last = 0;
9845 continue;
9846 }
9847
9848 inode = block_group->inode;
9849 block_group->iref = 0;
9850 block_group->inode = NULL;
9851 spin_unlock(&block_group->lock);
9852 ASSERT(block_group->io_ctl.inode == NULL);
9853 iput(inode);
9854 last = block_group->key.objectid + block_group->key.offset;
9855 btrfs_put_block_group(block_group);
9856 }
9857}
9858
9859
9860
9861
9862
9863
9864int btrfs_free_block_groups(struct btrfs_fs_info *info)
9865{
9866 struct btrfs_block_group_cache *block_group;
9867 struct btrfs_space_info *space_info;
9868 struct btrfs_caching_control *caching_ctl;
9869 struct rb_node *n;
9870
9871 down_write(&info->commit_root_sem);
9872 while (!list_empty(&info->caching_block_groups)) {
9873 caching_ctl = list_entry(info->caching_block_groups.next,
9874 struct btrfs_caching_control, list);
9875 list_del(&caching_ctl->list);
9876 put_caching_control(caching_ctl);
9877 }
9878 up_write(&info->commit_root_sem);
9879
9880 spin_lock(&info->unused_bgs_lock);
9881 while (!list_empty(&info->unused_bgs)) {
9882 block_group = list_first_entry(&info->unused_bgs,
9883 struct btrfs_block_group_cache,
9884 bg_list);
9885 list_del_init(&block_group->bg_list);
9886 btrfs_put_block_group(block_group);
9887 }
9888 spin_unlock(&info->unused_bgs_lock);
9889
9890 spin_lock(&info->block_group_cache_lock);
9891 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
9892 block_group = rb_entry(n, struct btrfs_block_group_cache,
9893 cache_node);
9894 rb_erase(&block_group->cache_node,
9895 &info->block_group_cache_tree);
9896 RB_CLEAR_NODE(&block_group->cache_node);
9897 spin_unlock(&info->block_group_cache_lock);
9898
9899 down_write(&block_group->space_info->groups_sem);
9900 list_del(&block_group->list);
9901 up_write(&block_group->space_info->groups_sem);
9902
9903
9904
9905
9906
9907 if (block_group->cached == BTRFS_CACHE_NO ||
9908 block_group->cached == BTRFS_CACHE_ERROR)
9909 free_excluded_extents(info, block_group);
9910
9911 btrfs_remove_free_space_cache(block_group);
9912 ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
9913 ASSERT(list_empty(&block_group->dirty_list));
9914 ASSERT(list_empty(&block_group->io_list));
9915 ASSERT(list_empty(&block_group->bg_list));
9916 ASSERT(atomic_read(&block_group->count) == 1);
9917 btrfs_put_block_group(block_group);
9918
9919 spin_lock(&info->block_group_cache_lock);
9920 }
9921 spin_unlock(&info->block_group_cache_lock);
9922
9923
9924
9925
9926
9927
9928
9929 synchronize_rcu();
9930
9931 release_global_block_rsv(info);
9932
9933 while (!list_empty(&info->space_info)) {
9934 int i;
9935
9936 space_info = list_entry(info->space_info.next,
9937 struct btrfs_space_info,
9938 list);
9939
9940
9941
9942
9943
9944 if (WARN_ON(space_info->bytes_pinned > 0 ||
9945 space_info->bytes_reserved > 0 ||
9946 space_info->bytes_may_use > 0))
9947 dump_space_info(info, space_info, 0, 0);
9948 list_del(&space_info->list);
9949 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
9950 struct kobject *kobj;
9951 kobj = space_info->block_group_kobjs[i];
9952 space_info->block_group_kobjs[i] = NULL;
9953 if (kobj) {
9954 kobject_del(kobj);
9955 kobject_put(kobj);
9956 }
9957 }
9958 kobject_del(&space_info->kobj);
9959 kobject_put(&space_info->kobj);
9960 }
9961 return 0;
9962}
9963
9964
9965void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
9966{
9967 struct btrfs_space_info *space_info;
9968 struct raid_kobject *rkobj;
9969 LIST_HEAD(list);
9970 int index;
9971 int ret = 0;
9972
9973 spin_lock(&fs_info->pending_raid_kobjs_lock);
9974 list_splice_init(&fs_info->pending_raid_kobjs, &list);
9975 spin_unlock(&fs_info->pending_raid_kobjs_lock);
9976
9977 list_for_each_entry(rkobj, &list, list) {
9978 space_info = __find_space_info(fs_info, rkobj->flags);
9979 index = btrfs_bg_flags_to_raid_index(rkobj->flags);
9980
9981 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
9982 "%s", get_raid_name(index));
9983 if (ret) {
9984 kobject_put(&rkobj->kobj);
9985 break;
9986 }
9987 }
9988 if (ret)
9989 btrfs_warn(fs_info,
9990 "failed to add kobject for block cache, ignoring");
9991}
9992
9993static void link_block_group(struct btrfs_block_group_cache *cache)
9994{
9995 struct btrfs_space_info *space_info = cache->space_info;
9996 struct btrfs_fs_info *fs_info = cache->fs_info;
9997 int index = btrfs_bg_flags_to_raid_index(cache->flags);
9998 bool first = false;
9999
10000 down_write(&space_info->groups_sem);
10001 if (list_empty(&space_info->block_groups[index]))
10002 first = true;
10003 list_add_tail(&cache->list, &space_info->block_groups[index]);
10004 up_write(&space_info->groups_sem);
10005
10006 if (first) {
10007 struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
10008 if (!rkobj) {
10009 btrfs_warn(cache->fs_info,
10010 "couldn't alloc memory for raid level kobject");
10011 return;
10012 }
10013 rkobj->flags = cache->flags;
10014 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
10015
10016 spin_lock(&fs_info->pending_raid_kobjs_lock);
10017 list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
10018 spin_unlock(&fs_info->pending_raid_kobjs_lock);
10019 space_info->block_group_kobjs[index] = &rkobj->kobj;
10020 }
10021}
10022
10023static struct btrfs_block_group_cache *
10024btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
10025 u64 start, u64 size)
10026{
10027 struct btrfs_block_group_cache *cache;
10028
10029 cache = kzalloc(sizeof(*cache), GFP_NOFS);
10030 if (!cache)
10031 return NULL;
10032
10033 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
10034 GFP_NOFS);
10035 if (!cache->free_space_ctl) {
10036 kfree(cache);
10037 return NULL;
10038 }
10039
10040 cache->key.objectid = start;
10041 cache->key.offset = size;
10042 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10043
10044 cache->fs_info = fs_info;
10045 cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
10046 set_free_space_tree_thresholds(cache);
10047
10048 atomic_set(&cache->count, 1);
10049 spin_lock_init(&cache->lock);
10050 init_rwsem(&cache->data_rwsem);
10051 INIT_LIST_HEAD(&cache->list);
10052 INIT_LIST_HEAD(&cache->cluster_list);
10053 INIT_LIST_HEAD(&cache->bg_list);
10054 INIT_LIST_HEAD(&cache->ro_list);
10055 INIT_LIST_HEAD(&cache->dirty_list);
10056 INIT_LIST_HEAD(&cache->io_list);
10057 btrfs_init_free_space_ctl(cache);
10058 atomic_set(&cache->trimming, 0);
10059 mutex_init(&cache->free_space_lock);
10060 btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
10061
10062 return cache;
10063}
10064
10065int btrfs_read_block_groups(struct btrfs_fs_info *info)
10066{
10067 struct btrfs_path *path;
10068 int ret;
10069 struct btrfs_block_group_cache *cache;
10070 struct btrfs_space_info *space_info;
10071 struct btrfs_key key;
10072 struct btrfs_key found_key;
10073 struct extent_buffer *leaf;
10074 int need_clear = 0;
10075 u64 cache_gen;
10076 u64 feature;
10077 int mixed;
10078
10079 feature = btrfs_super_incompat_flags(info->super_copy);
10080 mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
10081
10082 key.objectid = 0;
10083 key.offset = 0;
10084 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10085 path = btrfs_alloc_path();
10086 if (!path)
10087 return -ENOMEM;
10088 path->reada = READA_FORWARD;
10089
10090 cache_gen = btrfs_super_cache_generation(info->super_copy);
10091 if (btrfs_test_opt(info, SPACE_CACHE) &&
10092 btrfs_super_generation(info->super_copy) != cache_gen)
10093 need_clear = 1;
10094 if (btrfs_test_opt(info, CLEAR_CACHE))
10095 need_clear = 1;
10096
10097 while (1) {
10098 ret = find_first_block_group(info, path, &key);
10099 if (ret > 0)
10100 break;
10101 if (ret != 0)
10102 goto error;
10103
10104 leaf = path->nodes[0];
10105 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10106
10107 cache = btrfs_create_block_group_cache(info, found_key.objectid,
10108 found_key.offset);
10109 if (!cache) {
10110 ret = -ENOMEM;
10111 goto error;
10112 }
10113
10114 if (need_clear) {
10115
10116
10117
10118
10119
10120
10121
10122
10123
10124
10125 if (btrfs_test_opt(info, SPACE_CACHE))
10126 cache->disk_cache_state = BTRFS_DC_CLEAR;
10127 }
10128
10129 read_extent_buffer(leaf, &cache->item,
10130 btrfs_item_ptr_offset(leaf, path->slots[0]),
10131 sizeof(cache->item));
10132 cache->flags = btrfs_block_group_flags(&cache->item);
10133 if (!mixed &&
10134 ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
10135 (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
10136 btrfs_err(info,
10137"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
10138 cache->key.objectid);
10139 ret = -EINVAL;
10140 goto error;
10141 }
10142
10143 key.objectid = found_key.objectid + found_key.offset;
10144 btrfs_release_path(path);
10145
10146
10147
10148
10149
10150
10151 ret = exclude_super_stripes(info, cache);
10152 if (ret) {
10153
10154
10155
10156
10157 free_excluded_extents(info, cache);
10158 btrfs_put_block_group(cache);
10159 goto error;
10160 }
10161
10162
10163
10164
10165
10166
10167
10168
10169 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
10170 cache->last_byte_to_unpin = (u64)-1;
10171 cache->cached = BTRFS_CACHE_FINISHED;
10172 free_excluded_extents(info, cache);
10173 } else if (btrfs_block_group_used(&cache->item) == 0) {
10174 cache->last_byte_to_unpin = (u64)-1;
10175 cache->cached = BTRFS_CACHE_FINISHED;
10176 add_new_free_space(cache, info,
10177 found_key.objectid,
10178 found_key.objectid +
10179 found_key.offset);
10180 free_excluded_extents(info, cache);
10181 }
10182
10183 ret = btrfs_add_block_group_cache(info, cache);
10184 if (ret) {
10185 btrfs_remove_free_space_cache(cache);
10186 btrfs_put_block_group(cache);
10187 goto error;
10188 }
10189
10190 trace_btrfs_add_block_group(info, cache, 0);
10191 update_space_info(info, cache->flags, found_key.offset,
10192 btrfs_block_group_used(&cache->item),
10193 cache->bytes_super, &space_info);
10194
10195 cache->space_info = space_info;
10196
10197 link_block_group(cache);
10198
10199 set_avail_alloc_bits(info, cache->flags);
10200 if (btrfs_chunk_readonly(info, cache->key.objectid)) {
10201 inc_block_group_ro(cache, 1);
10202 } else if (btrfs_block_group_used(&cache->item) == 0) {
10203 spin_lock(&info->unused_bgs_lock);
10204
10205 if (list_empty(&cache->bg_list)) {
10206 btrfs_get_block_group(cache);
10207 list_add_tail(&cache->bg_list,
10208 &info->unused_bgs);
10209 }
10210 spin_unlock(&info->unused_bgs_lock);
10211 }
10212 }
10213
10214 list_for_each_entry_rcu(space_info, &info->space_info, list) {
10215 if (!(get_alloc_profile(info, space_info->flags) &
10216 (BTRFS_BLOCK_GROUP_RAID10 |
10217 BTRFS_BLOCK_GROUP_RAID1 |
10218 BTRFS_BLOCK_GROUP_RAID5 |
10219 BTRFS_BLOCK_GROUP_RAID6 |
10220 BTRFS_BLOCK_GROUP_DUP)))
10221 continue;
10222
10223
10224
10225
10226 list_for_each_entry(cache,
10227 &space_info->block_groups[BTRFS_RAID_RAID0],
10228 list)
10229 inc_block_group_ro(cache, 1);
10230 list_for_each_entry(cache,
10231 &space_info->block_groups[BTRFS_RAID_SINGLE],
10232 list)
10233 inc_block_group_ro(cache, 1);
10234 }
10235
10236 btrfs_add_raid_kobjects(info);
10237 init_global_block_rsv(info);
10238 ret = 0;
10239error:
10240 btrfs_free_path(path);
10241 return ret;
10242}
10243
10244void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
10245{
10246 struct btrfs_fs_info *fs_info = trans->fs_info;
10247 struct btrfs_block_group_cache *block_group, *tmp;
10248 struct btrfs_root *extent_root = fs_info->extent_root;
10249 struct btrfs_block_group_item item;
10250 struct btrfs_key key;
10251 int ret = 0;
10252 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
10253
10254 trans->can_flush_pending_bgs = false;
10255 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
10256 if (ret)
10257 goto next;
10258
10259 spin_lock(&block_group->lock);
10260 memcpy(&item, &block_group->item, sizeof(item));
10261 memcpy(&key, &block_group->key, sizeof(key));
10262 spin_unlock(&block_group->lock);
10263
10264 ret = btrfs_insert_item(trans, extent_root, &key, &item,
10265 sizeof(item));
10266 if (ret)
10267 btrfs_abort_transaction(trans, ret);
10268 ret = btrfs_finish_chunk_alloc(trans, fs_info, key.objectid,
10269 key.offset);
10270 if (ret)
10271 btrfs_abort_transaction(trans, ret);
10272 add_block_group_free_space(trans, fs_info, block_group);
10273
10274next:
10275 list_del_init(&block_group->bg_list);
10276 }
10277 trans->can_flush_pending_bgs = can_flush_pending_bgs;
10278}
10279
10280int btrfs_make_block_group(struct btrfs_trans_handle *trans,
10281 struct btrfs_fs_info *fs_info, u64 bytes_used,
10282 u64 type, u64 chunk_offset, u64 size)
10283{
10284 struct btrfs_block_group_cache *cache;
10285 int ret;
10286
10287 btrfs_set_log_full_commit(fs_info, trans);
10288
10289 cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
10290 if (!cache)
10291 return -ENOMEM;
10292
10293 btrfs_set_block_group_used(&cache->item, bytes_used);
10294 btrfs_set_block_group_chunk_objectid(&cache->item,
10295 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
10296 btrfs_set_block_group_flags(&cache->item, type);
10297
10298 cache->flags = type;
10299 cache->last_byte_to_unpin = (u64)-1;
10300 cache->cached = BTRFS_CACHE_FINISHED;
10301 cache->needs_free_space = 1;
10302 ret = exclude_super_stripes(fs_info, cache);
10303 if (ret) {
10304
10305
10306
10307
10308 free_excluded_extents(fs_info, cache);
10309 btrfs_put_block_group(cache);
10310 return ret;
10311 }
10312
10313 add_new_free_space(cache, fs_info, chunk_offset, chunk_offset + size);
10314
10315 free_excluded_extents(fs_info, cache);
10316
10317#ifdef CONFIG_BTRFS_DEBUG
10318 if (btrfs_should_fragment_free_space(cache)) {
10319 u64 new_bytes_used = size - bytes_used;
10320
10321 bytes_used += new_bytes_used >> 1;
10322 fragment_free_space(cache);
10323 }
10324#endif
10325
10326
10327
10328
10329
10330 cache->space_info = __find_space_info(fs_info, cache->flags);
10331 ASSERT(cache->space_info);
10332
10333 ret = btrfs_add_block_group_cache(fs_info, cache);
10334 if (ret) {
10335 btrfs_remove_free_space_cache(cache);
10336 btrfs_put_block_group(cache);
10337 return ret;
10338 }
10339
10340
10341
10342
10343
10344 trace_btrfs_add_block_group(fs_info, cache, 1);
10345 update_space_info(fs_info, cache->flags, size, bytes_used,
10346 cache->bytes_super, &cache->space_info);
10347 update_global_block_rsv(fs_info);
10348
10349 link_block_group(cache);
10350
10351 list_add_tail(&cache->bg_list, &trans->new_bgs);
10352
10353 set_avail_alloc_bits(fs_info, type);
10354 return 0;
10355}
10356
10357static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10358{
10359 u64 extra_flags = chunk_to_extended(flags) &
10360 BTRFS_EXTENDED_PROFILE_MASK;
10361
10362 write_seqlock(&fs_info->profiles_lock);
10363 if (flags & BTRFS_BLOCK_GROUP_DATA)
10364 fs_info->avail_data_alloc_bits &= ~extra_flags;
10365 if (flags & BTRFS_BLOCK_GROUP_METADATA)
10366 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
10367 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
10368 fs_info->avail_system_alloc_bits &= ~extra_flags;
10369 write_sequnlock(&fs_info->profiles_lock);
10370}
10371
10372int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10373 struct btrfs_fs_info *fs_info, u64 group_start,
10374 struct extent_map *em)
10375{
10376 struct btrfs_root *root = fs_info->extent_root;
10377 struct btrfs_path *path;
10378 struct btrfs_block_group_cache *block_group;
10379 struct btrfs_free_cluster *cluster;
10380 struct btrfs_root *tree_root = fs_info->tree_root;
10381 struct btrfs_key key;
10382 struct inode *inode;
10383 struct kobject *kobj = NULL;
10384 int ret;
10385 int index;
10386 int factor;
10387 struct btrfs_caching_control *caching_ctl = NULL;
10388 bool remove_em;
10389
10390 block_group = btrfs_lookup_block_group(fs_info, group_start);
10391 BUG_ON(!block_group);
10392 BUG_ON(!block_group->ro);
10393
10394
10395
10396
10397
10398 free_excluded_extents(fs_info, block_group);
10399 btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
10400 block_group->key.offset);
10401
10402 memcpy(&key, &block_group->key, sizeof(key));
10403 index = btrfs_bg_flags_to_raid_index(block_group->flags);
10404 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
10405 BTRFS_BLOCK_GROUP_RAID1 |
10406 BTRFS_BLOCK_GROUP_RAID10))
10407 factor = 2;
10408 else
10409 factor = 1;
10410
10411
10412 cluster = &fs_info->data_alloc_cluster;
10413 spin_lock(&cluster->refill_lock);
10414 btrfs_return_cluster_to_free_space(block_group, cluster);
10415 spin_unlock(&cluster->refill_lock);
10416
10417
10418
10419
10420
10421 cluster = &fs_info->meta_alloc_cluster;
10422 spin_lock(&cluster->refill_lock);
10423 btrfs_return_cluster_to_free_space(block_group, cluster);
10424 spin_unlock(&cluster->refill_lock);
10425
10426 path = btrfs_alloc_path();
10427 if (!path) {
10428 ret = -ENOMEM;
10429 goto out;
10430 }
10431
10432
10433
10434
10435
10436 inode = lookup_free_space_inode(fs_info, block_group, path);
10437
10438 mutex_lock(&trans->transaction->cache_write_mutex);
10439
10440
10441
10442
10443 spin_lock(&trans->transaction->dirty_bgs_lock);
10444 if (!list_empty(&block_group->io_list)) {
10445 list_del_init(&block_group->io_list);
10446
10447 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
10448
10449 spin_unlock(&trans->transaction->dirty_bgs_lock);
10450 btrfs_wait_cache_io(trans, block_group, path);
10451 btrfs_put_block_group(block_group);
10452 spin_lock(&trans->transaction->dirty_bgs_lock);
10453 }
10454
10455 if (!list_empty(&block_group->dirty_list)) {
10456 list_del_init(&block_group->dirty_list);
10457 btrfs_put_block_group(block_group);
10458 }
10459 spin_unlock(&trans->transaction->dirty_bgs_lock);
10460 mutex_unlock(&trans->transaction->cache_write_mutex);
10461
10462 if (!IS_ERR(inode)) {
10463 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
10464 if (ret) {
10465 btrfs_add_delayed_iput(inode);
10466 goto out;
10467 }
10468 clear_nlink(inode);
10469
10470 spin_lock(&block_group->lock);
10471 if (block_group->iref) {
10472 block_group->iref = 0;
10473 block_group->inode = NULL;
10474 spin_unlock(&block_group->lock);
10475 iput(inode);
10476 } else {
10477 spin_unlock(&block_group->lock);
10478 }
10479
10480 btrfs_add_delayed_iput(inode);
10481 }
10482
10483 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
10484 key.offset = block_group->key.objectid;
10485 key.type = 0;
10486
10487 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
10488 if (ret < 0)
10489 goto out;
10490 if (ret > 0)
10491 btrfs_release_path(path);
10492 if (ret == 0) {
10493 ret = btrfs_del_item(trans, tree_root, path);
10494 if (ret)
10495 goto out;
10496 btrfs_release_path(path);
10497 }
10498
10499 spin_lock(&fs_info->block_group_cache_lock);
10500 rb_erase(&block_group->cache_node,
10501 &fs_info->block_group_cache_tree);
10502 RB_CLEAR_NODE(&block_group->cache_node);
10503
10504 if (fs_info->first_logical_byte == block_group->key.objectid)
10505 fs_info->first_logical_byte = (u64)-1;
10506 spin_unlock(&fs_info->block_group_cache_lock);
10507
10508 down_write(&block_group->space_info->groups_sem);
10509
10510
10511
10512
10513 list_del_init(&block_group->list);
10514 if (list_empty(&block_group->space_info->block_groups[index])) {
10515 kobj = block_group->space_info->block_group_kobjs[index];
10516 block_group->space_info->block_group_kobjs[index] = NULL;
10517 clear_avail_alloc_bits(fs_info, block_group->flags);
10518 }
10519 up_write(&block_group->space_info->groups_sem);
10520 if (kobj) {
10521 kobject_del(kobj);
10522 kobject_put(kobj);
10523 }
10524
10525 if (block_group->has_caching_ctl)
10526 caching_ctl = get_caching_control(block_group);
10527 if (block_group->cached == BTRFS_CACHE_STARTED)
10528 wait_block_group_cache_done(block_group);
10529 if (block_group->has_caching_ctl) {
10530 down_write(&fs_info->commit_root_sem);
10531 if (!caching_ctl) {
10532 struct btrfs_caching_control *ctl;
10533
10534 list_for_each_entry(ctl,
10535 &fs_info->caching_block_groups, list)
10536 if (ctl->block_group == block_group) {
10537 caching_ctl = ctl;
10538 refcount_inc(&caching_ctl->count);
10539 break;
10540 }
10541 }
10542 if (caching_ctl)
10543 list_del_init(&caching_ctl->list);
10544 up_write(&fs_info->commit_root_sem);
10545 if (caching_ctl) {
10546
10547 put_caching_control(caching_ctl);
10548 put_caching_control(caching_ctl);
10549 }
10550 }
10551
10552 spin_lock(&trans->transaction->dirty_bgs_lock);
10553 if (!list_empty(&block_group->dirty_list)) {
10554 WARN_ON(1);
10555 }
10556 if (!list_empty(&block_group->io_list)) {
10557 WARN_ON(1);
10558 }
10559 spin_unlock(&trans->transaction->dirty_bgs_lock);
10560 btrfs_remove_free_space_cache(block_group);
10561
10562 spin_lock(&block_group->space_info->lock);
10563 list_del_init(&block_group->ro_list);
10564
10565 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
10566 WARN_ON(block_group->space_info->total_bytes
10567 < block_group->key.offset);
10568 WARN_ON(block_group->space_info->bytes_readonly
10569 < block_group->key.offset);
10570 WARN_ON(block_group->space_info->disk_total
10571 < block_group->key.offset * factor);
10572 }
10573 block_group->space_info->total_bytes -= block_group->key.offset;
10574 block_group->space_info->bytes_readonly -= block_group->key.offset;
10575 block_group->space_info->disk_total -= block_group->key.offset * factor;
10576
10577 spin_unlock(&block_group->space_info->lock);
10578
10579 memcpy(&key, &block_group->key, sizeof(key));
10580
10581 mutex_lock(&fs_info->chunk_mutex);
10582 if (!list_empty(&em->list)) {
10583
10584 free_extent_map(em);
10585 }
10586 spin_lock(&block_group->lock);
10587 block_group->removed = 1;
10588
10589
10590
10591
10592
10593
10594
10595
10596
10597
10598
10599
10600
10601
10602
10603
10604
10605
10606
10607
10608
10609
10610
10611 remove_em = (atomic_read(&block_group->trimming) == 0);
10612
10613
10614
10615
10616
10617 if (!remove_em) {
10618
10619
10620
10621
10622
10623
10624
10625
10626
10627
10628
10629 list_move_tail(&em->list, &fs_info->pinned_chunks);
10630 }
10631 spin_unlock(&block_group->lock);
10632
10633 if (remove_em) {
10634 struct extent_map_tree *em_tree;
10635
10636 em_tree = &fs_info->mapping_tree.map_tree;
10637 write_lock(&em_tree->lock);
10638
10639
10640
10641
10642
10643 remove_extent_mapping(em_tree, em);
10644 write_unlock(&em_tree->lock);
10645
10646 free_extent_map(em);
10647 }
10648
10649 mutex_unlock(&fs_info->chunk_mutex);
10650
10651 ret = remove_block_group_free_space(trans, fs_info, block_group);
10652 if (ret)
10653 goto out;
10654
10655 btrfs_put_block_group(block_group);
10656 btrfs_put_block_group(block_group);
10657
10658 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10659 if (ret > 0)
10660 ret = -EIO;
10661 if (ret < 0)
10662 goto out;
10663
10664 ret = btrfs_del_item(trans, root, path);
10665out:
10666 btrfs_free_path(path);
10667 return ret;
10668}
10669
10670struct btrfs_trans_handle *
10671btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
10672 const u64 chunk_offset)
10673{
10674 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
10675 struct extent_map *em;
10676 struct map_lookup *map;
10677 unsigned int num_items;
10678
10679 read_lock(&em_tree->lock);
10680 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
10681 read_unlock(&em_tree->lock);
10682 ASSERT(em && em->start == chunk_offset);
10683
10684
10685
10686
10687
10688
10689
10690
10691
10692
10693
10694
10695
10696
10697
10698
10699
10700
10701
10702
10703 map = em->map_lookup;
10704 num_items = 3 + map->num_stripes;
10705 free_extent_map(em);
10706
10707 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
10708 num_items, 1);
10709}
10710
10711
10712
10713
10714
10715void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10716{
10717 struct btrfs_block_group_cache *block_group;
10718 struct btrfs_space_info *space_info;
10719 struct btrfs_trans_handle *trans;
10720 int ret = 0;
10721
10722 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
10723 return;
10724
10725 spin_lock(&fs_info->unused_bgs_lock);
10726 while (!list_empty(&fs_info->unused_bgs)) {
10727 u64 start, end;
10728 int trimming;
10729
10730 block_group = list_first_entry(&fs_info->unused_bgs,
10731 struct btrfs_block_group_cache,
10732 bg_list);
10733 list_del_init(&block_group->bg_list);
10734
10735 space_info = block_group->space_info;
10736
10737 if (ret || btrfs_mixed_space_info(space_info)) {
10738 btrfs_put_block_group(block_group);
10739 continue;
10740 }
10741 spin_unlock(&fs_info->unused_bgs_lock);
10742
10743 mutex_lock(&fs_info->delete_unused_bgs_mutex);
10744
10745
10746 down_write(&space_info->groups_sem);
10747 spin_lock(&block_group->lock);
10748 if (block_group->reserved ||
10749 btrfs_block_group_used(&block_group->item) ||
10750 block_group->ro ||
10751 list_is_singular(&block_group->list)) {
10752
10753
10754
10755
10756
10757
10758 spin_unlock(&block_group->lock);
10759 up_write(&space_info->groups_sem);
10760 goto next;
10761 }
10762 spin_unlock(&block_group->lock);
10763
10764
10765 ret = inc_block_group_ro(block_group, 0);
10766 up_write(&space_info->groups_sem);
10767 if (ret < 0) {
10768 ret = 0;
10769 goto next;
10770 }
10771
10772
10773
10774
10775
10776 trans = btrfs_start_trans_remove_block_group(fs_info,
10777 block_group->key.objectid);
10778 if (IS_ERR(trans)) {
10779 btrfs_dec_block_group_ro(block_group);
10780 ret = PTR_ERR(trans);
10781 goto next;
10782 }
10783
10784
10785
10786
10787
10788 start = block_group->key.objectid;
10789 end = start + block_group->key.offset - 1;
10790
10791
10792
10793
10794
10795
10796
10797
10798
10799
10800
10801 mutex_lock(&fs_info->unused_bg_unpin_mutex);
10802 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
10803 EXTENT_DIRTY);
10804 if (ret) {
10805 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10806 btrfs_dec_block_group_ro(block_group);
10807 goto end_trans;
10808 }
10809 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
10810 EXTENT_DIRTY);
10811 if (ret) {
10812 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10813 btrfs_dec_block_group_ro(block_group);
10814 goto end_trans;
10815 }
10816 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10817
10818
10819 spin_lock(&space_info->lock);
10820 spin_lock(&block_group->lock);
10821
10822 space_info->bytes_pinned -= block_group->pinned;
10823 space_info->bytes_readonly += block_group->pinned;
10824 percpu_counter_add(&space_info->total_bytes_pinned,
10825 -block_group->pinned);
10826 block_group->pinned = 0;
10827
10828 spin_unlock(&block_group->lock);
10829 spin_unlock(&space_info->lock);
10830
10831
10832 trimming = btrfs_test_opt(fs_info, DISCARD);
10833
10834
10835 if (trimming)
10836 btrfs_get_block_group_trimming(block_group);
10837
10838
10839
10840
10841
10842 ret = btrfs_remove_chunk(trans, fs_info,
10843 block_group->key.objectid);
10844
10845 if (ret) {
10846 if (trimming)
10847 btrfs_put_block_group_trimming(block_group);
10848 goto end_trans;
10849 }
10850
10851
10852
10853
10854
10855
10856 if (trimming) {
10857 spin_lock(&fs_info->unused_bgs_lock);
10858
10859
10860
10861
10862
10863 list_move(&block_group->bg_list,
10864 &trans->transaction->deleted_bgs);
10865 spin_unlock(&fs_info->unused_bgs_lock);
10866 btrfs_get_block_group(block_group);
10867 }
10868end_trans:
10869 btrfs_end_transaction(trans);
10870next:
10871 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
10872 btrfs_put_block_group(block_group);
10873 spin_lock(&fs_info->unused_bgs_lock);
10874 }
10875 spin_unlock(&fs_info->unused_bgs_lock);
10876}
10877
10878int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
10879{
10880 struct btrfs_space_info *space_info;
10881 struct btrfs_super_block *disk_super;
10882 u64 features;
10883 u64 flags;
10884 int mixed = 0;
10885 int ret;
10886
10887 disk_super = fs_info->super_copy;
10888 if (!btrfs_super_root(disk_super))
10889 return -EINVAL;
10890
10891 features = btrfs_super_incompat_flags(disk_super);
10892 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
10893 mixed = 1;
10894
10895 flags = BTRFS_BLOCK_GROUP_SYSTEM;
10896 ret = create_space_info(fs_info, flags, &space_info);
10897 if (ret)
10898 goto out;
10899
10900 if (mixed) {
10901 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
10902 ret = create_space_info(fs_info, flags, &space_info);
10903 } else {
10904 flags = BTRFS_BLOCK_GROUP_METADATA;
10905 ret = create_space_info(fs_info, flags, &space_info);
10906 if (ret)
10907 goto out;
10908
10909 flags = BTRFS_BLOCK_GROUP_DATA;
10910 ret = create_space_info(fs_info, flags, &space_info);
10911 }
10912out:
10913 return ret;
10914}
10915
10916int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
10917 u64 start, u64 end)
10918{
10919 return unpin_extent_range(fs_info, start, end, false);
10920}
10921
10922
10923
10924
10925
10926
10927
10928
10929
10930
10931
10932
10933
10934
10935
10936
10937
10938
10939
10940static int btrfs_trim_free_extents(struct btrfs_device *device,
10941 u64 minlen, u64 *trimmed)
10942{
10943 u64 start = 0, len = 0;
10944 int ret;
10945
10946 *trimmed = 0;
10947
10948
10949 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
10950 return 0;
10951
10952
10953 if (device->total_bytes <= device->bytes_used)
10954 return 0;
10955
10956 ret = 0;
10957
10958 while (1) {
10959 struct btrfs_fs_info *fs_info = device->fs_info;
10960 struct btrfs_transaction *trans;
10961 u64 bytes;
10962
10963 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
10964 if (ret)
10965 return ret;
10966
10967 down_read(&fs_info->commit_root_sem);
10968
10969 spin_lock(&fs_info->trans_lock);
10970 trans = fs_info->running_transaction;
10971 if (trans)
10972 refcount_inc(&trans->use_count);
10973 spin_unlock(&fs_info->trans_lock);
10974
10975 ret = find_free_dev_extent_start(trans, device, minlen, start,
10976 &start, &len);
10977 if (trans)
10978 btrfs_put_transaction(trans);
10979
10980 if (ret) {
10981 up_read(&fs_info->commit_root_sem);
10982 mutex_unlock(&fs_info->chunk_mutex);
10983 if (ret == -ENOSPC)
10984 ret = 0;
10985 break;
10986 }
10987
10988 ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
10989 up_read(&fs_info->commit_root_sem);
10990 mutex_unlock(&fs_info->chunk_mutex);
10991
10992 if (ret)
10993 break;
10994
10995 start += len;
10996 *trimmed += bytes;
10997
10998 if (fatal_signal_pending(current)) {
10999 ret = -ERESTARTSYS;
11000 break;
11001 }
11002
11003 cond_resched();
11004 }
11005
11006 return ret;
11007}
11008
11009int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
11010{
11011 struct btrfs_block_group_cache *cache = NULL;
11012 struct btrfs_device *device;
11013 struct list_head *devices;
11014 u64 group_trimmed;
11015 u64 start;
11016 u64 end;
11017 u64 trimmed = 0;
11018 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11019 int ret = 0;
11020
11021
11022
11023
11024 if (range->len == total_bytes)
11025 cache = btrfs_lookup_first_block_group(fs_info, range->start);
11026 else
11027 cache = btrfs_lookup_block_group(fs_info, range->start);
11028
11029 while (cache) {
11030 if (cache->key.objectid >= (range->start + range->len)) {
11031 btrfs_put_block_group(cache);
11032 break;
11033 }
11034
11035 start = max(range->start, cache->key.objectid);
11036 end = min(range->start + range->len,
11037 cache->key.objectid + cache->key.offset);
11038
11039 if (end - start >= range->minlen) {
11040 if (!block_group_cache_done(cache)) {
11041 ret = cache_block_group(cache, 0);
11042 if (ret) {
11043 btrfs_put_block_group(cache);
11044 break;
11045 }
11046 ret = wait_block_group_cache_done(cache);
11047 if (ret) {
11048 btrfs_put_block_group(cache);
11049 break;
11050 }
11051 }
11052 ret = btrfs_trim_block_group(cache,
11053 &group_trimmed,
11054 start,
11055 end,
11056 range->minlen);
11057
11058 trimmed += group_trimmed;
11059 if (ret) {
11060 btrfs_put_block_group(cache);
11061 break;
11062 }
11063 }
11064
11065 cache = next_block_group(fs_info, cache);
11066 }
11067
11068 mutex_lock(&fs_info->fs_devices->device_list_mutex);
11069 devices = &fs_info->fs_devices->alloc_list;
11070 list_for_each_entry(device, devices, dev_alloc_list) {
11071 ret = btrfs_trim_free_extents(device, range->minlen,
11072 &group_trimmed);
11073 if (ret)
11074 break;
11075
11076 trimmed += group_trimmed;
11077 }
11078 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
11079
11080 range->len = trimmed;
11081 return ret;
11082}
11083
11084
11085
11086
11087
11088
11089
11090
11091
11092void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
11093{
11094 percpu_counter_dec(&root->subv_writers->counter);
11095
11096
11097
11098 smp_mb();
11099 if (waitqueue_active(&root->subv_writers->wait))
11100 wake_up(&root->subv_writers->wait);
11101}
11102
11103int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
11104{
11105 if (atomic_read(&root->will_be_snapshotted))
11106 return 0;
11107
11108 percpu_counter_inc(&root->subv_writers->counter);
11109
11110
11111
11112 smp_mb();
11113 if (atomic_read(&root->will_be_snapshotted)) {
11114 btrfs_end_write_no_snapshotting(root);
11115 return 0;
11116 }
11117 return 1;
11118}
11119
11120void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
11121{
11122 while (true) {
11123 int ret;
11124
11125 ret = btrfs_start_write_no_snapshotting(root);
11126 if (ret)
11127 break;
11128 wait_var_event(&root->will_be_snapshotted,
11129 !atomic_read(&root->will_be_snapshotted));
11130 }
11131}
11132