1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/sched.h>
19#include <linux/pagemap.h>
20#include <linux/writeback.h>
21#include <linux/blkdev.h>
22#include <linux/sort.h>
23#include <linux/rcupdate.h>
24#include <linux/kthread.h>
25#include <linux/slab.h>
26#include <linux/ratelimit.h>
27#include <linux/percpu_counter.h>
28#include "hash.h"
29#include "tree-log.h"
30#include "disk-io.h"
31#include "print-tree.h"
32#include "volumes.h"
33#include "raid56.h"
34#include "locking.h"
35#include "free-space-cache.h"
36#include "free-space-tree.h"
37#include "math.h"
38#include "sysfs.h"
39#include "qgroup.h"
40
41#undef SCRAMBLE_DELAYED_REFS
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57enum {
58 CHUNK_ALLOC_NO_FORCE = 0,
59 CHUNK_ALLOC_LIMITED = 1,
60 CHUNK_ALLOC_FORCE = 2,
61};
62
63
64
65
66
67
68
69
70
71
72enum {
73 RESERVE_FREE = 0,
74 RESERVE_ALLOC = 1,
75 RESERVE_ALLOC_NO_ACCOUNT = 2,
76};
77
78static int update_block_group(struct btrfs_trans_handle *trans,
79 struct btrfs_root *root, u64 bytenr,
80 u64 num_bytes, int alloc);
81static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
82 struct btrfs_root *root,
83 struct btrfs_delayed_ref_node *node, u64 parent,
84 u64 root_objectid, u64 owner_objectid,
85 u64 owner_offset, int refs_to_drop,
86 struct btrfs_delayed_extent_op *extra_op);
87static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
88 struct extent_buffer *leaf,
89 struct btrfs_extent_item *ei);
90static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
91 struct btrfs_root *root,
92 u64 parent, u64 root_objectid,
93 u64 flags, u64 owner, u64 offset,
94 struct btrfs_key *ins, int ref_mod);
95static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
96 struct btrfs_root *root,
97 u64 parent, u64 root_objectid,
98 u64 flags, struct btrfs_disk_key *key,
99 int level, struct btrfs_key *ins);
100static int do_chunk_alloc(struct btrfs_trans_handle *trans,
101 struct btrfs_root *extent_root, u64 flags,
102 int force);
103static int find_next_key(struct btrfs_path *path, int level,
104 struct btrfs_key *key);
105static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
106 int dump_block_groups);
107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
108 u64 num_bytes, int reserve,
109 int delalloc);
110static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
111 u64 num_bytes);
112int btrfs_pin_extent(struct btrfs_root *root,
113 u64 bytenr, u64 num_bytes, int reserved);
114
115static noinline int
116block_group_cache_done(struct btrfs_block_group_cache *cache)
117{
118 smp_mb();
119 return cache->cached == BTRFS_CACHE_FINISHED ||
120 cache->cached == BTRFS_CACHE_ERROR;
121}
122
123static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
124{
125 return (cache->flags & bits) == bits;
126}
127
128void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
129{
130 atomic_inc(&cache->count);
131}
132
133void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
134{
135 if (atomic_dec_and_test(&cache->count)) {
136 WARN_ON(cache->pinned > 0);
137 WARN_ON(cache->reserved > 0);
138 kfree(cache->free_space_ctl);
139 kfree(cache);
140 }
141}
142
143
144
145
146
147static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
148 struct btrfs_block_group_cache *block_group)
149{
150 struct rb_node **p;
151 struct rb_node *parent = NULL;
152 struct btrfs_block_group_cache *cache;
153
154 spin_lock(&info->block_group_cache_lock);
155 p = &info->block_group_cache_tree.rb_node;
156
157 while (*p) {
158 parent = *p;
159 cache = rb_entry(parent, struct btrfs_block_group_cache,
160 cache_node);
161 if (block_group->key.objectid < cache->key.objectid) {
162 p = &(*p)->rb_left;
163 } else if (block_group->key.objectid > cache->key.objectid) {
164 p = &(*p)->rb_right;
165 } else {
166 spin_unlock(&info->block_group_cache_lock);
167 return -EEXIST;
168 }
169 }
170
171 rb_link_node(&block_group->cache_node, parent, p);
172 rb_insert_color(&block_group->cache_node,
173 &info->block_group_cache_tree);
174
175 if (info->first_logical_byte > block_group->key.objectid)
176 info->first_logical_byte = block_group->key.objectid;
177
178 spin_unlock(&info->block_group_cache_lock);
179
180 return 0;
181}
182
183
184
185
186
187static struct btrfs_block_group_cache *
188block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
189 int contains)
190{
191 struct btrfs_block_group_cache *cache, *ret = NULL;
192 struct rb_node *n;
193 u64 end, start;
194
195 spin_lock(&info->block_group_cache_lock);
196 n = info->block_group_cache_tree.rb_node;
197
198 while (n) {
199 cache = rb_entry(n, struct btrfs_block_group_cache,
200 cache_node);
201 end = cache->key.objectid + cache->key.offset - 1;
202 start = cache->key.objectid;
203
204 if (bytenr < start) {
205 if (!contains && (!ret || start < ret->key.objectid))
206 ret = cache;
207 n = n->rb_left;
208 } else if (bytenr > start) {
209 if (contains && bytenr <= end) {
210 ret = cache;
211 break;
212 }
213 n = n->rb_right;
214 } else {
215 ret = cache;
216 break;
217 }
218 }
219 if (ret) {
220 btrfs_get_block_group(ret);
221 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
222 info->first_logical_byte = ret->key.objectid;
223 }
224 spin_unlock(&info->block_group_cache_lock);
225
226 return ret;
227}
228
229static int add_excluded_extent(struct btrfs_root *root,
230 u64 start, u64 num_bytes)
231{
232 u64 end = start + num_bytes - 1;
233 set_extent_bits(&root->fs_info->freed_extents[0],
234 start, end, EXTENT_UPTODATE, GFP_NOFS);
235 set_extent_bits(&root->fs_info->freed_extents[1],
236 start, end, EXTENT_UPTODATE, GFP_NOFS);
237 return 0;
238}
239
240static void free_excluded_extents(struct btrfs_root *root,
241 struct btrfs_block_group_cache *cache)
242{
243 u64 start, end;
244
245 start = cache->key.objectid;
246 end = start + cache->key.offset - 1;
247
248 clear_extent_bits(&root->fs_info->freed_extents[0],
249 start, end, EXTENT_UPTODATE, GFP_NOFS);
250 clear_extent_bits(&root->fs_info->freed_extents[1],
251 start, end, EXTENT_UPTODATE, GFP_NOFS);
252}
253
254static int exclude_super_stripes(struct btrfs_root *root,
255 struct btrfs_block_group_cache *cache)
256{
257 u64 bytenr;
258 u64 *logical;
259 int stripe_len;
260 int i, nr, ret;
261
262 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
263 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
264 cache->bytes_super += stripe_len;
265 ret = add_excluded_extent(root, cache->key.objectid,
266 stripe_len);
267 if (ret)
268 return ret;
269 }
270
271 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
272 bytenr = btrfs_sb_offset(i);
273 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
274 cache->key.objectid, bytenr,
275 0, &logical, &nr, &stripe_len);
276 if (ret)
277 return ret;
278
279 while (nr--) {
280 u64 start, len;
281
282 if (logical[nr] > cache->key.objectid +
283 cache->key.offset)
284 continue;
285
286 if (logical[nr] + stripe_len <= cache->key.objectid)
287 continue;
288
289 start = logical[nr];
290 if (start < cache->key.objectid) {
291 start = cache->key.objectid;
292 len = (logical[nr] + stripe_len) - start;
293 } else {
294 len = min_t(u64, stripe_len,
295 cache->key.objectid +
296 cache->key.offset - start);
297 }
298
299 cache->bytes_super += len;
300 ret = add_excluded_extent(root, start, len);
301 if (ret) {
302 kfree(logical);
303 return ret;
304 }
305 }
306
307 kfree(logical);
308 }
309 return 0;
310}
311
312static struct btrfs_caching_control *
313get_caching_control(struct btrfs_block_group_cache *cache)
314{
315 struct btrfs_caching_control *ctl;
316
317 spin_lock(&cache->lock);
318 if (!cache->caching_ctl) {
319 spin_unlock(&cache->lock);
320 return NULL;
321 }
322
323 ctl = cache->caching_ctl;
324 atomic_inc(&ctl->count);
325 spin_unlock(&cache->lock);
326 return ctl;
327}
328
329static void put_caching_control(struct btrfs_caching_control *ctl)
330{
331 if (atomic_dec_and_test(&ctl->count))
332 kfree(ctl);
333}
334
335#ifdef CONFIG_BTRFS_DEBUG
336static void fragment_free_space(struct btrfs_root *root,
337 struct btrfs_block_group_cache *block_group)
338{
339 u64 start = block_group->key.objectid;
340 u64 len = block_group->key.offset;
341 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
342 root->nodesize : root->sectorsize;
343 u64 step = chunk << 1;
344
345 while (len > chunk) {
346 btrfs_remove_free_space(block_group, start, chunk);
347 start += step;
348 if (len < step)
349 len = 0;
350 else
351 len -= step;
352 }
353}
354#endif
355
356
357
358
359
360
361u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
362 struct btrfs_fs_info *info, u64 start, u64 end)
363{
364 u64 extent_start, extent_end, size, total_added = 0;
365 int ret;
366
367 while (start < end) {
368 ret = find_first_extent_bit(info->pinned_extents, start,
369 &extent_start, &extent_end,
370 EXTENT_DIRTY | EXTENT_UPTODATE,
371 NULL);
372 if (ret)
373 break;
374
375 if (extent_start <= start) {
376 start = extent_end + 1;
377 } else if (extent_start > start && extent_start < end) {
378 size = extent_start - start;
379 total_added += size;
380 ret = btrfs_add_free_space(block_group, start,
381 size);
382 BUG_ON(ret);
383 start = extent_end + 1;
384 } else {
385 break;
386 }
387 }
388
389 if (start < end) {
390 size = end - start;
391 total_added += size;
392 ret = btrfs_add_free_space(block_group, start, size);
393 BUG_ON(ret);
394 }
395
396 return total_added;
397}
398
399static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
400{
401 struct btrfs_block_group_cache *block_group;
402 struct btrfs_fs_info *fs_info;
403 struct btrfs_root *extent_root;
404 struct btrfs_path *path;
405 struct extent_buffer *leaf;
406 struct btrfs_key key;
407 u64 total_found = 0;
408 u64 last = 0;
409 u32 nritems;
410 int ret;
411 bool wakeup = true;
412
413 block_group = caching_ctl->block_group;
414 fs_info = block_group->fs_info;
415 extent_root = fs_info->extent_root;
416
417 path = btrfs_alloc_path();
418 if (!path)
419 return -ENOMEM;
420
421 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
422
423#ifdef CONFIG_BTRFS_DEBUG
424
425
426
427
428
429 if (btrfs_should_fragment_free_space(extent_root, block_group))
430 wakeup = false;
431#endif
432
433
434
435
436
437
438 path->skip_locking = 1;
439 path->search_commit_root = 1;
440 path->reada = READA_FORWARD;
441
442 key.objectid = last;
443 key.offset = 0;
444 key.type = BTRFS_EXTENT_ITEM_KEY;
445
446next:
447 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
448 if (ret < 0)
449 goto out;
450
451 leaf = path->nodes[0];
452 nritems = btrfs_header_nritems(leaf);
453
454 while (1) {
455 if (btrfs_fs_closing(fs_info) > 1) {
456 last = (u64)-1;
457 break;
458 }
459
460 if (path->slots[0] < nritems) {
461 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
462 } else {
463 ret = find_next_key(path, 0, &key);
464 if (ret)
465 break;
466
467 if (need_resched() ||
468 rwsem_is_contended(&fs_info->commit_root_sem)) {
469 if (wakeup)
470 caching_ctl->progress = last;
471 btrfs_release_path(path);
472 up_read(&fs_info->commit_root_sem);
473 mutex_unlock(&caching_ctl->mutex);
474 cond_resched();
475 mutex_lock(&caching_ctl->mutex);
476 down_read(&fs_info->commit_root_sem);
477 goto next;
478 }
479
480 ret = btrfs_next_leaf(extent_root, path);
481 if (ret < 0)
482 goto out;
483 if (ret)
484 break;
485 leaf = path->nodes[0];
486 nritems = btrfs_header_nritems(leaf);
487 continue;
488 }
489
490 if (key.objectid < last) {
491 key.objectid = last;
492 key.offset = 0;
493 key.type = BTRFS_EXTENT_ITEM_KEY;
494
495 if (wakeup)
496 caching_ctl->progress = last;
497 btrfs_release_path(path);
498 goto next;
499 }
500
501 if (key.objectid < block_group->key.objectid) {
502 path->slots[0]++;
503 continue;
504 }
505
506 if (key.objectid >= block_group->key.objectid +
507 block_group->key.offset)
508 break;
509
510 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
511 key.type == BTRFS_METADATA_ITEM_KEY) {
512 total_found += add_new_free_space(block_group,
513 fs_info, last,
514 key.objectid);
515 if (key.type == BTRFS_METADATA_ITEM_KEY)
516 last = key.objectid +
517 fs_info->tree_root->nodesize;
518 else
519 last = key.objectid + key.offset;
520
521 if (total_found > CACHING_CTL_WAKE_UP) {
522 total_found = 0;
523 if (wakeup)
524 wake_up(&caching_ctl->wait);
525 }
526 }
527 path->slots[0]++;
528 }
529 ret = 0;
530
531 total_found += add_new_free_space(block_group, fs_info, last,
532 block_group->key.objectid +
533 block_group->key.offset);
534 caching_ctl->progress = (u64)-1;
535
536out:
537 btrfs_free_path(path);
538 return ret;
539}
540
541static noinline void caching_thread(struct btrfs_work *work)
542{
543 struct btrfs_block_group_cache *block_group;
544 struct btrfs_fs_info *fs_info;
545 struct btrfs_caching_control *caching_ctl;
546 struct btrfs_root *extent_root;
547 int ret;
548
549 caching_ctl = container_of(work, struct btrfs_caching_control, work);
550 block_group = caching_ctl->block_group;
551 fs_info = block_group->fs_info;
552 extent_root = fs_info->extent_root;
553
554 mutex_lock(&caching_ctl->mutex);
555 down_read(&fs_info->commit_root_sem);
556
557 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
558 ret = load_free_space_tree(caching_ctl);
559 else
560 ret = load_extent_tree_free(caching_ctl);
561
562 spin_lock(&block_group->lock);
563 block_group->caching_ctl = NULL;
564 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
565 spin_unlock(&block_group->lock);
566
567#ifdef CONFIG_BTRFS_DEBUG
568 if (btrfs_should_fragment_free_space(extent_root, block_group)) {
569 u64 bytes_used;
570
571 spin_lock(&block_group->space_info->lock);
572 spin_lock(&block_group->lock);
573 bytes_used = block_group->key.offset -
574 btrfs_block_group_used(&block_group->item);
575 block_group->space_info->bytes_used += bytes_used >> 1;
576 spin_unlock(&block_group->lock);
577 spin_unlock(&block_group->space_info->lock);
578 fragment_free_space(extent_root, block_group);
579 }
580#endif
581
582 caching_ctl->progress = (u64)-1;
583
584 up_read(&fs_info->commit_root_sem);
585 free_excluded_extents(fs_info->extent_root, block_group);
586 mutex_unlock(&caching_ctl->mutex);
587
588 wake_up(&caching_ctl->wait);
589
590 put_caching_control(caching_ctl);
591 btrfs_put_block_group(block_group);
592}
593
594static int cache_block_group(struct btrfs_block_group_cache *cache,
595 int load_cache_only)
596{
597 DEFINE_WAIT(wait);
598 struct btrfs_fs_info *fs_info = cache->fs_info;
599 struct btrfs_caching_control *caching_ctl;
600 int ret = 0;
601
602 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
603 if (!caching_ctl)
604 return -ENOMEM;
605
606 INIT_LIST_HEAD(&caching_ctl->list);
607 mutex_init(&caching_ctl->mutex);
608 init_waitqueue_head(&caching_ctl->wait);
609 caching_ctl->block_group = cache;
610 caching_ctl->progress = cache->key.objectid;
611 atomic_set(&caching_ctl->count, 1);
612 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
613 caching_thread, NULL, NULL);
614
615 spin_lock(&cache->lock);
616
617
618
619
620
621
622
623
624
625
626
627
628 while (cache->cached == BTRFS_CACHE_FAST) {
629 struct btrfs_caching_control *ctl;
630
631 ctl = cache->caching_ctl;
632 atomic_inc(&ctl->count);
633 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
634 spin_unlock(&cache->lock);
635
636 schedule();
637
638 finish_wait(&ctl->wait, &wait);
639 put_caching_control(ctl);
640 spin_lock(&cache->lock);
641 }
642
643 if (cache->cached != BTRFS_CACHE_NO) {
644 spin_unlock(&cache->lock);
645 kfree(caching_ctl);
646 return 0;
647 }
648 WARN_ON(cache->caching_ctl);
649 cache->caching_ctl = caching_ctl;
650 cache->cached = BTRFS_CACHE_FAST;
651 spin_unlock(&cache->lock);
652
653 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
654 mutex_lock(&caching_ctl->mutex);
655 ret = load_free_space_cache(fs_info, cache);
656
657 spin_lock(&cache->lock);
658 if (ret == 1) {
659 cache->caching_ctl = NULL;
660 cache->cached = BTRFS_CACHE_FINISHED;
661 cache->last_byte_to_unpin = (u64)-1;
662 caching_ctl->progress = (u64)-1;
663 } else {
664 if (load_cache_only) {
665 cache->caching_ctl = NULL;
666 cache->cached = BTRFS_CACHE_NO;
667 } else {
668 cache->cached = BTRFS_CACHE_STARTED;
669 cache->has_caching_ctl = 1;
670 }
671 }
672 spin_unlock(&cache->lock);
673#ifdef CONFIG_BTRFS_DEBUG
674 if (ret == 1 &&
675 btrfs_should_fragment_free_space(fs_info->extent_root,
676 cache)) {
677 u64 bytes_used;
678
679 spin_lock(&cache->space_info->lock);
680 spin_lock(&cache->lock);
681 bytes_used = cache->key.offset -
682 btrfs_block_group_used(&cache->item);
683 cache->space_info->bytes_used += bytes_used >> 1;
684 spin_unlock(&cache->lock);
685 spin_unlock(&cache->space_info->lock);
686 fragment_free_space(fs_info->extent_root, cache);
687 }
688#endif
689 mutex_unlock(&caching_ctl->mutex);
690
691 wake_up(&caching_ctl->wait);
692 if (ret == 1) {
693 put_caching_control(caching_ctl);
694 free_excluded_extents(fs_info->extent_root, cache);
695 return 0;
696 }
697 } else {
698
699
700
701
702 spin_lock(&cache->lock);
703 if (load_cache_only) {
704 cache->caching_ctl = NULL;
705 cache->cached = BTRFS_CACHE_NO;
706 } else {
707 cache->cached = BTRFS_CACHE_STARTED;
708 cache->has_caching_ctl = 1;
709 }
710 spin_unlock(&cache->lock);
711 wake_up(&caching_ctl->wait);
712 }
713
714 if (load_cache_only) {
715 put_caching_control(caching_ctl);
716 return 0;
717 }
718
719 down_write(&fs_info->commit_root_sem);
720 atomic_inc(&caching_ctl->count);
721 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
722 up_write(&fs_info->commit_root_sem);
723
724 btrfs_get_block_group(cache);
725
726 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
727
728 return ret;
729}
730
731
732
733
734static struct btrfs_block_group_cache *
735btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
736{
737 struct btrfs_block_group_cache *cache;
738
739 cache = block_group_cache_tree_search(info, bytenr, 0);
740
741 return cache;
742}
743
744
745
746
747struct btrfs_block_group_cache *btrfs_lookup_block_group(
748 struct btrfs_fs_info *info,
749 u64 bytenr)
750{
751 struct btrfs_block_group_cache *cache;
752
753 cache = block_group_cache_tree_search(info, bytenr, 1);
754
755 return cache;
756}
757
758static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
759 u64 flags)
760{
761 struct list_head *head = &info->space_info;
762 struct btrfs_space_info *found;
763
764 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
765
766 rcu_read_lock();
767 list_for_each_entry_rcu(found, head, list) {
768 if (found->flags & flags) {
769 rcu_read_unlock();
770 return found;
771 }
772 }
773 rcu_read_unlock();
774 return NULL;
775}
776
777
778
779
780
781void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
782{
783 struct list_head *head = &info->space_info;
784 struct btrfs_space_info *found;
785
786 rcu_read_lock();
787 list_for_each_entry_rcu(found, head, list)
788 found->full = 0;
789 rcu_read_unlock();
790}
791
792
793int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len)
794{
795 int ret;
796 struct btrfs_key key;
797 struct btrfs_path *path;
798
799 path = btrfs_alloc_path();
800 if (!path)
801 return -ENOMEM;
802
803 key.objectid = start;
804 key.offset = len;
805 key.type = BTRFS_EXTENT_ITEM_KEY;
806 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
807 0, 0);
808 btrfs_free_path(path);
809 return ret;
810}
811
812
813
814
815
816
817
818
819
820
821int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
822 struct btrfs_root *root, u64 bytenr,
823 u64 offset, int metadata, u64 *refs, u64 *flags)
824{
825 struct btrfs_delayed_ref_head *head;
826 struct btrfs_delayed_ref_root *delayed_refs;
827 struct btrfs_path *path;
828 struct btrfs_extent_item *ei;
829 struct extent_buffer *leaf;
830 struct btrfs_key key;
831 u32 item_size;
832 u64 num_refs;
833 u64 extent_flags;
834 int ret;
835
836
837
838
839
840 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
841 offset = root->nodesize;
842 metadata = 0;
843 }
844
845 path = btrfs_alloc_path();
846 if (!path)
847 return -ENOMEM;
848
849 if (!trans) {
850 path->skip_locking = 1;
851 path->search_commit_root = 1;
852 }
853
854search_again:
855 key.objectid = bytenr;
856 key.offset = offset;
857 if (metadata)
858 key.type = BTRFS_METADATA_ITEM_KEY;
859 else
860 key.type = BTRFS_EXTENT_ITEM_KEY;
861
862 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
863 &key, path, 0, 0);
864 if (ret < 0)
865 goto out_free;
866
867 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
868 if (path->slots[0]) {
869 path->slots[0]--;
870 btrfs_item_key_to_cpu(path->nodes[0], &key,
871 path->slots[0]);
872 if (key.objectid == bytenr &&
873 key.type == BTRFS_EXTENT_ITEM_KEY &&
874 key.offset == root->nodesize)
875 ret = 0;
876 }
877 }
878
879 if (ret == 0) {
880 leaf = path->nodes[0];
881 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
882 if (item_size >= sizeof(*ei)) {
883 ei = btrfs_item_ptr(leaf, path->slots[0],
884 struct btrfs_extent_item);
885 num_refs = btrfs_extent_refs(leaf, ei);
886 extent_flags = btrfs_extent_flags(leaf, ei);
887 } else {
888#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
889 struct btrfs_extent_item_v0 *ei0;
890 BUG_ON(item_size != sizeof(*ei0));
891 ei0 = btrfs_item_ptr(leaf, path->slots[0],
892 struct btrfs_extent_item_v0);
893 num_refs = btrfs_extent_refs_v0(leaf, ei0);
894
895 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
896#else
897 BUG();
898#endif
899 }
900 BUG_ON(num_refs == 0);
901 } else {
902 num_refs = 0;
903 extent_flags = 0;
904 ret = 0;
905 }
906
907 if (!trans)
908 goto out;
909
910 delayed_refs = &trans->transaction->delayed_refs;
911 spin_lock(&delayed_refs->lock);
912 head = btrfs_find_delayed_ref_head(trans, bytenr);
913 if (head) {
914 if (!mutex_trylock(&head->mutex)) {
915 atomic_inc(&head->node.refs);
916 spin_unlock(&delayed_refs->lock);
917
918 btrfs_release_path(path);
919
920
921
922
923
924 mutex_lock(&head->mutex);
925 mutex_unlock(&head->mutex);
926 btrfs_put_delayed_ref(&head->node);
927 goto search_again;
928 }
929 spin_lock(&head->lock);
930 if (head->extent_op && head->extent_op->update_flags)
931 extent_flags |= head->extent_op->flags_to_set;
932 else
933 BUG_ON(num_refs == 0);
934
935 num_refs += head->node.ref_mod;
936 spin_unlock(&head->lock);
937 mutex_unlock(&head->mutex);
938 }
939 spin_unlock(&delayed_refs->lock);
940out:
941 WARN_ON(num_refs == 0);
942 if (refs)
943 *refs = num_refs;
944 if (flags)
945 *flags = extent_flags;
946out_free:
947 btrfs_free_path(path);
948 return ret;
949}
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1058static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1059 struct btrfs_root *root,
1060 struct btrfs_path *path,
1061 u64 owner, u32 extra_size)
1062{
1063 struct btrfs_extent_item *item;
1064 struct btrfs_extent_item_v0 *ei0;
1065 struct btrfs_extent_ref_v0 *ref0;
1066 struct btrfs_tree_block_info *bi;
1067 struct extent_buffer *leaf;
1068 struct btrfs_key key;
1069 struct btrfs_key found_key;
1070 u32 new_size = sizeof(*item);
1071 u64 refs;
1072 int ret;
1073
1074 leaf = path->nodes[0];
1075 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
1076
1077 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1078 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1079 struct btrfs_extent_item_v0);
1080 refs = btrfs_extent_refs_v0(leaf, ei0);
1081
1082 if (owner == (u64)-1) {
1083 while (1) {
1084 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1085 ret = btrfs_next_leaf(root, path);
1086 if (ret < 0)
1087 return ret;
1088 BUG_ON(ret > 0);
1089 leaf = path->nodes[0];
1090 }
1091 btrfs_item_key_to_cpu(leaf, &found_key,
1092 path->slots[0]);
1093 BUG_ON(key.objectid != found_key.objectid);
1094 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1095 path->slots[0]++;
1096 continue;
1097 }
1098 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1099 struct btrfs_extent_ref_v0);
1100 owner = btrfs_ref_objectid_v0(leaf, ref0);
1101 break;
1102 }
1103 }
1104 btrfs_release_path(path);
1105
1106 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1107 new_size += sizeof(*bi);
1108
1109 new_size -= sizeof(*ei0);
1110 ret = btrfs_search_slot(trans, root, &key, path,
1111 new_size + extra_size, 1);
1112 if (ret < 0)
1113 return ret;
1114 BUG_ON(ret);
1115
1116 btrfs_extend_item(root, path, new_size);
1117
1118 leaf = path->nodes[0];
1119 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1120 btrfs_set_extent_refs(leaf, item, refs);
1121
1122 btrfs_set_extent_generation(leaf, item, 0);
1123 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1124 btrfs_set_extent_flags(leaf, item,
1125 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1126 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1127 bi = (struct btrfs_tree_block_info *)(item + 1);
1128
1129 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
1130 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1131 } else {
1132 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1133 }
1134 btrfs_mark_buffer_dirty(leaf);
1135 return 0;
1136}
1137#endif
1138
1139static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1140{
1141 u32 high_crc = ~(u32)0;
1142 u32 low_crc = ~(u32)0;
1143 __le64 lenum;
1144
1145 lenum = cpu_to_le64(root_objectid);
1146 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1147 lenum = cpu_to_le64(owner);
1148 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1149 lenum = cpu_to_le64(offset);
1150 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1151
1152 return ((u64)high_crc << 31) ^ (u64)low_crc;
1153}
1154
1155static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1156 struct btrfs_extent_data_ref *ref)
1157{
1158 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1159 btrfs_extent_data_ref_objectid(leaf, ref),
1160 btrfs_extent_data_ref_offset(leaf, ref));
1161}
1162
1163static int match_extent_data_ref(struct extent_buffer *leaf,
1164 struct btrfs_extent_data_ref *ref,
1165 u64 root_objectid, u64 owner, u64 offset)
1166{
1167 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1168 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1169 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1170 return 0;
1171 return 1;
1172}
1173
1174static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1175 struct btrfs_root *root,
1176 struct btrfs_path *path,
1177 u64 bytenr, u64 parent,
1178 u64 root_objectid,
1179 u64 owner, u64 offset)
1180{
1181 struct btrfs_key key;
1182 struct btrfs_extent_data_ref *ref;
1183 struct extent_buffer *leaf;
1184 u32 nritems;
1185 int ret;
1186 int recow;
1187 int err = -ENOENT;
1188
1189 key.objectid = bytenr;
1190 if (parent) {
1191 key.type = BTRFS_SHARED_DATA_REF_KEY;
1192 key.offset = parent;
1193 } else {
1194 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1195 key.offset = hash_extent_data_ref(root_objectid,
1196 owner, offset);
1197 }
1198again:
1199 recow = 0;
1200 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1201 if (ret < 0) {
1202 err = ret;
1203 goto fail;
1204 }
1205
1206 if (parent) {
1207 if (!ret)
1208 return 0;
1209#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1210 key.type = BTRFS_EXTENT_REF_V0_KEY;
1211 btrfs_release_path(path);
1212 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1213 if (ret < 0) {
1214 err = ret;
1215 goto fail;
1216 }
1217 if (!ret)
1218 return 0;
1219#endif
1220 goto fail;
1221 }
1222
1223 leaf = path->nodes[0];
1224 nritems = btrfs_header_nritems(leaf);
1225 while (1) {
1226 if (path->slots[0] >= nritems) {
1227 ret = btrfs_next_leaf(root, path);
1228 if (ret < 0)
1229 err = ret;
1230 if (ret)
1231 goto fail;
1232
1233 leaf = path->nodes[0];
1234 nritems = btrfs_header_nritems(leaf);
1235 recow = 1;
1236 }
1237
1238 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1239 if (key.objectid != bytenr ||
1240 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1241 goto fail;
1242
1243 ref = btrfs_item_ptr(leaf, path->slots[0],
1244 struct btrfs_extent_data_ref);
1245
1246 if (match_extent_data_ref(leaf, ref, root_objectid,
1247 owner, offset)) {
1248 if (recow) {
1249 btrfs_release_path(path);
1250 goto again;
1251 }
1252 err = 0;
1253 break;
1254 }
1255 path->slots[0]++;
1256 }
1257fail:
1258 return err;
1259}
1260
1261static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1262 struct btrfs_root *root,
1263 struct btrfs_path *path,
1264 u64 bytenr, u64 parent,
1265 u64 root_objectid, u64 owner,
1266 u64 offset, int refs_to_add)
1267{
1268 struct btrfs_key key;
1269 struct extent_buffer *leaf;
1270 u32 size;
1271 u32 num_refs;
1272 int ret;
1273
1274 key.objectid = bytenr;
1275 if (parent) {
1276 key.type = BTRFS_SHARED_DATA_REF_KEY;
1277 key.offset = parent;
1278 size = sizeof(struct btrfs_shared_data_ref);
1279 } else {
1280 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1281 key.offset = hash_extent_data_ref(root_objectid,
1282 owner, offset);
1283 size = sizeof(struct btrfs_extent_data_ref);
1284 }
1285
1286 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1287 if (ret && ret != -EEXIST)
1288 goto fail;
1289
1290 leaf = path->nodes[0];
1291 if (parent) {
1292 struct btrfs_shared_data_ref *ref;
1293 ref = btrfs_item_ptr(leaf, path->slots[0],
1294 struct btrfs_shared_data_ref);
1295 if (ret == 0) {
1296 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1297 } else {
1298 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1299 num_refs += refs_to_add;
1300 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1301 }
1302 } else {
1303 struct btrfs_extent_data_ref *ref;
1304 while (ret == -EEXIST) {
1305 ref = btrfs_item_ptr(leaf, path->slots[0],
1306 struct btrfs_extent_data_ref);
1307 if (match_extent_data_ref(leaf, ref, root_objectid,
1308 owner, offset))
1309 break;
1310 btrfs_release_path(path);
1311 key.offset++;
1312 ret = btrfs_insert_empty_item(trans, root, path, &key,
1313 size);
1314 if (ret && ret != -EEXIST)
1315 goto fail;
1316
1317 leaf = path->nodes[0];
1318 }
1319 ref = btrfs_item_ptr(leaf, path->slots[0],
1320 struct btrfs_extent_data_ref);
1321 if (ret == 0) {
1322 btrfs_set_extent_data_ref_root(leaf, ref,
1323 root_objectid);
1324 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1325 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1326 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1327 } else {
1328 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1329 num_refs += refs_to_add;
1330 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1331 }
1332 }
1333 btrfs_mark_buffer_dirty(leaf);
1334 ret = 0;
1335fail:
1336 btrfs_release_path(path);
1337 return ret;
1338}
1339
1340static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1341 struct btrfs_root *root,
1342 struct btrfs_path *path,
1343 int refs_to_drop, int *last_ref)
1344{
1345 struct btrfs_key key;
1346 struct btrfs_extent_data_ref *ref1 = NULL;
1347 struct btrfs_shared_data_ref *ref2 = NULL;
1348 struct extent_buffer *leaf;
1349 u32 num_refs = 0;
1350 int ret = 0;
1351
1352 leaf = path->nodes[0];
1353 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1354
1355 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1356 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1357 struct btrfs_extent_data_ref);
1358 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1359 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1360 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1361 struct btrfs_shared_data_ref);
1362 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1363#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1364 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1365 struct btrfs_extent_ref_v0 *ref0;
1366 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1367 struct btrfs_extent_ref_v0);
1368 num_refs = btrfs_ref_count_v0(leaf, ref0);
1369#endif
1370 } else {
1371 BUG();
1372 }
1373
1374 BUG_ON(num_refs < refs_to_drop);
1375 num_refs -= refs_to_drop;
1376
1377 if (num_refs == 0) {
1378 ret = btrfs_del_item(trans, root, path);
1379 *last_ref = 1;
1380 } else {
1381 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1382 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1383 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1384 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1385#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1386 else {
1387 struct btrfs_extent_ref_v0 *ref0;
1388 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1389 struct btrfs_extent_ref_v0);
1390 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1391 }
1392#endif
1393 btrfs_mark_buffer_dirty(leaf);
1394 }
1395 return ret;
1396}
1397
1398static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1399 struct btrfs_extent_inline_ref *iref)
1400{
1401 struct btrfs_key key;
1402 struct extent_buffer *leaf;
1403 struct btrfs_extent_data_ref *ref1;
1404 struct btrfs_shared_data_ref *ref2;
1405 u32 num_refs = 0;
1406
1407 leaf = path->nodes[0];
1408 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1409 if (iref) {
1410 if (btrfs_extent_inline_ref_type(leaf, iref) ==
1411 BTRFS_EXTENT_DATA_REF_KEY) {
1412 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1413 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1414 } else {
1415 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1416 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1417 }
1418 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1419 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1420 struct btrfs_extent_data_ref);
1421 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1422 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1423 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1424 struct btrfs_shared_data_ref);
1425 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1426#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1427 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1428 struct btrfs_extent_ref_v0 *ref0;
1429 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1430 struct btrfs_extent_ref_v0);
1431 num_refs = btrfs_ref_count_v0(leaf, ref0);
1432#endif
1433 } else {
1434 WARN_ON(1);
1435 }
1436 return num_refs;
1437}
1438
1439static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1440 struct btrfs_root *root,
1441 struct btrfs_path *path,
1442 u64 bytenr, u64 parent,
1443 u64 root_objectid)
1444{
1445 struct btrfs_key key;
1446 int ret;
1447
1448 key.objectid = bytenr;
1449 if (parent) {
1450 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1451 key.offset = parent;
1452 } else {
1453 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1454 key.offset = root_objectid;
1455 }
1456
1457 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1458 if (ret > 0)
1459 ret = -ENOENT;
1460#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1461 if (ret == -ENOENT && parent) {
1462 btrfs_release_path(path);
1463 key.type = BTRFS_EXTENT_REF_V0_KEY;
1464 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1465 if (ret > 0)
1466 ret = -ENOENT;
1467 }
1468#endif
1469 return ret;
1470}
1471
1472static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1473 struct btrfs_root *root,
1474 struct btrfs_path *path,
1475 u64 bytenr, u64 parent,
1476 u64 root_objectid)
1477{
1478 struct btrfs_key key;
1479 int ret;
1480
1481 key.objectid = bytenr;
1482 if (parent) {
1483 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1484 key.offset = parent;
1485 } else {
1486 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1487 key.offset = root_objectid;
1488 }
1489
1490 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1491 btrfs_release_path(path);
1492 return ret;
1493}
1494
1495static inline int extent_ref_type(u64 parent, u64 owner)
1496{
1497 int type;
1498 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1499 if (parent > 0)
1500 type = BTRFS_SHARED_BLOCK_REF_KEY;
1501 else
1502 type = BTRFS_TREE_BLOCK_REF_KEY;
1503 } else {
1504 if (parent > 0)
1505 type = BTRFS_SHARED_DATA_REF_KEY;
1506 else
1507 type = BTRFS_EXTENT_DATA_REF_KEY;
1508 }
1509 return type;
1510}
1511
1512static int find_next_key(struct btrfs_path *path, int level,
1513 struct btrfs_key *key)
1514
1515{
1516 for (; level < BTRFS_MAX_LEVEL; level++) {
1517 if (!path->nodes[level])
1518 break;
1519 if (path->slots[level] + 1 >=
1520 btrfs_header_nritems(path->nodes[level]))
1521 continue;
1522 if (level == 0)
1523 btrfs_item_key_to_cpu(path->nodes[level], key,
1524 path->slots[level] + 1);
1525 else
1526 btrfs_node_key_to_cpu(path->nodes[level], key,
1527 path->slots[level] + 1);
1528 return 0;
1529 }
1530 return 1;
1531}
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546static noinline_for_stack
1547int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1548 struct btrfs_root *root,
1549 struct btrfs_path *path,
1550 struct btrfs_extent_inline_ref **ref_ret,
1551 u64 bytenr, u64 num_bytes,
1552 u64 parent, u64 root_objectid,
1553 u64 owner, u64 offset, int insert)
1554{
1555 struct btrfs_key key;
1556 struct extent_buffer *leaf;
1557 struct btrfs_extent_item *ei;
1558 struct btrfs_extent_inline_ref *iref;
1559 u64 flags;
1560 u64 item_size;
1561 unsigned long ptr;
1562 unsigned long end;
1563 int extra_size;
1564 int type;
1565 int want;
1566 int ret;
1567 int err = 0;
1568 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1569 SKINNY_METADATA);
1570
1571 key.objectid = bytenr;
1572 key.type = BTRFS_EXTENT_ITEM_KEY;
1573 key.offset = num_bytes;
1574
1575 want = extent_ref_type(parent, owner);
1576 if (insert) {
1577 extra_size = btrfs_extent_inline_ref_size(want);
1578 path->keep_locks = 1;
1579 } else
1580 extra_size = -1;
1581
1582
1583
1584
1585
1586 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1587 key.type = BTRFS_METADATA_ITEM_KEY;
1588 key.offset = owner;
1589 }
1590
1591again:
1592 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1593 if (ret < 0) {
1594 err = ret;
1595 goto out;
1596 }
1597
1598
1599
1600
1601
1602 if (ret > 0 && skinny_metadata) {
1603 skinny_metadata = false;
1604 if (path->slots[0]) {
1605 path->slots[0]--;
1606 btrfs_item_key_to_cpu(path->nodes[0], &key,
1607 path->slots[0]);
1608 if (key.objectid == bytenr &&
1609 key.type == BTRFS_EXTENT_ITEM_KEY &&
1610 key.offset == num_bytes)
1611 ret = 0;
1612 }
1613 if (ret) {
1614 key.objectid = bytenr;
1615 key.type = BTRFS_EXTENT_ITEM_KEY;
1616 key.offset = num_bytes;
1617 btrfs_release_path(path);
1618 goto again;
1619 }
1620 }
1621
1622 if (ret && !insert) {
1623 err = -ENOENT;
1624 goto out;
1625 } else if (WARN_ON(ret)) {
1626 err = -EIO;
1627 goto out;
1628 }
1629
1630 leaf = path->nodes[0];
1631 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1632#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1633 if (item_size < sizeof(*ei)) {
1634 if (!insert) {
1635 err = -ENOENT;
1636 goto out;
1637 }
1638 ret = convert_extent_item_v0(trans, root, path, owner,
1639 extra_size);
1640 if (ret < 0) {
1641 err = ret;
1642 goto out;
1643 }
1644 leaf = path->nodes[0];
1645 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1646 }
1647#endif
1648 BUG_ON(item_size < sizeof(*ei));
1649
1650 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1651 flags = btrfs_extent_flags(leaf, ei);
1652
1653 ptr = (unsigned long)(ei + 1);
1654 end = (unsigned long)ei + item_size;
1655
1656 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1657 ptr += sizeof(struct btrfs_tree_block_info);
1658 BUG_ON(ptr > end);
1659 }
1660
1661 err = -ENOENT;
1662 while (1) {
1663 if (ptr >= end) {
1664 WARN_ON(ptr > end);
1665 break;
1666 }
1667 iref = (struct btrfs_extent_inline_ref *)ptr;
1668 type = btrfs_extent_inline_ref_type(leaf, iref);
1669 if (want < type)
1670 break;
1671 if (want > type) {
1672 ptr += btrfs_extent_inline_ref_size(type);
1673 continue;
1674 }
1675
1676 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1677 struct btrfs_extent_data_ref *dref;
1678 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1679 if (match_extent_data_ref(leaf, dref, root_objectid,
1680 owner, offset)) {
1681 err = 0;
1682 break;
1683 }
1684 if (hash_extent_data_ref_item(leaf, dref) <
1685 hash_extent_data_ref(root_objectid, owner, offset))
1686 break;
1687 } else {
1688 u64 ref_offset;
1689 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1690 if (parent > 0) {
1691 if (parent == ref_offset) {
1692 err = 0;
1693 break;
1694 }
1695 if (ref_offset < parent)
1696 break;
1697 } else {
1698 if (root_objectid == ref_offset) {
1699 err = 0;
1700 break;
1701 }
1702 if (ref_offset < root_objectid)
1703 break;
1704 }
1705 }
1706 ptr += btrfs_extent_inline_ref_size(type);
1707 }
1708 if (err == -ENOENT && insert) {
1709 if (item_size + extra_size >=
1710 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1711 err = -EAGAIN;
1712 goto out;
1713 }
1714
1715
1716
1717
1718
1719
1720 if (find_next_key(path, 0, &key) == 0 &&
1721 key.objectid == bytenr &&
1722 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1723 err = -EAGAIN;
1724 goto out;
1725 }
1726 }
1727 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1728out:
1729 if (insert) {
1730 path->keep_locks = 0;
1731 btrfs_unlock_up_safe(path, 1);
1732 }
1733 return err;
1734}
1735
1736
1737
1738
1739static noinline_for_stack
1740void setup_inline_extent_backref(struct btrfs_root *root,
1741 struct btrfs_path *path,
1742 struct btrfs_extent_inline_ref *iref,
1743 u64 parent, u64 root_objectid,
1744 u64 owner, u64 offset, int refs_to_add,
1745 struct btrfs_delayed_extent_op *extent_op)
1746{
1747 struct extent_buffer *leaf;
1748 struct btrfs_extent_item *ei;
1749 unsigned long ptr;
1750 unsigned long end;
1751 unsigned long item_offset;
1752 u64 refs;
1753 int size;
1754 int type;
1755
1756 leaf = path->nodes[0];
1757 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1758 item_offset = (unsigned long)iref - (unsigned long)ei;
1759
1760 type = extent_ref_type(parent, owner);
1761 size = btrfs_extent_inline_ref_size(type);
1762
1763 btrfs_extend_item(root, path, size);
1764
1765 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1766 refs = btrfs_extent_refs(leaf, ei);
1767 refs += refs_to_add;
1768 btrfs_set_extent_refs(leaf, ei, refs);
1769 if (extent_op)
1770 __run_delayed_extent_op(extent_op, leaf, ei);
1771
1772 ptr = (unsigned long)ei + item_offset;
1773 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1774 if (ptr < end - size)
1775 memmove_extent_buffer(leaf, ptr + size, ptr,
1776 end - size - ptr);
1777
1778 iref = (struct btrfs_extent_inline_ref *)ptr;
1779 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1780 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1781 struct btrfs_extent_data_ref *dref;
1782 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1783 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1784 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1785 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1786 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1787 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1788 struct btrfs_shared_data_ref *sref;
1789 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1790 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1791 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1792 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1793 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1794 } else {
1795 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1796 }
1797 btrfs_mark_buffer_dirty(leaf);
1798}
1799
1800static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1801 struct btrfs_root *root,
1802 struct btrfs_path *path,
1803 struct btrfs_extent_inline_ref **ref_ret,
1804 u64 bytenr, u64 num_bytes, u64 parent,
1805 u64 root_objectid, u64 owner, u64 offset)
1806{
1807 int ret;
1808
1809 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1810 bytenr, num_bytes, parent,
1811 root_objectid, owner, offset, 0);
1812 if (ret != -ENOENT)
1813 return ret;
1814
1815 btrfs_release_path(path);
1816 *ref_ret = NULL;
1817
1818 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1819 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1820 root_objectid);
1821 } else {
1822 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1823 root_objectid, owner, offset);
1824 }
1825 return ret;
1826}
1827
1828
1829
1830
1831static noinline_for_stack
1832void update_inline_extent_backref(struct btrfs_root *root,
1833 struct btrfs_path *path,
1834 struct btrfs_extent_inline_ref *iref,
1835 int refs_to_mod,
1836 struct btrfs_delayed_extent_op *extent_op,
1837 int *last_ref)
1838{
1839 struct extent_buffer *leaf;
1840 struct btrfs_extent_item *ei;
1841 struct btrfs_extent_data_ref *dref = NULL;
1842 struct btrfs_shared_data_ref *sref = NULL;
1843 unsigned long ptr;
1844 unsigned long end;
1845 u32 item_size;
1846 int size;
1847 int type;
1848 u64 refs;
1849
1850 leaf = path->nodes[0];
1851 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1852 refs = btrfs_extent_refs(leaf, ei);
1853 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1854 refs += refs_to_mod;
1855 btrfs_set_extent_refs(leaf, ei, refs);
1856 if (extent_op)
1857 __run_delayed_extent_op(extent_op, leaf, ei);
1858
1859 type = btrfs_extent_inline_ref_type(leaf, iref);
1860
1861 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1862 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1863 refs = btrfs_extent_data_ref_count(leaf, dref);
1864 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1865 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1866 refs = btrfs_shared_data_ref_count(leaf, sref);
1867 } else {
1868 refs = 1;
1869 BUG_ON(refs_to_mod != -1);
1870 }
1871
1872 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1873 refs += refs_to_mod;
1874
1875 if (refs > 0) {
1876 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1877 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1878 else
1879 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1880 } else {
1881 *last_ref = 1;
1882 size = btrfs_extent_inline_ref_size(type);
1883 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1884 ptr = (unsigned long)iref;
1885 end = (unsigned long)ei + item_size;
1886 if (ptr + size < end)
1887 memmove_extent_buffer(leaf, ptr, ptr + size,
1888 end - ptr - size);
1889 item_size -= size;
1890 btrfs_truncate_item(root, path, item_size, 1);
1891 }
1892 btrfs_mark_buffer_dirty(leaf);
1893}
1894
1895static noinline_for_stack
1896int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1897 struct btrfs_root *root,
1898 struct btrfs_path *path,
1899 u64 bytenr, u64 num_bytes, u64 parent,
1900 u64 root_objectid, u64 owner,
1901 u64 offset, int refs_to_add,
1902 struct btrfs_delayed_extent_op *extent_op)
1903{
1904 struct btrfs_extent_inline_ref *iref;
1905 int ret;
1906
1907 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1908 bytenr, num_bytes, parent,
1909 root_objectid, owner, offset, 1);
1910 if (ret == 0) {
1911 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1912 update_inline_extent_backref(root, path, iref,
1913 refs_to_add, extent_op, NULL);
1914 } else if (ret == -ENOENT) {
1915 setup_inline_extent_backref(root, path, iref, parent,
1916 root_objectid, owner, offset,
1917 refs_to_add, extent_op);
1918 ret = 0;
1919 }
1920 return ret;
1921}
1922
1923static int insert_extent_backref(struct btrfs_trans_handle *trans,
1924 struct btrfs_root *root,
1925 struct btrfs_path *path,
1926 u64 bytenr, u64 parent, u64 root_objectid,
1927 u64 owner, u64 offset, int refs_to_add)
1928{
1929 int ret;
1930 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1931 BUG_ON(refs_to_add != 1);
1932 ret = insert_tree_block_ref(trans, root, path, bytenr,
1933 parent, root_objectid);
1934 } else {
1935 ret = insert_extent_data_ref(trans, root, path, bytenr,
1936 parent, root_objectid,
1937 owner, offset, refs_to_add);
1938 }
1939 return ret;
1940}
1941
1942static int remove_extent_backref(struct btrfs_trans_handle *trans,
1943 struct btrfs_root *root,
1944 struct btrfs_path *path,
1945 struct btrfs_extent_inline_ref *iref,
1946 int refs_to_drop, int is_data, int *last_ref)
1947{
1948 int ret = 0;
1949
1950 BUG_ON(!is_data && refs_to_drop != 1);
1951 if (iref) {
1952 update_inline_extent_backref(root, path, iref,
1953 -refs_to_drop, NULL, last_ref);
1954 } else if (is_data) {
1955 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
1956 last_ref);
1957 } else {
1958 *last_ref = 1;
1959 ret = btrfs_del_item(trans, root, path);
1960 }
1961 return ret;
1962}
1963
1964#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
1965static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1966 u64 *discarded_bytes)
1967{
1968 int j, ret = 0;
1969 u64 bytes_left, end;
1970 u64 aligned_start = ALIGN(start, 1 << 9);
1971
1972 if (WARN_ON(start != aligned_start)) {
1973 len -= aligned_start - start;
1974 len = round_down(len, 1 << 9);
1975 start = aligned_start;
1976 }
1977
1978 *discarded_bytes = 0;
1979
1980 if (!len)
1981 return 0;
1982
1983 end = start + len;
1984 bytes_left = len;
1985
1986
1987 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1988 u64 sb_start = btrfs_sb_offset(j);
1989 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1990 u64 size = sb_start - start;
1991
1992 if (!in_range(sb_start, start, bytes_left) &&
1993 !in_range(sb_end, start, bytes_left) &&
1994 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1995 continue;
1996
1997
1998
1999
2000
2001 if (sb_start <= start) {
2002 start += sb_end - start;
2003 if (start > end) {
2004 bytes_left = 0;
2005 break;
2006 }
2007 bytes_left = end - start;
2008 continue;
2009 }
2010
2011 if (size) {
2012 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
2013 GFP_NOFS, 0);
2014 if (!ret)
2015 *discarded_bytes += size;
2016 else if (ret != -EOPNOTSUPP)
2017 return ret;
2018 }
2019
2020 start = sb_end;
2021 if (start > end) {
2022 bytes_left = 0;
2023 break;
2024 }
2025 bytes_left = end - start;
2026 }
2027
2028 if (bytes_left) {
2029 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
2030 GFP_NOFS, 0);
2031 if (!ret)
2032 *discarded_bytes += bytes_left;
2033 }
2034 return ret;
2035}
2036
2037int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
2038 u64 num_bytes, u64 *actual_bytes)
2039{
2040 int ret;
2041 u64 discarded_bytes = 0;
2042 struct btrfs_bio *bbio = NULL;
2043
2044
2045
2046 ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
2047 bytenr, &num_bytes, &bbio, 0);
2048
2049 if (!ret) {
2050 struct btrfs_bio_stripe *stripe = bbio->stripes;
2051 int i;
2052
2053
2054 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
2055 u64 bytes;
2056 if (!stripe->dev->can_discard)
2057 continue;
2058
2059 ret = btrfs_issue_discard(stripe->dev->bdev,
2060 stripe->physical,
2061 stripe->length,
2062 &bytes);
2063 if (!ret)
2064 discarded_bytes += bytes;
2065 else if (ret != -EOPNOTSUPP)
2066 break;
2067
2068
2069
2070
2071
2072
2073 ret = 0;
2074 }
2075 btrfs_put_bbio(bbio);
2076 }
2077
2078 if (actual_bytes)
2079 *actual_bytes = discarded_bytes;
2080
2081
2082 if (ret == -EOPNOTSUPP)
2083 ret = 0;
2084 return ret;
2085}
2086
2087
2088int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2089 struct btrfs_root *root,
2090 u64 bytenr, u64 num_bytes, u64 parent,
2091 u64 root_objectid, u64 owner, u64 offset)
2092{
2093 int ret;
2094 struct btrfs_fs_info *fs_info = root->fs_info;
2095
2096 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
2097 root_objectid == BTRFS_TREE_LOG_OBJECTID);
2098
2099 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2100 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
2101 num_bytes,
2102 parent, root_objectid, (int)owner,
2103 BTRFS_ADD_DELAYED_REF, NULL);
2104 } else {
2105 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
2106 num_bytes, parent, root_objectid,
2107 owner, offset, 0,
2108 BTRFS_ADD_DELAYED_REF, NULL);
2109 }
2110 return ret;
2111}
2112
2113static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2114 struct btrfs_root *root,
2115 struct btrfs_delayed_ref_node *node,
2116 u64 parent, u64 root_objectid,
2117 u64 owner, u64 offset, int refs_to_add,
2118 struct btrfs_delayed_extent_op *extent_op)
2119{
2120 struct btrfs_fs_info *fs_info = root->fs_info;
2121 struct btrfs_path *path;
2122 struct extent_buffer *leaf;
2123 struct btrfs_extent_item *item;
2124 struct btrfs_key key;
2125 u64 bytenr = node->bytenr;
2126 u64 num_bytes = node->num_bytes;
2127 u64 refs;
2128 int ret;
2129
2130 path = btrfs_alloc_path();
2131 if (!path)
2132 return -ENOMEM;
2133
2134 path->reada = READA_FORWARD;
2135 path->leave_spinning = 1;
2136
2137 ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
2138 bytenr, num_bytes, parent,
2139 root_objectid, owner, offset,
2140 refs_to_add, extent_op);
2141 if ((ret < 0 && ret != -EAGAIN) || !ret)
2142 goto out;
2143
2144
2145
2146
2147
2148
2149 leaf = path->nodes[0];
2150 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2151 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2152 refs = btrfs_extent_refs(leaf, item);
2153 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2154 if (extent_op)
2155 __run_delayed_extent_op(extent_op, leaf, item);
2156
2157 btrfs_mark_buffer_dirty(leaf);
2158 btrfs_release_path(path);
2159
2160 path->reada = READA_FORWARD;
2161 path->leave_spinning = 1;
2162
2163 ret = insert_extent_backref(trans, root->fs_info->extent_root,
2164 path, bytenr, parent, root_objectid,
2165 owner, offset, refs_to_add);
2166 if (ret)
2167 btrfs_abort_transaction(trans, root, ret);
2168out:
2169 btrfs_free_path(path);
2170 return ret;
2171}
2172
2173static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2174 struct btrfs_root *root,
2175 struct btrfs_delayed_ref_node *node,
2176 struct btrfs_delayed_extent_op *extent_op,
2177 int insert_reserved)
2178{
2179 int ret = 0;
2180 struct btrfs_delayed_data_ref *ref;
2181 struct btrfs_key ins;
2182 u64 parent = 0;
2183 u64 ref_root = 0;
2184 u64 flags = 0;
2185
2186 ins.objectid = node->bytenr;
2187 ins.offset = node->num_bytes;
2188 ins.type = BTRFS_EXTENT_ITEM_KEY;
2189
2190 ref = btrfs_delayed_node_to_data_ref(node);
2191 trace_run_delayed_data_ref(node, ref, node->action);
2192
2193 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2194 parent = ref->parent;
2195 ref_root = ref->root;
2196
2197 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2198 if (extent_op)
2199 flags |= extent_op->flags_to_set;
2200 ret = alloc_reserved_file_extent(trans, root,
2201 parent, ref_root, flags,
2202 ref->objectid, ref->offset,
2203 &ins, node->ref_mod);
2204 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2205 ret = __btrfs_inc_extent_ref(trans, root, node, parent,
2206 ref_root, ref->objectid,
2207 ref->offset, node->ref_mod,
2208 extent_op);
2209 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2210 ret = __btrfs_free_extent(trans, root, node, parent,
2211 ref_root, ref->objectid,
2212 ref->offset, node->ref_mod,
2213 extent_op);
2214 } else {
2215 BUG();
2216 }
2217 return ret;
2218}
2219
2220static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2221 struct extent_buffer *leaf,
2222 struct btrfs_extent_item *ei)
2223{
2224 u64 flags = btrfs_extent_flags(leaf, ei);
2225 if (extent_op->update_flags) {
2226 flags |= extent_op->flags_to_set;
2227 btrfs_set_extent_flags(leaf, ei, flags);
2228 }
2229
2230 if (extent_op->update_key) {
2231 struct btrfs_tree_block_info *bi;
2232 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2233 bi = (struct btrfs_tree_block_info *)(ei + 1);
2234 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2235 }
2236}
2237
2238static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2239 struct btrfs_root *root,
2240 struct btrfs_delayed_ref_node *node,
2241 struct btrfs_delayed_extent_op *extent_op)
2242{
2243 struct btrfs_key key;
2244 struct btrfs_path *path;
2245 struct btrfs_extent_item *ei;
2246 struct extent_buffer *leaf;
2247 u32 item_size;
2248 int ret;
2249 int err = 0;
2250 int metadata = !extent_op->is_data;
2251
2252 if (trans->aborted)
2253 return 0;
2254
2255 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2256 metadata = 0;
2257
2258 path = btrfs_alloc_path();
2259 if (!path)
2260 return -ENOMEM;
2261
2262 key.objectid = node->bytenr;
2263
2264 if (metadata) {
2265 key.type = BTRFS_METADATA_ITEM_KEY;
2266 key.offset = extent_op->level;
2267 } else {
2268 key.type = BTRFS_EXTENT_ITEM_KEY;
2269 key.offset = node->num_bytes;
2270 }
2271
2272again:
2273 path->reada = READA_FORWARD;
2274 path->leave_spinning = 1;
2275 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
2276 path, 0, 1);
2277 if (ret < 0) {
2278 err = ret;
2279 goto out;
2280 }
2281 if (ret > 0) {
2282 if (metadata) {
2283 if (path->slots[0] > 0) {
2284 path->slots[0]--;
2285 btrfs_item_key_to_cpu(path->nodes[0], &key,
2286 path->slots[0]);
2287 if (key.objectid == node->bytenr &&
2288 key.type == BTRFS_EXTENT_ITEM_KEY &&
2289 key.offset == node->num_bytes)
2290 ret = 0;
2291 }
2292 if (ret > 0) {
2293 btrfs_release_path(path);
2294 metadata = 0;
2295
2296 key.objectid = node->bytenr;
2297 key.offset = node->num_bytes;
2298 key.type = BTRFS_EXTENT_ITEM_KEY;
2299 goto again;
2300 }
2301 } else {
2302 err = -EIO;
2303 goto out;
2304 }
2305 }
2306
2307 leaf = path->nodes[0];
2308 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2309#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2310 if (item_size < sizeof(*ei)) {
2311 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
2312 path, (u64)-1, 0);
2313 if (ret < 0) {
2314 err = ret;
2315 goto out;
2316 }
2317 leaf = path->nodes[0];
2318 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2319 }
2320#endif
2321 BUG_ON(item_size < sizeof(*ei));
2322 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2323 __run_delayed_extent_op(extent_op, leaf, ei);
2324
2325 btrfs_mark_buffer_dirty(leaf);
2326out:
2327 btrfs_free_path(path);
2328 return err;
2329}
2330
2331static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2332 struct btrfs_root *root,
2333 struct btrfs_delayed_ref_node *node,
2334 struct btrfs_delayed_extent_op *extent_op,
2335 int insert_reserved)
2336{
2337 int ret = 0;
2338 struct btrfs_delayed_tree_ref *ref;
2339 struct btrfs_key ins;
2340 u64 parent = 0;
2341 u64 ref_root = 0;
2342 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2343 SKINNY_METADATA);
2344
2345 ref = btrfs_delayed_node_to_tree_ref(node);
2346 trace_run_delayed_tree_ref(node, ref, node->action);
2347
2348 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2349 parent = ref->parent;
2350 ref_root = ref->root;
2351
2352 ins.objectid = node->bytenr;
2353 if (skinny_metadata) {
2354 ins.offset = ref->level;
2355 ins.type = BTRFS_METADATA_ITEM_KEY;
2356 } else {
2357 ins.offset = node->num_bytes;
2358 ins.type = BTRFS_EXTENT_ITEM_KEY;
2359 }
2360
2361 BUG_ON(node->ref_mod != 1);
2362 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2363 BUG_ON(!extent_op || !extent_op->update_flags);
2364 ret = alloc_reserved_tree_block(trans, root,
2365 parent, ref_root,
2366 extent_op->flags_to_set,
2367 &extent_op->key,
2368 ref->level, &ins);
2369 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2370 ret = __btrfs_inc_extent_ref(trans, root, node,
2371 parent, ref_root,
2372 ref->level, 0, 1,
2373 extent_op);
2374 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2375 ret = __btrfs_free_extent(trans, root, node,
2376 parent, ref_root,
2377 ref->level, 0, 1, extent_op);
2378 } else {
2379 BUG();
2380 }
2381 return ret;
2382}
2383
2384
2385static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2386 struct btrfs_root *root,
2387 struct btrfs_delayed_ref_node *node,
2388 struct btrfs_delayed_extent_op *extent_op,
2389 int insert_reserved)
2390{
2391 int ret = 0;
2392
2393 if (trans->aborted) {
2394 if (insert_reserved)
2395 btrfs_pin_extent(root, node->bytenr,
2396 node->num_bytes, 1);
2397 return 0;
2398 }
2399
2400 if (btrfs_delayed_ref_is_head(node)) {
2401 struct btrfs_delayed_ref_head *head;
2402
2403
2404
2405
2406
2407
2408 BUG_ON(extent_op);
2409 head = btrfs_delayed_node_to_head(node);
2410 trace_run_delayed_ref_head(node, head, node->action);
2411
2412 if (insert_reserved) {
2413 btrfs_pin_extent(root, node->bytenr,
2414 node->num_bytes, 1);
2415 if (head->is_data) {
2416 ret = btrfs_del_csums(trans, root,
2417 node->bytenr,
2418 node->num_bytes);
2419 }
2420 }
2421
2422
2423 btrfs_qgroup_free_delayed_ref(root->fs_info,
2424 head->qgroup_ref_root,
2425 head->qgroup_reserved);
2426 return ret;
2427 }
2428
2429 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2430 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2431 ret = run_delayed_tree_ref(trans, root, node, extent_op,
2432 insert_reserved);
2433 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2434 node->type == BTRFS_SHARED_DATA_REF_KEY)
2435 ret = run_delayed_data_ref(trans, root, node, extent_op,
2436 insert_reserved);
2437 else
2438 BUG();
2439 return ret;
2440}
2441
2442static inline struct btrfs_delayed_ref_node *
2443select_delayed_ref(struct btrfs_delayed_ref_head *head)
2444{
2445 struct btrfs_delayed_ref_node *ref;
2446
2447 if (list_empty(&head->ref_list))
2448 return NULL;
2449
2450
2451
2452
2453
2454
2455
2456 list_for_each_entry(ref, &head->ref_list, list) {
2457 if (ref->action == BTRFS_ADD_DELAYED_REF)
2458 return ref;
2459 }
2460
2461 return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
2462 list);
2463}
2464
2465
2466
2467
2468
2469static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2470 struct btrfs_root *root,
2471 unsigned long nr)
2472{
2473 struct btrfs_delayed_ref_root *delayed_refs;
2474 struct btrfs_delayed_ref_node *ref;
2475 struct btrfs_delayed_ref_head *locked_ref = NULL;
2476 struct btrfs_delayed_extent_op *extent_op;
2477 struct btrfs_fs_info *fs_info = root->fs_info;
2478 ktime_t start = ktime_get();
2479 int ret;
2480 unsigned long count = 0;
2481 unsigned long actual_count = 0;
2482 int must_insert_reserved = 0;
2483
2484 delayed_refs = &trans->transaction->delayed_refs;
2485 while (1) {
2486 if (!locked_ref) {
2487 if (count >= nr)
2488 break;
2489
2490 spin_lock(&delayed_refs->lock);
2491 locked_ref = btrfs_select_ref_head(trans);
2492 if (!locked_ref) {
2493 spin_unlock(&delayed_refs->lock);
2494 break;
2495 }
2496
2497
2498
2499 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2500 spin_unlock(&delayed_refs->lock);
2501
2502
2503
2504
2505
2506
2507 if (ret == -EAGAIN) {
2508 locked_ref = NULL;
2509 count++;
2510 continue;
2511 }
2512 }
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526 spin_lock(&locked_ref->lock);
2527 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2528 locked_ref);
2529
2530
2531
2532
2533
2534 ref = select_delayed_ref(locked_ref);
2535
2536 if (ref && ref->seq &&
2537 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2538 spin_unlock(&locked_ref->lock);
2539 btrfs_delayed_ref_unlock(locked_ref);
2540 spin_lock(&delayed_refs->lock);
2541 locked_ref->processing = 0;
2542 delayed_refs->num_heads_ready++;
2543 spin_unlock(&delayed_refs->lock);
2544 locked_ref = NULL;
2545 cond_resched();
2546 count++;
2547 continue;
2548 }
2549
2550
2551
2552
2553
2554 must_insert_reserved = locked_ref->must_insert_reserved;
2555 locked_ref->must_insert_reserved = 0;
2556
2557 extent_op = locked_ref->extent_op;
2558 locked_ref->extent_op = NULL;
2559
2560 if (!ref) {
2561
2562
2563
2564
2565
2566
2567 ref = &locked_ref->node;
2568
2569 if (extent_op && must_insert_reserved) {
2570 btrfs_free_delayed_extent_op(extent_op);
2571 extent_op = NULL;
2572 }
2573
2574 if (extent_op) {
2575 spin_unlock(&locked_ref->lock);
2576 ret = run_delayed_extent_op(trans, root,
2577 ref, extent_op);
2578 btrfs_free_delayed_extent_op(extent_op);
2579
2580 if (ret) {
2581
2582
2583
2584
2585
2586
2587 if (must_insert_reserved)
2588 locked_ref->must_insert_reserved = 1;
2589 locked_ref->processing = 0;
2590 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2591 btrfs_delayed_ref_unlock(locked_ref);
2592 return ret;
2593 }
2594 continue;
2595 }
2596
2597
2598
2599
2600
2601
2602 spin_unlock(&locked_ref->lock);
2603 spin_lock(&delayed_refs->lock);
2604 spin_lock(&locked_ref->lock);
2605 if (!list_empty(&locked_ref->ref_list) ||
2606 locked_ref->extent_op) {
2607 spin_unlock(&locked_ref->lock);
2608 spin_unlock(&delayed_refs->lock);
2609 continue;
2610 }
2611 ref->in_tree = 0;
2612 delayed_refs->num_heads--;
2613 rb_erase(&locked_ref->href_node,
2614 &delayed_refs->href_root);
2615 spin_unlock(&delayed_refs->lock);
2616 } else {
2617 actual_count++;
2618 ref->in_tree = 0;
2619 list_del(&ref->list);
2620 }
2621 atomic_dec(&delayed_refs->num_entries);
2622
2623 if (!btrfs_delayed_ref_is_head(ref)) {
2624
2625
2626
2627
2628 switch (ref->action) {
2629 case BTRFS_ADD_DELAYED_REF:
2630 case BTRFS_ADD_DELAYED_EXTENT:
2631 locked_ref->node.ref_mod -= ref->ref_mod;
2632 break;
2633 case BTRFS_DROP_DELAYED_REF:
2634 locked_ref->node.ref_mod += ref->ref_mod;
2635 break;
2636 default:
2637 WARN_ON(1);
2638 }
2639 }
2640 spin_unlock(&locked_ref->lock);
2641
2642 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2643 must_insert_reserved);
2644
2645 btrfs_free_delayed_extent_op(extent_op);
2646 if (ret) {
2647 locked_ref->processing = 0;
2648 btrfs_delayed_ref_unlock(locked_ref);
2649 btrfs_put_delayed_ref(ref);
2650 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2651 return ret;
2652 }
2653
2654
2655
2656
2657
2658
2659
2660 if (btrfs_delayed_ref_is_head(ref)) {
2661 if (locked_ref->is_data &&
2662 locked_ref->total_ref_mod < 0) {
2663 spin_lock(&delayed_refs->lock);
2664 delayed_refs->pending_csums -= ref->num_bytes;
2665 spin_unlock(&delayed_refs->lock);
2666 }
2667 btrfs_delayed_ref_unlock(locked_ref);
2668 locked_ref = NULL;
2669 }
2670 btrfs_put_delayed_ref(ref);
2671 count++;
2672 cond_resched();
2673 }
2674
2675
2676
2677
2678
2679
2680 if (actual_count > 0) {
2681 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2682 u64 avg;
2683
2684
2685
2686
2687
2688 spin_lock(&delayed_refs->lock);
2689 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2690 fs_info->avg_delayed_ref_runtime = avg >> 2;
2691 spin_unlock(&delayed_refs->lock);
2692 }
2693 return 0;
2694}
2695
2696#ifdef SCRAMBLE_DELAYED_REFS
2697
2698
2699
2700
2701
2702static u64 find_middle(struct rb_root *root)
2703{
2704 struct rb_node *n = root->rb_node;
2705 struct btrfs_delayed_ref_node *entry;
2706 int alt = 1;
2707 u64 middle;
2708 u64 first = 0, last = 0;
2709
2710 n = rb_first(root);
2711 if (n) {
2712 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2713 first = entry->bytenr;
2714 }
2715 n = rb_last(root);
2716 if (n) {
2717 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2718 last = entry->bytenr;
2719 }
2720 n = root->rb_node;
2721
2722 while (n) {
2723 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2724 WARN_ON(!entry->in_tree);
2725
2726 middle = entry->bytenr;
2727
2728 if (alt)
2729 n = n->rb_left;
2730 else
2731 n = n->rb_right;
2732
2733 alt = 1 - alt;
2734 }
2735 return middle;
2736}
2737#endif
2738
2739static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2740{
2741 u64 num_bytes;
2742
2743 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2744 sizeof(struct btrfs_extent_inline_ref));
2745 if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2746 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2747
2748
2749
2750
2751
2752 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2753}
2754
2755
2756
2757
2758
2759u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
2760{
2761 u64 csum_size;
2762 u64 num_csums_per_leaf;
2763 u64 num_csums;
2764
2765 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
2766 num_csums_per_leaf = div64_u64(csum_size,
2767 (u64)btrfs_super_csum_size(root->fs_info->super_copy));
2768 num_csums = div64_u64(csum_bytes, root->sectorsize);
2769 num_csums += num_csums_per_leaf - 1;
2770 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2771 return num_csums;
2772}
2773
2774int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2775 struct btrfs_root *root)
2776{
2777 struct btrfs_block_rsv *global_rsv;
2778 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2779 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2780 u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
2781 u64 num_bytes, num_dirty_bgs_bytes;
2782 int ret = 0;
2783
2784 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
2785 num_heads = heads_to_leaves(root, num_heads);
2786 if (num_heads > 1)
2787 num_bytes += (num_heads - 1) * root->nodesize;
2788 num_bytes <<= 1;
2789 num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
2790 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
2791 num_dirty_bgs);
2792 global_rsv = &root->fs_info->global_block_rsv;
2793
2794
2795
2796
2797
2798 if (global_rsv->space_info->full) {
2799 num_dirty_bgs_bytes <<= 1;
2800 num_bytes <<= 1;
2801 }
2802
2803 spin_lock(&global_rsv->lock);
2804 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
2805 ret = 1;
2806 spin_unlock(&global_rsv->lock);
2807 return ret;
2808}
2809
2810int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2811 struct btrfs_root *root)
2812{
2813 struct btrfs_fs_info *fs_info = root->fs_info;
2814 u64 num_entries =
2815 atomic_read(&trans->transaction->delayed_refs.num_entries);
2816 u64 avg_runtime;
2817 u64 val;
2818
2819 smp_mb();
2820 avg_runtime = fs_info->avg_delayed_ref_runtime;
2821 val = num_entries * avg_runtime;
2822 if (num_entries * avg_runtime >= NSEC_PER_SEC)
2823 return 1;
2824 if (val >= NSEC_PER_SEC / 2)
2825 return 2;
2826
2827 return btrfs_check_space_for_delayed_refs(trans, root);
2828}
2829
2830struct async_delayed_refs {
2831 struct btrfs_root *root;
2832 int count;
2833 int error;
2834 int sync;
2835 struct completion wait;
2836 struct btrfs_work work;
2837};
2838
2839static void delayed_ref_async_start(struct btrfs_work *work)
2840{
2841 struct async_delayed_refs *async;
2842 struct btrfs_trans_handle *trans;
2843 int ret;
2844
2845 async = container_of(work, struct async_delayed_refs, work);
2846
2847 trans = btrfs_join_transaction(async->root);
2848 if (IS_ERR(trans)) {
2849 async->error = PTR_ERR(trans);
2850 goto done;
2851 }
2852
2853
2854
2855
2856
2857 trans->sync = true;
2858 ret = btrfs_run_delayed_refs(trans, async->root, async->count);
2859 if (ret)
2860 async->error = ret;
2861
2862 ret = btrfs_end_transaction(trans, async->root);
2863 if (ret && !async->error)
2864 async->error = ret;
2865done:
2866 if (async->sync)
2867 complete(&async->wait);
2868 else
2869 kfree(async);
2870}
2871
2872int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2873 unsigned long count, int wait)
2874{
2875 struct async_delayed_refs *async;
2876 int ret;
2877
2878 async = kmalloc(sizeof(*async), GFP_NOFS);
2879 if (!async)
2880 return -ENOMEM;
2881
2882 async->root = root->fs_info->tree_root;
2883 async->count = count;
2884 async->error = 0;
2885 if (wait)
2886 async->sync = 1;
2887 else
2888 async->sync = 0;
2889 init_completion(&async->wait);
2890
2891 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2892 delayed_ref_async_start, NULL, NULL);
2893
2894 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2895
2896 if (wait) {
2897 wait_for_completion(&async->wait);
2898 ret = async->error;
2899 kfree(async);
2900 return ret;
2901 }
2902 return 0;
2903}
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2916 struct btrfs_root *root, unsigned long count)
2917{
2918 struct rb_node *node;
2919 struct btrfs_delayed_ref_root *delayed_refs;
2920 struct btrfs_delayed_ref_head *head;
2921 int ret;
2922 int run_all = count == (unsigned long)-1;
2923 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
2924
2925
2926 if (trans->aborted)
2927 return 0;
2928
2929 if (root->fs_info->creating_free_space_tree)
2930 return 0;
2931
2932 if (root == root->fs_info->extent_root)
2933 root = root->fs_info->tree_root;
2934
2935 delayed_refs = &trans->transaction->delayed_refs;
2936 if (count == 0)
2937 count = atomic_read(&delayed_refs->num_entries) * 2;
2938
2939again:
2940#ifdef SCRAMBLE_DELAYED_REFS
2941 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2942#endif
2943 trans->can_flush_pending_bgs = false;
2944 ret = __btrfs_run_delayed_refs(trans, root, count);
2945 if (ret < 0) {
2946 btrfs_abort_transaction(trans, root, ret);
2947 return ret;
2948 }
2949
2950 if (run_all) {
2951 if (!list_empty(&trans->new_bgs))
2952 btrfs_create_pending_block_groups(trans, root);
2953
2954 spin_lock(&delayed_refs->lock);
2955 node = rb_first(&delayed_refs->href_root);
2956 if (!node) {
2957 spin_unlock(&delayed_refs->lock);
2958 goto out;
2959 }
2960 count = (unsigned long)-1;
2961
2962 while (node) {
2963 head = rb_entry(node, struct btrfs_delayed_ref_head,
2964 href_node);
2965 if (btrfs_delayed_ref_is_head(&head->node)) {
2966 struct btrfs_delayed_ref_node *ref;
2967
2968 ref = &head->node;
2969 atomic_inc(&ref->refs);
2970
2971 spin_unlock(&delayed_refs->lock);
2972
2973
2974
2975
2976 mutex_lock(&head->mutex);
2977 mutex_unlock(&head->mutex);
2978
2979 btrfs_put_delayed_ref(ref);
2980 cond_resched();
2981 goto again;
2982 } else {
2983 WARN_ON(1);
2984 }
2985 node = rb_next(node);
2986 }
2987 spin_unlock(&delayed_refs->lock);
2988 cond_resched();
2989 goto again;
2990 }
2991out:
2992 assert_qgroups_uptodate(trans);
2993 trans->can_flush_pending_bgs = can_flush_pending_bgs;
2994 return 0;
2995}
2996
2997int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2998 struct btrfs_root *root,
2999 u64 bytenr, u64 num_bytes, u64 flags,
3000 int level, int is_data)
3001{
3002 struct btrfs_delayed_extent_op *extent_op;
3003 int ret;
3004
3005 extent_op = btrfs_alloc_delayed_extent_op();
3006 if (!extent_op)
3007 return -ENOMEM;
3008
3009 extent_op->flags_to_set = flags;
3010 extent_op->update_flags = true;
3011 extent_op->update_key = false;
3012 extent_op->is_data = is_data ? true : false;
3013 extent_op->level = level;
3014
3015 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
3016 num_bytes, extent_op);
3017 if (ret)
3018 btrfs_free_delayed_extent_op(extent_op);
3019 return ret;
3020}
3021
3022static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
3023 struct btrfs_root *root,
3024 struct btrfs_path *path,
3025 u64 objectid, u64 offset, u64 bytenr)
3026{
3027 struct btrfs_delayed_ref_head *head;
3028 struct btrfs_delayed_ref_node *ref;
3029 struct btrfs_delayed_data_ref *data_ref;
3030 struct btrfs_delayed_ref_root *delayed_refs;
3031 int ret = 0;
3032
3033 delayed_refs = &trans->transaction->delayed_refs;
3034 spin_lock(&delayed_refs->lock);
3035 head = btrfs_find_delayed_ref_head(trans, bytenr);
3036 if (!head) {
3037 spin_unlock(&delayed_refs->lock);
3038 return 0;
3039 }
3040
3041 if (!mutex_trylock(&head->mutex)) {
3042 atomic_inc(&head->node.refs);
3043 spin_unlock(&delayed_refs->lock);
3044
3045 btrfs_release_path(path);
3046
3047
3048
3049
3050
3051 mutex_lock(&head->mutex);
3052 mutex_unlock(&head->mutex);
3053 btrfs_put_delayed_ref(&head->node);
3054 return -EAGAIN;
3055 }
3056 spin_unlock(&delayed_refs->lock);
3057
3058 spin_lock(&head->lock);
3059 list_for_each_entry(ref, &head->ref_list, list) {
3060
3061 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
3062 ret = 1;
3063 break;
3064 }
3065
3066 data_ref = btrfs_delayed_node_to_data_ref(ref);
3067
3068
3069
3070
3071
3072 if (data_ref->root != root->root_key.objectid ||
3073 data_ref->objectid != objectid ||
3074 data_ref->offset != offset) {
3075 ret = 1;
3076 break;
3077 }
3078 }
3079 spin_unlock(&head->lock);
3080 mutex_unlock(&head->mutex);
3081 return ret;
3082}
3083
3084static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
3085 struct btrfs_root *root,
3086 struct btrfs_path *path,
3087 u64 objectid, u64 offset, u64 bytenr)
3088{
3089 struct btrfs_root *extent_root = root->fs_info->extent_root;
3090 struct extent_buffer *leaf;
3091 struct btrfs_extent_data_ref *ref;
3092 struct btrfs_extent_inline_ref *iref;
3093 struct btrfs_extent_item *ei;
3094 struct btrfs_key key;
3095 u32 item_size;
3096 int ret;
3097
3098 key.objectid = bytenr;
3099 key.offset = (u64)-1;
3100 key.type = BTRFS_EXTENT_ITEM_KEY;
3101
3102 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3103 if (ret < 0)
3104 goto out;
3105 BUG_ON(ret == 0);
3106
3107 ret = -ENOENT;
3108 if (path->slots[0] == 0)
3109 goto out;
3110
3111 path->slots[0]--;
3112 leaf = path->nodes[0];
3113 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3114
3115 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
3116 goto out;
3117
3118 ret = 1;
3119 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3120#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3121 if (item_size < sizeof(*ei)) {
3122 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3123 goto out;
3124 }
3125#endif
3126 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3127
3128 if (item_size != sizeof(*ei) +
3129 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3130 goto out;
3131
3132 if (btrfs_extent_generation(leaf, ei) <=
3133 btrfs_root_last_snapshot(&root->root_item))
3134 goto out;
3135
3136 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3137 if (btrfs_extent_inline_ref_type(leaf, iref) !=
3138 BTRFS_EXTENT_DATA_REF_KEY)
3139 goto out;
3140
3141 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3142 if (btrfs_extent_refs(leaf, ei) !=
3143 btrfs_extent_data_ref_count(leaf, ref) ||
3144 btrfs_extent_data_ref_root(leaf, ref) !=
3145 root->root_key.objectid ||
3146 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3147 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3148 goto out;
3149
3150 ret = 0;
3151out:
3152 return ret;
3153}
3154
3155int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
3156 struct btrfs_root *root,
3157 u64 objectid, u64 offset, u64 bytenr)
3158{
3159 struct btrfs_path *path;
3160 int ret;
3161 int ret2;
3162
3163 path = btrfs_alloc_path();
3164 if (!path)
3165 return -ENOENT;
3166
3167 do {
3168 ret = check_committed_ref(trans, root, path, objectid,
3169 offset, bytenr);
3170 if (ret && ret != -ENOENT)
3171 goto out;
3172
3173 ret2 = check_delayed_ref(trans, root, path, objectid,
3174 offset, bytenr);
3175 } while (ret2 == -EAGAIN);
3176
3177 if (ret2 && ret2 != -ENOENT) {
3178 ret = ret2;
3179 goto out;
3180 }
3181
3182 if (ret != -ENOENT || ret2 != -ENOENT)
3183 ret = 0;
3184out:
3185 btrfs_free_path(path);
3186 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3187 WARN_ON(ret > 0);
3188 return ret;
3189}
3190
3191static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3192 struct btrfs_root *root,
3193 struct extent_buffer *buf,
3194 int full_backref, int inc)
3195{
3196 u64 bytenr;
3197 u64 num_bytes;
3198 u64 parent;
3199 u64 ref_root;
3200 u32 nritems;
3201 struct btrfs_key key;
3202 struct btrfs_file_extent_item *fi;
3203 int i;
3204 int level;
3205 int ret = 0;
3206 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
3207 u64, u64, u64, u64, u64, u64);
3208
3209
3210 if (btrfs_test_is_dummy_root(root))
3211 return 0;
3212
3213 ref_root = btrfs_header_owner(buf);
3214 nritems = btrfs_header_nritems(buf);
3215 level = btrfs_header_level(buf);
3216
3217 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3218 return 0;
3219
3220 if (inc)
3221 process_func = btrfs_inc_extent_ref;
3222 else
3223 process_func = btrfs_free_extent;
3224
3225 if (full_backref)
3226 parent = buf->start;
3227 else
3228 parent = 0;
3229
3230 for (i = 0; i < nritems; i++) {
3231 if (level == 0) {
3232 btrfs_item_key_to_cpu(buf, &key, i);
3233 if (key.type != BTRFS_EXTENT_DATA_KEY)
3234 continue;
3235 fi = btrfs_item_ptr(buf, i,
3236 struct btrfs_file_extent_item);
3237 if (btrfs_file_extent_type(buf, fi) ==
3238 BTRFS_FILE_EXTENT_INLINE)
3239 continue;
3240 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3241 if (bytenr == 0)
3242 continue;
3243
3244 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3245 key.offset -= btrfs_file_extent_offset(buf, fi);
3246 ret = process_func(trans, root, bytenr, num_bytes,
3247 parent, ref_root, key.objectid,
3248 key.offset);
3249 if (ret)
3250 goto fail;
3251 } else {
3252 bytenr = btrfs_node_blockptr(buf, i);
3253 num_bytes = root->nodesize;
3254 ret = process_func(trans, root, bytenr, num_bytes,
3255 parent, ref_root, level - 1, 0);
3256 if (ret)
3257 goto fail;
3258 }
3259 }
3260 return 0;
3261fail:
3262 return ret;
3263}
3264
3265int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3266 struct extent_buffer *buf, int full_backref)
3267{
3268 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3269}
3270
3271int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3272 struct extent_buffer *buf, int full_backref)
3273{
3274 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3275}
3276
3277static int write_one_cache_group(struct btrfs_trans_handle *trans,
3278 struct btrfs_root *root,
3279 struct btrfs_path *path,
3280 struct btrfs_block_group_cache *cache)
3281{
3282 int ret;
3283 struct btrfs_root *extent_root = root->fs_info->extent_root;
3284 unsigned long bi;
3285 struct extent_buffer *leaf;
3286
3287 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3288 if (ret) {
3289 if (ret > 0)
3290 ret = -ENOENT;
3291 goto fail;
3292 }
3293
3294 leaf = path->nodes[0];
3295 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3296 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3297 btrfs_mark_buffer_dirty(leaf);
3298fail:
3299 btrfs_release_path(path);
3300 return ret;
3301
3302}
3303
3304static struct btrfs_block_group_cache *
3305next_block_group(struct btrfs_root *root,
3306 struct btrfs_block_group_cache *cache)
3307{
3308 struct rb_node *node;
3309
3310 spin_lock(&root->fs_info->block_group_cache_lock);
3311
3312
3313 if (RB_EMPTY_NODE(&cache->cache_node)) {
3314 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3315
3316 spin_unlock(&root->fs_info->block_group_cache_lock);
3317 btrfs_put_block_group(cache);
3318 cache = btrfs_lookup_first_block_group(root->fs_info,
3319 next_bytenr);
3320 return cache;
3321 }
3322 node = rb_next(&cache->cache_node);
3323 btrfs_put_block_group(cache);
3324 if (node) {
3325 cache = rb_entry(node, struct btrfs_block_group_cache,
3326 cache_node);
3327 btrfs_get_block_group(cache);
3328 } else
3329 cache = NULL;
3330 spin_unlock(&root->fs_info->block_group_cache_lock);
3331 return cache;
3332}
3333
3334static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3335 struct btrfs_trans_handle *trans,
3336 struct btrfs_path *path)
3337{
3338 struct btrfs_root *root = block_group->fs_info->tree_root;
3339 struct inode *inode = NULL;
3340 u64 alloc_hint = 0;
3341 int dcs = BTRFS_DC_ERROR;
3342 u64 num_pages = 0;
3343 int retries = 0;
3344 int ret = 0;
3345
3346
3347
3348
3349
3350 if (block_group->key.offset < (100 * SZ_1M)) {
3351 spin_lock(&block_group->lock);
3352 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3353 spin_unlock(&block_group->lock);
3354 return 0;
3355 }
3356
3357 if (trans->aborted)
3358 return 0;
3359again:
3360 inode = lookup_free_space_inode(root, block_group, path);
3361 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3362 ret = PTR_ERR(inode);
3363 btrfs_release_path(path);
3364 goto out;
3365 }
3366
3367 if (IS_ERR(inode)) {
3368 BUG_ON(retries);
3369 retries++;
3370
3371 if (block_group->ro)
3372 goto out_free;
3373
3374 ret = create_free_space_inode(root, trans, block_group, path);
3375 if (ret)
3376 goto out_free;
3377 goto again;
3378 }
3379
3380
3381 if (block_group->cache_generation == trans->transid &&
3382 i_size_read(inode)) {
3383 dcs = BTRFS_DC_SETUP;
3384 goto out_put;
3385 }
3386
3387
3388
3389
3390
3391
3392 BTRFS_I(inode)->generation = 0;
3393 ret = btrfs_update_inode(trans, root, inode);
3394 if (ret) {
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405 btrfs_abort_transaction(trans, root, ret);
3406 goto out_put;
3407 }
3408 WARN_ON(ret);
3409
3410 if (i_size_read(inode) > 0) {
3411 ret = btrfs_check_trunc_cache_free_space(root,
3412 &root->fs_info->global_block_rsv);
3413 if (ret)
3414 goto out_put;
3415
3416 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
3417 if (ret)
3418 goto out_put;
3419 }
3420
3421 spin_lock(&block_group->lock);
3422 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3423 !btrfs_test_opt(root, SPACE_CACHE)) {
3424
3425
3426
3427
3428
3429 dcs = BTRFS_DC_WRITTEN;
3430 spin_unlock(&block_group->lock);
3431 goto out_put;
3432 }
3433 spin_unlock(&block_group->lock);
3434
3435
3436
3437
3438
3439 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
3440 ret = -ENOSPC;
3441 goto out_put;
3442 }
3443
3444
3445
3446
3447
3448
3449
3450 num_pages = div_u64(block_group->key.offset, SZ_256M);
3451 if (!num_pages)
3452 num_pages = 1;
3453
3454 num_pages *= 16;
3455 num_pages *= PAGE_SIZE;
3456
3457 ret = btrfs_check_data_free_space(inode, 0, num_pages);
3458 if (ret)
3459 goto out_put;
3460
3461 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3462 num_pages, num_pages,
3463 &alloc_hint);
3464
3465
3466
3467
3468
3469
3470
3471
3472 if (!ret)
3473 dcs = BTRFS_DC_SETUP;
3474 else if (ret == -ENOSPC)
3475 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3476 btrfs_free_reserved_data_space(inode, 0, num_pages);
3477
3478out_put:
3479 iput(inode);
3480out_free:
3481 btrfs_release_path(path);
3482out:
3483 spin_lock(&block_group->lock);
3484 if (!ret && dcs == BTRFS_DC_SETUP)
3485 block_group->cache_generation = trans->transid;
3486 block_group->disk_cache_state = dcs;
3487 spin_unlock(&block_group->lock);
3488
3489 return ret;
3490}
3491
3492int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3493 struct btrfs_root *root)
3494{
3495 struct btrfs_block_group_cache *cache, *tmp;
3496 struct btrfs_transaction *cur_trans = trans->transaction;
3497 struct btrfs_path *path;
3498
3499 if (list_empty(&cur_trans->dirty_bgs) ||
3500 !btrfs_test_opt(root, SPACE_CACHE))
3501 return 0;
3502
3503 path = btrfs_alloc_path();
3504 if (!path)
3505 return -ENOMEM;
3506
3507
3508 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3509 dirty_list) {
3510 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3511 cache_save_setup(cache, trans, path);
3512 }
3513
3514 btrfs_free_path(path);
3515 return 0;
3516}
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3531 struct btrfs_root *root)
3532{
3533 struct btrfs_block_group_cache *cache;
3534 struct btrfs_transaction *cur_trans = trans->transaction;
3535 int ret = 0;
3536 int should_put;
3537 struct btrfs_path *path = NULL;
3538 LIST_HEAD(dirty);
3539 struct list_head *io = &cur_trans->io_bgs;
3540 int num_started = 0;
3541 int loops = 0;
3542
3543 spin_lock(&cur_trans->dirty_bgs_lock);
3544 if (list_empty(&cur_trans->dirty_bgs)) {
3545 spin_unlock(&cur_trans->dirty_bgs_lock);
3546 return 0;
3547 }
3548 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3549 spin_unlock(&cur_trans->dirty_bgs_lock);
3550
3551again:
3552
3553
3554
3555
3556 btrfs_create_pending_block_groups(trans, root);
3557
3558 if (!path) {
3559 path = btrfs_alloc_path();
3560 if (!path)
3561 return -ENOMEM;
3562 }
3563
3564
3565
3566
3567
3568
3569 mutex_lock(&trans->transaction->cache_write_mutex);
3570 while (!list_empty(&dirty)) {
3571 cache = list_first_entry(&dirty,
3572 struct btrfs_block_group_cache,
3573 dirty_list);
3574
3575
3576
3577
3578
3579 if (!list_empty(&cache->io_list)) {
3580 list_del_init(&cache->io_list);
3581 btrfs_wait_cache_io(root, trans, cache,
3582 &cache->io_ctl, path,
3583 cache->key.objectid);
3584 btrfs_put_block_group(cache);
3585 }
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596 spin_lock(&cur_trans->dirty_bgs_lock);
3597 list_del_init(&cache->dirty_list);
3598 spin_unlock(&cur_trans->dirty_bgs_lock);
3599
3600 should_put = 1;
3601
3602 cache_save_setup(cache, trans, path);
3603
3604 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3605 cache->io_ctl.inode = NULL;
3606 ret = btrfs_write_out_cache(root, trans, cache, path);
3607 if (ret == 0 && cache->io_ctl.inode) {
3608 num_started++;
3609 should_put = 0;
3610
3611
3612
3613
3614
3615 list_add_tail(&cache->io_list, io);
3616 } else {
3617
3618
3619
3620
3621 ret = 0;
3622 }
3623 }
3624 if (!ret) {
3625 ret = write_one_cache_group(trans, root, path, cache);
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635 if (ret == -ENOENT) {
3636 ret = 0;
3637 spin_lock(&cur_trans->dirty_bgs_lock);
3638 if (list_empty(&cache->dirty_list)) {
3639 list_add_tail(&cache->dirty_list,
3640 &cur_trans->dirty_bgs);
3641 btrfs_get_block_group(cache);
3642 }
3643 spin_unlock(&cur_trans->dirty_bgs_lock);
3644 } else if (ret) {
3645 btrfs_abort_transaction(trans, root, ret);
3646 }
3647 }
3648
3649
3650 if (should_put)
3651 btrfs_put_block_group(cache);
3652
3653 if (ret)
3654 break;
3655
3656
3657
3658
3659
3660
3661 mutex_unlock(&trans->transaction->cache_write_mutex);
3662 mutex_lock(&trans->transaction->cache_write_mutex);
3663 }
3664 mutex_unlock(&trans->transaction->cache_write_mutex);
3665
3666
3667
3668
3669
3670 ret = btrfs_run_delayed_refs(trans, root, 0);
3671 if (!ret && loops == 0) {
3672 loops++;
3673 spin_lock(&cur_trans->dirty_bgs_lock);
3674 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3675
3676
3677
3678
3679 if (!list_empty(&dirty)) {
3680 spin_unlock(&cur_trans->dirty_bgs_lock);
3681 goto again;
3682 }
3683 spin_unlock(&cur_trans->dirty_bgs_lock);
3684 }
3685
3686 btrfs_free_path(path);
3687 return ret;
3688}
3689
3690int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3691 struct btrfs_root *root)
3692{
3693 struct btrfs_block_group_cache *cache;
3694 struct btrfs_transaction *cur_trans = trans->transaction;
3695 int ret = 0;
3696 int should_put;
3697 struct btrfs_path *path;
3698 struct list_head *io = &cur_trans->io_bgs;
3699 int num_started = 0;
3700
3701 path = btrfs_alloc_path();
3702 if (!path)
3703 return -ENOMEM;
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720 spin_lock(&cur_trans->dirty_bgs_lock);
3721 while (!list_empty(&cur_trans->dirty_bgs)) {
3722 cache = list_first_entry(&cur_trans->dirty_bgs,
3723 struct btrfs_block_group_cache,
3724 dirty_list);
3725
3726
3727
3728
3729
3730
3731 if (!list_empty(&cache->io_list)) {
3732 spin_unlock(&cur_trans->dirty_bgs_lock);
3733 list_del_init(&cache->io_list);
3734 btrfs_wait_cache_io(root, trans, cache,
3735 &cache->io_ctl, path,
3736 cache->key.objectid);
3737 btrfs_put_block_group(cache);
3738 spin_lock(&cur_trans->dirty_bgs_lock);
3739 }
3740
3741
3742
3743
3744
3745 list_del_init(&cache->dirty_list);
3746 spin_unlock(&cur_trans->dirty_bgs_lock);
3747 should_put = 1;
3748
3749 cache_save_setup(cache, trans, path);
3750
3751 if (!ret)
3752 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
3753
3754 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3755 cache->io_ctl.inode = NULL;
3756 ret = btrfs_write_out_cache(root, trans, cache, path);
3757 if (ret == 0 && cache->io_ctl.inode) {
3758 num_started++;
3759 should_put = 0;
3760 list_add_tail(&cache->io_list, io);
3761 } else {
3762
3763
3764
3765
3766 ret = 0;
3767 }
3768 }
3769 if (!ret) {
3770 ret = write_one_cache_group(trans, root, path, cache);
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784 if (ret == -ENOENT) {
3785 wait_event(cur_trans->writer_wait,
3786 atomic_read(&cur_trans->num_writers) == 1);
3787 ret = write_one_cache_group(trans, root, path,
3788 cache);
3789 }
3790 if (ret)
3791 btrfs_abort_transaction(trans, root, ret);
3792 }
3793
3794
3795 if (should_put)
3796 btrfs_put_block_group(cache);
3797 spin_lock(&cur_trans->dirty_bgs_lock);
3798 }
3799 spin_unlock(&cur_trans->dirty_bgs_lock);
3800
3801 while (!list_empty(io)) {
3802 cache = list_first_entry(io, struct btrfs_block_group_cache,
3803 io_list);
3804 list_del_init(&cache->io_list);
3805 btrfs_wait_cache_io(root, trans, cache,
3806 &cache->io_ctl, path, cache->key.objectid);
3807 btrfs_put_block_group(cache);
3808 }
3809
3810 btrfs_free_path(path);
3811 return ret;
3812}
3813
3814int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3815{
3816 struct btrfs_block_group_cache *block_group;
3817 int readonly = 0;
3818
3819 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
3820 if (!block_group || block_group->ro)
3821 readonly = 1;
3822 if (block_group)
3823 btrfs_put_block_group(block_group);
3824 return readonly;
3825}
3826
3827static const char *alloc_name(u64 flags)
3828{
3829 switch (flags) {
3830 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3831 return "mixed";
3832 case BTRFS_BLOCK_GROUP_METADATA:
3833 return "metadata";
3834 case BTRFS_BLOCK_GROUP_DATA:
3835 return "data";
3836 case BTRFS_BLOCK_GROUP_SYSTEM:
3837 return "system";
3838 default:
3839 WARN_ON(1);
3840 return "invalid-combination";
3841 };
3842}
3843
3844static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3845 u64 total_bytes, u64 bytes_used,
3846 struct btrfs_space_info **space_info)
3847{
3848 struct btrfs_space_info *found;
3849 int i;
3850 int factor;
3851 int ret;
3852
3853 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3854 BTRFS_BLOCK_GROUP_RAID10))
3855 factor = 2;
3856 else
3857 factor = 1;
3858
3859 found = __find_space_info(info, flags);
3860 if (found) {
3861 spin_lock(&found->lock);
3862 found->total_bytes += total_bytes;
3863 found->disk_total += total_bytes * factor;
3864 found->bytes_used += bytes_used;
3865 found->disk_used += bytes_used * factor;
3866 if (total_bytes > 0)
3867 found->full = 0;
3868 spin_unlock(&found->lock);
3869 *space_info = found;
3870 return 0;
3871 }
3872 found = kzalloc(sizeof(*found), GFP_NOFS);
3873 if (!found)
3874 return -ENOMEM;
3875
3876 ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
3877 if (ret) {
3878 kfree(found);
3879 return ret;
3880 }
3881
3882 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3883 INIT_LIST_HEAD(&found->block_groups[i]);
3884 init_rwsem(&found->groups_sem);
3885 spin_lock_init(&found->lock);
3886 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3887 found->total_bytes = total_bytes;
3888 found->disk_total = total_bytes * factor;
3889 found->bytes_used = bytes_used;
3890 found->disk_used = bytes_used * factor;
3891 found->bytes_pinned = 0;
3892 found->bytes_reserved = 0;
3893 found->bytes_readonly = 0;
3894 found->bytes_may_use = 0;
3895 found->full = 0;
3896 found->max_extent_size = 0;
3897 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3898 found->chunk_alloc = 0;
3899 found->flush = 0;
3900 init_waitqueue_head(&found->wait);
3901 INIT_LIST_HEAD(&found->ro_bgs);
3902
3903 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3904 info->space_info_kobj, "%s",
3905 alloc_name(found->flags));
3906 if (ret) {
3907 kfree(found);
3908 return ret;
3909 }
3910
3911 *space_info = found;
3912 list_add_rcu(&found->list, &info->space_info);
3913 if (flags & BTRFS_BLOCK_GROUP_DATA)
3914 info->data_sinfo = found;
3915
3916 return ret;
3917}
3918
3919static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3920{
3921 u64 extra_flags = chunk_to_extended(flags) &
3922 BTRFS_EXTENDED_PROFILE_MASK;
3923
3924 write_seqlock(&fs_info->profiles_lock);
3925 if (flags & BTRFS_BLOCK_GROUP_DATA)
3926 fs_info->avail_data_alloc_bits |= extra_flags;
3927 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3928 fs_info->avail_metadata_alloc_bits |= extra_flags;
3929 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3930 fs_info->avail_system_alloc_bits |= extra_flags;
3931 write_sequnlock(&fs_info->profiles_lock);
3932}
3933
3934
3935
3936
3937
3938
3939
3940static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3941{
3942 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3943 u64 target = 0;
3944
3945 if (!bctl)
3946 return 0;
3947
3948 if (flags & BTRFS_BLOCK_GROUP_DATA &&
3949 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3950 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3951 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
3952 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3953 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3954 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
3955 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3956 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3957 }
3958
3959 return target;
3960}
3961
3962
3963
3964
3965
3966
3967
3968
3969static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3970{
3971 u64 num_devices = root->fs_info->fs_devices->rw_devices;
3972 u64 target;
3973 u64 raid_type;
3974 u64 allowed = 0;
3975
3976
3977
3978
3979
3980 spin_lock(&root->fs_info->balance_lock);
3981 target = get_restripe_target(root->fs_info, flags);
3982 if (target) {
3983
3984 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
3985 spin_unlock(&root->fs_info->balance_lock);
3986 return extended_to_chunk(target);
3987 }
3988 }
3989 spin_unlock(&root->fs_info->balance_lock);
3990
3991
3992 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
3993 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
3994 allowed |= btrfs_raid_group[raid_type];
3995 }
3996 allowed &= flags;
3997
3998 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
3999 allowed = BTRFS_BLOCK_GROUP_RAID6;
4000 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
4001 allowed = BTRFS_BLOCK_GROUP_RAID5;
4002 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
4003 allowed = BTRFS_BLOCK_GROUP_RAID10;
4004 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
4005 allowed = BTRFS_BLOCK_GROUP_RAID1;
4006 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
4007 allowed = BTRFS_BLOCK_GROUP_RAID0;
4008
4009 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
4010
4011 return extended_to_chunk(flags | allowed);
4012}
4013
4014static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
4015{
4016 unsigned seq;
4017 u64 flags;
4018
4019 do {
4020 flags = orig_flags;
4021 seq = read_seqbegin(&root->fs_info->profiles_lock);
4022
4023 if (flags & BTRFS_BLOCK_GROUP_DATA)
4024 flags |= root->fs_info->avail_data_alloc_bits;
4025 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4026 flags |= root->fs_info->avail_system_alloc_bits;
4027 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
4028 flags |= root->fs_info->avail_metadata_alloc_bits;
4029 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
4030
4031 return btrfs_reduce_alloc_profile(root, flags);
4032}
4033
4034u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
4035{
4036 u64 flags;
4037 u64 ret;
4038
4039 if (data)
4040 flags = BTRFS_BLOCK_GROUP_DATA;
4041 else if (root == root->fs_info->chunk_root)
4042 flags = BTRFS_BLOCK_GROUP_SYSTEM;
4043 else
4044 flags = BTRFS_BLOCK_GROUP_METADATA;
4045
4046 ret = get_alloc_profile(root, flags);
4047 return ret;
4048}
4049
4050int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
4051{
4052 struct btrfs_space_info *data_sinfo;
4053 struct btrfs_root *root = BTRFS_I(inode)->root;
4054 struct btrfs_fs_info *fs_info = root->fs_info;
4055 u64 used;
4056 int ret = 0;
4057 int need_commit = 2;
4058 int have_pinned_space;
4059
4060
4061 bytes = ALIGN(bytes, root->sectorsize);
4062
4063 if (btrfs_is_free_space_inode(inode)) {
4064 need_commit = 0;
4065 ASSERT(current->journal_info);
4066 }
4067
4068 data_sinfo = fs_info->data_sinfo;
4069 if (!data_sinfo)
4070 goto alloc;
4071
4072again:
4073
4074 spin_lock(&data_sinfo->lock);
4075 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
4076 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
4077 data_sinfo->bytes_may_use;
4078
4079 if (used + bytes > data_sinfo->total_bytes) {
4080 struct btrfs_trans_handle *trans;
4081
4082
4083
4084
4085
4086 if (!data_sinfo->full) {
4087 u64 alloc_target;
4088
4089 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
4090 spin_unlock(&data_sinfo->lock);
4091alloc:
4092 alloc_target = btrfs_get_alloc_profile(root, 1);
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103 trans = btrfs_join_transaction(root);
4104 if (IS_ERR(trans))
4105 return PTR_ERR(trans);
4106
4107 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4108 alloc_target,
4109 CHUNK_ALLOC_NO_FORCE);
4110 btrfs_end_transaction(trans, root);
4111 if (ret < 0) {
4112 if (ret != -ENOSPC)
4113 return ret;
4114 else {
4115 have_pinned_space = 1;
4116 goto commit_trans;
4117 }
4118 }
4119
4120 if (!data_sinfo)
4121 data_sinfo = fs_info->data_sinfo;
4122
4123 goto again;
4124 }
4125
4126
4127
4128
4129
4130
4131 have_pinned_space = percpu_counter_compare(
4132 &data_sinfo->total_bytes_pinned,
4133 used + bytes - data_sinfo->total_bytes);
4134 spin_unlock(&data_sinfo->lock);
4135
4136
4137commit_trans:
4138 if (need_commit &&
4139 !atomic_read(&root->fs_info->open_ioctl_trans)) {
4140 need_commit--;
4141
4142 if (need_commit > 0) {
4143 btrfs_start_delalloc_roots(fs_info, 0, -1);
4144 btrfs_wait_ordered_roots(fs_info, -1);
4145 }
4146
4147 trans = btrfs_join_transaction(root);
4148 if (IS_ERR(trans))
4149 return PTR_ERR(trans);
4150 if (have_pinned_space >= 0 ||
4151 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4152 &trans->transaction->flags) ||
4153 need_commit > 0) {
4154 ret = btrfs_commit_transaction(trans, root);
4155 if (ret)
4156 return ret;
4157
4158
4159
4160
4161
4162 mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
4163 mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
4164 goto again;
4165 } else {
4166 btrfs_end_transaction(trans, root);
4167 }
4168 }
4169
4170 trace_btrfs_space_reservation(root->fs_info,
4171 "space_info:enospc",
4172 data_sinfo->flags, bytes, 1);
4173 return -ENOSPC;
4174 }
4175 data_sinfo->bytes_may_use += bytes;
4176 trace_btrfs_space_reservation(root->fs_info, "space_info",
4177 data_sinfo->flags, bytes, 1);
4178 spin_unlock(&data_sinfo->lock);
4179
4180 return ret;
4181}
4182
4183
4184
4185
4186
4187
4188int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
4189{
4190 struct btrfs_root *root = BTRFS_I(inode)->root;
4191 int ret;
4192
4193
4194 len = round_up(start + len, root->sectorsize) -
4195 round_down(start, root->sectorsize);
4196 start = round_down(start, root->sectorsize);
4197
4198 ret = btrfs_alloc_data_chunk_ondemand(inode, len);
4199 if (ret < 0)
4200 return ret;
4201
4202
4203
4204
4205
4206
4207
4208 ret = btrfs_qgroup_reserve_data(inode, start, len);
4209 return ret;
4210}
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
4221 u64 len)
4222{
4223 struct btrfs_root *root = BTRFS_I(inode)->root;
4224 struct btrfs_space_info *data_sinfo;
4225
4226
4227 len = round_up(start + len, root->sectorsize) -
4228 round_down(start, root->sectorsize);
4229 start = round_down(start, root->sectorsize);
4230
4231 data_sinfo = root->fs_info->data_sinfo;
4232 spin_lock(&data_sinfo->lock);
4233 if (WARN_ON(data_sinfo->bytes_may_use < len))
4234 data_sinfo->bytes_may_use = 0;
4235 else
4236 data_sinfo->bytes_may_use -= len;
4237 trace_btrfs_space_reservation(root->fs_info, "space_info",
4238 data_sinfo->flags, len, 0);
4239 spin_unlock(&data_sinfo->lock);
4240}
4241
4242
4243
4244
4245
4246
4247
4248
4249void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
4250{
4251 btrfs_free_reserved_data_space_noquota(inode, start, len);
4252 btrfs_qgroup_free_data(inode, start, len);
4253}
4254
4255static void force_metadata_allocation(struct btrfs_fs_info *info)
4256{
4257 struct list_head *head = &info->space_info;
4258 struct btrfs_space_info *found;
4259
4260 rcu_read_lock();
4261 list_for_each_entry_rcu(found, head, list) {
4262 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
4263 found->force_alloc = CHUNK_ALLOC_FORCE;
4264 }
4265 rcu_read_unlock();
4266}
4267
4268static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4269{
4270 return (global->size << 1);
4271}
4272
4273static int should_alloc_chunk(struct btrfs_root *root,
4274 struct btrfs_space_info *sinfo, int force)
4275{
4276 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4277 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
4278 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
4279 u64 thresh;
4280
4281 if (force == CHUNK_ALLOC_FORCE)
4282 return 1;
4283
4284
4285
4286
4287
4288
4289 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
4290 num_allocated += calc_global_rsv_need_space(global_rsv);
4291
4292
4293
4294
4295
4296 if (force == CHUNK_ALLOC_LIMITED) {
4297 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
4298 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
4299
4300 if (num_bytes - num_allocated < thresh)
4301 return 1;
4302 }
4303
4304 if (num_allocated + SZ_2M < div_factor(num_bytes, 8))
4305 return 0;
4306 return 1;
4307}
4308
4309static u64 get_profile_num_devs(struct btrfs_root *root, u64 type)
4310{
4311 u64 num_dev;
4312
4313 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4314 BTRFS_BLOCK_GROUP_RAID0 |
4315 BTRFS_BLOCK_GROUP_RAID5 |
4316 BTRFS_BLOCK_GROUP_RAID6))
4317 num_dev = root->fs_info->fs_devices->rw_devices;
4318 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4319 num_dev = 2;
4320 else
4321 num_dev = 1;
4322
4323 return num_dev;
4324}
4325
4326
4327
4328
4329
4330
4331void check_system_chunk(struct btrfs_trans_handle *trans,
4332 struct btrfs_root *root,
4333 u64 type)
4334{
4335 struct btrfs_space_info *info;
4336 u64 left;
4337 u64 thresh;
4338 int ret = 0;
4339 u64 num_devs;
4340
4341
4342
4343
4344
4345 ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
4346
4347 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4348 spin_lock(&info->lock);
4349 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
4350 info->bytes_reserved - info->bytes_readonly -
4351 info->bytes_may_use;
4352 spin_unlock(&info->lock);
4353
4354 num_devs = get_profile_num_devs(root, type);
4355
4356
4357 thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
4358 btrfs_calc_trans_metadata_size(root, 1);
4359
4360 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
4361 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
4362 left, thresh, type);
4363 dump_space_info(info, 0, 0);
4364 }
4365
4366 if (left < thresh) {
4367 u64 flags;
4368
4369 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
4370
4371
4372
4373
4374
4375
4376 ret = btrfs_alloc_chunk(trans, root, flags);
4377 }
4378
4379 if (!ret) {
4380 ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
4381 &root->fs_info->chunk_block_rsv,
4382 thresh, BTRFS_RESERVE_NO_FLUSH);
4383 if (!ret)
4384 trans->chunk_bytes_reserved += thresh;
4385 }
4386}
4387
4388static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4389 struct btrfs_root *extent_root, u64 flags, int force)
4390{
4391 struct btrfs_space_info *space_info;
4392 struct btrfs_fs_info *fs_info = extent_root->fs_info;
4393 int wait_for_alloc = 0;
4394 int ret = 0;
4395
4396
4397 if (trans->allocating_chunk)
4398 return -ENOSPC;
4399
4400 space_info = __find_space_info(extent_root->fs_info, flags);
4401 if (!space_info) {
4402 ret = update_space_info(extent_root->fs_info, flags,
4403 0, 0, &space_info);
4404 BUG_ON(ret);
4405 }
4406 BUG_ON(!space_info);
4407
4408again:
4409 spin_lock(&space_info->lock);
4410 if (force < space_info->force_alloc)
4411 force = space_info->force_alloc;
4412 if (space_info->full) {
4413 if (should_alloc_chunk(extent_root, space_info, force))
4414 ret = -ENOSPC;
4415 else
4416 ret = 0;
4417 spin_unlock(&space_info->lock);
4418 return ret;
4419 }
4420
4421 if (!should_alloc_chunk(extent_root, space_info, force)) {
4422 spin_unlock(&space_info->lock);
4423 return 0;
4424 } else if (space_info->chunk_alloc) {
4425 wait_for_alloc = 1;
4426 } else {
4427 space_info->chunk_alloc = 1;
4428 }
4429
4430 spin_unlock(&space_info->lock);
4431
4432 mutex_lock(&fs_info->chunk_mutex);
4433
4434
4435
4436
4437
4438
4439
4440 if (wait_for_alloc) {
4441 mutex_unlock(&fs_info->chunk_mutex);
4442 wait_for_alloc = 0;
4443 goto again;
4444 }
4445
4446 trans->allocating_chunk = true;
4447
4448
4449
4450
4451
4452 if (btrfs_mixed_space_info(space_info))
4453 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4454
4455
4456
4457
4458
4459
4460 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
4461 fs_info->data_chunk_allocations++;
4462 if (!(fs_info->data_chunk_allocations %
4463 fs_info->metadata_ratio))
4464 force_metadata_allocation(fs_info);
4465 }
4466
4467
4468
4469
4470
4471 check_system_chunk(trans, extent_root, flags);
4472
4473 ret = btrfs_alloc_chunk(trans, extent_root, flags);
4474 trans->allocating_chunk = false;
4475
4476 spin_lock(&space_info->lock);
4477 if (ret < 0 && ret != -ENOSPC)
4478 goto out;
4479 if (ret)
4480 space_info->full = 1;
4481 else
4482 ret = 1;
4483
4484 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4485out:
4486 space_info->chunk_alloc = 0;
4487 spin_unlock(&space_info->lock);
4488 mutex_unlock(&fs_info->chunk_mutex);
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503 if (trans->can_flush_pending_bgs &&
4504 trans->chunk_bytes_reserved >= (u64)SZ_2M) {
4505 btrfs_create_pending_block_groups(trans, trans->root);
4506 btrfs_trans_release_chunk_metadata(trans);
4507 }
4508 return ret;
4509}
4510
4511static int can_overcommit(struct btrfs_root *root,
4512 struct btrfs_space_info *space_info, u64 bytes,
4513 enum btrfs_reserve_flush_enum flush)
4514{
4515 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4516 u64 profile = btrfs_get_alloc_profile(root, 0);
4517 u64 space_size;
4518 u64 avail;
4519 u64 used;
4520
4521 used = space_info->bytes_used + space_info->bytes_reserved +
4522 space_info->bytes_pinned + space_info->bytes_readonly;
4523
4524
4525
4526
4527
4528
4529
4530 spin_lock(&global_rsv->lock);
4531 space_size = calc_global_rsv_need_space(global_rsv);
4532 spin_unlock(&global_rsv->lock);
4533 if (used + space_size >= space_info->total_bytes)
4534 return 0;
4535
4536 used += space_info->bytes_may_use;
4537
4538 spin_lock(&root->fs_info->free_chunk_lock);
4539 avail = root->fs_info->free_chunk_space;
4540 spin_unlock(&root->fs_info->free_chunk_lock);
4541
4542
4543
4544
4545
4546
4547
4548 if (profile & (BTRFS_BLOCK_GROUP_DUP |
4549 BTRFS_BLOCK_GROUP_RAID1 |
4550 BTRFS_BLOCK_GROUP_RAID10))
4551 avail >>= 1;
4552
4553
4554
4555
4556
4557
4558 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4559 avail >>= 3;
4560 else
4561 avail >>= 1;
4562
4563 if (used + bytes < space_info->total_bytes + avail)
4564 return 1;
4565 return 0;
4566}
4567
4568static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
4569 unsigned long nr_pages, int nr_items)
4570{
4571 struct super_block *sb = root->fs_info->sb;
4572
4573 if (down_read_trylock(&sb->s_umount)) {
4574 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4575 up_read(&sb->s_umount);
4576 } else {
4577
4578
4579
4580
4581
4582
4583
4584 btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
4585 if (!current->journal_info)
4586 btrfs_wait_ordered_roots(root->fs_info, nr_items);
4587 }
4588}
4589
4590static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
4591{
4592 u64 bytes;
4593 int nr;
4594
4595 bytes = btrfs_calc_trans_metadata_size(root, 1);
4596 nr = (int)div64_u64(to_reclaim, bytes);
4597 if (!nr)
4598 nr = 1;
4599 return nr;
4600}
4601
4602#define EXTENT_SIZE_PER_ITEM SZ_256K
4603
4604
4605
4606
4607static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4608 bool wait_ordered)
4609{
4610 struct btrfs_block_rsv *block_rsv;
4611 struct btrfs_space_info *space_info;
4612 struct btrfs_trans_handle *trans;
4613 u64 delalloc_bytes;
4614 u64 max_reclaim;
4615 long time_left;
4616 unsigned long nr_pages;
4617 int loops;
4618 int items;
4619 enum btrfs_reserve_flush_enum flush;
4620
4621
4622 items = calc_reclaim_items_nr(root, to_reclaim);
4623 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
4624
4625 trans = (struct btrfs_trans_handle *)current->journal_info;
4626 block_rsv = &root->fs_info->delalloc_block_rsv;
4627 space_info = block_rsv->space_info;
4628
4629 delalloc_bytes = percpu_counter_sum_positive(
4630 &root->fs_info->delalloc_bytes);
4631 if (delalloc_bytes == 0) {
4632 if (trans)
4633 return;
4634 if (wait_ordered)
4635 btrfs_wait_ordered_roots(root->fs_info, items);
4636 return;
4637 }
4638
4639 loops = 0;
4640 while (delalloc_bytes && loops < 3) {
4641 max_reclaim = min(delalloc_bytes, to_reclaim);
4642 nr_pages = max_reclaim >> PAGE_SHIFT;
4643 btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
4644
4645
4646
4647
4648 max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
4649 if (!max_reclaim)
4650 goto skip_async;
4651
4652 if (max_reclaim <= nr_pages)
4653 max_reclaim = 0;
4654 else
4655 max_reclaim -= nr_pages;
4656
4657 wait_event(root->fs_info->async_submit_wait,
4658 atomic_read(&root->fs_info->async_delalloc_pages) <=
4659 (int)max_reclaim);
4660skip_async:
4661 if (!trans)
4662 flush = BTRFS_RESERVE_FLUSH_ALL;
4663 else
4664 flush = BTRFS_RESERVE_NO_FLUSH;
4665 spin_lock(&space_info->lock);
4666 if (can_overcommit(root, space_info, orig, flush)) {
4667 spin_unlock(&space_info->lock);
4668 break;
4669 }
4670 spin_unlock(&space_info->lock);
4671
4672 loops++;
4673 if (wait_ordered && !trans) {
4674 btrfs_wait_ordered_roots(root->fs_info, items);
4675 } else {
4676 time_left = schedule_timeout_killable(1);
4677 if (time_left)
4678 break;
4679 }
4680 delalloc_bytes = percpu_counter_sum_positive(
4681 &root->fs_info->delalloc_bytes);
4682 }
4683}
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695static int may_commit_transaction(struct btrfs_root *root,
4696 struct btrfs_space_info *space_info,
4697 u64 bytes, int force)
4698{
4699 struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
4700 struct btrfs_trans_handle *trans;
4701
4702 trans = (struct btrfs_trans_handle *)current->journal_info;
4703 if (trans)
4704 return -EAGAIN;
4705
4706 if (force)
4707 goto commit;
4708
4709
4710 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4711 bytes) >= 0)
4712 goto commit;
4713
4714
4715
4716
4717
4718 if (space_info != delayed_rsv->space_info)
4719 return -ENOSPC;
4720
4721 spin_lock(&delayed_rsv->lock);
4722 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4723 bytes - delayed_rsv->size) >= 0) {
4724 spin_unlock(&delayed_rsv->lock);
4725 return -ENOSPC;
4726 }
4727 spin_unlock(&delayed_rsv->lock);
4728
4729commit:
4730 trans = btrfs_join_transaction(root);
4731 if (IS_ERR(trans))
4732 return -ENOSPC;
4733
4734 return btrfs_commit_transaction(trans, root);
4735}
4736
4737enum flush_state {
4738 FLUSH_DELAYED_ITEMS_NR = 1,
4739 FLUSH_DELAYED_ITEMS = 2,
4740 FLUSH_DELALLOC = 3,
4741 FLUSH_DELALLOC_WAIT = 4,
4742 ALLOC_CHUNK = 5,
4743 COMMIT_TRANS = 6,
4744};
4745
4746static int flush_space(struct btrfs_root *root,
4747 struct btrfs_space_info *space_info, u64 num_bytes,
4748 u64 orig_bytes, int state)
4749{
4750 struct btrfs_trans_handle *trans;
4751 int nr;
4752 int ret = 0;
4753
4754 switch (state) {
4755 case FLUSH_DELAYED_ITEMS_NR:
4756 case FLUSH_DELAYED_ITEMS:
4757 if (state == FLUSH_DELAYED_ITEMS_NR)
4758 nr = calc_reclaim_items_nr(root, num_bytes) * 2;
4759 else
4760 nr = -1;
4761
4762 trans = btrfs_join_transaction(root);
4763 if (IS_ERR(trans)) {
4764 ret = PTR_ERR(trans);
4765 break;
4766 }
4767 ret = btrfs_run_delayed_items_nr(trans, root, nr);
4768 btrfs_end_transaction(trans, root);
4769 break;
4770 case FLUSH_DELALLOC:
4771 case FLUSH_DELALLOC_WAIT:
4772 shrink_delalloc(root, num_bytes * 2, orig_bytes,
4773 state == FLUSH_DELALLOC_WAIT);
4774 break;
4775 case ALLOC_CHUNK:
4776 trans = btrfs_join_transaction(root);
4777 if (IS_ERR(trans)) {
4778 ret = PTR_ERR(trans);
4779 break;
4780 }
4781 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4782 btrfs_get_alloc_profile(root, 0),
4783 CHUNK_ALLOC_NO_FORCE);
4784 btrfs_end_transaction(trans, root);
4785 if (ret == -ENOSPC)
4786 ret = 0;
4787 break;
4788 case COMMIT_TRANS:
4789 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
4790 break;
4791 default:
4792 ret = -ENOSPC;
4793 break;
4794 }
4795
4796 return ret;
4797}
4798
4799static inline u64
4800btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4801 struct btrfs_space_info *space_info)
4802{
4803 u64 used;
4804 u64 expected;
4805 u64 to_reclaim;
4806
4807 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
4808 spin_lock(&space_info->lock);
4809 if (can_overcommit(root, space_info, to_reclaim,
4810 BTRFS_RESERVE_FLUSH_ALL)) {
4811 to_reclaim = 0;
4812 goto out;
4813 }
4814
4815 used = space_info->bytes_used + space_info->bytes_reserved +
4816 space_info->bytes_pinned + space_info->bytes_readonly +
4817 space_info->bytes_may_use;
4818 if (can_overcommit(root, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
4819 expected = div_factor_fine(space_info->total_bytes, 95);
4820 else
4821 expected = div_factor_fine(space_info->total_bytes, 90);
4822
4823 if (used > expected)
4824 to_reclaim = used - expected;
4825 else
4826 to_reclaim = 0;
4827 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4828 space_info->bytes_reserved);
4829out:
4830 spin_unlock(&space_info->lock);
4831
4832 return to_reclaim;
4833}
4834
4835static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4836 struct btrfs_fs_info *fs_info, u64 used)
4837{
4838 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4839
4840
4841 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
4842 return 0;
4843
4844 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
4845 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4846}
4847
4848static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
4849 struct btrfs_fs_info *fs_info,
4850 int flush_state)
4851{
4852 u64 used;
4853
4854 spin_lock(&space_info->lock);
4855
4856
4857
4858
4859 if (flush_state > COMMIT_TRANS && space_info->full) {
4860 spin_unlock(&space_info->lock);
4861 return 0;
4862 }
4863
4864 used = space_info->bytes_used + space_info->bytes_reserved +
4865 space_info->bytes_pinned + space_info->bytes_readonly +
4866 space_info->bytes_may_use;
4867 if (need_do_async_reclaim(space_info, fs_info, used)) {
4868 spin_unlock(&space_info->lock);
4869 return 1;
4870 }
4871 spin_unlock(&space_info->lock);
4872
4873 return 0;
4874}
4875
4876static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4877{
4878 struct btrfs_fs_info *fs_info;
4879 struct btrfs_space_info *space_info;
4880 u64 to_reclaim;
4881 int flush_state;
4882
4883 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4884 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4885
4886 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4887 space_info);
4888 if (!to_reclaim)
4889 return;
4890
4891 flush_state = FLUSH_DELAYED_ITEMS_NR;
4892 do {
4893 flush_space(fs_info->fs_root, space_info, to_reclaim,
4894 to_reclaim, flush_state);
4895 flush_state++;
4896 if (!btrfs_need_do_async_reclaim(space_info, fs_info,
4897 flush_state))
4898 return;
4899 } while (flush_state < COMMIT_TRANS);
4900}
4901
4902void btrfs_init_async_reclaim_work(struct work_struct *work)
4903{
4904 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
4905}
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921static int reserve_metadata_bytes(struct btrfs_root *root,
4922 struct btrfs_block_rsv *block_rsv,
4923 u64 orig_bytes,
4924 enum btrfs_reserve_flush_enum flush)
4925{
4926 struct btrfs_space_info *space_info = block_rsv->space_info;
4927 u64 used;
4928 u64 num_bytes = orig_bytes;
4929 int flush_state = FLUSH_DELAYED_ITEMS_NR;
4930 int ret = 0;
4931 bool flushing = false;
4932
4933again:
4934 ret = 0;
4935 spin_lock(&space_info->lock);
4936
4937
4938
4939
4940 while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
4941 space_info->flush) {
4942 spin_unlock(&space_info->lock);
4943
4944
4945
4946
4947
4948
4949 if (current->journal_info)
4950 return -EAGAIN;
4951 ret = wait_event_killable(space_info->wait, !space_info->flush);
4952
4953 if (ret)
4954 return -EINTR;
4955
4956 spin_lock(&space_info->lock);
4957 }
4958
4959 ret = -ENOSPC;
4960 used = space_info->bytes_used + space_info->bytes_reserved +
4961 space_info->bytes_pinned + space_info->bytes_readonly +
4962 space_info->bytes_may_use;
4963
4964
4965
4966
4967
4968
4969
4970
4971 if (used <= space_info->total_bytes) {
4972 if (used + orig_bytes <= space_info->total_bytes) {
4973 space_info->bytes_may_use += orig_bytes;
4974 trace_btrfs_space_reservation(root->fs_info,
4975 "space_info", space_info->flags, orig_bytes, 1);
4976 ret = 0;
4977 } else {
4978
4979
4980
4981
4982
4983 num_bytes = orig_bytes;
4984 }
4985 } else {
4986
4987
4988
4989
4990
4991 num_bytes = used - space_info->total_bytes +
4992 (orig_bytes * 2);
4993 }
4994
4995 if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
4996 space_info->bytes_may_use += orig_bytes;
4997 trace_btrfs_space_reservation(root->fs_info, "space_info",
4998 space_info->flags, orig_bytes,
4999 1);
5000 ret = 0;
5001 }
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5012 flushing = true;
5013 space_info->flush = 1;
5014 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5015 used += orig_bytes;
5016
5017
5018
5019
5020
5021 if (!root->fs_info->log_root_recovering &&
5022 need_do_async_reclaim(space_info, root->fs_info, used) &&
5023 !work_busy(&root->fs_info->async_reclaim_work))
5024 queue_work(system_unbound_wq,
5025 &root->fs_info->async_reclaim_work);
5026 }
5027 spin_unlock(&space_info->lock);
5028
5029 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5030 goto out;
5031
5032 ret = flush_space(root, space_info, num_bytes, orig_bytes,
5033 flush_state);
5034 flush_state++;
5035
5036
5037
5038
5039
5040 if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
5041 (flush_state == FLUSH_DELALLOC ||
5042 flush_state == FLUSH_DELALLOC_WAIT))
5043 flush_state = ALLOC_CHUNK;
5044
5045 if (!ret)
5046 goto again;
5047 else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
5048 flush_state < COMMIT_TRANS)
5049 goto again;
5050 else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
5051 flush_state <= COMMIT_TRANS)
5052 goto again;
5053
5054out:
5055 if (ret == -ENOSPC &&
5056 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5057 struct btrfs_block_rsv *global_rsv =
5058 &root->fs_info->global_block_rsv;
5059
5060 if (block_rsv != global_rsv &&
5061 !block_rsv_use_bytes(global_rsv, orig_bytes))
5062 ret = 0;
5063 }
5064 if (ret == -ENOSPC)
5065 trace_btrfs_space_reservation(root->fs_info,
5066 "space_info:enospc",
5067 space_info->flags, orig_bytes, 1);
5068 if (flushing) {
5069 spin_lock(&space_info->lock);
5070 space_info->flush = 0;
5071 wake_up_all(&space_info->wait);
5072 spin_unlock(&space_info->lock);
5073 }
5074 return ret;
5075}
5076
5077static struct btrfs_block_rsv *get_block_rsv(
5078 const struct btrfs_trans_handle *trans,
5079 const struct btrfs_root *root)
5080{
5081 struct btrfs_block_rsv *block_rsv = NULL;
5082
5083 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
5084 (root == root->fs_info->csum_root && trans->adding_csums) ||
5085 (root == root->fs_info->uuid_root))
5086 block_rsv = trans->block_rsv;
5087
5088 if (!block_rsv)
5089 block_rsv = root->block_rsv;
5090
5091 if (!block_rsv)
5092 block_rsv = &root->fs_info->empty_block_rsv;
5093
5094 return block_rsv;
5095}
5096
5097static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
5098 u64 num_bytes)
5099{
5100 int ret = -ENOSPC;
5101 spin_lock(&block_rsv->lock);
5102 if (block_rsv->reserved >= num_bytes) {
5103 block_rsv->reserved -= num_bytes;
5104 if (block_rsv->reserved < block_rsv->size)
5105 block_rsv->full = 0;
5106 ret = 0;
5107 }
5108 spin_unlock(&block_rsv->lock);
5109 return ret;
5110}
5111
5112static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
5113 u64 num_bytes, int update_size)
5114{
5115 spin_lock(&block_rsv->lock);
5116 block_rsv->reserved += num_bytes;
5117 if (update_size)
5118 block_rsv->size += num_bytes;
5119 else if (block_rsv->reserved >= block_rsv->size)
5120 block_rsv->full = 1;
5121 spin_unlock(&block_rsv->lock);
5122}
5123
5124int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5125 struct btrfs_block_rsv *dest, u64 num_bytes,
5126 int min_factor)
5127{
5128 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5129 u64 min_bytes;
5130
5131 if (global_rsv->space_info != dest->space_info)
5132 return -ENOSPC;
5133
5134 spin_lock(&global_rsv->lock);
5135 min_bytes = div_factor(global_rsv->size, min_factor);
5136 if (global_rsv->reserved < min_bytes + num_bytes) {
5137 spin_unlock(&global_rsv->lock);
5138 return -ENOSPC;
5139 }
5140 global_rsv->reserved -= num_bytes;
5141 if (global_rsv->reserved < global_rsv->size)
5142 global_rsv->full = 0;
5143 spin_unlock(&global_rsv->lock);
5144
5145 block_rsv_add_bytes(dest, num_bytes, 1);
5146 return 0;
5147}
5148
5149static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5150 struct btrfs_block_rsv *block_rsv,
5151 struct btrfs_block_rsv *dest, u64 num_bytes)
5152{
5153 struct btrfs_space_info *space_info = block_rsv->space_info;
5154
5155 spin_lock(&block_rsv->lock);
5156 if (num_bytes == (u64)-1)
5157 num_bytes = block_rsv->size;
5158 block_rsv->size -= num_bytes;
5159 if (block_rsv->reserved >= block_rsv->size) {
5160 num_bytes = block_rsv->reserved - block_rsv->size;
5161 block_rsv->reserved = block_rsv->size;
5162 block_rsv->full = 1;
5163 } else {
5164 num_bytes = 0;
5165 }
5166 spin_unlock(&block_rsv->lock);
5167
5168 if (num_bytes > 0) {
5169 if (dest) {
5170 spin_lock(&dest->lock);
5171 if (!dest->full) {
5172 u64 bytes_to_add;
5173
5174 bytes_to_add = dest->size - dest->reserved;
5175 bytes_to_add = min(num_bytes, bytes_to_add);
5176 dest->reserved += bytes_to_add;
5177 if (dest->reserved >= dest->size)
5178 dest->full = 1;
5179 num_bytes -= bytes_to_add;
5180 }
5181 spin_unlock(&dest->lock);
5182 }
5183 if (num_bytes) {
5184 spin_lock(&space_info->lock);
5185 space_info->bytes_may_use -= num_bytes;
5186 trace_btrfs_space_reservation(fs_info, "space_info",
5187 space_info->flags, num_bytes, 0);
5188 spin_unlock(&space_info->lock);
5189 }
5190 }
5191}
5192
5193static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
5194 struct btrfs_block_rsv *dst, u64 num_bytes)
5195{
5196 int ret;
5197
5198 ret = block_rsv_use_bytes(src, num_bytes);
5199 if (ret)
5200 return ret;
5201
5202 block_rsv_add_bytes(dst, num_bytes, 1);
5203 return 0;
5204}
5205
5206void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
5207{
5208 memset(rsv, 0, sizeof(*rsv));
5209 spin_lock_init(&rsv->lock);
5210 rsv->type = type;
5211}
5212
5213struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
5214 unsigned short type)
5215{
5216 struct btrfs_block_rsv *block_rsv;
5217 struct btrfs_fs_info *fs_info = root->fs_info;
5218
5219 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5220 if (!block_rsv)
5221 return NULL;
5222
5223 btrfs_init_block_rsv(block_rsv, type);
5224 block_rsv->space_info = __find_space_info(fs_info,
5225 BTRFS_BLOCK_GROUP_METADATA);
5226 return block_rsv;
5227}
5228
5229void btrfs_free_block_rsv(struct btrfs_root *root,
5230 struct btrfs_block_rsv *rsv)
5231{
5232 if (!rsv)
5233 return;
5234 btrfs_block_rsv_release(root, rsv, (u64)-1);
5235 kfree(rsv);
5236}
5237
5238void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
5239{
5240 kfree(rsv);
5241}
5242
5243int btrfs_block_rsv_add(struct btrfs_root *root,
5244 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5245 enum btrfs_reserve_flush_enum flush)
5246{
5247 int ret;
5248
5249 if (num_bytes == 0)
5250 return 0;
5251
5252 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5253 if (!ret) {
5254 block_rsv_add_bytes(block_rsv, num_bytes, 1);
5255 return 0;
5256 }
5257
5258 return ret;
5259}
5260
5261int btrfs_block_rsv_check(struct btrfs_root *root,
5262 struct btrfs_block_rsv *block_rsv, int min_factor)
5263{
5264 u64 num_bytes = 0;
5265 int ret = -ENOSPC;
5266
5267 if (!block_rsv)
5268 return 0;
5269
5270 spin_lock(&block_rsv->lock);
5271 num_bytes = div_factor(block_rsv->size, min_factor);
5272 if (block_rsv->reserved >= num_bytes)
5273 ret = 0;
5274 spin_unlock(&block_rsv->lock);
5275
5276 return ret;
5277}
5278
5279int btrfs_block_rsv_refill(struct btrfs_root *root,
5280 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5281 enum btrfs_reserve_flush_enum flush)
5282{
5283 u64 num_bytes = 0;
5284 int ret = -ENOSPC;
5285
5286 if (!block_rsv)
5287 return 0;
5288
5289 spin_lock(&block_rsv->lock);
5290 num_bytes = min_reserved;
5291 if (block_rsv->reserved >= num_bytes)
5292 ret = 0;
5293 else
5294 num_bytes -= block_rsv->reserved;
5295 spin_unlock(&block_rsv->lock);
5296
5297 if (!ret)
5298 return 0;
5299
5300 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5301 if (!ret) {
5302 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5303 return 0;
5304 }
5305
5306 return ret;
5307}
5308
5309int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
5310 struct btrfs_block_rsv *dst_rsv,
5311 u64 num_bytes)
5312{
5313 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5314}
5315
5316void btrfs_block_rsv_release(struct btrfs_root *root,
5317 struct btrfs_block_rsv *block_rsv,
5318 u64 num_bytes)
5319{
5320 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5321 if (global_rsv == block_rsv ||
5322 block_rsv->space_info != global_rsv->space_info)
5323 global_rsv = NULL;
5324 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
5325 num_bytes);
5326}
5327
5328
5329
5330
5331
5332
5333static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
5334{
5335 struct btrfs_space_info *sinfo;
5336 u64 num_bytes;
5337 u64 meta_used;
5338 u64 data_used;
5339 int csum_size = btrfs_super_csum_size(fs_info->super_copy);
5340
5341 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
5342 spin_lock(&sinfo->lock);
5343 data_used = sinfo->bytes_used;
5344 spin_unlock(&sinfo->lock);
5345
5346 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5347 spin_lock(&sinfo->lock);
5348 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
5349 data_used = 0;
5350 meta_used = sinfo->bytes_used;
5351 spin_unlock(&sinfo->lock);
5352
5353 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
5354 csum_size * 2;
5355 num_bytes += div_u64(data_used + meta_used, 50);
5356
5357 if (num_bytes * 3 > meta_used)
5358 num_bytes = div_u64(meta_used, 3);
5359
5360 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
5361}
5362
5363static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5364{
5365 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5366 struct btrfs_space_info *sinfo = block_rsv->space_info;
5367 u64 num_bytes;
5368
5369 num_bytes = calc_global_metadata_size(fs_info);
5370
5371 spin_lock(&sinfo->lock);
5372 spin_lock(&block_rsv->lock);
5373
5374 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
5375
5376 if (block_rsv->reserved < block_rsv->size) {
5377 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
5378 sinfo->bytes_reserved + sinfo->bytes_readonly +
5379 sinfo->bytes_may_use;
5380 if (sinfo->total_bytes > num_bytes) {
5381 num_bytes = sinfo->total_bytes - num_bytes;
5382 num_bytes = min(num_bytes,
5383 block_rsv->size - block_rsv->reserved);
5384 block_rsv->reserved += num_bytes;
5385 sinfo->bytes_may_use += num_bytes;
5386 trace_btrfs_space_reservation(fs_info, "space_info",
5387 sinfo->flags, num_bytes,
5388 1);
5389 }
5390 } else if (block_rsv->reserved > block_rsv->size) {
5391 num_bytes = block_rsv->reserved - block_rsv->size;
5392 sinfo->bytes_may_use -= num_bytes;
5393 trace_btrfs_space_reservation(fs_info, "space_info",
5394 sinfo->flags, num_bytes, 0);
5395 block_rsv->reserved = block_rsv->size;
5396 }
5397
5398 if (block_rsv->reserved == block_rsv->size)
5399 block_rsv->full = 1;
5400 else
5401 block_rsv->full = 0;
5402
5403 spin_unlock(&block_rsv->lock);
5404 spin_unlock(&sinfo->lock);
5405}
5406
5407static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5408{
5409 struct btrfs_space_info *space_info;
5410
5411 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5412 fs_info->chunk_block_rsv.space_info = space_info;
5413
5414 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5415 fs_info->global_block_rsv.space_info = space_info;
5416 fs_info->delalloc_block_rsv.space_info = space_info;
5417 fs_info->trans_block_rsv.space_info = space_info;
5418 fs_info->empty_block_rsv.space_info = space_info;
5419 fs_info->delayed_block_rsv.space_info = space_info;
5420
5421 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
5422 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
5423 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5424 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
5425 if (fs_info->quota_root)
5426 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
5427 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
5428
5429 update_global_block_rsv(fs_info);
5430}
5431
5432static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5433{
5434 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5435 (u64)-1);
5436 WARN_ON(fs_info->delalloc_block_rsv.size > 0);
5437 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
5438 WARN_ON(fs_info->trans_block_rsv.size > 0);
5439 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5440 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5441 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
5442 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5443 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
5444}
5445
5446void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
5447 struct btrfs_root *root)
5448{
5449 if (!trans->block_rsv)
5450 return;
5451
5452 if (!trans->bytes_reserved)
5453 return;
5454
5455 trace_btrfs_space_reservation(root->fs_info, "transaction",
5456 trans->transid, trans->bytes_reserved, 0);
5457 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
5458 trans->bytes_reserved = 0;
5459}
5460
5461
5462
5463
5464
5465void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5466{
5467 struct btrfs_fs_info *fs_info = trans->root->fs_info;
5468
5469 if (!trans->chunk_bytes_reserved)
5470 return;
5471
5472 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5473
5474 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5475 trans->chunk_bytes_reserved);
5476 trans->chunk_bytes_reserved = 0;
5477}
5478
5479
5480int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5481 struct inode *inode)
5482{
5483 struct btrfs_root *root = BTRFS_I(inode)->root;
5484 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
5485 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
5486
5487
5488
5489
5490
5491
5492 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
5493 trace_btrfs_space_reservation(root->fs_info, "orphan",
5494 btrfs_ino(inode), num_bytes, 1);
5495 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5496}
5497
5498void btrfs_orphan_release_metadata(struct inode *inode)
5499{
5500 struct btrfs_root *root = BTRFS_I(inode)->root;
5501 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
5502 trace_btrfs_space_reservation(root->fs_info, "orphan",
5503 btrfs_ino(inode), num_bytes, 0);
5504 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
5505}
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5522 struct btrfs_block_rsv *rsv,
5523 int items,
5524 u64 *qgroup_reserved,
5525 bool use_global_rsv)
5526{
5527 u64 num_bytes;
5528 int ret;
5529 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5530
5531 if (root->fs_info->quota_enabled) {
5532
5533 num_bytes = 3 * root->nodesize;
5534 ret = btrfs_qgroup_reserve_meta(root, num_bytes);
5535 if (ret)
5536 return ret;
5537 } else {
5538 num_bytes = 0;
5539 }
5540
5541 *qgroup_reserved = num_bytes;
5542
5543 num_bytes = btrfs_calc_trans_metadata_size(root, items);
5544 rsv->space_info = __find_space_info(root->fs_info,
5545 BTRFS_BLOCK_GROUP_METADATA);
5546 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
5547 BTRFS_RESERVE_FLUSH_ALL);
5548
5549 if (ret == -ENOSPC && use_global_rsv)
5550 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
5551
5552 if (ret && *qgroup_reserved)
5553 btrfs_qgroup_free_meta(root, *qgroup_reserved);
5554
5555 return ret;
5556}
5557
5558void btrfs_subvolume_release_metadata(struct btrfs_root *root,
5559 struct btrfs_block_rsv *rsv,
5560 u64 qgroup_reserved)
5561{
5562 btrfs_block_rsv_release(root, rsv, (u64)-1);
5563}
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
5576{
5577 unsigned drop_inode_space = 0;
5578 unsigned dropped_extents = 0;
5579 unsigned num_extents = 0;
5580
5581 num_extents = (unsigned)div64_u64(num_bytes +
5582 BTRFS_MAX_EXTENT_SIZE - 1,
5583 BTRFS_MAX_EXTENT_SIZE);
5584 ASSERT(num_extents);
5585 ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
5586 BTRFS_I(inode)->outstanding_extents -= num_extents;
5587
5588 if (BTRFS_I(inode)->outstanding_extents == 0 &&
5589 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5590 &BTRFS_I(inode)->runtime_flags))
5591 drop_inode_space = 1;
5592
5593
5594
5595
5596
5597 if (BTRFS_I(inode)->outstanding_extents >=
5598 BTRFS_I(inode)->reserved_extents)
5599 return drop_inode_space;
5600
5601 dropped_extents = BTRFS_I(inode)->reserved_extents -
5602 BTRFS_I(inode)->outstanding_extents;
5603 BTRFS_I(inode)->reserved_extents -= dropped_extents;
5604 return dropped_extents + drop_inode_space;
5605}
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5626 int reserve)
5627{
5628 struct btrfs_root *root = BTRFS_I(inode)->root;
5629 u64 old_csums, num_csums;
5630
5631 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5632 BTRFS_I(inode)->csum_bytes == 0)
5633 return 0;
5634
5635 old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5636 if (reserve)
5637 BTRFS_I(inode)->csum_bytes += num_bytes;
5638 else
5639 BTRFS_I(inode)->csum_bytes -= num_bytes;
5640 num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5641
5642
5643 if (old_csums == num_csums)
5644 return 0;
5645
5646 if (reserve)
5647 return btrfs_calc_trans_metadata_size(root,
5648 num_csums - old_csums);
5649
5650 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
5651}
5652
5653int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5654{
5655 struct btrfs_root *root = BTRFS_I(inode)->root;
5656 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
5657 u64 to_reserve = 0;
5658 u64 csum_bytes;
5659 unsigned nr_extents = 0;
5660 int extra_reserve = 0;
5661 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
5662 int ret = 0;
5663 bool delalloc_lock = true;
5664 u64 to_free = 0;
5665 unsigned dropped;
5666
5667
5668
5669
5670
5671
5672 if (btrfs_is_free_space_inode(inode)) {
5673 flush = BTRFS_RESERVE_NO_FLUSH;
5674 delalloc_lock = false;
5675 }
5676
5677 if (flush != BTRFS_RESERVE_NO_FLUSH &&
5678 btrfs_transaction_in_commit(root->fs_info))
5679 schedule_timeout(1);
5680
5681 if (delalloc_lock)
5682 mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
5683
5684 num_bytes = ALIGN(num_bytes, root->sectorsize);
5685
5686 spin_lock(&BTRFS_I(inode)->lock);
5687 nr_extents = (unsigned)div64_u64(num_bytes +
5688 BTRFS_MAX_EXTENT_SIZE - 1,
5689 BTRFS_MAX_EXTENT_SIZE);
5690 BTRFS_I(inode)->outstanding_extents += nr_extents;
5691 nr_extents = 0;
5692
5693 if (BTRFS_I(inode)->outstanding_extents >
5694 BTRFS_I(inode)->reserved_extents)
5695 nr_extents = BTRFS_I(inode)->outstanding_extents -
5696 BTRFS_I(inode)->reserved_extents;
5697
5698
5699
5700
5701
5702 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5703 &BTRFS_I(inode)->runtime_flags)) {
5704 nr_extents++;
5705 extra_reserve = 1;
5706 }
5707
5708 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
5709 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
5710 csum_bytes = BTRFS_I(inode)->csum_bytes;
5711 spin_unlock(&BTRFS_I(inode)->lock);
5712
5713 if (root->fs_info->quota_enabled) {
5714 ret = btrfs_qgroup_reserve_meta(root,
5715 nr_extents * root->nodesize);
5716 if (ret)
5717 goto out_fail;
5718 }
5719
5720 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
5721 if (unlikely(ret)) {
5722 btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
5723 goto out_fail;
5724 }
5725
5726 spin_lock(&BTRFS_I(inode)->lock);
5727 if (extra_reserve) {
5728 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5729 &BTRFS_I(inode)->runtime_flags);
5730 nr_extents--;
5731 }
5732 BTRFS_I(inode)->reserved_extents += nr_extents;
5733 spin_unlock(&BTRFS_I(inode)->lock);
5734
5735 if (delalloc_lock)
5736 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5737
5738 if (to_reserve)
5739 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5740 btrfs_ino(inode), to_reserve, 1);
5741 block_rsv_add_bytes(block_rsv, to_reserve, 1);
5742
5743 return 0;
5744
5745out_fail:
5746 spin_lock(&BTRFS_I(inode)->lock);
5747 dropped = drop_outstanding_extent(inode, num_bytes);
5748
5749
5750
5751
5752
5753 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
5754 calc_csum_metadata_size(inode, num_bytes, 0);
5755 } else {
5756 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
5757 u64 bytes;
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
5768 BTRFS_I(inode)->csum_bytes = csum_bytes;
5769 to_free = calc_csum_metadata_size(inode, bytes, 0);
5770
5771
5772
5773
5774
5775
5776
5777 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
5778 bytes = csum_bytes - orig_csum_bytes;
5779 bytes = calc_csum_metadata_size(inode, bytes, 0);
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
5790 if (bytes > to_free)
5791 to_free = bytes - to_free;
5792 else
5793 to_free = 0;
5794 }
5795 spin_unlock(&BTRFS_I(inode)->lock);
5796 if (dropped)
5797 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5798
5799 if (to_free) {
5800 btrfs_block_rsv_release(root, block_rsv, to_free);
5801 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5802 btrfs_ino(inode), to_free, 0);
5803 }
5804 if (delalloc_lock)
5805 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5806 return ret;
5807}
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5819{
5820 struct btrfs_root *root = BTRFS_I(inode)->root;
5821 u64 to_free = 0;
5822 unsigned dropped;
5823
5824 num_bytes = ALIGN(num_bytes, root->sectorsize);
5825 spin_lock(&BTRFS_I(inode)->lock);
5826 dropped = drop_outstanding_extent(inode, num_bytes);
5827
5828 if (num_bytes)
5829 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
5830 spin_unlock(&BTRFS_I(inode)->lock);
5831 if (dropped > 0)
5832 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5833
5834 if (btrfs_test_is_dummy_root(root))
5835 return;
5836
5837 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5838 btrfs_ino(inode), to_free, 0);
5839
5840 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
5841 to_free);
5842}
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
5870{
5871 int ret;
5872
5873 ret = btrfs_check_data_free_space(inode, start, len);
5874 if (ret < 0)
5875 return ret;
5876 ret = btrfs_delalloc_reserve_metadata(inode, len);
5877 if (ret < 0)
5878 btrfs_free_reserved_data_space(inode, start, len);
5879 return ret;
5880}
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len)
5898{
5899 btrfs_delalloc_release_metadata(inode, len);
5900 btrfs_free_reserved_data_space(inode, start, len);
5901}
5902
5903static int update_block_group(struct btrfs_trans_handle *trans,
5904 struct btrfs_root *root, u64 bytenr,
5905 u64 num_bytes, int alloc)
5906{
5907 struct btrfs_block_group_cache *cache = NULL;
5908 struct btrfs_fs_info *info = root->fs_info;
5909 u64 total = num_bytes;
5910 u64 old_val;
5911 u64 byte_in_group;
5912 int factor;
5913
5914
5915 spin_lock(&info->delalloc_root_lock);
5916 old_val = btrfs_super_bytes_used(info->super_copy);
5917 if (alloc)
5918 old_val += num_bytes;
5919 else
5920 old_val -= num_bytes;
5921 btrfs_set_super_bytes_used(info->super_copy, old_val);
5922 spin_unlock(&info->delalloc_root_lock);
5923
5924 while (total) {
5925 cache = btrfs_lookup_block_group(info, bytenr);
5926 if (!cache)
5927 return -ENOENT;
5928 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
5929 BTRFS_BLOCK_GROUP_RAID1 |
5930 BTRFS_BLOCK_GROUP_RAID10))
5931 factor = 2;
5932 else
5933 factor = 1;
5934
5935
5936
5937
5938
5939
5940 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5941 cache_block_group(cache, 1);
5942
5943 byte_in_group = bytenr - cache->key.objectid;
5944 WARN_ON(byte_in_group > cache->key.offset);
5945
5946 spin_lock(&cache->space_info->lock);
5947 spin_lock(&cache->lock);
5948
5949 if (btrfs_test_opt(root, SPACE_CACHE) &&
5950 cache->disk_cache_state < BTRFS_DC_CLEAR)
5951 cache->disk_cache_state = BTRFS_DC_CLEAR;
5952
5953 old_val = btrfs_block_group_used(&cache->item);
5954 num_bytes = min(total, cache->key.offset - byte_in_group);
5955 if (alloc) {
5956 old_val += num_bytes;
5957 btrfs_set_block_group_used(&cache->item, old_val);
5958 cache->reserved -= num_bytes;
5959 cache->space_info->bytes_reserved -= num_bytes;
5960 cache->space_info->bytes_used += num_bytes;
5961 cache->space_info->disk_used += num_bytes * factor;
5962 spin_unlock(&cache->lock);
5963 spin_unlock(&cache->space_info->lock);
5964 } else {
5965 old_val -= num_bytes;
5966 btrfs_set_block_group_used(&cache->item, old_val);
5967 cache->pinned += num_bytes;
5968 cache->space_info->bytes_pinned += num_bytes;
5969 cache->space_info->bytes_used -= num_bytes;
5970 cache->space_info->disk_used -= num_bytes * factor;
5971 spin_unlock(&cache->lock);
5972 spin_unlock(&cache->space_info->lock);
5973
5974 set_extent_dirty(info->pinned_extents,
5975 bytenr, bytenr + num_bytes - 1,
5976 GFP_NOFS | __GFP_NOFAIL);
5977 }
5978
5979 spin_lock(&trans->transaction->dirty_bgs_lock);
5980 if (list_empty(&cache->dirty_list)) {
5981 list_add_tail(&cache->dirty_list,
5982 &trans->transaction->dirty_bgs);
5983 trans->transaction->num_dirty_bgs++;
5984 btrfs_get_block_group(cache);
5985 }
5986 spin_unlock(&trans->transaction->dirty_bgs_lock);
5987
5988
5989
5990
5991
5992
5993
5994 if (!alloc && old_val == 0) {
5995 spin_lock(&info->unused_bgs_lock);
5996 if (list_empty(&cache->bg_list)) {
5997 btrfs_get_block_group(cache);
5998 list_add_tail(&cache->bg_list,
5999 &info->unused_bgs);
6000 }
6001 spin_unlock(&info->unused_bgs_lock);
6002 }
6003
6004 btrfs_put_block_group(cache);
6005 total -= num_bytes;
6006 bytenr += num_bytes;
6007 }
6008 return 0;
6009}
6010
6011static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
6012{
6013 struct btrfs_block_group_cache *cache;
6014 u64 bytenr;
6015
6016 spin_lock(&root->fs_info->block_group_cache_lock);
6017 bytenr = root->fs_info->first_logical_byte;
6018 spin_unlock(&root->fs_info->block_group_cache_lock);
6019
6020 if (bytenr < (u64)-1)
6021 return bytenr;
6022
6023 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
6024 if (!cache)
6025 return 0;
6026
6027 bytenr = cache->key.objectid;
6028 btrfs_put_block_group(cache);
6029
6030 return bytenr;
6031}
6032
6033static int pin_down_extent(struct btrfs_root *root,
6034 struct btrfs_block_group_cache *cache,
6035 u64 bytenr, u64 num_bytes, int reserved)
6036{
6037 spin_lock(&cache->space_info->lock);
6038 spin_lock(&cache->lock);
6039 cache->pinned += num_bytes;
6040 cache->space_info->bytes_pinned += num_bytes;
6041 if (reserved) {
6042 cache->reserved -= num_bytes;
6043 cache->space_info->bytes_reserved -= num_bytes;
6044 }
6045 spin_unlock(&cache->lock);
6046 spin_unlock(&cache->space_info->lock);
6047
6048 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
6049 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
6050 if (reserved)
6051 trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
6052 return 0;
6053}
6054
6055
6056
6057
6058int btrfs_pin_extent(struct btrfs_root *root,
6059 u64 bytenr, u64 num_bytes, int reserved)
6060{
6061 struct btrfs_block_group_cache *cache;
6062
6063 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
6064 BUG_ON(!cache);
6065
6066 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
6067
6068 btrfs_put_block_group(cache);
6069 return 0;
6070}
6071
6072
6073
6074
6075int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
6076 u64 bytenr, u64 num_bytes)
6077{
6078 struct btrfs_block_group_cache *cache;
6079 int ret;
6080
6081 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
6082 if (!cache)
6083 return -EINVAL;
6084
6085
6086
6087
6088
6089
6090
6091 cache_block_group(cache, 1);
6092
6093 pin_down_extent(root, cache, bytenr, num_bytes, 0);
6094
6095
6096 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
6097 btrfs_put_block_group(cache);
6098 return ret;
6099}
6100
6101static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes)
6102{
6103 int ret;
6104 struct btrfs_block_group_cache *block_group;
6105 struct btrfs_caching_control *caching_ctl;
6106
6107 block_group = btrfs_lookup_block_group(root->fs_info, start);
6108 if (!block_group)
6109 return -EINVAL;
6110
6111 cache_block_group(block_group, 0);
6112 caching_ctl = get_caching_control(block_group);
6113
6114 if (!caching_ctl) {
6115
6116 BUG_ON(!block_group_cache_done(block_group));
6117 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6118 } else {
6119 mutex_lock(&caching_ctl->mutex);
6120
6121 if (start >= caching_ctl->progress) {
6122 ret = add_excluded_extent(root, start, num_bytes);
6123 } else if (start + num_bytes <= caching_ctl->progress) {
6124 ret = btrfs_remove_free_space(block_group,
6125 start, num_bytes);
6126 } else {
6127 num_bytes = caching_ctl->progress - start;
6128 ret = btrfs_remove_free_space(block_group,
6129 start, num_bytes);
6130 if (ret)
6131 goto out_lock;
6132
6133 num_bytes = (start + num_bytes) -
6134 caching_ctl->progress;
6135 start = caching_ctl->progress;
6136 ret = add_excluded_extent(root, start, num_bytes);
6137 }
6138out_lock:
6139 mutex_unlock(&caching_ctl->mutex);
6140 put_caching_control(caching_ctl);
6141 }
6142 btrfs_put_block_group(block_group);
6143 return ret;
6144}
6145
6146int btrfs_exclude_logged_extents(struct btrfs_root *log,
6147 struct extent_buffer *eb)
6148{
6149 struct btrfs_file_extent_item *item;
6150 struct btrfs_key key;
6151 int found_type;
6152 int i;
6153
6154 if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS))
6155 return 0;
6156
6157 for (i = 0; i < btrfs_header_nritems(eb); i++) {
6158 btrfs_item_key_to_cpu(eb, &key, i);
6159 if (key.type != BTRFS_EXTENT_DATA_KEY)
6160 continue;
6161 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
6162 found_type = btrfs_file_extent_type(eb, item);
6163 if (found_type == BTRFS_FILE_EXTENT_INLINE)
6164 continue;
6165 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
6166 continue;
6167 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
6168 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
6169 __exclude_logged_extent(log, key.objectid, key.offset);
6170 }
6171
6172 return 0;
6173}
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
6199 u64 num_bytes, int reserve, int delalloc)
6200{
6201 struct btrfs_space_info *space_info = cache->space_info;
6202 int ret = 0;
6203
6204 spin_lock(&space_info->lock);
6205 spin_lock(&cache->lock);
6206 if (reserve != RESERVE_FREE) {
6207 if (cache->ro) {
6208 ret = -EAGAIN;
6209 } else {
6210 cache->reserved += num_bytes;
6211 space_info->bytes_reserved += num_bytes;
6212 if (reserve == RESERVE_ALLOC) {
6213 trace_btrfs_space_reservation(cache->fs_info,
6214 "space_info", space_info->flags,
6215 num_bytes, 0);
6216 space_info->bytes_may_use -= num_bytes;
6217 }
6218
6219 if (delalloc)
6220 cache->delalloc_bytes += num_bytes;
6221 }
6222 } else {
6223 if (cache->ro)
6224 space_info->bytes_readonly += num_bytes;
6225 cache->reserved -= num_bytes;
6226 space_info->bytes_reserved -= num_bytes;
6227
6228 if (delalloc)
6229 cache->delalloc_bytes -= num_bytes;
6230 }
6231 spin_unlock(&cache->lock);
6232 spin_unlock(&space_info->lock);
6233 return ret;
6234}
6235
6236void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
6237 struct btrfs_root *root)
6238{
6239 struct btrfs_fs_info *fs_info = root->fs_info;
6240 struct btrfs_caching_control *next;
6241 struct btrfs_caching_control *caching_ctl;
6242 struct btrfs_block_group_cache *cache;
6243
6244 down_write(&fs_info->commit_root_sem);
6245
6246 list_for_each_entry_safe(caching_ctl, next,
6247 &fs_info->caching_block_groups, list) {
6248 cache = caching_ctl->block_group;
6249 if (block_group_cache_done(cache)) {
6250 cache->last_byte_to_unpin = (u64)-1;
6251 list_del_init(&caching_ctl->list);
6252 put_caching_control(caching_ctl);
6253 } else {
6254 cache->last_byte_to_unpin = caching_ctl->progress;
6255 }
6256 }
6257
6258 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6259 fs_info->pinned_extents = &fs_info->freed_extents[1];
6260 else
6261 fs_info->pinned_extents = &fs_info->freed_extents[0];
6262
6263 up_write(&fs_info->commit_root_sem);
6264
6265 update_global_block_rsv(fs_info);
6266}
6267
6268
6269
6270
6271
6272static struct btrfs_free_cluster *
6273fetch_cluster_info(struct btrfs_root *root, struct btrfs_space_info *space_info,
6274 u64 *empty_cluster)
6275{
6276 struct btrfs_free_cluster *ret = NULL;
6277 bool ssd = btrfs_test_opt(root, SSD);
6278
6279 *empty_cluster = 0;
6280 if (btrfs_mixed_space_info(space_info))
6281 return ret;
6282
6283 if (ssd)
6284 *empty_cluster = SZ_2M;
6285 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
6286 ret = &root->fs_info->meta_alloc_cluster;
6287 if (!ssd)
6288 *empty_cluster = SZ_64K;
6289 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && ssd) {
6290 ret = &root->fs_info->data_alloc_cluster;
6291 }
6292
6293 return ret;
6294}
6295
6296static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6297 const bool return_free_space)
6298{
6299 struct btrfs_fs_info *fs_info = root->fs_info;
6300 struct btrfs_block_group_cache *cache = NULL;
6301 struct btrfs_space_info *space_info;
6302 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6303 struct btrfs_free_cluster *cluster = NULL;
6304 u64 len;
6305 u64 total_unpinned = 0;
6306 u64 empty_cluster = 0;
6307 bool readonly;
6308
6309 while (start <= end) {
6310 readonly = false;
6311 if (!cache ||
6312 start >= cache->key.objectid + cache->key.offset) {
6313 if (cache)
6314 btrfs_put_block_group(cache);
6315 total_unpinned = 0;
6316 cache = btrfs_lookup_block_group(fs_info, start);
6317 BUG_ON(!cache);
6318
6319 cluster = fetch_cluster_info(root,
6320 cache->space_info,
6321 &empty_cluster);
6322 empty_cluster <<= 1;
6323 }
6324
6325 len = cache->key.objectid + cache->key.offset - start;
6326 len = min(len, end + 1 - start);
6327
6328 if (start < cache->last_byte_to_unpin) {
6329 len = min(len, cache->last_byte_to_unpin - start);
6330 if (return_free_space)
6331 btrfs_add_free_space(cache, start, len);
6332 }
6333
6334 start += len;
6335 total_unpinned += len;
6336 space_info = cache->space_info;
6337
6338
6339
6340
6341
6342
6343
6344 if (cluster && cluster->fragmented &&
6345 total_unpinned > empty_cluster) {
6346 spin_lock(&cluster->lock);
6347 cluster->fragmented = 0;
6348 spin_unlock(&cluster->lock);
6349 }
6350
6351 spin_lock(&space_info->lock);
6352 spin_lock(&cache->lock);
6353 cache->pinned -= len;
6354 space_info->bytes_pinned -= len;
6355 space_info->max_extent_size = 0;
6356 percpu_counter_add(&space_info->total_bytes_pinned, -len);
6357 if (cache->ro) {
6358 space_info->bytes_readonly += len;
6359 readonly = true;
6360 }
6361 spin_unlock(&cache->lock);
6362 if (!readonly && global_rsv->space_info == space_info) {
6363 spin_lock(&global_rsv->lock);
6364 if (!global_rsv->full) {
6365 len = min(len, global_rsv->size -
6366 global_rsv->reserved);
6367 global_rsv->reserved += len;
6368 space_info->bytes_may_use += len;
6369 if (global_rsv->reserved >= global_rsv->size)
6370 global_rsv->full = 1;
6371 }
6372 spin_unlock(&global_rsv->lock);
6373 }
6374 spin_unlock(&space_info->lock);
6375 }
6376
6377 if (cache)
6378 btrfs_put_block_group(cache);
6379 return 0;
6380}
6381
6382int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
6383 struct btrfs_root *root)
6384{
6385 struct btrfs_fs_info *fs_info = root->fs_info;
6386 struct btrfs_block_group_cache *block_group, *tmp;
6387 struct list_head *deleted_bgs;
6388 struct extent_io_tree *unpin;
6389 u64 start;
6390 u64 end;
6391 int ret;
6392
6393 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6394 unpin = &fs_info->freed_extents[1];
6395 else
6396 unpin = &fs_info->freed_extents[0];
6397
6398 while (!trans->aborted) {
6399 mutex_lock(&fs_info->unused_bg_unpin_mutex);
6400 ret = find_first_extent_bit(unpin, 0, &start, &end,
6401 EXTENT_DIRTY, NULL);
6402 if (ret) {
6403 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6404 break;
6405 }
6406
6407 if (btrfs_test_opt(root, DISCARD))
6408 ret = btrfs_discard_extent(root, start,
6409 end + 1 - start, NULL);
6410
6411 clear_extent_dirty(unpin, start, end, GFP_NOFS);
6412 unpin_extent_range(root, start, end, true);
6413 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6414 cond_resched();
6415 }
6416
6417
6418
6419
6420
6421
6422 deleted_bgs = &trans->transaction->deleted_bgs;
6423 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6424 u64 trimmed = 0;
6425
6426 ret = -EROFS;
6427 if (!trans->aborted)
6428 ret = btrfs_discard_extent(root,
6429 block_group->key.objectid,
6430 block_group->key.offset,
6431 &trimmed);
6432
6433 list_del_init(&block_group->bg_list);
6434 btrfs_put_block_group_trimming(block_group);
6435 btrfs_put_block_group(block_group);
6436
6437 if (ret) {
6438 const char *errstr = btrfs_decode_error(ret);
6439 btrfs_warn(fs_info,
6440 "Discard failed while removing blockgroup: errno=%d %s\n",
6441 ret, errstr);
6442 }
6443 }
6444
6445 return 0;
6446}
6447
6448static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
6449 u64 owner, u64 root_objectid)
6450{
6451 struct btrfs_space_info *space_info;
6452 u64 flags;
6453
6454 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
6455 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
6456 flags = BTRFS_BLOCK_GROUP_SYSTEM;
6457 else
6458 flags = BTRFS_BLOCK_GROUP_METADATA;
6459 } else {
6460 flags = BTRFS_BLOCK_GROUP_DATA;
6461 }
6462
6463 space_info = __find_space_info(fs_info, flags);
6464 BUG_ON(!space_info);
6465 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
6466}
6467
6468
6469static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6470 struct btrfs_root *root,
6471 struct btrfs_delayed_ref_node *node, u64 parent,
6472 u64 root_objectid, u64 owner_objectid,
6473 u64 owner_offset, int refs_to_drop,
6474 struct btrfs_delayed_extent_op *extent_op)
6475{
6476 struct btrfs_key key;
6477 struct btrfs_path *path;
6478 struct btrfs_fs_info *info = root->fs_info;
6479 struct btrfs_root *extent_root = info->extent_root;
6480 struct extent_buffer *leaf;
6481 struct btrfs_extent_item *ei;
6482 struct btrfs_extent_inline_ref *iref;
6483 int ret;
6484 int is_data;
6485 int extent_slot = 0;
6486 int found_extent = 0;
6487 int num_to_del = 1;
6488 u32 item_size;
6489 u64 refs;
6490 u64 bytenr = node->bytenr;
6491 u64 num_bytes = node->num_bytes;
6492 int last_ref = 0;
6493 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6494 SKINNY_METADATA);
6495
6496 path = btrfs_alloc_path();
6497 if (!path)
6498 return -ENOMEM;
6499
6500 path->reada = READA_FORWARD;
6501 path->leave_spinning = 1;
6502
6503 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6504 BUG_ON(!is_data && refs_to_drop != 1);
6505
6506 if (is_data)
6507 skinny_metadata = 0;
6508
6509 ret = lookup_extent_backref(trans, extent_root, path, &iref,
6510 bytenr, num_bytes, parent,
6511 root_objectid, owner_objectid,
6512 owner_offset);
6513 if (ret == 0) {
6514 extent_slot = path->slots[0];
6515 while (extent_slot >= 0) {
6516 btrfs_item_key_to_cpu(path->nodes[0], &key,
6517 extent_slot);
6518 if (key.objectid != bytenr)
6519 break;
6520 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6521 key.offset == num_bytes) {
6522 found_extent = 1;
6523 break;
6524 }
6525 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6526 key.offset == owner_objectid) {
6527 found_extent = 1;
6528 break;
6529 }
6530 if (path->slots[0] - extent_slot > 5)
6531 break;
6532 extent_slot--;
6533 }
6534#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6535 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
6536 if (found_extent && item_size < sizeof(*ei))
6537 found_extent = 0;
6538#endif
6539 if (!found_extent) {
6540 BUG_ON(iref);
6541 ret = remove_extent_backref(trans, extent_root, path,
6542 NULL, refs_to_drop,
6543 is_data, &last_ref);
6544 if (ret) {
6545 btrfs_abort_transaction(trans, extent_root, ret);
6546 goto out;
6547 }
6548 btrfs_release_path(path);
6549 path->leave_spinning = 1;
6550
6551 key.objectid = bytenr;
6552 key.type = BTRFS_EXTENT_ITEM_KEY;
6553 key.offset = num_bytes;
6554
6555 if (!is_data && skinny_metadata) {
6556 key.type = BTRFS_METADATA_ITEM_KEY;
6557 key.offset = owner_objectid;
6558 }
6559
6560 ret = btrfs_search_slot(trans, extent_root,
6561 &key, path, -1, 1);
6562 if (ret > 0 && skinny_metadata && path->slots[0]) {
6563
6564
6565
6566
6567 path->slots[0]--;
6568 btrfs_item_key_to_cpu(path->nodes[0], &key,
6569 path->slots[0]);
6570 if (key.objectid == bytenr &&
6571 key.type == BTRFS_EXTENT_ITEM_KEY &&
6572 key.offset == num_bytes)
6573 ret = 0;
6574 }
6575
6576 if (ret > 0 && skinny_metadata) {
6577 skinny_metadata = false;
6578 key.objectid = bytenr;
6579 key.type = BTRFS_EXTENT_ITEM_KEY;
6580 key.offset = num_bytes;
6581 btrfs_release_path(path);
6582 ret = btrfs_search_slot(trans, extent_root,
6583 &key, path, -1, 1);
6584 }
6585
6586 if (ret) {
6587 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
6588 ret, bytenr);
6589 if (ret > 0)
6590 btrfs_print_leaf(extent_root,
6591 path->nodes[0]);
6592 }
6593 if (ret < 0) {
6594 btrfs_abort_transaction(trans, extent_root, ret);
6595 goto out;
6596 }
6597 extent_slot = path->slots[0];
6598 }
6599 } else if (WARN_ON(ret == -ENOENT)) {
6600 btrfs_print_leaf(extent_root, path->nodes[0]);
6601 btrfs_err(info,
6602 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
6603 bytenr, parent, root_objectid, owner_objectid,
6604 owner_offset);
6605 btrfs_abort_transaction(trans, extent_root, ret);
6606 goto out;
6607 } else {
6608 btrfs_abort_transaction(trans, extent_root, ret);
6609 goto out;
6610 }
6611
6612 leaf = path->nodes[0];
6613 item_size = btrfs_item_size_nr(leaf, extent_slot);
6614#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6615 if (item_size < sizeof(*ei)) {
6616 BUG_ON(found_extent || extent_slot != path->slots[0]);
6617 ret = convert_extent_item_v0(trans, extent_root, path,
6618 owner_objectid, 0);
6619 if (ret < 0) {
6620 btrfs_abort_transaction(trans, extent_root, ret);
6621 goto out;
6622 }
6623
6624 btrfs_release_path(path);
6625 path->leave_spinning = 1;
6626
6627 key.objectid = bytenr;
6628 key.type = BTRFS_EXTENT_ITEM_KEY;
6629 key.offset = num_bytes;
6630
6631 ret = btrfs_search_slot(trans, extent_root, &key, path,
6632 -1, 1);
6633 if (ret) {
6634 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
6635 ret, bytenr);
6636 btrfs_print_leaf(extent_root, path->nodes[0]);
6637 }
6638 if (ret < 0) {
6639 btrfs_abort_transaction(trans, extent_root, ret);
6640 goto out;
6641 }
6642
6643 extent_slot = path->slots[0];
6644 leaf = path->nodes[0];
6645 item_size = btrfs_item_size_nr(leaf, extent_slot);
6646 }
6647#endif
6648 BUG_ON(item_size < sizeof(*ei));
6649 ei = btrfs_item_ptr(leaf, extent_slot,
6650 struct btrfs_extent_item);
6651 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
6652 key.type == BTRFS_EXTENT_ITEM_KEY) {
6653 struct btrfs_tree_block_info *bi;
6654 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
6655 bi = (struct btrfs_tree_block_info *)(ei + 1);
6656 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
6657 }
6658
6659 refs = btrfs_extent_refs(leaf, ei);
6660 if (refs < refs_to_drop) {
6661 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
6662 "for bytenr %Lu", refs_to_drop, refs, bytenr);
6663 ret = -EINVAL;
6664 btrfs_abort_transaction(trans, extent_root, ret);
6665 goto out;
6666 }
6667 refs -= refs_to_drop;
6668
6669 if (refs > 0) {
6670 if (extent_op)
6671 __run_delayed_extent_op(extent_op, leaf, ei);
6672
6673
6674
6675
6676 if (iref) {
6677 BUG_ON(!found_extent);
6678 } else {
6679 btrfs_set_extent_refs(leaf, ei, refs);
6680 btrfs_mark_buffer_dirty(leaf);
6681 }
6682 if (found_extent) {
6683 ret = remove_extent_backref(trans, extent_root, path,
6684 iref, refs_to_drop,
6685 is_data, &last_ref);
6686 if (ret) {
6687 btrfs_abort_transaction(trans, extent_root, ret);
6688 goto out;
6689 }
6690 }
6691 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
6692 root_objectid);
6693 } else {
6694 if (found_extent) {
6695 BUG_ON(is_data && refs_to_drop !=
6696 extent_data_ref_count(path, iref));
6697 if (iref) {
6698 BUG_ON(path->slots[0] != extent_slot);
6699 } else {
6700 BUG_ON(path->slots[0] != extent_slot + 1);
6701 path->slots[0] = extent_slot;
6702 num_to_del = 2;
6703 }
6704 }
6705
6706 last_ref = 1;
6707 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
6708 num_to_del);
6709 if (ret) {
6710 btrfs_abort_transaction(trans, extent_root, ret);
6711 goto out;
6712 }
6713 btrfs_release_path(path);
6714
6715 if (is_data) {
6716 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
6717 if (ret) {
6718 btrfs_abort_transaction(trans, extent_root, ret);
6719 goto out;
6720 }
6721 }
6722
6723 ret = add_to_free_space_tree(trans, root->fs_info, bytenr,
6724 num_bytes);
6725 if (ret) {
6726 btrfs_abort_transaction(trans, extent_root, ret);
6727 goto out;
6728 }
6729
6730 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
6731 if (ret) {
6732 btrfs_abort_transaction(trans, extent_root, ret);
6733 goto out;
6734 }
6735 }
6736 btrfs_release_path(path);
6737
6738out:
6739 btrfs_free_path(path);
6740 return ret;
6741}
6742
6743
6744
6745
6746
6747
6748
6749static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
6750 struct btrfs_root *root, u64 bytenr)
6751{
6752 struct btrfs_delayed_ref_head *head;
6753 struct btrfs_delayed_ref_root *delayed_refs;
6754 int ret = 0;
6755
6756 delayed_refs = &trans->transaction->delayed_refs;
6757 spin_lock(&delayed_refs->lock);
6758 head = btrfs_find_delayed_ref_head(trans, bytenr);
6759 if (!head)
6760 goto out_delayed_unlock;
6761
6762 spin_lock(&head->lock);
6763 if (!list_empty(&head->ref_list))
6764 goto out;
6765
6766 if (head->extent_op) {
6767 if (!head->must_insert_reserved)
6768 goto out;
6769 btrfs_free_delayed_extent_op(head->extent_op);
6770 head->extent_op = NULL;
6771 }
6772
6773
6774
6775
6776
6777 if (!mutex_trylock(&head->mutex))
6778 goto out;
6779
6780
6781
6782
6783
6784 head->node.in_tree = 0;
6785 rb_erase(&head->href_node, &delayed_refs->href_root);
6786
6787 atomic_dec(&delayed_refs->num_entries);
6788
6789
6790
6791
6792
6793 delayed_refs->num_heads--;
6794 if (head->processing == 0)
6795 delayed_refs->num_heads_ready--;
6796 head->processing = 0;
6797 spin_unlock(&head->lock);
6798 spin_unlock(&delayed_refs->lock);
6799
6800 BUG_ON(head->extent_op);
6801 if (head->must_insert_reserved)
6802 ret = 1;
6803
6804 mutex_unlock(&head->mutex);
6805 btrfs_put_delayed_ref(&head->node);
6806 return ret;
6807out:
6808 spin_unlock(&head->lock);
6809
6810out_delayed_unlock:
6811 spin_unlock(&delayed_refs->lock);
6812 return 0;
6813}
6814
6815void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6816 struct btrfs_root *root,
6817 struct extent_buffer *buf,
6818 u64 parent, int last_ref)
6819{
6820 int pin = 1;
6821 int ret;
6822
6823 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6824 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
6825 buf->start, buf->len,
6826 parent, root->root_key.objectid,
6827 btrfs_header_level(buf),
6828 BTRFS_DROP_DELAYED_REF, NULL);
6829 BUG_ON(ret);
6830 }
6831
6832 if (!last_ref)
6833 return;
6834
6835 if (btrfs_header_generation(buf) == trans->transid) {
6836 struct btrfs_block_group_cache *cache;
6837
6838 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6839 ret = check_ref_cleanup(trans, root, buf->start);
6840 if (!ret)
6841 goto out;
6842 }
6843
6844 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6845
6846 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
6847 pin_down_extent(root, cache, buf->start, buf->len, 1);
6848 btrfs_put_block_group(cache);
6849 goto out;
6850 }
6851
6852 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
6853
6854 btrfs_add_free_space(cache, buf->start, buf->len);
6855 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
6856 btrfs_put_block_group(cache);
6857 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
6858 pin = 0;
6859 }
6860out:
6861 if (pin)
6862 add_pinned_bytes(root->fs_info, buf->len,
6863 btrfs_header_level(buf),
6864 root->root_key.objectid);
6865
6866
6867
6868
6869
6870 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
6871}
6872
6873
6874int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6875 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
6876 u64 owner, u64 offset)
6877{
6878 int ret;
6879 struct btrfs_fs_info *fs_info = root->fs_info;
6880
6881 if (btrfs_test_is_dummy_root(root))
6882 return 0;
6883
6884 add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
6885
6886
6887
6888
6889
6890 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
6891 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
6892
6893 btrfs_pin_extent(root, bytenr, num_bytes, 1);
6894 ret = 0;
6895 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
6896 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
6897 num_bytes,
6898 parent, root_objectid, (int)owner,
6899 BTRFS_DROP_DELAYED_REF, NULL);
6900 } else {
6901 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
6902 num_bytes,
6903 parent, root_objectid, owner,
6904 offset, 0,
6905 BTRFS_DROP_DELAYED_REF, NULL);
6906 }
6907 return ret;
6908}
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924static noinline void
6925wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6926 u64 num_bytes)
6927{
6928 struct btrfs_caching_control *caching_ctl;
6929
6930 caching_ctl = get_caching_control(cache);
6931 if (!caching_ctl)
6932 return;
6933
6934 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
6935 (cache->free_space_ctl->free_space >= num_bytes));
6936
6937 put_caching_control(caching_ctl);
6938}
6939
6940static noinline int
6941wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
6942{
6943 struct btrfs_caching_control *caching_ctl;
6944 int ret = 0;
6945
6946 caching_ctl = get_caching_control(cache);
6947 if (!caching_ctl)
6948 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
6949
6950 wait_event(caching_ctl->wait, block_group_cache_done(cache));
6951 if (cache->cached == BTRFS_CACHE_ERROR)
6952 ret = -EIO;
6953 put_caching_control(caching_ctl);
6954 return ret;
6955}
6956
6957int __get_raid_index(u64 flags)
6958{
6959 if (flags & BTRFS_BLOCK_GROUP_RAID10)
6960 return BTRFS_RAID_RAID10;
6961 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
6962 return BTRFS_RAID_RAID1;
6963 else if (flags & BTRFS_BLOCK_GROUP_DUP)
6964 return BTRFS_RAID_DUP;
6965 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
6966 return BTRFS_RAID_RAID0;
6967 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
6968 return BTRFS_RAID_RAID5;
6969 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
6970 return BTRFS_RAID_RAID6;
6971
6972 return BTRFS_RAID_SINGLE;
6973}
6974
6975int get_block_group_index(struct btrfs_block_group_cache *cache)
6976{
6977 return __get_raid_index(cache->flags);
6978}
6979
6980static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
6981 [BTRFS_RAID_RAID10] = "raid10",
6982 [BTRFS_RAID_RAID1] = "raid1",
6983 [BTRFS_RAID_DUP] = "dup",
6984 [BTRFS_RAID_RAID0] = "raid0",
6985 [BTRFS_RAID_SINGLE] = "single",
6986 [BTRFS_RAID_RAID5] = "raid5",
6987 [BTRFS_RAID_RAID6] = "raid6",
6988};
6989
6990static const char *get_raid_name(enum btrfs_raid_types type)
6991{
6992 if (type >= BTRFS_NR_RAID_TYPES)
6993 return NULL;
6994
6995 return btrfs_raid_type_names[type];
6996}
6997
6998enum btrfs_loop_type {
6999 LOOP_CACHING_NOWAIT = 0,
7000 LOOP_CACHING_WAIT = 1,
7001 LOOP_ALLOC_CHUNK = 2,
7002 LOOP_NO_EMPTY_SIZE = 3,
7003};
7004
7005static inline void
7006btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
7007 int delalloc)
7008{
7009 if (delalloc)
7010 down_read(&cache->data_rwsem);
7011}
7012
7013static inline void
7014btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
7015 int delalloc)
7016{
7017 btrfs_get_block_group(cache);
7018 if (delalloc)
7019 down_read(&cache->data_rwsem);
7020}
7021
7022static struct btrfs_block_group_cache *
7023btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
7024 struct btrfs_free_cluster *cluster,
7025 int delalloc)
7026{
7027 struct btrfs_block_group_cache *used_bg = NULL;
7028 bool locked = false;
7029again:
7030 spin_lock(&cluster->refill_lock);
7031 if (locked) {
7032 if (used_bg == cluster->block_group)
7033 return used_bg;
7034
7035 up_read(&used_bg->data_rwsem);
7036 btrfs_put_block_group(used_bg);
7037 }
7038
7039 used_bg = cluster->block_group;
7040 if (!used_bg)
7041 return NULL;
7042
7043 if (used_bg == block_group)
7044 return used_bg;
7045
7046 btrfs_get_block_group(used_bg);
7047
7048 if (!delalloc)
7049 return used_bg;
7050
7051 if (down_read_trylock(&used_bg->data_rwsem))
7052 return used_bg;
7053
7054 spin_unlock(&cluster->refill_lock);
7055 down_read(&used_bg->data_rwsem);
7056 locked = true;
7057 goto again;
7058}
7059
7060static inline void
7061btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7062 int delalloc)
7063{
7064 if (delalloc)
7065 up_read(&cache->data_rwsem);
7066 btrfs_put_block_group(cache);
7067}
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080static noinline int find_free_extent(struct btrfs_root *orig_root,
7081 u64 num_bytes, u64 empty_size,
7082 u64 hint_byte, struct btrfs_key *ins,
7083 u64 flags, int delalloc)
7084{
7085 int ret = 0;
7086 struct btrfs_root *root = orig_root->fs_info->extent_root;
7087 struct btrfs_free_cluster *last_ptr = NULL;
7088 struct btrfs_block_group_cache *block_group = NULL;
7089 u64 search_start = 0;
7090 u64 max_extent_size = 0;
7091 u64 empty_cluster = 0;
7092 struct btrfs_space_info *space_info;
7093 int loop = 0;
7094 int index = __get_raid_index(flags);
7095 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
7096 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
7097 bool failed_cluster_refill = false;
7098 bool failed_alloc = false;
7099 bool use_cluster = true;
7100 bool have_caching_bg = false;
7101 bool orig_have_caching_bg = false;
7102 bool full_search = false;
7103
7104 WARN_ON(num_bytes < root->sectorsize);
7105 ins->type = BTRFS_EXTENT_ITEM_KEY;
7106 ins->objectid = 0;
7107 ins->offset = 0;
7108
7109 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
7110
7111 space_info = __find_space_info(root->fs_info, flags);
7112 if (!space_info) {
7113 btrfs_err(root->fs_info, "No space info for %llu", flags);
7114 return -ENOSPC;
7115 }
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127 if (unlikely(space_info->max_extent_size)) {
7128 spin_lock(&space_info->lock);
7129 if (space_info->max_extent_size &&
7130 num_bytes > space_info->max_extent_size) {
7131 ins->offset = space_info->max_extent_size;
7132 spin_unlock(&space_info->lock);
7133 return -ENOSPC;
7134 } else if (space_info->max_extent_size) {
7135 use_cluster = false;
7136 }
7137 spin_unlock(&space_info->lock);
7138 }
7139
7140 last_ptr = fetch_cluster_info(orig_root, space_info, &empty_cluster);
7141 if (last_ptr) {
7142 spin_lock(&last_ptr->lock);
7143 if (last_ptr->block_group)
7144 hint_byte = last_ptr->window_start;
7145 if (last_ptr->fragmented) {
7146
7147
7148
7149
7150
7151 hint_byte = last_ptr->window_start;
7152 use_cluster = false;
7153 }
7154 spin_unlock(&last_ptr->lock);
7155 }
7156
7157 search_start = max(search_start, first_logical_byte(root, 0));
7158 search_start = max(search_start, hint_byte);
7159 if (search_start == hint_byte) {
7160 block_group = btrfs_lookup_block_group(root->fs_info,
7161 search_start);
7162
7163
7164
7165
7166
7167
7168
7169 if (block_group && block_group_bits(block_group, flags) &&
7170 block_group->cached != BTRFS_CACHE_NO) {
7171 down_read(&space_info->groups_sem);
7172 if (list_empty(&block_group->list) ||
7173 block_group->ro) {
7174
7175
7176
7177
7178
7179
7180 btrfs_put_block_group(block_group);
7181 up_read(&space_info->groups_sem);
7182 } else {
7183 index = get_block_group_index(block_group);
7184 btrfs_lock_block_group(block_group, delalloc);
7185 goto have_block_group;
7186 }
7187 } else if (block_group) {
7188 btrfs_put_block_group(block_group);
7189 }
7190 }
7191search:
7192 have_caching_bg = false;
7193 if (index == 0 || index == __get_raid_index(flags))
7194 full_search = true;
7195 down_read(&space_info->groups_sem);
7196 list_for_each_entry(block_group, &space_info->block_groups[index],
7197 list) {
7198 u64 offset;
7199 int cached;
7200
7201 btrfs_grab_block_group(block_group, delalloc);
7202 search_start = block_group->key.objectid;
7203
7204
7205
7206
7207
7208
7209 if (!block_group_bits(block_group, flags)) {
7210 u64 extra = BTRFS_BLOCK_GROUP_DUP |
7211 BTRFS_BLOCK_GROUP_RAID1 |
7212 BTRFS_BLOCK_GROUP_RAID5 |
7213 BTRFS_BLOCK_GROUP_RAID6 |
7214 BTRFS_BLOCK_GROUP_RAID10;
7215
7216
7217
7218
7219
7220
7221 if ((flags & extra) && !(block_group->flags & extra))
7222 goto loop;
7223 }
7224
7225have_block_group:
7226 cached = block_group_cache_done(block_group);
7227 if (unlikely(!cached)) {
7228 have_caching_bg = true;
7229 ret = cache_block_group(block_group, 0);
7230 BUG_ON(ret < 0);
7231 ret = 0;
7232 }
7233
7234 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
7235 goto loop;
7236 if (unlikely(block_group->ro))
7237 goto loop;
7238
7239
7240
7241
7242
7243 if (last_ptr && use_cluster) {
7244 struct btrfs_block_group_cache *used_block_group;
7245 unsigned long aligned_cluster;
7246
7247
7248
7249
7250 used_block_group = btrfs_lock_cluster(block_group,
7251 last_ptr,
7252 delalloc);
7253 if (!used_block_group)
7254 goto refill_cluster;
7255
7256 if (used_block_group != block_group &&
7257 (used_block_group->ro ||
7258 !block_group_bits(used_block_group, flags)))
7259 goto release_cluster;
7260
7261 offset = btrfs_alloc_from_cluster(used_block_group,
7262 last_ptr,
7263 num_bytes,
7264 used_block_group->key.objectid,
7265 &max_extent_size);
7266 if (offset) {
7267
7268 spin_unlock(&last_ptr->refill_lock);
7269 trace_btrfs_reserve_extent_cluster(root,
7270 used_block_group,
7271 search_start, num_bytes);
7272 if (used_block_group != block_group) {
7273 btrfs_release_block_group(block_group,
7274 delalloc);
7275 block_group = used_block_group;
7276 }
7277 goto checks;
7278 }
7279
7280 WARN_ON(last_ptr->block_group != used_block_group);
7281release_cluster:
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297 if (loop >= LOOP_NO_EMPTY_SIZE &&
7298 used_block_group != block_group) {
7299 spin_unlock(&last_ptr->refill_lock);
7300 btrfs_release_block_group(used_block_group,
7301 delalloc);
7302 goto unclustered_alloc;
7303 }
7304
7305
7306
7307
7308
7309 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7310
7311 if (used_block_group != block_group)
7312 btrfs_release_block_group(used_block_group,
7313 delalloc);
7314refill_cluster:
7315 if (loop >= LOOP_NO_EMPTY_SIZE) {
7316 spin_unlock(&last_ptr->refill_lock);
7317 goto unclustered_alloc;
7318 }
7319
7320 aligned_cluster = max_t(unsigned long,
7321 empty_cluster + empty_size,
7322 block_group->full_stripe_len);
7323
7324
7325 ret = btrfs_find_space_cluster(root, block_group,
7326 last_ptr, search_start,
7327 num_bytes,
7328 aligned_cluster);
7329 if (ret == 0) {
7330
7331
7332
7333
7334 offset = btrfs_alloc_from_cluster(block_group,
7335 last_ptr,
7336 num_bytes,
7337 search_start,
7338 &max_extent_size);
7339 if (offset) {
7340
7341 spin_unlock(&last_ptr->refill_lock);
7342 trace_btrfs_reserve_extent_cluster(root,
7343 block_group, search_start,
7344 num_bytes);
7345 goto checks;
7346 }
7347 } else if (!cached && loop > LOOP_CACHING_NOWAIT
7348 && !failed_cluster_refill) {
7349 spin_unlock(&last_ptr->refill_lock);
7350
7351 failed_cluster_refill = true;
7352 wait_block_group_cache_progress(block_group,
7353 num_bytes + empty_cluster + empty_size);
7354 goto have_block_group;
7355 }
7356
7357
7358
7359
7360
7361
7362
7363 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7364 spin_unlock(&last_ptr->refill_lock);
7365 goto loop;
7366 }
7367
7368unclustered_alloc:
7369
7370
7371
7372
7373
7374 if (unlikely(last_ptr)) {
7375 spin_lock(&last_ptr->lock);
7376 last_ptr->fragmented = 1;
7377 spin_unlock(&last_ptr->lock);
7378 }
7379 spin_lock(&block_group->free_space_ctl->tree_lock);
7380 if (cached &&
7381 block_group->free_space_ctl->free_space <
7382 num_bytes + empty_cluster + empty_size) {
7383 if (block_group->free_space_ctl->free_space >
7384 max_extent_size)
7385 max_extent_size =
7386 block_group->free_space_ctl->free_space;
7387 spin_unlock(&block_group->free_space_ctl->tree_lock);
7388 goto loop;
7389 }
7390 spin_unlock(&block_group->free_space_ctl->tree_lock);
7391
7392 offset = btrfs_find_space_for_alloc(block_group, search_start,
7393 num_bytes, empty_size,
7394 &max_extent_size);
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404 if (!offset && !failed_alloc && !cached &&
7405 loop > LOOP_CACHING_NOWAIT) {
7406 wait_block_group_cache_progress(block_group,
7407 num_bytes + empty_size);
7408 failed_alloc = true;
7409 goto have_block_group;
7410 } else if (!offset) {
7411 goto loop;
7412 }
7413checks:
7414 search_start = ALIGN(offset, root->stripesize);
7415
7416
7417 if (search_start + num_bytes >
7418 block_group->key.objectid + block_group->key.offset) {
7419 btrfs_add_free_space(block_group, offset, num_bytes);
7420 goto loop;
7421 }
7422
7423 if (offset < search_start)
7424 btrfs_add_free_space(block_group, offset,
7425 search_start - offset);
7426 BUG_ON(offset > search_start);
7427
7428 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
7429 alloc_type, delalloc);
7430 if (ret == -EAGAIN) {
7431 btrfs_add_free_space(block_group, offset, num_bytes);
7432 goto loop;
7433 }
7434
7435
7436 ins->objectid = search_start;
7437 ins->offset = num_bytes;
7438
7439 trace_btrfs_reserve_extent(orig_root, block_group,
7440 search_start, num_bytes);
7441 btrfs_release_block_group(block_group, delalloc);
7442 break;
7443loop:
7444 failed_cluster_refill = false;
7445 failed_alloc = false;
7446 BUG_ON(index != get_block_group_index(block_group));
7447 btrfs_release_block_group(block_group, delalloc);
7448 }
7449 up_read(&space_info->groups_sem);
7450
7451 if ((loop == LOOP_CACHING_NOWAIT) && have_caching_bg
7452 && !orig_have_caching_bg)
7453 orig_have_caching_bg = true;
7454
7455 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
7456 goto search;
7457
7458 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
7459 goto search;
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
7470 index = 0;
7471 if (loop == LOOP_CACHING_NOWAIT) {
7472
7473
7474
7475
7476
7477 if (orig_have_caching_bg || !full_search)
7478 loop = LOOP_CACHING_WAIT;
7479 else
7480 loop = LOOP_ALLOC_CHUNK;
7481 } else {
7482 loop++;
7483 }
7484
7485 if (loop == LOOP_ALLOC_CHUNK) {
7486 struct btrfs_trans_handle *trans;
7487 int exist = 0;
7488
7489 trans = current->journal_info;
7490 if (trans)
7491 exist = 1;
7492 else
7493 trans = btrfs_join_transaction(root);
7494
7495 if (IS_ERR(trans)) {
7496 ret = PTR_ERR(trans);
7497 goto out;
7498 }
7499
7500 ret = do_chunk_alloc(trans, root, flags,
7501 CHUNK_ALLOC_FORCE);
7502
7503
7504
7505
7506
7507
7508 if (ret == -ENOSPC)
7509 loop = LOOP_NO_EMPTY_SIZE;
7510
7511
7512
7513
7514
7515 if (ret < 0 && ret != -ENOSPC)
7516 btrfs_abort_transaction(trans,
7517 root, ret);
7518 else
7519 ret = 0;
7520 if (!exist)
7521 btrfs_end_transaction(trans, root);
7522 if (ret)
7523 goto out;
7524 }
7525
7526 if (loop == LOOP_NO_EMPTY_SIZE) {
7527
7528
7529
7530
7531 if (empty_size == 0 &&
7532 empty_cluster == 0) {
7533 ret = -ENOSPC;
7534 goto out;
7535 }
7536 empty_size = 0;
7537 empty_cluster = 0;
7538 }
7539
7540 goto search;
7541 } else if (!ins->objectid) {
7542 ret = -ENOSPC;
7543 } else if (ins->objectid) {
7544 if (!use_cluster && last_ptr) {
7545 spin_lock(&last_ptr->lock);
7546 last_ptr->window_start = ins->objectid;
7547 spin_unlock(&last_ptr->lock);
7548 }
7549 ret = 0;
7550 }
7551out:
7552 if (ret == -ENOSPC) {
7553 spin_lock(&space_info->lock);
7554 space_info->max_extent_size = max_extent_size;
7555 spin_unlock(&space_info->lock);
7556 ins->offset = max_extent_size;
7557 }
7558 return ret;
7559}
7560
7561static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
7562 int dump_block_groups)
7563{
7564 struct btrfs_block_group_cache *cache;
7565 int index = 0;
7566
7567 spin_lock(&info->lock);
7568 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
7569 info->flags,
7570 info->total_bytes - info->bytes_used - info->bytes_pinned -
7571 info->bytes_reserved - info->bytes_readonly,
7572 (info->full) ? "" : "not ");
7573 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
7574 "reserved=%llu, may_use=%llu, readonly=%llu\n",
7575 info->total_bytes, info->bytes_used, info->bytes_pinned,
7576 info->bytes_reserved, info->bytes_may_use,
7577 info->bytes_readonly);
7578 spin_unlock(&info->lock);
7579
7580 if (!dump_block_groups)
7581 return;
7582
7583 down_read(&info->groups_sem);
7584again:
7585 list_for_each_entry(cache, &info->block_groups[index], list) {
7586 spin_lock(&cache->lock);
7587 printk(KERN_INFO "BTRFS: "
7588 "block group %llu has %llu bytes, "
7589 "%llu used %llu pinned %llu reserved %s\n",
7590 cache->key.objectid, cache->key.offset,
7591 btrfs_block_group_used(&cache->item), cache->pinned,
7592 cache->reserved, cache->ro ? "[readonly]" : "");
7593 btrfs_dump_free_space(cache, bytes);
7594 spin_unlock(&cache->lock);
7595 }
7596 if (++index < BTRFS_NR_RAID_TYPES)
7597 goto again;
7598 up_read(&info->groups_sem);
7599}
7600
7601int btrfs_reserve_extent(struct btrfs_root *root,
7602 u64 num_bytes, u64 min_alloc_size,
7603 u64 empty_size, u64 hint_byte,
7604 struct btrfs_key *ins, int is_data, int delalloc)
7605{
7606 bool final_tried = num_bytes == min_alloc_size;
7607 u64 flags;
7608 int ret;
7609
7610 flags = btrfs_get_alloc_profile(root, is_data);
7611again:
7612 WARN_ON(num_bytes < root->sectorsize);
7613 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
7614 flags, delalloc);
7615
7616 if (ret == -ENOSPC) {
7617 if (!final_tried && ins->offset) {
7618 num_bytes = min(num_bytes >> 1, ins->offset);
7619 num_bytes = round_down(num_bytes, root->sectorsize);
7620 num_bytes = max(num_bytes, min_alloc_size);
7621 if (num_bytes == min_alloc_size)
7622 final_tried = true;
7623 goto again;
7624 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7625 struct btrfs_space_info *sinfo;
7626
7627 sinfo = __find_space_info(root->fs_info, flags);
7628 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
7629 flags, num_bytes);
7630 if (sinfo)
7631 dump_space_info(sinfo, num_bytes, 1);
7632 }
7633 }
7634
7635 return ret;
7636}
7637
7638static int __btrfs_free_reserved_extent(struct btrfs_root *root,
7639 u64 start, u64 len,
7640 int pin, int delalloc)
7641{
7642 struct btrfs_block_group_cache *cache;
7643 int ret = 0;
7644
7645 cache = btrfs_lookup_block_group(root->fs_info, start);
7646 if (!cache) {
7647 btrfs_err(root->fs_info, "Unable to find block group for %llu",
7648 start);
7649 return -ENOSPC;
7650 }
7651
7652 if (pin)
7653 pin_down_extent(root, cache, start, len, 1);
7654 else {
7655 if (btrfs_test_opt(root, DISCARD))
7656 ret = btrfs_discard_extent(root, start, len, NULL);
7657 btrfs_add_free_space(cache, start, len);
7658 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
7659 }
7660
7661 btrfs_put_block_group(cache);
7662
7663 trace_btrfs_reserved_extent_free(root, start, len);
7664
7665 return ret;
7666}
7667
7668int btrfs_free_reserved_extent(struct btrfs_root *root,
7669 u64 start, u64 len, int delalloc)
7670{
7671 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
7672}
7673
7674int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
7675 u64 start, u64 len)
7676{
7677 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
7678}
7679
7680static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7681 struct btrfs_root *root,
7682 u64 parent, u64 root_objectid,
7683 u64 flags, u64 owner, u64 offset,
7684 struct btrfs_key *ins, int ref_mod)
7685{
7686 int ret;
7687 struct btrfs_fs_info *fs_info = root->fs_info;
7688 struct btrfs_extent_item *extent_item;
7689 struct btrfs_extent_inline_ref *iref;
7690 struct btrfs_path *path;
7691 struct extent_buffer *leaf;
7692 int type;
7693 u32 size;
7694
7695 if (parent > 0)
7696 type = BTRFS_SHARED_DATA_REF_KEY;
7697 else
7698 type = BTRFS_EXTENT_DATA_REF_KEY;
7699
7700 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
7701
7702 path = btrfs_alloc_path();
7703 if (!path)
7704 return -ENOMEM;
7705
7706 path->leave_spinning = 1;
7707 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7708 ins, size);
7709 if (ret) {
7710 btrfs_free_path(path);
7711 return ret;
7712 }
7713
7714 leaf = path->nodes[0];
7715 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7716 struct btrfs_extent_item);
7717 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
7718 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7719 btrfs_set_extent_flags(leaf, extent_item,
7720 flags | BTRFS_EXTENT_FLAG_DATA);
7721
7722 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7723 btrfs_set_extent_inline_ref_type(leaf, iref, type);
7724 if (parent > 0) {
7725 struct btrfs_shared_data_ref *ref;
7726 ref = (struct btrfs_shared_data_ref *)(iref + 1);
7727 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7728 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
7729 } else {
7730 struct btrfs_extent_data_ref *ref;
7731 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
7732 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
7733 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
7734 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
7735 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
7736 }
7737
7738 btrfs_mark_buffer_dirty(path->nodes[0]);
7739 btrfs_free_path(path);
7740
7741 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
7742 ins->offset);
7743 if (ret)
7744 return ret;
7745
7746 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
7747 if (ret) {
7748 btrfs_err(fs_info, "update block group failed for %llu %llu",
7749 ins->objectid, ins->offset);
7750 BUG();
7751 }
7752 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
7753 return ret;
7754}
7755
7756static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7757 struct btrfs_root *root,
7758 u64 parent, u64 root_objectid,
7759 u64 flags, struct btrfs_disk_key *key,
7760 int level, struct btrfs_key *ins)
7761{
7762 int ret;
7763 struct btrfs_fs_info *fs_info = root->fs_info;
7764 struct btrfs_extent_item *extent_item;
7765 struct btrfs_tree_block_info *block_info;
7766 struct btrfs_extent_inline_ref *iref;
7767 struct btrfs_path *path;
7768 struct extent_buffer *leaf;
7769 u32 size = sizeof(*extent_item) + sizeof(*iref);
7770 u64 num_bytes = ins->offset;
7771 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7772 SKINNY_METADATA);
7773
7774 if (!skinny_metadata)
7775 size += sizeof(*block_info);
7776
7777 path = btrfs_alloc_path();
7778 if (!path) {
7779 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7780 root->nodesize);
7781 return -ENOMEM;
7782 }
7783
7784 path->leave_spinning = 1;
7785 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7786 ins, size);
7787 if (ret) {
7788 btrfs_free_path(path);
7789 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7790 root->nodesize);
7791 return ret;
7792 }
7793
7794 leaf = path->nodes[0];
7795 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7796 struct btrfs_extent_item);
7797 btrfs_set_extent_refs(leaf, extent_item, 1);
7798 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7799 btrfs_set_extent_flags(leaf, extent_item,
7800 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
7801
7802 if (skinny_metadata) {
7803 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7804 num_bytes = root->nodesize;
7805 } else {
7806 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
7807 btrfs_set_tree_block_key(leaf, block_info, key);
7808 btrfs_set_tree_block_level(leaf, block_info, level);
7809 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
7810 }
7811
7812 if (parent > 0) {
7813 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
7814 btrfs_set_extent_inline_ref_type(leaf, iref,
7815 BTRFS_SHARED_BLOCK_REF_KEY);
7816 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7817 } else {
7818 btrfs_set_extent_inline_ref_type(leaf, iref,
7819 BTRFS_TREE_BLOCK_REF_KEY);
7820 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
7821 }
7822
7823 btrfs_mark_buffer_dirty(leaf);
7824 btrfs_free_path(path);
7825
7826 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
7827 num_bytes);
7828 if (ret)
7829 return ret;
7830
7831 ret = update_block_group(trans, root, ins->objectid, root->nodesize,
7832 1);
7833 if (ret) {
7834 btrfs_err(fs_info, "update block group failed for %llu %llu",
7835 ins->objectid, ins->offset);
7836 BUG();
7837 }
7838
7839 trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize);
7840 return ret;
7841}
7842
7843int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7844 struct btrfs_root *root,
7845 u64 root_objectid, u64 owner,
7846 u64 offset, u64 ram_bytes,
7847 struct btrfs_key *ins)
7848{
7849 int ret;
7850
7851 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
7852
7853 ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
7854 ins->offset, 0,
7855 root_objectid, owner, offset,
7856 ram_bytes, BTRFS_ADD_DELAYED_EXTENT,
7857 NULL);
7858 return ret;
7859}
7860
7861
7862
7863
7864
7865
7866int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7867 struct btrfs_root *root,
7868 u64 root_objectid, u64 owner, u64 offset,
7869 struct btrfs_key *ins)
7870{
7871 int ret;
7872 struct btrfs_block_group_cache *block_group;
7873
7874
7875
7876
7877
7878 if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
7879 ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
7880 if (ret)
7881 return ret;
7882 }
7883
7884 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
7885 if (!block_group)
7886 return -EINVAL;
7887
7888 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
7889 RESERVE_ALLOC_NO_ACCOUNT, 0);
7890 BUG_ON(ret);
7891 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
7892 0, owner, offset, ins, 1);
7893 btrfs_put_block_group(block_group);
7894 return ret;
7895}
7896
7897static struct extent_buffer *
7898btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
7899 u64 bytenr, int level)
7900{
7901 struct extent_buffer *buf;
7902
7903 buf = btrfs_find_create_tree_block(root, bytenr);
7904 if (!buf)
7905 return ERR_PTR(-ENOMEM);
7906 btrfs_set_header_generation(buf, trans->transid);
7907 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
7908 btrfs_tree_lock(buf);
7909 clean_tree_block(trans, root->fs_info, buf);
7910 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
7911
7912 btrfs_set_lock_blocking(buf);
7913 set_extent_buffer_uptodate(buf);
7914
7915 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
7916 buf->log_index = root->log_transid % 2;
7917
7918
7919
7920
7921 if (buf->log_index == 0)
7922 set_extent_dirty(&root->dirty_log_pages, buf->start,
7923 buf->start + buf->len - 1, GFP_NOFS);
7924 else
7925 set_extent_new(&root->dirty_log_pages, buf->start,
7926 buf->start + buf->len - 1, GFP_NOFS);
7927 } else {
7928 buf->log_index = -1;
7929 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
7930 buf->start + buf->len - 1, GFP_NOFS);
7931 }
7932 trans->blocks_used++;
7933
7934 return buf;
7935}
7936
7937static struct btrfs_block_rsv *
7938use_block_rsv(struct btrfs_trans_handle *trans,
7939 struct btrfs_root *root, u32 blocksize)
7940{
7941 struct btrfs_block_rsv *block_rsv;
7942 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
7943 int ret;
7944 bool global_updated = false;
7945
7946 block_rsv = get_block_rsv(trans, root);
7947
7948 if (unlikely(block_rsv->size == 0))
7949 goto try_reserve;
7950again:
7951 ret = block_rsv_use_bytes(block_rsv, blocksize);
7952 if (!ret)
7953 return block_rsv;
7954
7955 if (block_rsv->failfast)
7956 return ERR_PTR(ret);
7957
7958 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
7959 global_updated = true;
7960 update_global_block_rsv(root->fs_info);
7961 goto again;
7962 }
7963
7964 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7965 static DEFINE_RATELIMIT_STATE(_rs,
7966 DEFAULT_RATELIMIT_INTERVAL * 10,
7967 1);
7968 if (__ratelimit(&_rs))
7969 WARN(1, KERN_DEBUG
7970 "BTRFS: block rsv returned %d\n", ret);
7971 }
7972try_reserve:
7973 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
7974 BTRFS_RESERVE_NO_FLUSH);
7975 if (!ret)
7976 return block_rsv;
7977
7978
7979
7980
7981
7982 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
7983 block_rsv->space_info == global_rsv->space_info) {
7984 ret = block_rsv_use_bytes(global_rsv, blocksize);
7985 if (!ret)
7986 return global_rsv;
7987 }
7988 return ERR_PTR(ret);
7989}
7990
7991static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
7992 struct btrfs_block_rsv *block_rsv, u32 blocksize)
7993{
7994 block_rsv_add_bytes(block_rsv, blocksize, 0);
7995 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
7996}
7997
7998
7999
8000
8001
8002struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8003 struct btrfs_root *root,
8004 u64 parent, u64 root_objectid,
8005 struct btrfs_disk_key *key, int level,
8006 u64 hint, u64 empty_size)
8007{
8008 struct btrfs_key ins;
8009 struct btrfs_block_rsv *block_rsv;
8010 struct extent_buffer *buf;
8011 struct btrfs_delayed_extent_op *extent_op;
8012 u64 flags = 0;
8013 int ret;
8014 u32 blocksize = root->nodesize;
8015 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
8016 SKINNY_METADATA);
8017
8018 if (btrfs_test_is_dummy_root(root)) {
8019 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
8020 level);
8021 if (!IS_ERR(buf))
8022 root->alloc_bytenr += blocksize;
8023 return buf;
8024 }
8025
8026 block_rsv = use_block_rsv(trans, root, blocksize);
8027 if (IS_ERR(block_rsv))
8028 return ERR_CAST(block_rsv);
8029
8030 ret = btrfs_reserve_extent(root, blocksize, blocksize,
8031 empty_size, hint, &ins, 0, 0);
8032 if (ret)
8033 goto out_unuse;
8034
8035 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
8036 if (IS_ERR(buf)) {
8037 ret = PTR_ERR(buf);
8038 goto out_free_reserved;
8039 }
8040
8041 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
8042 if (parent == 0)
8043 parent = ins.objectid;
8044 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8045 } else
8046 BUG_ON(parent > 0);
8047
8048 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
8049 extent_op = btrfs_alloc_delayed_extent_op();
8050 if (!extent_op) {
8051 ret = -ENOMEM;
8052 goto out_free_buf;
8053 }
8054 if (key)
8055 memcpy(&extent_op->key, key, sizeof(extent_op->key));
8056 else
8057 memset(&extent_op->key, 0, sizeof(extent_op->key));
8058 extent_op->flags_to_set = flags;
8059 extent_op->update_key = skinny_metadata ? false : true;
8060 extent_op->update_flags = true;
8061 extent_op->is_data = false;
8062 extent_op->level = level;
8063
8064 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
8065 ins.objectid, ins.offset,
8066 parent, root_objectid, level,
8067 BTRFS_ADD_DELAYED_EXTENT,
8068 extent_op);
8069 if (ret)
8070 goto out_free_delayed;
8071 }
8072 return buf;
8073
8074out_free_delayed:
8075 btrfs_free_delayed_extent_op(extent_op);
8076out_free_buf:
8077 free_extent_buffer(buf);
8078out_free_reserved:
8079 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
8080out_unuse:
8081 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
8082 return ERR_PTR(ret);
8083}
8084
8085struct walk_control {
8086 u64 refs[BTRFS_MAX_LEVEL];
8087 u64 flags[BTRFS_MAX_LEVEL];
8088 struct btrfs_key update_progress;
8089 int stage;
8090 int level;
8091 int shared_level;
8092 int update_ref;
8093 int keep_locks;
8094 int reada_slot;
8095 int reada_count;
8096 int for_reloc;
8097};
8098
8099#define DROP_REFERENCE 1
8100#define UPDATE_BACKREF 2
8101
8102static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
8103 struct btrfs_root *root,
8104 struct walk_control *wc,
8105 struct btrfs_path *path)
8106{
8107 u64 bytenr;
8108 u64 generation;
8109 u64 refs;
8110 u64 flags;
8111 u32 nritems;
8112 u32 blocksize;
8113 struct btrfs_key key;
8114 struct extent_buffer *eb;
8115 int ret;
8116 int slot;
8117 int nread = 0;
8118
8119 if (path->slots[wc->level] < wc->reada_slot) {
8120 wc->reada_count = wc->reada_count * 2 / 3;
8121 wc->reada_count = max(wc->reada_count, 2);
8122 } else {
8123 wc->reada_count = wc->reada_count * 3 / 2;
8124 wc->reada_count = min_t(int, wc->reada_count,
8125 BTRFS_NODEPTRS_PER_BLOCK(root));
8126 }
8127
8128 eb = path->nodes[wc->level];
8129 nritems = btrfs_header_nritems(eb);
8130 blocksize = root->nodesize;
8131
8132 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
8133 if (nread >= wc->reada_count)
8134 break;
8135
8136 cond_resched();
8137 bytenr = btrfs_node_blockptr(eb, slot);
8138 generation = btrfs_node_ptr_generation(eb, slot);
8139
8140 if (slot == path->slots[wc->level])
8141 goto reada;
8142
8143 if (wc->stage == UPDATE_BACKREF &&
8144 generation <= root->root_key.offset)
8145 continue;
8146
8147
8148 ret = btrfs_lookup_extent_info(trans, root, bytenr,
8149 wc->level - 1, 1, &refs,
8150 &flags);
8151
8152 if (ret < 0)
8153 continue;
8154 BUG_ON(refs == 0);
8155
8156 if (wc->stage == DROP_REFERENCE) {
8157 if (refs == 1)
8158 goto reada;
8159
8160 if (wc->level == 1 &&
8161 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8162 continue;
8163 if (!wc->update_ref ||
8164 generation <= root->root_key.offset)
8165 continue;
8166 btrfs_node_key_to_cpu(eb, &key, slot);
8167 ret = btrfs_comp_cpu_keys(&key,
8168 &wc->update_progress);
8169 if (ret < 0)
8170 continue;
8171 } else {
8172 if (wc->level == 1 &&
8173 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8174 continue;
8175 }
8176reada:
8177 readahead_tree_block(root, bytenr);
8178 nread++;
8179 }
8180 wc->reada_slot = slot;
8181}
8182
8183
8184
8185
8186
8187static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
8188 struct btrfs_root *root, u64 bytenr,
8189 u64 num_bytes)
8190{
8191 struct btrfs_qgroup_extent_record *qrecord;
8192 struct btrfs_delayed_ref_root *delayed_refs;
8193
8194 qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
8195 if (!qrecord)
8196 return -ENOMEM;
8197
8198 qrecord->bytenr = bytenr;
8199 qrecord->num_bytes = num_bytes;
8200 qrecord->old_roots = NULL;
8201
8202 delayed_refs = &trans->transaction->delayed_refs;
8203 spin_lock(&delayed_refs->lock);
8204 if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord))
8205 kfree(qrecord);
8206 spin_unlock(&delayed_refs->lock);
8207
8208 return 0;
8209}
8210
8211static int account_leaf_items(struct btrfs_trans_handle *trans,
8212 struct btrfs_root *root,
8213 struct extent_buffer *eb)
8214{
8215 int nr = btrfs_header_nritems(eb);
8216 int i, extent_type, ret;
8217 struct btrfs_key key;
8218 struct btrfs_file_extent_item *fi;
8219 u64 bytenr, num_bytes;
8220
8221
8222 if (!root->fs_info->quota_enabled)
8223 return 0;
8224
8225 for (i = 0; i < nr; i++) {
8226 btrfs_item_key_to_cpu(eb, &key, i);
8227
8228 if (key.type != BTRFS_EXTENT_DATA_KEY)
8229 continue;
8230
8231 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
8232
8233 extent_type = btrfs_file_extent_type(eb, fi);
8234
8235 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
8236 continue;
8237
8238 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8239 if (!bytenr)
8240 continue;
8241
8242 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8243
8244 ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
8245 if (ret)
8246 return ret;
8247 }
8248 return 0;
8249}
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265static int adjust_slots_upwards(struct btrfs_root *root,
8266 struct btrfs_path *path, int root_level)
8267{
8268 int level = 0;
8269 int nr, slot;
8270 struct extent_buffer *eb;
8271
8272 if (root_level == 0)
8273 return 1;
8274
8275 while (level <= root_level) {
8276 eb = path->nodes[level];
8277 nr = btrfs_header_nritems(eb);
8278 path->slots[level]++;
8279 slot = path->slots[level];
8280 if (slot >= nr || level == 0) {
8281
8282
8283
8284
8285
8286 if (level != root_level) {
8287 btrfs_tree_unlock_rw(eb, path->locks[level]);
8288 path->locks[level] = 0;
8289
8290 free_extent_buffer(eb);
8291 path->nodes[level] = NULL;
8292 path->slots[level] = 0;
8293 }
8294 } else {
8295
8296
8297
8298
8299
8300 break;
8301 }
8302
8303 level++;
8304 }
8305
8306 eb = path->nodes[root_level];
8307 if (path->slots[root_level] >= btrfs_header_nritems(eb))
8308 return 1;
8309
8310 return 0;
8311}
8312
8313
8314
8315
8316static int account_shared_subtree(struct btrfs_trans_handle *trans,
8317 struct btrfs_root *root,
8318 struct extent_buffer *root_eb,
8319 u64 root_gen,
8320 int root_level)
8321{
8322 int ret = 0;
8323 int level;
8324 struct extent_buffer *eb = root_eb;
8325 struct btrfs_path *path = NULL;
8326
8327 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
8328 BUG_ON(root_eb == NULL);
8329
8330 if (!root->fs_info->quota_enabled)
8331 return 0;
8332
8333 if (!extent_buffer_uptodate(root_eb)) {
8334 ret = btrfs_read_buffer(root_eb, root_gen);
8335 if (ret)
8336 goto out;
8337 }
8338
8339 if (root_level == 0) {
8340 ret = account_leaf_items(trans, root, root_eb);
8341 goto out;
8342 }
8343
8344 path = btrfs_alloc_path();
8345 if (!path)
8346 return -ENOMEM;
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357 extent_buffer_get(root_eb);
8358 path->nodes[root_level] = root_eb;
8359 path->slots[root_level] = 0;
8360 path->locks[root_level] = 0;
8361walk_down:
8362 level = root_level;
8363 while (level >= 0) {
8364 if (path->nodes[level] == NULL) {
8365 int parent_slot;
8366 u64 child_gen;
8367 u64 child_bytenr;
8368
8369
8370
8371 eb = path->nodes[level + 1];
8372 parent_slot = path->slots[level + 1];
8373 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
8374 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
8375
8376 eb = read_tree_block(root, child_bytenr, child_gen);
8377 if (IS_ERR(eb)) {
8378 ret = PTR_ERR(eb);
8379 goto out;
8380 } else if (!extent_buffer_uptodate(eb)) {
8381 free_extent_buffer(eb);
8382 ret = -EIO;
8383 goto out;
8384 }
8385
8386 path->nodes[level] = eb;
8387 path->slots[level] = 0;
8388
8389 btrfs_tree_read_lock(eb);
8390 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
8391 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
8392
8393 ret = record_one_subtree_extent(trans, root, child_bytenr,
8394 root->nodesize);
8395 if (ret)
8396 goto out;
8397 }
8398
8399 if (level == 0) {
8400 ret = account_leaf_items(trans, root, path->nodes[level]);
8401 if (ret)
8402 goto out;
8403
8404
8405 ret = adjust_slots_upwards(root, path, root_level);
8406 if (ret)
8407 break;
8408
8409
8410 goto walk_down;
8411 }
8412
8413 level--;
8414 }
8415
8416 ret = 0;
8417out:
8418 btrfs_free_path(path);
8419
8420 return ret;
8421}
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8432 struct btrfs_root *root,
8433 struct btrfs_path *path,
8434 struct walk_control *wc, int lookup_info)
8435{
8436 int level = wc->level;
8437 struct extent_buffer *eb = path->nodes[level];
8438 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8439 int ret;
8440
8441 if (wc->stage == UPDATE_BACKREF &&
8442 btrfs_header_owner(eb) != root->root_key.objectid)
8443 return 1;
8444
8445
8446
8447
8448
8449 if (lookup_info &&
8450 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
8451 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
8452 BUG_ON(!path->locks[level]);
8453 ret = btrfs_lookup_extent_info(trans, root,
8454 eb->start, level, 1,
8455 &wc->refs[level],
8456 &wc->flags[level]);
8457 BUG_ON(ret == -ENOMEM);
8458 if (ret)
8459 return ret;
8460 BUG_ON(wc->refs[level] == 0);
8461 }
8462
8463 if (wc->stage == DROP_REFERENCE) {
8464 if (wc->refs[level] > 1)
8465 return 1;
8466
8467 if (path->locks[level] && !wc->keep_locks) {
8468 btrfs_tree_unlock_rw(eb, path->locks[level]);
8469 path->locks[level] = 0;
8470 }
8471 return 0;
8472 }
8473
8474
8475 if (!(wc->flags[level] & flag)) {
8476 BUG_ON(!path->locks[level]);
8477 ret = btrfs_inc_ref(trans, root, eb, 1);
8478 BUG_ON(ret);
8479 ret = btrfs_dec_ref(trans, root, eb, 0);
8480 BUG_ON(ret);
8481 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
8482 eb->len, flag,
8483 btrfs_header_level(eb), 0);
8484 BUG_ON(ret);
8485 wc->flags[level] |= flag;
8486 }
8487
8488
8489
8490
8491
8492 if (path->locks[level] && level > 0) {
8493 btrfs_tree_unlock_rw(eb, path->locks[level]);
8494 path->locks[level] = 0;
8495 }
8496 return 0;
8497}
8498
8499
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8513 struct btrfs_root *root,
8514 struct btrfs_path *path,
8515 struct walk_control *wc, int *lookup_info)
8516{
8517 u64 bytenr;
8518 u64 generation;
8519 u64 parent;
8520 u32 blocksize;
8521 struct btrfs_key key;
8522 struct extent_buffer *next;
8523 int level = wc->level;
8524 int reada = 0;
8525 int ret = 0;
8526 bool need_account = false;
8527
8528 generation = btrfs_node_ptr_generation(path->nodes[level],
8529 path->slots[level]);
8530
8531
8532
8533
8534
8535 if (wc->stage == UPDATE_BACKREF &&
8536 generation <= root->root_key.offset) {
8537 *lookup_info = 1;
8538 return 1;
8539 }
8540
8541 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8542 blocksize = root->nodesize;
8543
8544 next = btrfs_find_tree_block(root->fs_info, bytenr);
8545 if (!next) {
8546 next = btrfs_find_create_tree_block(root, bytenr);
8547 if (!next)
8548 return -ENOMEM;
8549 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8550 level - 1);
8551 reada = 1;
8552 }
8553 btrfs_tree_lock(next);
8554 btrfs_set_lock_blocking(next);
8555
8556 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
8557 &wc->refs[level - 1],
8558 &wc->flags[level - 1]);
8559 if (ret < 0) {
8560 btrfs_tree_unlock(next);
8561 return ret;
8562 }
8563
8564 if (unlikely(wc->refs[level - 1] == 0)) {
8565 btrfs_err(root->fs_info, "Missing references.");
8566 BUG();
8567 }
8568 *lookup_info = 0;
8569
8570 if (wc->stage == DROP_REFERENCE) {
8571 if (wc->refs[level - 1] > 1) {
8572 need_account = true;
8573 if (level == 1 &&
8574 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8575 goto skip;
8576
8577 if (!wc->update_ref ||
8578 generation <= root->root_key.offset)
8579 goto skip;
8580
8581 btrfs_node_key_to_cpu(path->nodes[level], &key,
8582 path->slots[level]);
8583 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8584 if (ret < 0)
8585 goto skip;
8586
8587 wc->stage = UPDATE_BACKREF;
8588 wc->shared_level = level - 1;
8589 }
8590 } else {
8591 if (level == 1 &&
8592 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8593 goto skip;
8594 }
8595
8596 if (!btrfs_buffer_uptodate(next, generation, 0)) {
8597 btrfs_tree_unlock(next);
8598 free_extent_buffer(next);
8599 next = NULL;
8600 *lookup_info = 1;
8601 }
8602
8603 if (!next) {
8604 if (reada && level == 1)
8605 reada_walk_down(trans, root, wc, path);
8606 next = read_tree_block(root, bytenr, generation);
8607 if (IS_ERR(next)) {
8608 return PTR_ERR(next);
8609 } else if (!extent_buffer_uptodate(next)) {
8610 free_extent_buffer(next);
8611 return -EIO;
8612 }
8613 btrfs_tree_lock(next);
8614 btrfs_set_lock_blocking(next);
8615 }
8616
8617 level--;
8618 BUG_ON(level != btrfs_header_level(next));
8619 path->nodes[level] = next;
8620 path->slots[level] = 0;
8621 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8622 wc->level = level;
8623 if (wc->level == 1)
8624 wc->reada_slot = 0;
8625 return 0;
8626skip:
8627 wc->refs[level - 1] = 0;
8628 wc->flags[level - 1] = 0;
8629 if (wc->stage == DROP_REFERENCE) {
8630 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8631 parent = path->nodes[level]->start;
8632 } else {
8633 BUG_ON(root->root_key.objectid !=
8634 btrfs_header_owner(path->nodes[level]));
8635 parent = 0;
8636 }
8637
8638 if (need_account) {
8639 ret = account_shared_subtree(trans, root, next,
8640 generation, level - 1);
8641 if (ret) {
8642 btrfs_err_rl(root->fs_info,
8643 "Error "
8644 "%d accounting shared subtree. Quota "
8645 "is out of sync, rescan required.",
8646 ret);
8647 }
8648 }
8649 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
8650 root->root_key.objectid, level - 1, 0);
8651 BUG_ON(ret);
8652 }
8653 btrfs_tree_unlock(next);
8654 free_extent_buffer(next);
8655 *lookup_info = 1;
8656 return 1;
8657}
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
8672 struct btrfs_root *root,
8673 struct btrfs_path *path,
8674 struct walk_control *wc)
8675{
8676 int ret;
8677 int level = wc->level;
8678 struct extent_buffer *eb = path->nodes[level];
8679 u64 parent = 0;
8680
8681 if (wc->stage == UPDATE_BACKREF) {
8682 BUG_ON(wc->shared_level < level);
8683 if (level < wc->shared_level)
8684 goto out;
8685
8686 ret = find_next_key(path, level + 1, &wc->update_progress);
8687 if (ret > 0)
8688 wc->update_ref = 0;
8689
8690 wc->stage = DROP_REFERENCE;
8691 wc->shared_level = -1;
8692 path->slots[level] = 0;
8693
8694
8695
8696
8697
8698
8699 if (!path->locks[level]) {
8700 BUG_ON(level == 0);
8701 btrfs_tree_lock(eb);
8702 btrfs_set_lock_blocking(eb);
8703 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8704
8705 ret = btrfs_lookup_extent_info(trans, root,
8706 eb->start, level, 1,
8707 &wc->refs[level],
8708 &wc->flags[level]);
8709 if (ret < 0) {
8710 btrfs_tree_unlock_rw(eb, path->locks[level]);
8711 path->locks[level] = 0;
8712 return ret;
8713 }
8714 BUG_ON(wc->refs[level] == 0);
8715 if (wc->refs[level] == 1) {
8716 btrfs_tree_unlock_rw(eb, path->locks[level]);
8717 path->locks[level] = 0;
8718 return 1;
8719 }
8720 }
8721 }
8722
8723
8724 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
8725
8726 if (wc->refs[level] == 1) {
8727 if (level == 0) {
8728 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8729 ret = btrfs_dec_ref(trans, root, eb, 1);
8730 else
8731 ret = btrfs_dec_ref(trans, root, eb, 0);
8732 BUG_ON(ret);
8733 ret = account_leaf_items(trans, root, eb);
8734 if (ret) {
8735 btrfs_err_rl(root->fs_info,
8736 "error "
8737 "%d accounting leaf items. Quota "
8738 "is out of sync, rescan required.",
8739 ret);
8740 }
8741 }
8742
8743 if (!path->locks[level] &&
8744 btrfs_header_generation(eb) == trans->transid) {
8745 btrfs_tree_lock(eb);
8746 btrfs_set_lock_blocking(eb);
8747 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8748 }
8749 clean_tree_block(trans, root->fs_info, eb);
8750 }
8751
8752 if (eb == root->node) {
8753 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8754 parent = eb->start;
8755 else
8756 BUG_ON(root->root_key.objectid !=
8757 btrfs_header_owner(eb));
8758 } else {
8759 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8760 parent = path->nodes[level + 1]->start;
8761 else
8762 BUG_ON(root->root_key.objectid !=
8763 btrfs_header_owner(path->nodes[level + 1]));
8764 }
8765
8766 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
8767out:
8768 wc->refs[level] = 0;
8769 wc->flags[level] = 0;
8770 return 0;
8771}
8772
8773static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
8774 struct btrfs_root *root,
8775 struct btrfs_path *path,
8776 struct walk_control *wc)
8777{
8778 int level = wc->level;
8779 int lookup_info = 1;
8780 int ret;
8781
8782 while (level >= 0) {
8783 ret = walk_down_proc(trans, root, path, wc, lookup_info);
8784 if (ret > 0)
8785 break;
8786
8787 if (level == 0)
8788 break;
8789
8790 if (path->slots[level] >=
8791 btrfs_header_nritems(path->nodes[level]))
8792 break;
8793
8794 ret = do_walk_down(trans, root, path, wc, &lookup_info);
8795 if (ret > 0) {
8796 path->slots[level]++;
8797 continue;
8798 } else if (ret < 0)
8799 return ret;
8800 level = wc->level;
8801 }
8802 return 0;
8803}
8804
8805static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
8806 struct btrfs_root *root,
8807 struct btrfs_path *path,
8808 struct walk_control *wc, int max_level)
8809{
8810 int level = wc->level;
8811 int ret;
8812
8813 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
8814 while (level < max_level && path->nodes[level]) {
8815 wc->level = level;
8816 if (path->slots[level] + 1 <
8817 btrfs_header_nritems(path->nodes[level])) {
8818 path->slots[level]++;
8819 return 0;
8820 } else {
8821 ret = walk_up_proc(trans, root, path, wc);
8822 if (ret > 0)
8823 return 0;
8824
8825 if (path->locks[level]) {
8826 btrfs_tree_unlock_rw(path->nodes[level],
8827 path->locks[level]);
8828 path->locks[level] = 0;
8829 }
8830 free_extent_buffer(path->nodes[level]);
8831 path->nodes[level] = NULL;
8832 level++;
8833 }
8834 }
8835 return 1;
8836}
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851int btrfs_drop_snapshot(struct btrfs_root *root,
8852 struct btrfs_block_rsv *block_rsv, int update_ref,
8853 int for_reloc)
8854{
8855 struct btrfs_path *path;
8856 struct btrfs_trans_handle *trans;
8857 struct btrfs_root *tree_root = root->fs_info->tree_root;
8858 struct btrfs_root_item *root_item = &root->root_item;
8859 struct walk_control *wc;
8860 struct btrfs_key key;
8861 int err = 0;
8862 int ret;
8863 int level;
8864 bool root_dropped = false;
8865
8866 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8867
8868 path = btrfs_alloc_path();
8869 if (!path) {
8870 err = -ENOMEM;
8871 goto out;
8872 }
8873
8874 wc = kzalloc(sizeof(*wc), GFP_NOFS);
8875 if (!wc) {
8876 btrfs_free_path(path);
8877 err = -ENOMEM;
8878 goto out;
8879 }
8880
8881 trans = btrfs_start_transaction(tree_root, 0);
8882 if (IS_ERR(trans)) {
8883 err = PTR_ERR(trans);
8884 goto out_free;
8885 }
8886
8887 if (block_rsv)
8888 trans->block_rsv = block_rsv;
8889
8890 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
8891 level = btrfs_header_level(root->node);
8892 path->nodes[level] = btrfs_lock_root_node(root);
8893 btrfs_set_lock_blocking(path->nodes[level]);
8894 path->slots[level] = 0;
8895 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8896 memset(&wc->update_progress, 0,
8897 sizeof(wc->update_progress));
8898 } else {
8899 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
8900 memcpy(&wc->update_progress, &key,
8901 sizeof(wc->update_progress));
8902
8903 level = root_item->drop_level;
8904 BUG_ON(level == 0);
8905 path->lowest_level = level;
8906 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8907 path->lowest_level = 0;
8908 if (ret < 0) {
8909 err = ret;
8910 goto out_end_trans;
8911 }
8912 WARN_ON(ret > 0);
8913
8914
8915
8916
8917
8918 btrfs_unlock_up_safe(path, 0);
8919
8920 level = btrfs_header_level(root->node);
8921 while (1) {
8922 btrfs_tree_lock(path->nodes[level]);
8923 btrfs_set_lock_blocking(path->nodes[level]);
8924 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8925
8926 ret = btrfs_lookup_extent_info(trans, root,
8927 path->nodes[level]->start,
8928 level, 1, &wc->refs[level],
8929 &wc->flags[level]);
8930 if (ret < 0) {
8931 err = ret;
8932 goto out_end_trans;
8933 }
8934 BUG_ON(wc->refs[level] == 0);
8935
8936 if (level == root_item->drop_level)
8937 break;
8938
8939 btrfs_tree_unlock(path->nodes[level]);
8940 path->locks[level] = 0;
8941 WARN_ON(wc->refs[level] != 1);
8942 level--;
8943 }
8944 }
8945
8946 wc->level = level;
8947 wc->shared_level = -1;
8948 wc->stage = DROP_REFERENCE;
8949 wc->update_ref = update_ref;
8950 wc->keep_locks = 0;
8951 wc->for_reloc = for_reloc;
8952 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
8953
8954 while (1) {
8955
8956 ret = walk_down_tree(trans, root, path, wc);
8957 if (ret < 0) {
8958 err = ret;
8959 break;
8960 }
8961
8962 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
8963 if (ret < 0) {
8964 err = ret;
8965 break;
8966 }
8967
8968 if (ret > 0) {
8969 BUG_ON(wc->stage != DROP_REFERENCE);
8970 break;
8971 }
8972
8973 if (wc->stage == DROP_REFERENCE) {
8974 level = wc->level;
8975 btrfs_node_key(path->nodes[level],
8976 &root_item->drop_progress,
8977 path->slots[level]);
8978 root_item->drop_level = level;
8979 }
8980
8981 BUG_ON(wc->level == 0);
8982 if (btrfs_should_end_transaction(trans, tree_root) ||
8983 (!for_reloc && btrfs_need_cleaner_sleep(root))) {
8984 ret = btrfs_update_root(trans, tree_root,
8985 &root->root_key,
8986 root_item);
8987 if (ret) {
8988 btrfs_abort_transaction(trans, tree_root, ret);
8989 err = ret;
8990 goto out_end_trans;
8991 }
8992
8993 btrfs_end_transaction_throttle(trans, tree_root);
8994 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
8995 pr_debug("BTRFS: drop snapshot early exit\n");
8996 err = -EAGAIN;
8997 goto out_free;
8998 }
8999
9000 trans = btrfs_start_transaction(tree_root, 0);
9001 if (IS_ERR(trans)) {
9002 err = PTR_ERR(trans);
9003 goto out_free;
9004 }
9005 if (block_rsv)
9006 trans->block_rsv = block_rsv;
9007 }
9008 }
9009 btrfs_release_path(path);
9010 if (err)
9011 goto out_end_trans;
9012
9013 ret = btrfs_del_root(trans, tree_root, &root->root_key);
9014 if (ret) {
9015 btrfs_abort_transaction(trans, tree_root, ret);
9016 goto out_end_trans;
9017 }
9018
9019 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9020 ret = btrfs_find_root(tree_root, &root->root_key, path,
9021 NULL, NULL);
9022 if (ret < 0) {
9023 btrfs_abort_transaction(trans, tree_root, ret);
9024 err = ret;
9025 goto out_end_trans;
9026 } else if (ret > 0) {
9027
9028
9029
9030
9031
9032 btrfs_del_orphan_item(trans, tree_root,
9033 root->root_key.objectid);
9034 }
9035 }
9036
9037 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
9038 btrfs_add_dropped_root(trans, root);
9039 } else {
9040 free_extent_buffer(root->node);
9041 free_extent_buffer(root->commit_root);
9042 btrfs_put_fs_root(root);
9043 }
9044 root_dropped = true;
9045out_end_trans:
9046 btrfs_end_transaction_throttle(trans, tree_root);
9047out_free:
9048 kfree(wc);
9049 btrfs_free_path(path);
9050out:
9051
9052
9053
9054
9055
9056
9057
9058 if (!for_reloc && root_dropped == false)
9059 btrfs_add_dead_root(root);
9060 if (err && err != -EAGAIN)
9061 btrfs_std_error(root->fs_info, err, NULL);
9062 return err;
9063}
9064
9065
9066
9067
9068
9069
9070
9071int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
9072 struct btrfs_root *root,
9073 struct extent_buffer *node,
9074 struct extent_buffer *parent)
9075{
9076 struct btrfs_path *path;
9077 struct walk_control *wc;
9078 int level;
9079 int parent_level;
9080 int ret = 0;
9081 int wret;
9082
9083 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
9084
9085 path = btrfs_alloc_path();
9086 if (!path)
9087 return -ENOMEM;
9088
9089 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9090 if (!wc) {
9091 btrfs_free_path(path);
9092 return -ENOMEM;
9093 }
9094
9095 btrfs_assert_tree_locked(parent);
9096 parent_level = btrfs_header_level(parent);
9097 extent_buffer_get(parent);
9098 path->nodes[parent_level] = parent;
9099 path->slots[parent_level] = btrfs_header_nritems(parent);
9100
9101 btrfs_assert_tree_locked(node);
9102 level = btrfs_header_level(node);
9103 path->nodes[level] = node;
9104 path->slots[level] = 0;
9105 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9106
9107 wc->refs[parent_level] = 1;
9108 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
9109 wc->level = level;
9110 wc->shared_level = -1;
9111 wc->stage = DROP_REFERENCE;
9112 wc->update_ref = 0;
9113 wc->keep_locks = 1;
9114 wc->for_reloc = 1;
9115 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
9116
9117 while (1) {
9118 wret = walk_down_tree(trans, root, path, wc);
9119 if (wret < 0) {
9120 ret = wret;
9121 break;
9122 }
9123
9124 wret = walk_up_tree(trans, root, path, wc, parent_level);
9125 if (wret < 0)
9126 ret = wret;
9127 if (wret != 0)
9128 break;
9129 }
9130
9131 kfree(wc);
9132 btrfs_free_path(path);
9133 return ret;
9134}
9135
9136static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
9137{
9138 u64 num_devices;
9139 u64 stripped;
9140
9141
9142
9143
9144
9145 stripped = get_restripe_target(root->fs_info, flags);
9146 if (stripped)
9147 return extended_to_chunk(stripped);
9148
9149 num_devices = root->fs_info->fs_devices->rw_devices;
9150
9151 stripped = BTRFS_BLOCK_GROUP_RAID0 |
9152 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
9153 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
9154
9155 if (num_devices == 1) {
9156 stripped |= BTRFS_BLOCK_GROUP_DUP;
9157 stripped = flags & ~stripped;
9158
9159
9160 if (flags & BTRFS_BLOCK_GROUP_RAID0)
9161 return stripped;
9162
9163
9164 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
9165 BTRFS_BLOCK_GROUP_RAID10))
9166 return stripped | BTRFS_BLOCK_GROUP_DUP;
9167 } else {
9168
9169 if (flags & stripped)
9170 return flags;
9171
9172 stripped |= BTRFS_BLOCK_GROUP_DUP;
9173 stripped = flags & ~stripped;
9174
9175
9176 if (flags & BTRFS_BLOCK_GROUP_DUP)
9177 return stripped | BTRFS_BLOCK_GROUP_RAID1;
9178
9179
9180 }
9181
9182 return flags;
9183}
9184
9185static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
9186{
9187 struct btrfs_space_info *sinfo = cache->space_info;
9188 u64 num_bytes;
9189 u64 min_allocable_bytes;
9190 int ret = -ENOSPC;
9191
9192
9193
9194
9195
9196
9197 if ((sinfo->flags &
9198 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
9199 !force)
9200 min_allocable_bytes = SZ_1M;
9201 else
9202 min_allocable_bytes = 0;
9203
9204 spin_lock(&sinfo->lock);
9205 spin_lock(&cache->lock);
9206
9207 if (cache->ro) {
9208 cache->ro++;
9209 ret = 0;
9210 goto out;
9211 }
9212
9213 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
9214 cache->bytes_super - btrfs_block_group_used(&cache->item);
9215
9216 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
9217 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
9218 min_allocable_bytes <= sinfo->total_bytes) {
9219 sinfo->bytes_readonly += num_bytes;
9220 cache->ro++;
9221 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
9222 ret = 0;
9223 }
9224out:
9225 spin_unlock(&cache->lock);
9226 spin_unlock(&sinfo->lock);
9227 return ret;
9228}
9229
9230int btrfs_inc_block_group_ro(struct btrfs_root *root,
9231 struct btrfs_block_group_cache *cache)
9232
9233{
9234 struct btrfs_trans_handle *trans;
9235 u64 alloc_flags;
9236 int ret;
9237
9238again:
9239 trans = btrfs_join_transaction(root);
9240 if (IS_ERR(trans))
9241 return PTR_ERR(trans);
9242
9243
9244
9245
9246
9247
9248 mutex_lock(&root->fs_info->ro_block_group_mutex);
9249 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
9250 u64 transid = trans->transid;
9251
9252 mutex_unlock(&root->fs_info->ro_block_group_mutex);
9253 btrfs_end_transaction(trans, root);
9254
9255 ret = btrfs_wait_for_commit(root, transid);
9256 if (ret)
9257 return ret;
9258 goto again;
9259 }
9260
9261
9262
9263
9264
9265 alloc_flags = update_block_group_flags(root, cache->flags);
9266 if (alloc_flags != cache->flags) {
9267 ret = do_chunk_alloc(trans, root, alloc_flags,
9268 CHUNK_ALLOC_FORCE);
9269
9270
9271
9272
9273
9274 if (ret == -ENOSPC)
9275 ret = 0;
9276 if (ret < 0)
9277 goto out;
9278 }
9279
9280 ret = inc_block_group_ro(cache, 0);
9281 if (!ret)
9282 goto out;
9283 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
9284 ret = do_chunk_alloc(trans, root, alloc_flags,
9285 CHUNK_ALLOC_FORCE);
9286 if (ret < 0)
9287 goto out;
9288 ret = inc_block_group_ro(cache, 0);
9289out:
9290 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
9291 alloc_flags = update_block_group_flags(root, cache->flags);
9292 lock_chunks(root->fs_info->chunk_root);
9293 check_system_chunk(trans, root, alloc_flags);
9294 unlock_chunks(root->fs_info->chunk_root);
9295 }
9296 mutex_unlock(&root->fs_info->ro_block_group_mutex);
9297
9298 btrfs_end_transaction(trans, root);
9299 return ret;
9300}
9301
9302int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
9303 struct btrfs_root *root, u64 type)
9304{
9305 u64 alloc_flags = get_alloc_profile(root, type);
9306 return do_chunk_alloc(trans, root, alloc_flags,
9307 CHUNK_ALLOC_FORCE);
9308}
9309
9310
9311
9312
9313
9314u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
9315{
9316 struct btrfs_block_group_cache *block_group;
9317 u64 free_bytes = 0;
9318 int factor;
9319
9320
9321 if (list_empty(&sinfo->ro_bgs))
9322 return 0;
9323
9324 spin_lock(&sinfo->lock);
9325 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
9326 spin_lock(&block_group->lock);
9327
9328 if (!block_group->ro) {
9329 spin_unlock(&block_group->lock);
9330 continue;
9331 }
9332
9333 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
9334 BTRFS_BLOCK_GROUP_RAID10 |
9335 BTRFS_BLOCK_GROUP_DUP))
9336 factor = 2;
9337 else
9338 factor = 1;
9339
9340 free_bytes += (block_group->key.offset -
9341 btrfs_block_group_used(&block_group->item)) *
9342 factor;
9343
9344 spin_unlock(&block_group->lock);
9345 }
9346 spin_unlock(&sinfo->lock);
9347
9348 return free_bytes;
9349}
9350
9351void btrfs_dec_block_group_ro(struct btrfs_root *root,
9352 struct btrfs_block_group_cache *cache)
9353{
9354 struct btrfs_space_info *sinfo = cache->space_info;
9355 u64 num_bytes;
9356
9357 BUG_ON(!cache->ro);
9358
9359 spin_lock(&sinfo->lock);
9360 spin_lock(&cache->lock);
9361 if (!--cache->ro) {
9362 num_bytes = cache->key.offset - cache->reserved -
9363 cache->pinned - cache->bytes_super -
9364 btrfs_block_group_used(&cache->item);
9365 sinfo->bytes_readonly -= num_bytes;
9366 list_del_init(&cache->ro_list);
9367 }
9368 spin_unlock(&cache->lock);
9369 spin_unlock(&sinfo->lock);
9370}
9371
9372
9373
9374
9375
9376
9377
9378int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
9379{
9380 struct btrfs_block_group_cache *block_group;
9381 struct btrfs_space_info *space_info;
9382 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
9383 struct btrfs_device *device;
9384 struct btrfs_trans_handle *trans;
9385 u64 min_free;
9386 u64 dev_min = 1;
9387 u64 dev_nr = 0;
9388 u64 target;
9389 int debug;
9390 int index;
9391 int full = 0;
9392 int ret = 0;
9393
9394 debug = btrfs_test_opt(root, ENOSPC_DEBUG);
9395
9396 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
9397
9398
9399 if (!block_group) {
9400 if (debug)
9401 btrfs_warn(root->fs_info,
9402 "can't find block group for bytenr %llu",
9403 bytenr);
9404 return -1;
9405 }
9406
9407 min_free = btrfs_block_group_used(&block_group->item);
9408
9409
9410 if (!min_free)
9411 goto out;
9412
9413 space_info = block_group->space_info;
9414 spin_lock(&space_info->lock);
9415
9416 full = space_info->full;
9417
9418
9419
9420
9421
9422
9423
9424
9425 if ((space_info->total_bytes != block_group->key.offset) &&
9426 (space_info->bytes_used + space_info->bytes_reserved +
9427 space_info->bytes_pinned + space_info->bytes_readonly +
9428 min_free < space_info->total_bytes)) {
9429 spin_unlock(&space_info->lock);
9430 goto out;
9431 }
9432 spin_unlock(&space_info->lock);
9433
9434
9435
9436
9437
9438
9439
9440
9441 ret = -1;
9442
9443
9444
9445
9446
9447
9448
9449
9450
9451 target = get_restripe_target(root->fs_info, block_group->flags);
9452 if (target) {
9453 index = __get_raid_index(extended_to_chunk(target));
9454 } else {
9455
9456
9457
9458
9459 if (full) {
9460 if (debug)
9461 btrfs_warn(root->fs_info,
9462 "no space to alloc new chunk for block group %llu",
9463 block_group->key.objectid);
9464 goto out;
9465 }
9466
9467 index = get_block_group_index(block_group);
9468 }
9469
9470 if (index == BTRFS_RAID_RAID10) {
9471 dev_min = 4;
9472
9473 min_free >>= 1;
9474 } else if (index == BTRFS_RAID_RAID1) {
9475 dev_min = 2;
9476 } else if (index == BTRFS_RAID_DUP) {
9477
9478 min_free <<= 1;
9479 } else if (index == BTRFS_RAID_RAID0) {
9480 dev_min = fs_devices->rw_devices;
9481 min_free = div64_u64(min_free, dev_min);
9482 }
9483
9484
9485 trans = btrfs_join_transaction(root);
9486 if (IS_ERR(trans)) {
9487 ret = PTR_ERR(trans);
9488 goto out;
9489 }
9490
9491 mutex_lock(&root->fs_info->chunk_mutex);
9492 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
9493 u64 dev_offset;
9494
9495
9496
9497
9498
9499 if (device->total_bytes > device->bytes_used + min_free &&
9500 !device->is_tgtdev_for_dev_replace) {
9501 ret = find_free_dev_extent(trans, device, min_free,
9502 &dev_offset, NULL);
9503 if (!ret)
9504 dev_nr++;
9505
9506 if (dev_nr >= dev_min)
9507 break;
9508
9509 ret = -1;
9510 }
9511 }
9512 if (debug && ret == -1)
9513 btrfs_warn(root->fs_info,
9514 "no space to allocate a new chunk for block group %llu",
9515 block_group->key.objectid);
9516 mutex_unlock(&root->fs_info->chunk_mutex);
9517 btrfs_end_transaction(trans, root);
9518out:
9519 btrfs_put_block_group(block_group);
9520 return ret;
9521}
9522
9523static int find_first_block_group(struct btrfs_root *root,
9524 struct btrfs_path *path, struct btrfs_key *key)
9525{
9526 int ret = 0;
9527 struct btrfs_key found_key;
9528 struct extent_buffer *leaf;
9529 int slot;
9530
9531 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9532 if (ret < 0)
9533 goto out;
9534
9535 while (1) {
9536 slot = path->slots[0];
9537 leaf = path->nodes[0];
9538 if (slot >= btrfs_header_nritems(leaf)) {
9539 ret = btrfs_next_leaf(root, path);
9540 if (ret == 0)
9541 continue;
9542 if (ret < 0)
9543 goto out;
9544 break;
9545 }
9546 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9547
9548 if (found_key.objectid >= key->objectid &&
9549 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9550 ret = 0;
9551 goto out;
9552 }
9553 path->slots[0]++;
9554 }
9555out:
9556 return ret;
9557}
9558
9559void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9560{
9561 struct btrfs_block_group_cache *block_group;
9562 u64 last = 0;
9563
9564 while (1) {
9565 struct inode *inode;
9566
9567 block_group = btrfs_lookup_first_block_group(info, last);
9568 while (block_group) {
9569 spin_lock(&block_group->lock);
9570 if (block_group->iref)
9571 break;
9572 spin_unlock(&block_group->lock);
9573 block_group = next_block_group(info->tree_root,
9574 block_group);
9575 }
9576 if (!block_group) {
9577 if (last == 0)
9578 break;
9579 last = 0;
9580 continue;
9581 }
9582
9583 inode = block_group->inode;
9584 block_group->iref = 0;
9585 block_group->inode = NULL;
9586 spin_unlock(&block_group->lock);
9587 iput(inode);
9588 last = block_group->key.objectid + block_group->key.offset;
9589 btrfs_put_block_group(block_group);
9590 }
9591}
9592
9593int btrfs_free_block_groups(struct btrfs_fs_info *info)
9594{
9595 struct btrfs_block_group_cache *block_group;
9596 struct btrfs_space_info *space_info;
9597 struct btrfs_caching_control *caching_ctl;
9598 struct rb_node *n;
9599
9600 down_write(&info->commit_root_sem);
9601 while (!list_empty(&info->caching_block_groups)) {
9602 caching_ctl = list_entry(info->caching_block_groups.next,
9603 struct btrfs_caching_control, list);
9604 list_del(&caching_ctl->list);
9605 put_caching_control(caching_ctl);
9606 }
9607 up_write(&info->commit_root_sem);
9608
9609 spin_lock(&info->unused_bgs_lock);
9610 while (!list_empty(&info->unused_bgs)) {
9611 block_group = list_first_entry(&info->unused_bgs,
9612 struct btrfs_block_group_cache,
9613 bg_list);
9614 list_del_init(&block_group->bg_list);
9615 btrfs_put_block_group(block_group);
9616 }
9617 spin_unlock(&info->unused_bgs_lock);
9618
9619 spin_lock(&info->block_group_cache_lock);
9620 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
9621 block_group = rb_entry(n, struct btrfs_block_group_cache,
9622 cache_node);
9623 rb_erase(&block_group->cache_node,
9624 &info->block_group_cache_tree);
9625 RB_CLEAR_NODE(&block_group->cache_node);
9626 spin_unlock(&info->block_group_cache_lock);
9627
9628 down_write(&block_group->space_info->groups_sem);
9629 list_del(&block_group->list);
9630 up_write(&block_group->space_info->groups_sem);
9631
9632 if (block_group->cached == BTRFS_CACHE_STARTED)
9633 wait_block_group_cache_done(block_group);
9634
9635
9636
9637
9638
9639 if (block_group->cached == BTRFS_CACHE_NO ||
9640 block_group->cached == BTRFS_CACHE_ERROR)
9641 free_excluded_extents(info->extent_root, block_group);
9642
9643 btrfs_remove_free_space_cache(block_group);
9644 btrfs_put_block_group(block_group);
9645
9646 spin_lock(&info->block_group_cache_lock);
9647 }
9648 spin_unlock(&info->block_group_cache_lock);
9649
9650
9651
9652
9653
9654
9655
9656 synchronize_rcu();
9657
9658 release_global_block_rsv(info);
9659
9660 while (!list_empty(&info->space_info)) {
9661 int i;
9662
9663 space_info = list_entry(info->space_info.next,
9664 struct btrfs_space_info,
9665 list);
9666 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
9667 if (WARN_ON(space_info->bytes_pinned > 0 ||
9668 space_info->bytes_reserved > 0 ||
9669 space_info->bytes_may_use > 0)) {
9670 dump_space_info(space_info, 0, 0);
9671 }
9672 }
9673 list_del(&space_info->list);
9674 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
9675 struct kobject *kobj;
9676 kobj = space_info->block_group_kobjs[i];
9677 space_info->block_group_kobjs[i] = NULL;
9678 if (kobj) {
9679 kobject_del(kobj);
9680 kobject_put(kobj);
9681 }
9682 }
9683 kobject_del(&space_info->kobj);
9684 kobject_put(&space_info->kobj);
9685 }
9686 return 0;
9687}
9688
9689static void __link_block_group(struct btrfs_space_info *space_info,
9690 struct btrfs_block_group_cache *cache)
9691{
9692 int index = get_block_group_index(cache);
9693 bool first = false;
9694
9695 down_write(&space_info->groups_sem);
9696 if (list_empty(&space_info->block_groups[index]))
9697 first = true;
9698 list_add_tail(&cache->list, &space_info->block_groups[index]);
9699 up_write(&space_info->groups_sem);
9700
9701 if (first) {
9702 struct raid_kobject *rkobj;
9703 int ret;
9704
9705 rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
9706 if (!rkobj)
9707 goto out_err;
9708 rkobj->raid_type = index;
9709 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
9710 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
9711 "%s", get_raid_name(index));
9712 if (ret) {
9713 kobject_put(&rkobj->kobj);
9714 goto out_err;
9715 }
9716 space_info->block_group_kobjs[index] = &rkobj->kobj;
9717 }
9718
9719 return;
9720out_err:
9721 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
9722}
9723
9724static struct btrfs_block_group_cache *
9725btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
9726{
9727 struct btrfs_block_group_cache *cache;
9728
9729 cache = kzalloc(sizeof(*cache), GFP_NOFS);
9730 if (!cache)
9731 return NULL;
9732
9733 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
9734 GFP_NOFS);
9735 if (!cache->free_space_ctl) {
9736 kfree(cache);
9737 return NULL;
9738 }
9739
9740 cache->key.objectid = start;
9741 cache->key.offset = size;
9742 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9743
9744 cache->sectorsize = root->sectorsize;
9745 cache->fs_info = root->fs_info;
9746 cache->full_stripe_len = btrfs_full_stripe_len(root,
9747 &root->fs_info->mapping_tree,
9748 start);
9749 set_free_space_tree_thresholds(cache);
9750
9751 atomic_set(&cache->count, 1);
9752 spin_lock_init(&cache->lock);
9753 init_rwsem(&cache->data_rwsem);
9754 INIT_LIST_HEAD(&cache->list);
9755 INIT_LIST_HEAD(&cache->cluster_list);
9756 INIT_LIST_HEAD(&cache->bg_list);
9757 INIT_LIST_HEAD(&cache->ro_list);
9758 INIT_LIST_HEAD(&cache->dirty_list);
9759 INIT_LIST_HEAD(&cache->io_list);
9760 btrfs_init_free_space_ctl(cache);
9761 atomic_set(&cache->trimming, 0);
9762 mutex_init(&cache->free_space_lock);
9763
9764 return cache;
9765}
9766
9767int btrfs_read_block_groups(struct btrfs_root *root)
9768{
9769 struct btrfs_path *path;
9770 int ret;
9771 struct btrfs_block_group_cache *cache;
9772 struct btrfs_fs_info *info = root->fs_info;
9773 struct btrfs_space_info *space_info;
9774 struct btrfs_key key;
9775 struct btrfs_key found_key;
9776 struct extent_buffer *leaf;
9777 int need_clear = 0;
9778 u64 cache_gen;
9779
9780 root = info->extent_root;
9781 key.objectid = 0;
9782 key.offset = 0;
9783 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9784 path = btrfs_alloc_path();
9785 if (!path)
9786 return -ENOMEM;
9787 path->reada = READA_FORWARD;
9788
9789 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
9790 if (btrfs_test_opt(root, SPACE_CACHE) &&
9791 btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
9792 need_clear = 1;
9793 if (btrfs_test_opt(root, CLEAR_CACHE))
9794 need_clear = 1;
9795
9796 while (1) {
9797 ret = find_first_block_group(root, path, &key);
9798 if (ret > 0)
9799 break;
9800 if (ret != 0)
9801 goto error;
9802
9803 leaf = path->nodes[0];
9804 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9805
9806 cache = btrfs_create_block_group_cache(root, found_key.objectid,
9807 found_key.offset);
9808 if (!cache) {
9809 ret = -ENOMEM;
9810 goto error;
9811 }
9812
9813 if (need_clear) {
9814
9815
9816
9817
9818
9819
9820
9821
9822
9823
9824 if (btrfs_test_opt(root, SPACE_CACHE))
9825 cache->disk_cache_state = BTRFS_DC_CLEAR;
9826 }
9827
9828 read_extent_buffer(leaf, &cache->item,
9829 btrfs_item_ptr_offset(leaf, path->slots[0]),
9830 sizeof(cache->item));
9831 cache->flags = btrfs_block_group_flags(&cache->item);
9832
9833 key.objectid = found_key.objectid + found_key.offset;
9834 btrfs_release_path(path);
9835
9836
9837
9838
9839
9840
9841 ret = exclude_super_stripes(root, cache);
9842 if (ret) {
9843
9844
9845
9846
9847 free_excluded_extents(root, cache);
9848 btrfs_put_block_group(cache);
9849 goto error;
9850 }
9851
9852
9853
9854
9855
9856
9857
9858
9859 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
9860 cache->last_byte_to_unpin = (u64)-1;
9861 cache->cached = BTRFS_CACHE_FINISHED;
9862 free_excluded_extents(root, cache);
9863 } else if (btrfs_block_group_used(&cache->item) == 0) {
9864 cache->last_byte_to_unpin = (u64)-1;
9865 cache->cached = BTRFS_CACHE_FINISHED;
9866 add_new_free_space(cache, root->fs_info,
9867 found_key.objectid,
9868 found_key.objectid +
9869 found_key.offset);
9870 free_excluded_extents(root, cache);
9871 }
9872
9873 ret = btrfs_add_block_group_cache(root->fs_info, cache);
9874 if (ret) {
9875 btrfs_remove_free_space_cache(cache);
9876 btrfs_put_block_group(cache);
9877 goto error;
9878 }
9879
9880 ret = update_space_info(info, cache->flags, found_key.offset,
9881 btrfs_block_group_used(&cache->item),
9882 &space_info);
9883 if (ret) {
9884 btrfs_remove_free_space_cache(cache);
9885 spin_lock(&info->block_group_cache_lock);
9886 rb_erase(&cache->cache_node,
9887 &info->block_group_cache_tree);
9888 RB_CLEAR_NODE(&cache->cache_node);
9889 spin_unlock(&info->block_group_cache_lock);
9890 btrfs_put_block_group(cache);
9891 goto error;
9892 }
9893
9894 cache->space_info = space_info;
9895 spin_lock(&cache->space_info->lock);
9896 cache->space_info->bytes_readonly += cache->bytes_super;
9897 spin_unlock(&cache->space_info->lock);
9898
9899 __link_block_group(space_info, cache);
9900
9901 set_avail_alloc_bits(root->fs_info, cache->flags);
9902 if (btrfs_chunk_readonly(root, cache->key.objectid)) {
9903 inc_block_group_ro(cache, 1);
9904 } else if (btrfs_block_group_used(&cache->item) == 0) {
9905 spin_lock(&info->unused_bgs_lock);
9906
9907 if (list_empty(&cache->bg_list)) {
9908 btrfs_get_block_group(cache);
9909 list_add_tail(&cache->bg_list,
9910 &info->unused_bgs);
9911 }
9912 spin_unlock(&info->unused_bgs_lock);
9913 }
9914 }
9915
9916 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
9917 if (!(get_alloc_profile(root, space_info->flags) &
9918 (BTRFS_BLOCK_GROUP_RAID10 |
9919 BTRFS_BLOCK_GROUP_RAID1 |
9920 BTRFS_BLOCK_GROUP_RAID5 |
9921 BTRFS_BLOCK_GROUP_RAID6 |
9922 BTRFS_BLOCK_GROUP_DUP)))
9923 continue;
9924
9925
9926
9927
9928 list_for_each_entry(cache,
9929 &space_info->block_groups[BTRFS_RAID_RAID0],
9930 list)
9931 inc_block_group_ro(cache, 1);
9932 list_for_each_entry(cache,
9933 &space_info->block_groups[BTRFS_RAID_SINGLE],
9934 list)
9935 inc_block_group_ro(cache, 1);
9936 }
9937
9938 init_global_block_rsv(info);
9939 ret = 0;
9940error:
9941 btrfs_free_path(path);
9942 return ret;
9943}
9944
9945void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
9946 struct btrfs_root *root)
9947{
9948 struct btrfs_block_group_cache *block_group, *tmp;
9949 struct btrfs_root *extent_root = root->fs_info->extent_root;
9950 struct btrfs_block_group_item item;
9951 struct btrfs_key key;
9952 int ret = 0;
9953 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
9954
9955 trans->can_flush_pending_bgs = false;
9956 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
9957 if (ret)
9958 goto next;
9959
9960 spin_lock(&block_group->lock);
9961 memcpy(&item, &block_group->item, sizeof(item));
9962 memcpy(&key, &block_group->key, sizeof(key));
9963 spin_unlock(&block_group->lock);
9964
9965 ret = btrfs_insert_item(trans, extent_root, &key, &item,
9966 sizeof(item));
9967 if (ret)
9968 btrfs_abort_transaction(trans, extent_root, ret);
9969 ret = btrfs_finish_chunk_alloc(trans, extent_root,
9970 key.objectid, key.offset);
9971 if (ret)
9972 btrfs_abort_transaction(trans, extent_root, ret);
9973 add_block_group_free_space(trans, root->fs_info, block_group);
9974
9975next:
9976 list_del_init(&block_group->bg_list);
9977 }
9978 trans->can_flush_pending_bgs = can_flush_pending_bgs;
9979}
9980
9981int btrfs_make_block_group(struct btrfs_trans_handle *trans,
9982 struct btrfs_root *root, u64 bytes_used,
9983 u64 type, u64 chunk_objectid, u64 chunk_offset,
9984 u64 size)
9985{
9986 int ret;
9987 struct btrfs_root *extent_root;
9988 struct btrfs_block_group_cache *cache;
9989
9990 extent_root = root->fs_info->extent_root;
9991
9992 btrfs_set_log_full_commit(root->fs_info, trans);
9993
9994 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
9995 if (!cache)
9996 return -ENOMEM;
9997
9998 btrfs_set_block_group_used(&cache->item, bytes_used);
9999 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
10000 btrfs_set_block_group_flags(&cache->item, type);
10001
10002 cache->flags = type;
10003 cache->last_byte_to_unpin = (u64)-1;
10004 cache->cached = BTRFS_CACHE_FINISHED;
10005 cache->needs_free_space = 1;
10006 ret = exclude_super_stripes(root, cache);
10007 if (ret) {
10008
10009
10010
10011
10012 free_excluded_extents(root, cache);
10013 btrfs_put_block_group(cache);
10014 return ret;
10015 }
10016
10017 add_new_free_space(cache, root->fs_info, chunk_offset,
10018 chunk_offset + size);
10019
10020 free_excluded_extents(root, cache);
10021
10022#ifdef CONFIG_BTRFS_DEBUG
10023 if (btrfs_should_fragment_free_space(root, cache)) {
10024 u64 new_bytes_used = size - bytes_used;
10025
10026 bytes_used += new_bytes_used >> 1;
10027 fragment_free_space(root, cache);
10028 }
10029#endif
10030
10031
10032
10033
10034
10035 ret = update_space_info(root->fs_info, cache->flags, 0, 0,
10036 &cache->space_info);
10037 if (ret) {
10038 btrfs_remove_free_space_cache(cache);
10039 btrfs_put_block_group(cache);
10040 return ret;
10041 }
10042
10043 ret = btrfs_add_block_group_cache(root->fs_info, cache);
10044 if (ret) {
10045 btrfs_remove_free_space_cache(cache);
10046 btrfs_put_block_group(cache);
10047 return ret;
10048 }
10049
10050
10051
10052
10053
10054 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
10055 &cache->space_info);
10056 if (ret) {
10057 btrfs_remove_free_space_cache(cache);
10058 spin_lock(&root->fs_info->block_group_cache_lock);
10059 rb_erase(&cache->cache_node,
10060 &root->fs_info->block_group_cache_tree);
10061 RB_CLEAR_NODE(&cache->cache_node);
10062 spin_unlock(&root->fs_info->block_group_cache_lock);
10063 btrfs_put_block_group(cache);
10064 return ret;
10065 }
10066 update_global_block_rsv(root->fs_info);
10067
10068 spin_lock(&cache->space_info->lock);
10069 cache->space_info->bytes_readonly += cache->bytes_super;
10070 spin_unlock(&cache->space_info->lock);
10071
10072 __link_block_group(cache->space_info, cache);
10073
10074 list_add_tail(&cache->bg_list, &trans->new_bgs);
10075
10076 set_avail_alloc_bits(extent_root->fs_info, type);
10077
10078 return 0;
10079}
10080
10081static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10082{
10083 u64 extra_flags = chunk_to_extended(flags) &
10084 BTRFS_EXTENDED_PROFILE_MASK;
10085
10086 write_seqlock(&fs_info->profiles_lock);
10087 if (flags & BTRFS_BLOCK_GROUP_DATA)
10088 fs_info->avail_data_alloc_bits &= ~extra_flags;
10089 if (flags & BTRFS_BLOCK_GROUP_METADATA)
10090 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
10091 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
10092 fs_info->avail_system_alloc_bits &= ~extra_flags;
10093 write_sequnlock(&fs_info->profiles_lock);
10094}
10095
10096int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10097 struct btrfs_root *root, u64 group_start,
10098 struct extent_map *em)
10099{
10100 struct btrfs_path *path;
10101 struct btrfs_block_group_cache *block_group;
10102 struct btrfs_free_cluster *cluster;
10103 struct btrfs_root *tree_root = root->fs_info->tree_root;
10104 struct btrfs_key key;
10105 struct inode *inode;
10106 struct kobject *kobj = NULL;
10107 int ret;
10108 int index;
10109 int factor;
10110 struct btrfs_caching_control *caching_ctl = NULL;
10111 bool remove_em;
10112
10113 root = root->fs_info->extent_root;
10114
10115 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
10116 BUG_ON(!block_group);
10117 BUG_ON(!block_group->ro);
10118
10119
10120
10121
10122
10123 free_excluded_extents(root, block_group);
10124
10125 memcpy(&key, &block_group->key, sizeof(key));
10126 index = get_block_group_index(block_group);
10127 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
10128 BTRFS_BLOCK_GROUP_RAID1 |
10129 BTRFS_BLOCK_GROUP_RAID10))
10130 factor = 2;
10131 else
10132 factor = 1;
10133
10134
10135 cluster = &root->fs_info->data_alloc_cluster;
10136 spin_lock(&cluster->refill_lock);
10137 btrfs_return_cluster_to_free_space(block_group, cluster);
10138 spin_unlock(&cluster->refill_lock);
10139
10140
10141
10142
10143
10144 cluster = &root->fs_info->meta_alloc_cluster;
10145 spin_lock(&cluster->refill_lock);
10146 btrfs_return_cluster_to_free_space(block_group, cluster);
10147 spin_unlock(&cluster->refill_lock);
10148
10149 path = btrfs_alloc_path();
10150 if (!path) {
10151 ret = -ENOMEM;
10152 goto out;
10153 }
10154
10155
10156
10157
10158
10159 inode = lookup_free_space_inode(tree_root, block_group, path);
10160
10161 mutex_lock(&trans->transaction->cache_write_mutex);
10162
10163
10164
10165
10166 spin_lock(&trans->transaction->dirty_bgs_lock);
10167 if (!list_empty(&block_group->io_list)) {
10168 list_del_init(&block_group->io_list);
10169
10170 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
10171
10172 spin_unlock(&trans->transaction->dirty_bgs_lock);
10173 btrfs_wait_cache_io(root, trans, block_group,
10174 &block_group->io_ctl, path,
10175 block_group->key.objectid);
10176 btrfs_put_block_group(block_group);
10177 spin_lock(&trans->transaction->dirty_bgs_lock);
10178 }
10179
10180 if (!list_empty(&block_group->dirty_list)) {
10181 list_del_init(&block_group->dirty_list);
10182 btrfs_put_block_group(block_group);
10183 }
10184 spin_unlock(&trans->transaction->dirty_bgs_lock);
10185 mutex_unlock(&trans->transaction->cache_write_mutex);
10186
10187 if (!IS_ERR(inode)) {
10188 ret = btrfs_orphan_add(trans, inode);
10189 if (ret) {
10190 btrfs_add_delayed_iput(inode);
10191 goto out;
10192 }
10193 clear_nlink(inode);
10194
10195 spin_lock(&block_group->lock);
10196 if (block_group->iref) {
10197 block_group->iref = 0;
10198 block_group->inode = NULL;
10199 spin_unlock(&block_group->lock);
10200 iput(inode);
10201 } else {
10202 spin_unlock(&block_group->lock);
10203 }
10204
10205 btrfs_add_delayed_iput(inode);
10206 }
10207
10208 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
10209 key.offset = block_group->key.objectid;
10210 key.type = 0;
10211
10212 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
10213 if (ret < 0)
10214 goto out;
10215 if (ret > 0)
10216 btrfs_release_path(path);
10217 if (ret == 0) {
10218 ret = btrfs_del_item(trans, tree_root, path);
10219 if (ret)
10220 goto out;
10221 btrfs_release_path(path);
10222 }
10223
10224 spin_lock(&root->fs_info->block_group_cache_lock);
10225 rb_erase(&block_group->cache_node,
10226 &root->fs_info->block_group_cache_tree);
10227 RB_CLEAR_NODE(&block_group->cache_node);
10228
10229 if (root->fs_info->first_logical_byte == block_group->key.objectid)
10230 root->fs_info->first_logical_byte = (u64)-1;
10231 spin_unlock(&root->fs_info->block_group_cache_lock);
10232
10233 down_write(&block_group->space_info->groups_sem);
10234
10235
10236
10237
10238 list_del_init(&block_group->list);
10239 if (list_empty(&block_group->space_info->block_groups[index])) {
10240 kobj = block_group->space_info->block_group_kobjs[index];
10241 block_group->space_info->block_group_kobjs[index] = NULL;
10242 clear_avail_alloc_bits(root->fs_info, block_group->flags);
10243 }
10244 up_write(&block_group->space_info->groups_sem);
10245 if (kobj) {
10246 kobject_del(kobj);
10247 kobject_put(kobj);
10248 }
10249
10250 if (block_group->has_caching_ctl)
10251 caching_ctl = get_caching_control(block_group);
10252 if (block_group->cached == BTRFS_CACHE_STARTED)
10253 wait_block_group_cache_done(block_group);
10254 if (block_group->has_caching_ctl) {
10255 down_write(&root->fs_info->commit_root_sem);
10256 if (!caching_ctl) {
10257 struct btrfs_caching_control *ctl;
10258
10259 list_for_each_entry(ctl,
10260 &root->fs_info->caching_block_groups, list)
10261 if (ctl->block_group == block_group) {
10262 caching_ctl = ctl;
10263 atomic_inc(&caching_ctl->count);
10264 break;
10265 }
10266 }
10267 if (caching_ctl)
10268 list_del_init(&caching_ctl->list);
10269 up_write(&root->fs_info->commit_root_sem);
10270 if (caching_ctl) {
10271
10272 put_caching_control(caching_ctl);
10273 put_caching_control(caching_ctl);
10274 }
10275 }
10276
10277 spin_lock(&trans->transaction->dirty_bgs_lock);
10278 if (!list_empty(&block_group->dirty_list)) {
10279 WARN_ON(1);
10280 }
10281 if (!list_empty(&block_group->io_list)) {
10282 WARN_ON(1);
10283 }
10284 spin_unlock(&trans->transaction->dirty_bgs_lock);
10285 btrfs_remove_free_space_cache(block_group);
10286
10287 spin_lock(&block_group->space_info->lock);
10288 list_del_init(&block_group->ro_list);
10289
10290 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
10291 WARN_ON(block_group->space_info->total_bytes
10292 < block_group->key.offset);
10293 WARN_ON(block_group->space_info->bytes_readonly
10294 < block_group->key.offset);
10295 WARN_ON(block_group->space_info->disk_total
10296 < block_group->key.offset * factor);
10297 }
10298 block_group->space_info->total_bytes -= block_group->key.offset;
10299 block_group->space_info->bytes_readonly -= block_group->key.offset;
10300 block_group->space_info->disk_total -= block_group->key.offset * factor;
10301
10302 spin_unlock(&block_group->space_info->lock);
10303
10304 memcpy(&key, &block_group->key, sizeof(key));
10305
10306 lock_chunks(root);
10307 if (!list_empty(&em->list)) {
10308
10309 free_extent_map(em);
10310 }
10311 spin_lock(&block_group->lock);
10312 block_group->removed = 1;
10313
10314
10315
10316
10317
10318
10319
10320
10321
10322
10323
10324
10325
10326
10327
10328
10329
10330
10331
10332
10333
10334
10335
10336 remove_em = (atomic_read(&block_group->trimming) == 0);
10337
10338
10339
10340
10341
10342 if (!remove_em) {
10343
10344
10345
10346
10347
10348
10349
10350
10351
10352
10353
10354 list_move_tail(&em->list, &root->fs_info->pinned_chunks);
10355 }
10356 spin_unlock(&block_group->lock);
10357
10358 if (remove_em) {
10359 struct extent_map_tree *em_tree;
10360
10361 em_tree = &root->fs_info->mapping_tree.map_tree;
10362 write_lock(&em_tree->lock);
10363
10364
10365
10366
10367
10368 remove_extent_mapping(em_tree, em);
10369 write_unlock(&em_tree->lock);
10370
10371 free_extent_map(em);
10372 }
10373
10374 unlock_chunks(root);
10375
10376 ret = remove_block_group_free_space(trans, root->fs_info, block_group);
10377 if (ret)
10378 goto out;
10379
10380 btrfs_put_block_group(block_group);
10381 btrfs_put_block_group(block_group);
10382
10383 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10384 if (ret > 0)
10385 ret = -EIO;
10386 if (ret < 0)
10387 goto out;
10388
10389 ret = btrfs_del_item(trans, root, path);
10390out:
10391 btrfs_free_path(path);
10392 return ret;
10393}
10394
10395struct btrfs_trans_handle *
10396btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
10397 const u64 chunk_offset)
10398{
10399 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
10400 struct extent_map *em;
10401 struct map_lookup *map;
10402 unsigned int num_items;
10403
10404 read_lock(&em_tree->lock);
10405 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
10406 read_unlock(&em_tree->lock);
10407 ASSERT(em && em->start == chunk_offset);
10408
10409
10410
10411
10412
10413
10414
10415
10416
10417
10418
10419
10420
10421
10422
10423
10424
10425
10426
10427
10428 map = em->map_lookup;
10429 num_items = 3 + map->num_stripes;
10430 free_extent_map(em);
10431
10432 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
10433 num_items, 1);
10434}
10435
10436
10437
10438
10439
10440void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10441{
10442 struct btrfs_block_group_cache *block_group;
10443 struct btrfs_space_info *space_info;
10444 struct btrfs_root *root = fs_info->extent_root;
10445 struct btrfs_trans_handle *trans;
10446 int ret = 0;
10447
10448 if (!fs_info->open)
10449 return;
10450
10451 spin_lock(&fs_info->unused_bgs_lock);
10452 while (!list_empty(&fs_info->unused_bgs)) {
10453 u64 start, end;
10454 int trimming;
10455
10456 block_group = list_first_entry(&fs_info->unused_bgs,
10457 struct btrfs_block_group_cache,
10458 bg_list);
10459 list_del_init(&block_group->bg_list);
10460
10461 space_info = block_group->space_info;
10462
10463 if (ret || btrfs_mixed_space_info(space_info)) {
10464 btrfs_put_block_group(block_group);
10465 continue;
10466 }
10467 spin_unlock(&fs_info->unused_bgs_lock);
10468
10469 mutex_lock(&fs_info->delete_unused_bgs_mutex);
10470
10471
10472 down_write(&space_info->groups_sem);
10473 spin_lock(&block_group->lock);
10474 if (block_group->reserved ||
10475 btrfs_block_group_used(&block_group->item) ||
10476 block_group->ro ||
10477 list_is_singular(&block_group->list)) {
10478
10479
10480
10481
10482
10483
10484 spin_unlock(&block_group->lock);
10485 up_write(&space_info->groups_sem);
10486 goto next;
10487 }
10488 spin_unlock(&block_group->lock);
10489
10490
10491 ret = inc_block_group_ro(block_group, 0);
10492 up_write(&space_info->groups_sem);
10493 if (ret < 0) {
10494 ret = 0;
10495 goto next;
10496 }
10497
10498
10499
10500
10501
10502 trans = btrfs_start_trans_remove_block_group(fs_info,
10503 block_group->key.objectid);
10504 if (IS_ERR(trans)) {
10505 btrfs_dec_block_group_ro(root, block_group);
10506 ret = PTR_ERR(trans);
10507 goto next;
10508 }
10509
10510
10511
10512
10513
10514 start = block_group->key.objectid;
10515 end = start + block_group->key.offset - 1;
10516
10517
10518
10519
10520
10521
10522
10523
10524
10525
10526
10527 mutex_lock(&fs_info->unused_bg_unpin_mutex);
10528 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
10529 EXTENT_DIRTY, GFP_NOFS);
10530 if (ret) {
10531 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10532 btrfs_dec_block_group_ro(root, block_group);
10533 goto end_trans;
10534 }
10535 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
10536 EXTENT_DIRTY, GFP_NOFS);
10537 if (ret) {
10538 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10539 btrfs_dec_block_group_ro(root, block_group);
10540 goto end_trans;
10541 }
10542 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10543
10544
10545 spin_lock(&space_info->lock);
10546 spin_lock(&block_group->lock);
10547
10548 space_info->bytes_pinned -= block_group->pinned;
10549 space_info->bytes_readonly += block_group->pinned;
10550 percpu_counter_add(&space_info->total_bytes_pinned,
10551 -block_group->pinned);
10552 block_group->pinned = 0;
10553
10554 spin_unlock(&block_group->lock);
10555 spin_unlock(&space_info->lock);
10556
10557
10558 trimming = btrfs_test_opt(root, DISCARD);
10559
10560
10561 if (trimming)
10562 btrfs_get_block_group_trimming(block_group);
10563
10564
10565
10566
10567
10568 ret = btrfs_remove_chunk(trans, root,
10569 block_group->key.objectid);
10570
10571 if (ret) {
10572 if (trimming)
10573 btrfs_put_block_group_trimming(block_group);
10574 goto end_trans;
10575 }
10576
10577
10578
10579
10580
10581
10582 if (trimming) {
10583 spin_lock(&fs_info->unused_bgs_lock);
10584
10585
10586
10587
10588
10589 list_move(&block_group->bg_list,
10590 &trans->transaction->deleted_bgs);
10591 spin_unlock(&fs_info->unused_bgs_lock);
10592 btrfs_get_block_group(block_group);
10593 }
10594end_trans:
10595 btrfs_end_transaction(trans, root);
10596next:
10597 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
10598 btrfs_put_block_group(block_group);
10599 spin_lock(&fs_info->unused_bgs_lock);
10600 }
10601 spin_unlock(&fs_info->unused_bgs_lock);
10602}
10603
10604int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
10605{
10606 struct btrfs_space_info *space_info;
10607 struct btrfs_super_block *disk_super;
10608 u64 features;
10609 u64 flags;
10610 int mixed = 0;
10611 int ret;
10612
10613 disk_super = fs_info->super_copy;
10614 if (!btrfs_super_root(disk_super))
10615 return -EINVAL;
10616
10617 features = btrfs_super_incompat_flags(disk_super);
10618 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
10619 mixed = 1;
10620
10621 flags = BTRFS_BLOCK_GROUP_SYSTEM;
10622 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10623 if (ret)
10624 goto out;
10625
10626 if (mixed) {
10627 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
10628 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10629 } else {
10630 flags = BTRFS_BLOCK_GROUP_METADATA;
10631 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10632 if (ret)
10633 goto out;
10634
10635 flags = BTRFS_BLOCK_GROUP_DATA;
10636 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10637 }
10638out:
10639 return ret;
10640}
10641
10642int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
10643{
10644 return unpin_extent_range(root, start, end, false);
10645}
10646
10647
10648
10649
10650
10651
10652
10653
10654
10655
10656
10657
10658
10659
10660
10661
10662
10663
10664
10665static int btrfs_trim_free_extents(struct btrfs_device *device,
10666 u64 minlen, u64 *trimmed)
10667{
10668 u64 start = 0, len = 0;
10669 int ret;
10670
10671 *trimmed = 0;
10672
10673
10674 if (!device->writeable)
10675 return 0;
10676
10677
10678 if (device->total_bytes <= device->bytes_used)
10679 return 0;
10680
10681 ret = 0;
10682
10683 while (1) {
10684 struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
10685 struct btrfs_transaction *trans;
10686 u64 bytes;
10687
10688 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
10689 if (ret)
10690 return ret;
10691
10692 down_read(&fs_info->commit_root_sem);
10693
10694 spin_lock(&fs_info->trans_lock);
10695 trans = fs_info->running_transaction;
10696 if (trans)
10697 atomic_inc(&trans->use_count);
10698 spin_unlock(&fs_info->trans_lock);
10699
10700 ret = find_free_dev_extent_start(trans, device, minlen, start,
10701 &start, &len);
10702 if (trans)
10703 btrfs_put_transaction(trans);
10704
10705 if (ret) {
10706 up_read(&fs_info->commit_root_sem);
10707 mutex_unlock(&fs_info->chunk_mutex);
10708 if (ret == -ENOSPC)
10709 ret = 0;
10710 break;
10711 }
10712
10713 ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
10714 up_read(&fs_info->commit_root_sem);
10715 mutex_unlock(&fs_info->chunk_mutex);
10716
10717 if (ret)
10718 break;
10719
10720 start += len;
10721 *trimmed += bytes;
10722
10723 if (fatal_signal_pending(current)) {
10724 ret = -ERESTARTSYS;
10725 break;
10726 }
10727
10728 cond_resched();
10729 }
10730
10731 return ret;
10732}
10733
10734int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
10735{
10736 struct btrfs_fs_info *fs_info = root->fs_info;
10737 struct btrfs_block_group_cache *cache = NULL;
10738 struct btrfs_device *device;
10739 struct list_head *devices;
10740 u64 group_trimmed;
10741 u64 start;
10742 u64 end;
10743 u64 trimmed = 0;
10744 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10745 int ret = 0;
10746
10747
10748
10749
10750 if (range->len == total_bytes)
10751 cache = btrfs_lookup_first_block_group(fs_info, range->start);
10752 else
10753 cache = btrfs_lookup_block_group(fs_info, range->start);
10754
10755 while (cache) {
10756 if (cache->key.objectid >= (range->start + range->len)) {
10757 btrfs_put_block_group(cache);
10758 break;
10759 }
10760
10761 start = max(range->start, cache->key.objectid);
10762 end = min(range->start + range->len,
10763 cache->key.objectid + cache->key.offset);
10764
10765 if (end - start >= range->minlen) {
10766 if (!block_group_cache_done(cache)) {
10767 ret = cache_block_group(cache, 0);
10768 if (ret) {
10769 btrfs_put_block_group(cache);
10770 break;
10771 }
10772 ret = wait_block_group_cache_done(cache);
10773 if (ret) {
10774 btrfs_put_block_group(cache);
10775 break;
10776 }
10777 }
10778 ret = btrfs_trim_block_group(cache,
10779 &group_trimmed,
10780 start,
10781 end,
10782 range->minlen);
10783
10784 trimmed += group_trimmed;
10785 if (ret) {
10786 btrfs_put_block_group(cache);
10787 break;
10788 }
10789 }
10790
10791 cache = next_block_group(fs_info->tree_root, cache);
10792 }
10793
10794 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
10795 devices = &root->fs_info->fs_devices->alloc_list;
10796 list_for_each_entry(device, devices, dev_alloc_list) {
10797 ret = btrfs_trim_free_extents(device, range->minlen,
10798 &group_trimmed);
10799 if (ret)
10800 break;
10801
10802 trimmed += group_trimmed;
10803 }
10804 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
10805
10806 range->len = trimmed;
10807 return ret;
10808}
10809
10810
10811
10812
10813
10814
10815
10816
10817
10818void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
10819{
10820 percpu_counter_dec(&root->subv_writers->counter);
10821
10822
10823
10824 smp_mb();
10825 if (waitqueue_active(&root->subv_writers->wait))
10826 wake_up(&root->subv_writers->wait);
10827}
10828
10829int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
10830{
10831 if (atomic_read(&root->will_be_snapshoted))
10832 return 0;
10833
10834 percpu_counter_inc(&root->subv_writers->counter);
10835
10836
10837
10838 smp_mb();
10839 if (atomic_read(&root->will_be_snapshoted)) {
10840 btrfs_end_write_no_snapshoting(root);
10841 return 0;
10842 }
10843 return 1;
10844}
10845
10846static int wait_snapshoting_atomic_t(atomic_t *a)
10847{
10848 schedule();
10849 return 0;
10850}
10851
10852void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
10853{
10854 while (true) {
10855 int ret;
10856
10857 ret = btrfs_start_write_no_snapshoting(root);
10858 if (ret)
10859 break;
10860 wait_on_atomic_t(&root->will_be_snapshoted,
10861 wait_snapshoting_atomic_t,
10862 TASK_UNINTERRUPTIBLE);
10863 }
10864}
10865