1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/sched.h>
19#include <linux/pagemap.h>
20#include <linux/writeback.h>
21#include <linux/blkdev.h>
22#include <linux/sort.h>
23#include <linux/rcupdate.h>
24#include <linux/kthread.h>
25#include <linux/slab.h>
26#include <linux/ratelimit.h>
27#include <linux/percpu_counter.h>
28#include "hash.h"
29#include "tree-log.h"
30#include "disk-io.h"
31#include "print-tree.h"
32#include "volumes.h"
33#include "raid56.h"
34#include "locking.h"
35#include "free-space-cache.h"
36#include "free-space-tree.h"
37#include "math.h"
38#include "sysfs.h"
39#include "qgroup.h"
40
41#undef SCRAMBLE_DELAYED_REFS
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57enum {
58 CHUNK_ALLOC_NO_FORCE = 0,
59 CHUNK_ALLOC_LIMITED = 1,
60 CHUNK_ALLOC_FORCE = 2,
61};
62
63
64
65
66
67
68
69
70
71
72enum {
73 RESERVE_FREE = 0,
74 RESERVE_ALLOC = 1,
75 RESERVE_ALLOC_NO_ACCOUNT = 2,
76};
77
78static int update_block_group(struct btrfs_trans_handle *trans,
79 struct btrfs_root *root, u64 bytenr,
80 u64 num_bytes, int alloc);
81static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
82 struct btrfs_root *root,
83 struct btrfs_delayed_ref_node *node, u64 parent,
84 u64 root_objectid, u64 owner_objectid,
85 u64 owner_offset, int refs_to_drop,
86 struct btrfs_delayed_extent_op *extra_op);
87static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
88 struct extent_buffer *leaf,
89 struct btrfs_extent_item *ei);
90static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
91 struct btrfs_root *root,
92 u64 parent, u64 root_objectid,
93 u64 flags, u64 owner, u64 offset,
94 struct btrfs_key *ins, int ref_mod);
95static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
96 struct btrfs_root *root,
97 u64 parent, u64 root_objectid,
98 u64 flags, struct btrfs_disk_key *key,
99 int level, struct btrfs_key *ins);
100static int do_chunk_alloc(struct btrfs_trans_handle *trans,
101 struct btrfs_root *extent_root, u64 flags,
102 int force);
103static int find_next_key(struct btrfs_path *path, int level,
104 struct btrfs_key *key);
105static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
106 int dump_block_groups);
107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
108 u64 num_bytes, int reserve,
109 int delalloc);
110static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
111 u64 num_bytes);
112int btrfs_pin_extent(struct btrfs_root *root,
113 u64 bytenr, u64 num_bytes, int reserved);
114
115static noinline int
116block_group_cache_done(struct btrfs_block_group_cache *cache)
117{
118 smp_mb();
119 return cache->cached == BTRFS_CACHE_FINISHED ||
120 cache->cached == BTRFS_CACHE_ERROR;
121}
122
123static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
124{
125 return (cache->flags & bits) == bits;
126}
127
128void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
129{
130 atomic_inc(&cache->count);
131}
132
133void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
134{
135 if (atomic_dec_and_test(&cache->count)) {
136 WARN_ON(cache->pinned > 0);
137 WARN_ON(cache->reserved > 0);
138 kfree(cache->free_space_ctl);
139 kfree(cache);
140 }
141}
142
143
144
145
146
147static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
148 struct btrfs_block_group_cache *block_group)
149{
150 struct rb_node **p;
151 struct rb_node *parent = NULL;
152 struct btrfs_block_group_cache *cache;
153
154 spin_lock(&info->block_group_cache_lock);
155 p = &info->block_group_cache_tree.rb_node;
156
157 while (*p) {
158 parent = *p;
159 cache = rb_entry(parent, struct btrfs_block_group_cache,
160 cache_node);
161 if (block_group->key.objectid < cache->key.objectid) {
162 p = &(*p)->rb_left;
163 } else if (block_group->key.objectid > cache->key.objectid) {
164 p = &(*p)->rb_right;
165 } else {
166 spin_unlock(&info->block_group_cache_lock);
167 return -EEXIST;
168 }
169 }
170
171 rb_link_node(&block_group->cache_node, parent, p);
172 rb_insert_color(&block_group->cache_node,
173 &info->block_group_cache_tree);
174
175 if (info->first_logical_byte > block_group->key.objectid)
176 info->first_logical_byte = block_group->key.objectid;
177
178 spin_unlock(&info->block_group_cache_lock);
179
180 return 0;
181}
182
183
184
185
186
187static struct btrfs_block_group_cache *
188block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
189 int contains)
190{
191 struct btrfs_block_group_cache *cache, *ret = NULL;
192 struct rb_node *n;
193 u64 end, start;
194
195 spin_lock(&info->block_group_cache_lock);
196 n = info->block_group_cache_tree.rb_node;
197
198 while (n) {
199 cache = rb_entry(n, struct btrfs_block_group_cache,
200 cache_node);
201 end = cache->key.objectid + cache->key.offset - 1;
202 start = cache->key.objectid;
203
204 if (bytenr < start) {
205 if (!contains && (!ret || start < ret->key.objectid))
206 ret = cache;
207 n = n->rb_left;
208 } else if (bytenr > start) {
209 if (contains && bytenr <= end) {
210 ret = cache;
211 break;
212 }
213 n = n->rb_right;
214 } else {
215 ret = cache;
216 break;
217 }
218 }
219 if (ret) {
220 btrfs_get_block_group(ret);
221 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
222 info->first_logical_byte = ret->key.objectid;
223 }
224 spin_unlock(&info->block_group_cache_lock);
225
226 return ret;
227}
228
229static int add_excluded_extent(struct btrfs_root *root,
230 u64 start, u64 num_bytes)
231{
232 u64 end = start + num_bytes - 1;
233 set_extent_bits(&root->fs_info->freed_extents[0],
234 start, end, EXTENT_UPTODATE);
235 set_extent_bits(&root->fs_info->freed_extents[1],
236 start, end, EXTENT_UPTODATE);
237 return 0;
238}
239
240static void free_excluded_extents(struct btrfs_root *root,
241 struct btrfs_block_group_cache *cache)
242{
243 u64 start, end;
244
245 start = cache->key.objectid;
246 end = start + cache->key.offset - 1;
247
248 clear_extent_bits(&root->fs_info->freed_extents[0],
249 start, end, EXTENT_UPTODATE);
250 clear_extent_bits(&root->fs_info->freed_extents[1],
251 start, end, EXTENT_UPTODATE);
252}
253
254static int exclude_super_stripes(struct btrfs_root *root,
255 struct btrfs_block_group_cache *cache)
256{
257 u64 bytenr;
258 u64 *logical;
259 int stripe_len;
260 int i, nr, ret;
261
262 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
263 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
264 cache->bytes_super += stripe_len;
265 ret = add_excluded_extent(root, cache->key.objectid,
266 stripe_len);
267 if (ret)
268 return ret;
269 }
270
271 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
272 bytenr = btrfs_sb_offset(i);
273 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
274 cache->key.objectid, bytenr,
275 0, &logical, &nr, &stripe_len);
276 if (ret)
277 return ret;
278
279 while (nr--) {
280 u64 start, len;
281
282 if (logical[nr] > cache->key.objectid +
283 cache->key.offset)
284 continue;
285
286 if (logical[nr] + stripe_len <= cache->key.objectid)
287 continue;
288
289 start = logical[nr];
290 if (start < cache->key.objectid) {
291 start = cache->key.objectid;
292 len = (logical[nr] + stripe_len) - start;
293 } else {
294 len = min_t(u64, stripe_len,
295 cache->key.objectid +
296 cache->key.offset - start);
297 }
298
299 cache->bytes_super += len;
300 ret = add_excluded_extent(root, start, len);
301 if (ret) {
302 kfree(logical);
303 return ret;
304 }
305 }
306
307 kfree(logical);
308 }
309 return 0;
310}
311
312static struct btrfs_caching_control *
313get_caching_control(struct btrfs_block_group_cache *cache)
314{
315 struct btrfs_caching_control *ctl;
316
317 spin_lock(&cache->lock);
318 if (!cache->caching_ctl) {
319 spin_unlock(&cache->lock);
320 return NULL;
321 }
322
323 ctl = cache->caching_ctl;
324 atomic_inc(&ctl->count);
325 spin_unlock(&cache->lock);
326 return ctl;
327}
328
329static void put_caching_control(struct btrfs_caching_control *ctl)
330{
331 if (atomic_dec_and_test(&ctl->count))
332 kfree(ctl);
333}
334
335#ifdef CONFIG_BTRFS_DEBUG
336static void fragment_free_space(struct btrfs_root *root,
337 struct btrfs_block_group_cache *block_group)
338{
339 u64 start = block_group->key.objectid;
340 u64 len = block_group->key.offset;
341 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
342 root->nodesize : root->sectorsize;
343 u64 step = chunk << 1;
344
345 while (len > chunk) {
346 btrfs_remove_free_space(block_group, start, chunk);
347 start += step;
348 if (len < step)
349 len = 0;
350 else
351 len -= step;
352 }
353}
354#endif
355
356
357
358
359
360
361u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
362 struct btrfs_fs_info *info, u64 start, u64 end)
363{
364 u64 extent_start, extent_end, size, total_added = 0;
365 int ret;
366
367 while (start < end) {
368 ret = find_first_extent_bit(info->pinned_extents, start,
369 &extent_start, &extent_end,
370 EXTENT_DIRTY | EXTENT_UPTODATE,
371 NULL);
372 if (ret)
373 break;
374
375 if (extent_start <= start) {
376 start = extent_end + 1;
377 } else if (extent_start > start && extent_start < end) {
378 size = extent_start - start;
379 total_added += size;
380 ret = btrfs_add_free_space(block_group, start,
381 size);
382 BUG_ON(ret);
383 start = extent_end + 1;
384 } else {
385 break;
386 }
387 }
388
389 if (start < end) {
390 size = end - start;
391 total_added += size;
392 ret = btrfs_add_free_space(block_group, start, size);
393 BUG_ON(ret);
394 }
395
396 return total_added;
397}
398
399static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
400{
401 struct btrfs_block_group_cache *block_group;
402 struct btrfs_fs_info *fs_info;
403 struct btrfs_root *extent_root;
404 struct btrfs_path *path;
405 struct extent_buffer *leaf;
406 struct btrfs_key key;
407 u64 total_found = 0;
408 u64 last = 0;
409 u32 nritems;
410 int ret;
411 bool wakeup = true;
412
413 block_group = caching_ctl->block_group;
414 fs_info = block_group->fs_info;
415 extent_root = fs_info->extent_root;
416
417 path = btrfs_alloc_path();
418 if (!path)
419 return -ENOMEM;
420
421 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
422
423#ifdef CONFIG_BTRFS_DEBUG
424
425
426
427
428
429 if (btrfs_should_fragment_free_space(extent_root, block_group))
430 wakeup = false;
431#endif
432
433
434
435
436
437
438 path->skip_locking = 1;
439 path->search_commit_root = 1;
440 path->reada = READA_FORWARD;
441
442 key.objectid = last;
443 key.offset = 0;
444 key.type = BTRFS_EXTENT_ITEM_KEY;
445
446next:
447 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
448 if (ret < 0)
449 goto out;
450
451 leaf = path->nodes[0];
452 nritems = btrfs_header_nritems(leaf);
453
454 while (1) {
455 if (btrfs_fs_closing(fs_info) > 1) {
456 last = (u64)-1;
457 break;
458 }
459
460 if (path->slots[0] < nritems) {
461 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
462 } else {
463 ret = find_next_key(path, 0, &key);
464 if (ret)
465 break;
466
467 if (need_resched() ||
468 rwsem_is_contended(&fs_info->commit_root_sem)) {
469 if (wakeup)
470 caching_ctl->progress = last;
471 btrfs_release_path(path);
472 up_read(&fs_info->commit_root_sem);
473 mutex_unlock(&caching_ctl->mutex);
474 cond_resched();
475 mutex_lock(&caching_ctl->mutex);
476 down_read(&fs_info->commit_root_sem);
477 goto next;
478 }
479
480 ret = btrfs_next_leaf(extent_root, path);
481 if (ret < 0)
482 goto out;
483 if (ret)
484 break;
485 leaf = path->nodes[0];
486 nritems = btrfs_header_nritems(leaf);
487 continue;
488 }
489
490 if (key.objectid < last) {
491 key.objectid = last;
492 key.offset = 0;
493 key.type = BTRFS_EXTENT_ITEM_KEY;
494
495 if (wakeup)
496 caching_ctl->progress = last;
497 btrfs_release_path(path);
498 goto next;
499 }
500
501 if (key.objectid < block_group->key.objectid) {
502 path->slots[0]++;
503 continue;
504 }
505
506 if (key.objectid >= block_group->key.objectid +
507 block_group->key.offset)
508 break;
509
510 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
511 key.type == BTRFS_METADATA_ITEM_KEY) {
512 total_found += add_new_free_space(block_group,
513 fs_info, last,
514 key.objectid);
515 if (key.type == BTRFS_METADATA_ITEM_KEY)
516 last = key.objectid +
517 fs_info->tree_root->nodesize;
518 else
519 last = key.objectid + key.offset;
520
521 if (total_found > CACHING_CTL_WAKE_UP) {
522 total_found = 0;
523 if (wakeup)
524 wake_up(&caching_ctl->wait);
525 }
526 }
527 path->slots[0]++;
528 }
529 ret = 0;
530
531 total_found += add_new_free_space(block_group, fs_info, last,
532 block_group->key.objectid +
533 block_group->key.offset);
534 caching_ctl->progress = (u64)-1;
535
536out:
537 btrfs_free_path(path);
538 return ret;
539}
540
541static noinline void caching_thread(struct btrfs_work *work)
542{
543 struct btrfs_block_group_cache *block_group;
544 struct btrfs_fs_info *fs_info;
545 struct btrfs_caching_control *caching_ctl;
546 struct btrfs_root *extent_root;
547 int ret;
548
549 caching_ctl = container_of(work, struct btrfs_caching_control, work);
550 block_group = caching_ctl->block_group;
551 fs_info = block_group->fs_info;
552 extent_root = fs_info->extent_root;
553
554 mutex_lock(&caching_ctl->mutex);
555 down_read(&fs_info->commit_root_sem);
556
557 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
558 ret = load_free_space_tree(caching_ctl);
559 else
560 ret = load_extent_tree_free(caching_ctl);
561
562 spin_lock(&block_group->lock);
563 block_group->caching_ctl = NULL;
564 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
565 spin_unlock(&block_group->lock);
566
567#ifdef CONFIG_BTRFS_DEBUG
568 if (btrfs_should_fragment_free_space(extent_root, block_group)) {
569 u64 bytes_used;
570
571 spin_lock(&block_group->space_info->lock);
572 spin_lock(&block_group->lock);
573 bytes_used = block_group->key.offset -
574 btrfs_block_group_used(&block_group->item);
575 block_group->space_info->bytes_used += bytes_used >> 1;
576 spin_unlock(&block_group->lock);
577 spin_unlock(&block_group->space_info->lock);
578 fragment_free_space(extent_root, block_group);
579 }
580#endif
581
582 caching_ctl->progress = (u64)-1;
583
584 up_read(&fs_info->commit_root_sem);
585 free_excluded_extents(fs_info->extent_root, block_group);
586 mutex_unlock(&caching_ctl->mutex);
587
588 wake_up(&caching_ctl->wait);
589
590 put_caching_control(caching_ctl);
591 btrfs_put_block_group(block_group);
592}
593
594static int cache_block_group(struct btrfs_block_group_cache *cache,
595 int load_cache_only)
596{
597 DEFINE_WAIT(wait);
598 struct btrfs_fs_info *fs_info = cache->fs_info;
599 struct btrfs_caching_control *caching_ctl;
600 int ret = 0;
601
602 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
603 if (!caching_ctl)
604 return -ENOMEM;
605
606 INIT_LIST_HEAD(&caching_ctl->list);
607 mutex_init(&caching_ctl->mutex);
608 init_waitqueue_head(&caching_ctl->wait);
609 caching_ctl->block_group = cache;
610 caching_ctl->progress = cache->key.objectid;
611 atomic_set(&caching_ctl->count, 1);
612 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
613 caching_thread, NULL, NULL);
614
615 spin_lock(&cache->lock);
616
617
618
619
620
621
622
623
624
625
626
627
628 while (cache->cached == BTRFS_CACHE_FAST) {
629 struct btrfs_caching_control *ctl;
630
631 ctl = cache->caching_ctl;
632 atomic_inc(&ctl->count);
633 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
634 spin_unlock(&cache->lock);
635
636 schedule();
637
638 finish_wait(&ctl->wait, &wait);
639 put_caching_control(ctl);
640 spin_lock(&cache->lock);
641 }
642
643 if (cache->cached != BTRFS_CACHE_NO) {
644 spin_unlock(&cache->lock);
645 kfree(caching_ctl);
646 return 0;
647 }
648 WARN_ON(cache->caching_ctl);
649 cache->caching_ctl = caching_ctl;
650 cache->cached = BTRFS_CACHE_FAST;
651 spin_unlock(&cache->lock);
652
653 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
654 mutex_lock(&caching_ctl->mutex);
655 ret = load_free_space_cache(fs_info, cache);
656
657 spin_lock(&cache->lock);
658 if (ret == 1) {
659 cache->caching_ctl = NULL;
660 cache->cached = BTRFS_CACHE_FINISHED;
661 cache->last_byte_to_unpin = (u64)-1;
662 caching_ctl->progress = (u64)-1;
663 } else {
664 if (load_cache_only) {
665 cache->caching_ctl = NULL;
666 cache->cached = BTRFS_CACHE_NO;
667 } else {
668 cache->cached = BTRFS_CACHE_STARTED;
669 cache->has_caching_ctl = 1;
670 }
671 }
672 spin_unlock(&cache->lock);
673#ifdef CONFIG_BTRFS_DEBUG
674 if (ret == 1 &&
675 btrfs_should_fragment_free_space(fs_info->extent_root,
676 cache)) {
677 u64 bytes_used;
678
679 spin_lock(&cache->space_info->lock);
680 spin_lock(&cache->lock);
681 bytes_used = cache->key.offset -
682 btrfs_block_group_used(&cache->item);
683 cache->space_info->bytes_used += bytes_used >> 1;
684 spin_unlock(&cache->lock);
685 spin_unlock(&cache->space_info->lock);
686 fragment_free_space(fs_info->extent_root, cache);
687 }
688#endif
689 mutex_unlock(&caching_ctl->mutex);
690
691 wake_up(&caching_ctl->wait);
692 if (ret == 1) {
693 put_caching_control(caching_ctl);
694 free_excluded_extents(fs_info->extent_root, cache);
695 return 0;
696 }
697 } else {
698
699
700
701
702 spin_lock(&cache->lock);
703 if (load_cache_only) {
704 cache->caching_ctl = NULL;
705 cache->cached = BTRFS_CACHE_NO;
706 } else {
707 cache->cached = BTRFS_CACHE_STARTED;
708 cache->has_caching_ctl = 1;
709 }
710 spin_unlock(&cache->lock);
711 wake_up(&caching_ctl->wait);
712 }
713
714 if (load_cache_only) {
715 put_caching_control(caching_ctl);
716 return 0;
717 }
718
719 down_write(&fs_info->commit_root_sem);
720 atomic_inc(&caching_ctl->count);
721 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
722 up_write(&fs_info->commit_root_sem);
723
724 btrfs_get_block_group(cache);
725
726 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
727
728 return ret;
729}
730
731
732
733
734static struct btrfs_block_group_cache *
735btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
736{
737 struct btrfs_block_group_cache *cache;
738
739 cache = block_group_cache_tree_search(info, bytenr, 0);
740
741 return cache;
742}
743
744
745
746
747struct btrfs_block_group_cache *btrfs_lookup_block_group(
748 struct btrfs_fs_info *info,
749 u64 bytenr)
750{
751 struct btrfs_block_group_cache *cache;
752
753 cache = block_group_cache_tree_search(info, bytenr, 1);
754
755 return cache;
756}
757
758static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
759 u64 flags)
760{
761 struct list_head *head = &info->space_info;
762 struct btrfs_space_info *found;
763
764 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
765
766 rcu_read_lock();
767 list_for_each_entry_rcu(found, head, list) {
768 if (found->flags & flags) {
769 rcu_read_unlock();
770 return found;
771 }
772 }
773 rcu_read_unlock();
774 return NULL;
775}
776
777
778
779
780
781void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
782{
783 struct list_head *head = &info->space_info;
784 struct btrfs_space_info *found;
785
786 rcu_read_lock();
787 list_for_each_entry_rcu(found, head, list)
788 found->full = 0;
789 rcu_read_unlock();
790}
791
792
793int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len)
794{
795 int ret;
796 struct btrfs_key key;
797 struct btrfs_path *path;
798
799 path = btrfs_alloc_path();
800 if (!path)
801 return -ENOMEM;
802
803 key.objectid = start;
804 key.offset = len;
805 key.type = BTRFS_EXTENT_ITEM_KEY;
806 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
807 0, 0);
808 btrfs_free_path(path);
809 return ret;
810}
811
812
813
814
815
816
817
818
819
820
821int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
822 struct btrfs_root *root, u64 bytenr,
823 u64 offset, int metadata, u64 *refs, u64 *flags)
824{
825 struct btrfs_delayed_ref_head *head;
826 struct btrfs_delayed_ref_root *delayed_refs;
827 struct btrfs_path *path;
828 struct btrfs_extent_item *ei;
829 struct extent_buffer *leaf;
830 struct btrfs_key key;
831 u32 item_size;
832 u64 num_refs;
833 u64 extent_flags;
834 int ret;
835
836
837
838
839
840 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
841 offset = root->nodesize;
842 metadata = 0;
843 }
844
845 path = btrfs_alloc_path();
846 if (!path)
847 return -ENOMEM;
848
849 if (!trans) {
850 path->skip_locking = 1;
851 path->search_commit_root = 1;
852 }
853
854search_again:
855 key.objectid = bytenr;
856 key.offset = offset;
857 if (metadata)
858 key.type = BTRFS_METADATA_ITEM_KEY;
859 else
860 key.type = BTRFS_EXTENT_ITEM_KEY;
861
862 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
863 &key, path, 0, 0);
864 if (ret < 0)
865 goto out_free;
866
867 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
868 if (path->slots[0]) {
869 path->slots[0]--;
870 btrfs_item_key_to_cpu(path->nodes[0], &key,
871 path->slots[0]);
872 if (key.objectid == bytenr &&
873 key.type == BTRFS_EXTENT_ITEM_KEY &&
874 key.offset == root->nodesize)
875 ret = 0;
876 }
877 }
878
879 if (ret == 0) {
880 leaf = path->nodes[0];
881 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
882 if (item_size >= sizeof(*ei)) {
883 ei = btrfs_item_ptr(leaf, path->slots[0],
884 struct btrfs_extent_item);
885 num_refs = btrfs_extent_refs(leaf, ei);
886 extent_flags = btrfs_extent_flags(leaf, ei);
887 } else {
888#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
889 struct btrfs_extent_item_v0 *ei0;
890 BUG_ON(item_size != sizeof(*ei0));
891 ei0 = btrfs_item_ptr(leaf, path->slots[0],
892 struct btrfs_extent_item_v0);
893 num_refs = btrfs_extent_refs_v0(leaf, ei0);
894
895 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
896#else
897 BUG();
898#endif
899 }
900 BUG_ON(num_refs == 0);
901 } else {
902 num_refs = 0;
903 extent_flags = 0;
904 ret = 0;
905 }
906
907 if (!trans)
908 goto out;
909
910 delayed_refs = &trans->transaction->delayed_refs;
911 spin_lock(&delayed_refs->lock);
912 head = btrfs_find_delayed_ref_head(trans, bytenr);
913 if (head) {
914 if (!mutex_trylock(&head->mutex)) {
915 atomic_inc(&head->node.refs);
916 spin_unlock(&delayed_refs->lock);
917
918 btrfs_release_path(path);
919
920
921
922
923
924 mutex_lock(&head->mutex);
925 mutex_unlock(&head->mutex);
926 btrfs_put_delayed_ref(&head->node);
927 goto search_again;
928 }
929 spin_lock(&head->lock);
930 if (head->extent_op && head->extent_op->update_flags)
931 extent_flags |= head->extent_op->flags_to_set;
932 else
933 BUG_ON(num_refs == 0);
934
935 num_refs += head->node.ref_mod;
936 spin_unlock(&head->lock);
937 mutex_unlock(&head->mutex);
938 }
939 spin_unlock(&delayed_refs->lock);
940out:
941 WARN_ON(num_refs == 0);
942 if (refs)
943 *refs = num_refs;
944 if (flags)
945 *flags = extent_flags;
946out_free:
947 btrfs_free_path(path);
948 return ret;
949}
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1058static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1059 struct btrfs_root *root,
1060 struct btrfs_path *path,
1061 u64 owner, u32 extra_size)
1062{
1063 struct btrfs_extent_item *item;
1064 struct btrfs_extent_item_v0 *ei0;
1065 struct btrfs_extent_ref_v0 *ref0;
1066 struct btrfs_tree_block_info *bi;
1067 struct extent_buffer *leaf;
1068 struct btrfs_key key;
1069 struct btrfs_key found_key;
1070 u32 new_size = sizeof(*item);
1071 u64 refs;
1072 int ret;
1073
1074 leaf = path->nodes[0];
1075 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
1076
1077 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1078 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1079 struct btrfs_extent_item_v0);
1080 refs = btrfs_extent_refs_v0(leaf, ei0);
1081
1082 if (owner == (u64)-1) {
1083 while (1) {
1084 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1085 ret = btrfs_next_leaf(root, path);
1086 if (ret < 0)
1087 return ret;
1088 BUG_ON(ret > 0);
1089 leaf = path->nodes[0];
1090 }
1091 btrfs_item_key_to_cpu(leaf, &found_key,
1092 path->slots[0]);
1093 BUG_ON(key.objectid != found_key.objectid);
1094 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1095 path->slots[0]++;
1096 continue;
1097 }
1098 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1099 struct btrfs_extent_ref_v0);
1100 owner = btrfs_ref_objectid_v0(leaf, ref0);
1101 break;
1102 }
1103 }
1104 btrfs_release_path(path);
1105
1106 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1107 new_size += sizeof(*bi);
1108
1109 new_size -= sizeof(*ei0);
1110 ret = btrfs_search_slot(trans, root, &key, path,
1111 new_size + extra_size, 1);
1112 if (ret < 0)
1113 return ret;
1114 BUG_ON(ret);
1115
1116 btrfs_extend_item(root, path, new_size);
1117
1118 leaf = path->nodes[0];
1119 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1120 btrfs_set_extent_refs(leaf, item, refs);
1121
1122 btrfs_set_extent_generation(leaf, item, 0);
1123 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1124 btrfs_set_extent_flags(leaf, item,
1125 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1126 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1127 bi = (struct btrfs_tree_block_info *)(item + 1);
1128
1129 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
1130 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1131 } else {
1132 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1133 }
1134 btrfs_mark_buffer_dirty(leaf);
1135 return 0;
1136}
1137#endif
1138
1139static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1140{
1141 u32 high_crc = ~(u32)0;
1142 u32 low_crc = ~(u32)0;
1143 __le64 lenum;
1144
1145 lenum = cpu_to_le64(root_objectid);
1146 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1147 lenum = cpu_to_le64(owner);
1148 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1149 lenum = cpu_to_le64(offset);
1150 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1151
1152 return ((u64)high_crc << 31) ^ (u64)low_crc;
1153}
1154
1155static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1156 struct btrfs_extent_data_ref *ref)
1157{
1158 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1159 btrfs_extent_data_ref_objectid(leaf, ref),
1160 btrfs_extent_data_ref_offset(leaf, ref));
1161}
1162
1163static int match_extent_data_ref(struct extent_buffer *leaf,
1164 struct btrfs_extent_data_ref *ref,
1165 u64 root_objectid, u64 owner, u64 offset)
1166{
1167 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1168 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1169 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1170 return 0;
1171 return 1;
1172}
1173
1174static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1175 struct btrfs_root *root,
1176 struct btrfs_path *path,
1177 u64 bytenr, u64 parent,
1178 u64 root_objectid,
1179 u64 owner, u64 offset)
1180{
1181 struct btrfs_key key;
1182 struct btrfs_extent_data_ref *ref;
1183 struct extent_buffer *leaf;
1184 u32 nritems;
1185 int ret;
1186 int recow;
1187 int err = -ENOENT;
1188
1189 key.objectid = bytenr;
1190 if (parent) {
1191 key.type = BTRFS_SHARED_DATA_REF_KEY;
1192 key.offset = parent;
1193 } else {
1194 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1195 key.offset = hash_extent_data_ref(root_objectid,
1196 owner, offset);
1197 }
1198again:
1199 recow = 0;
1200 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1201 if (ret < 0) {
1202 err = ret;
1203 goto fail;
1204 }
1205
1206 if (parent) {
1207 if (!ret)
1208 return 0;
1209#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1210 key.type = BTRFS_EXTENT_REF_V0_KEY;
1211 btrfs_release_path(path);
1212 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1213 if (ret < 0) {
1214 err = ret;
1215 goto fail;
1216 }
1217 if (!ret)
1218 return 0;
1219#endif
1220 goto fail;
1221 }
1222
1223 leaf = path->nodes[0];
1224 nritems = btrfs_header_nritems(leaf);
1225 while (1) {
1226 if (path->slots[0] >= nritems) {
1227 ret = btrfs_next_leaf(root, path);
1228 if (ret < 0)
1229 err = ret;
1230 if (ret)
1231 goto fail;
1232
1233 leaf = path->nodes[0];
1234 nritems = btrfs_header_nritems(leaf);
1235 recow = 1;
1236 }
1237
1238 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1239 if (key.objectid != bytenr ||
1240 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1241 goto fail;
1242
1243 ref = btrfs_item_ptr(leaf, path->slots[0],
1244 struct btrfs_extent_data_ref);
1245
1246 if (match_extent_data_ref(leaf, ref, root_objectid,
1247 owner, offset)) {
1248 if (recow) {
1249 btrfs_release_path(path);
1250 goto again;
1251 }
1252 err = 0;
1253 break;
1254 }
1255 path->slots[0]++;
1256 }
1257fail:
1258 return err;
1259}
1260
1261static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1262 struct btrfs_root *root,
1263 struct btrfs_path *path,
1264 u64 bytenr, u64 parent,
1265 u64 root_objectid, u64 owner,
1266 u64 offset, int refs_to_add)
1267{
1268 struct btrfs_key key;
1269 struct extent_buffer *leaf;
1270 u32 size;
1271 u32 num_refs;
1272 int ret;
1273
1274 key.objectid = bytenr;
1275 if (parent) {
1276 key.type = BTRFS_SHARED_DATA_REF_KEY;
1277 key.offset = parent;
1278 size = sizeof(struct btrfs_shared_data_ref);
1279 } else {
1280 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1281 key.offset = hash_extent_data_ref(root_objectid,
1282 owner, offset);
1283 size = sizeof(struct btrfs_extent_data_ref);
1284 }
1285
1286 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1287 if (ret && ret != -EEXIST)
1288 goto fail;
1289
1290 leaf = path->nodes[0];
1291 if (parent) {
1292 struct btrfs_shared_data_ref *ref;
1293 ref = btrfs_item_ptr(leaf, path->slots[0],
1294 struct btrfs_shared_data_ref);
1295 if (ret == 0) {
1296 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1297 } else {
1298 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1299 num_refs += refs_to_add;
1300 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1301 }
1302 } else {
1303 struct btrfs_extent_data_ref *ref;
1304 while (ret == -EEXIST) {
1305 ref = btrfs_item_ptr(leaf, path->slots[0],
1306 struct btrfs_extent_data_ref);
1307 if (match_extent_data_ref(leaf, ref, root_objectid,
1308 owner, offset))
1309 break;
1310 btrfs_release_path(path);
1311 key.offset++;
1312 ret = btrfs_insert_empty_item(trans, root, path, &key,
1313 size);
1314 if (ret && ret != -EEXIST)
1315 goto fail;
1316
1317 leaf = path->nodes[0];
1318 }
1319 ref = btrfs_item_ptr(leaf, path->slots[0],
1320 struct btrfs_extent_data_ref);
1321 if (ret == 0) {
1322 btrfs_set_extent_data_ref_root(leaf, ref,
1323 root_objectid);
1324 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1325 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1326 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1327 } else {
1328 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1329 num_refs += refs_to_add;
1330 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1331 }
1332 }
1333 btrfs_mark_buffer_dirty(leaf);
1334 ret = 0;
1335fail:
1336 btrfs_release_path(path);
1337 return ret;
1338}
1339
1340static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1341 struct btrfs_root *root,
1342 struct btrfs_path *path,
1343 int refs_to_drop, int *last_ref)
1344{
1345 struct btrfs_key key;
1346 struct btrfs_extent_data_ref *ref1 = NULL;
1347 struct btrfs_shared_data_ref *ref2 = NULL;
1348 struct extent_buffer *leaf;
1349 u32 num_refs = 0;
1350 int ret = 0;
1351
1352 leaf = path->nodes[0];
1353 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1354
1355 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1356 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1357 struct btrfs_extent_data_ref);
1358 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1359 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1360 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1361 struct btrfs_shared_data_ref);
1362 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1363#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1364 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1365 struct btrfs_extent_ref_v0 *ref0;
1366 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1367 struct btrfs_extent_ref_v0);
1368 num_refs = btrfs_ref_count_v0(leaf, ref0);
1369#endif
1370 } else {
1371 BUG();
1372 }
1373
1374 BUG_ON(num_refs < refs_to_drop);
1375 num_refs -= refs_to_drop;
1376
1377 if (num_refs == 0) {
1378 ret = btrfs_del_item(trans, root, path);
1379 *last_ref = 1;
1380 } else {
1381 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1382 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1383 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1384 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1385#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1386 else {
1387 struct btrfs_extent_ref_v0 *ref0;
1388 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1389 struct btrfs_extent_ref_v0);
1390 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1391 }
1392#endif
1393 btrfs_mark_buffer_dirty(leaf);
1394 }
1395 return ret;
1396}
1397
1398static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1399 struct btrfs_extent_inline_ref *iref)
1400{
1401 struct btrfs_key key;
1402 struct extent_buffer *leaf;
1403 struct btrfs_extent_data_ref *ref1;
1404 struct btrfs_shared_data_ref *ref2;
1405 u32 num_refs = 0;
1406
1407 leaf = path->nodes[0];
1408 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1409 if (iref) {
1410 if (btrfs_extent_inline_ref_type(leaf, iref) ==
1411 BTRFS_EXTENT_DATA_REF_KEY) {
1412 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1413 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1414 } else {
1415 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1416 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1417 }
1418 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1419 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1420 struct btrfs_extent_data_ref);
1421 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1422 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1423 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1424 struct btrfs_shared_data_ref);
1425 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1426#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1427 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1428 struct btrfs_extent_ref_v0 *ref0;
1429 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1430 struct btrfs_extent_ref_v0);
1431 num_refs = btrfs_ref_count_v0(leaf, ref0);
1432#endif
1433 } else {
1434 WARN_ON(1);
1435 }
1436 return num_refs;
1437}
1438
1439static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1440 struct btrfs_root *root,
1441 struct btrfs_path *path,
1442 u64 bytenr, u64 parent,
1443 u64 root_objectid)
1444{
1445 struct btrfs_key key;
1446 int ret;
1447
1448 key.objectid = bytenr;
1449 if (parent) {
1450 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1451 key.offset = parent;
1452 } else {
1453 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1454 key.offset = root_objectid;
1455 }
1456
1457 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1458 if (ret > 0)
1459 ret = -ENOENT;
1460#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1461 if (ret == -ENOENT && parent) {
1462 btrfs_release_path(path);
1463 key.type = BTRFS_EXTENT_REF_V0_KEY;
1464 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1465 if (ret > 0)
1466 ret = -ENOENT;
1467 }
1468#endif
1469 return ret;
1470}
1471
1472static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1473 struct btrfs_root *root,
1474 struct btrfs_path *path,
1475 u64 bytenr, u64 parent,
1476 u64 root_objectid)
1477{
1478 struct btrfs_key key;
1479 int ret;
1480
1481 key.objectid = bytenr;
1482 if (parent) {
1483 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1484 key.offset = parent;
1485 } else {
1486 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1487 key.offset = root_objectid;
1488 }
1489
1490 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1491 btrfs_release_path(path);
1492 return ret;
1493}
1494
1495static inline int extent_ref_type(u64 parent, u64 owner)
1496{
1497 int type;
1498 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1499 if (parent > 0)
1500 type = BTRFS_SHARED_BLOCK_REF_KEY;
1501 else
1502 type = BTRFS_TREE_BLOCK_REF_KEY;
1503 } else {
1504 if (parent > 0)
1505 type = BTRFS_SHARED_DATA_REF_KEY;
1506 else
1507 type = BTRFS_EXTENT_DATA_REF_KEY;
1508 }
1509 return type;
1510}
1511
1512static int find_next_key(struct btrfs_path *path, int level,
1513 struct btrfs_key *key)
1514
1515{
1516 for (; level < BTRFS_MAX_LEVEL; level++) {
1517 if (!path->nodes[level])
1518 break;
1519 if (path->slots[level] + 1 >=
1520 btrfs_header_nritems(path->nodes[level]))
1521 continue;
1522 if (level == 0)
1523 btrfs_item_key_to_cpu(path->nodes[level], key,
1524 path->slots[level] + 1);
1525 else
1526 btrfs_node_key_to_cpu(path->nodes[level], key,
1527 path->slots[level] + 1);
1528 return 0;
1529 }
1530 return 1;
1531}
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546static noinline_for_stack
1547int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1548 struct btrfs_root *root,
1549 struct btrfs_path *path,
1550 struct btrfs_extent_inline_ref **ref_ret,
1551 u64 bytenr, u64 num_bytes,
1552 u64 parent, u64 root_objectid,
1553 u64 owner, u64 offset, int insert)
1554{
1555 struct btrfs_key key;
1556 struct extent_buffer *leaf;
1557 struct btrfs_extent_item *ei;
1558 struct btrfs_extent_inline_ref *iref;
1559 u64 flags;
1560 u64 item_size;
1561 unsigned long ptr;
1562 unsigned long end;
1563 int extra_size;
1564 int type;
1565 int want;
1566 int ret;
1567 int err = 0;
1568 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1569 SKINNY_METADATA);
1570
1571 key.objectid = bytenr;
1572 key.type = BTRFS_EXTENT_ITEM_KEY;
1573 key.offset = num_bytes;
1574
1575 want = extent_ref_type(parent, owner);
1576 if (insert) {
1577 extra_size = btrfs_extent_inline_ref_size(want);
1578 path->keep_locks = 1;
1579 } else
1580 extra_size = -1;
1581
1582
1583
1584
1585
1586 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1587 key.type = BTRFS_METADATA_ITEM_KEY;
1588 key.offset = owner;
1589 }
1590
1591again:
1592 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1593 if (ret < 0) {
1594 err = ret;
1595 goto out;
1596 }
1597
1598
1599
1600
1601
1602 if (ret > 0 && skinny_metadata) {
1603 skinny_metadata = false;
1604 if (path->slots[0]) {
1605 path->slots[0]--;
1606 btrfs_item_key_to_cpu(path->nodes[0], &key,
1607 path->slots[0]);
1608 if (key.objectid == bytenr &&
1609 key.type == BTRFS_EXTENT_ITEM_KEY &&
1610 key.offset == num_bytes)
1611 ret = 0;
1612 }
1613 if (ret) {
1614 key.objectid = bytenr;
1615 key.type = BTRFS_EXTENT_ITEM_KEY;
1616 key.offset = num_bytes;
1617 btrfs_release_path(path);
1618 goto again;
1619 }
1620 }
1621
1622 if (ret && !insert) {
1623 err = -ENOENT;
1624 goto out;
1625 } else if (WARN_ON(ret)) {
1626 err = -EIO;
1627 goto out;
1628 }
1629
1630 leaf = path->nodes[0];
1631 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1632#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1633 if (item_size < sizeof(*ei)) {
1634 if (!insert) {
1635 err = -ENOENT;
1636 goto out;
1637 }
1638 ret = convert_extent_item_v0(trans, root, path, owner,
1639 extra_size);
1640 if (ret < 0) {
1641 err = ret;
1642 goto out;
1643 }
1644 leaf = path->nodes[0];
1645 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1646 }
1647#endif
1648 BUG_ON(item_size < sizeof(*ei));
1649
1650 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1651 flags = btrfs_extent_flags(leaf, ei);
1652
1653 ptr = (unsigned long)(ei + 1);
1654 end = (unsigned long)ei + item_size;
1655
1656 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1657 ptr += sizeof(struct btrfs_tree_block_info);
1658 BUG_ON(ptr > end);
1659 }
1660
1661 err = -ENOENT;
1662 while (1) {
1663 if (ptr >= end) {
1664 WARN_ON(ptr > end);
1665 break;
1666 }
1667 iref = (struct btrfs_extent_inline_ref *)ptr;
1668 type = btrfs_extent_inline_ref_type(leaf, iref);
1669 if (want < type)
1670 break;
1671 if (want > type) {
1672 ptr += btrfs_extent_inline_ref_size(type);
1673 continue;
1674 }
1675
1676 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1677 struct btrfs_extent_data_ref *dref;
1678 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1679 if (match_extent_data_ref(leaf, dref, root_objectid,
1680 owner, offset)) {
1681 err = 0;
1682 break;
1683 }
1684 if (hash_extent_data_ref_item(leaf, dref) <
1685 hash_extent_data_ref(root_objectid, owner, offset))
1686 break;
1687 } else {
1688 u64 ref_offset;
1689 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1690 if (parent > 0) {
1691 if (parent == ref_offset) {
1692 err = 0;
1693 break;
1694 }
1695 if (ref_offset < parent)
1696 break;
1697 } else {
1698 if (root_objectid == ref_offset) {
1699 err = 0;
1700 break;
1701 }
1702 if (ref_offset < root_objectid)
1703 break;
1704 }
1705 }
1706 ptr += btrfs_extent_inline_ref_size(type);
1707 }
1708 if (err == -ENOENT && insert) {
1709 if (item_size + extra_size >=
1710 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1711 err = -EAGAIN;
1712 goto out;
1713 }
1714
1715
1716
1717
1718
1719
1720 if (find_next_key(path, 0, &key) == 0 &&
1721 key.objectid == bytenr &&
1722 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1723 err = -EAGAIN;
1724 goto out;
1725 }
1726 }
1727 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1728out:
1729 if (insert) {
1730 path->keep_locks = 0;
1731 btrfs_unlock_up_safe(path, 1);
1732 }
1733 return err;
1734}
1735
1736
1737
1738
1739static noinline_for_stack
1740void setup_inline_extent_backref(struct btrfs_root *root,
1741 struct btrfs_path *path,
1742 struct btrfs_extent_inline_ref *iref,
1743 u64 parent, u64 root_objectid,
1744 u64 owner, u64 offset, int refs_to_add,
1745 struct btrfs_delayed_extent_op *extent_op)
1746{
1747 struct extent_buffer *leaf;
1748 struct btrfs_extent_item *ei;
1749 unsigned long ptr;
1750 unsigned long end;
1751 unsigned long item_offset;
1752 u64 refs;
1753 int size;
1754 int type;
1755
1756 leaf = path->nodes[0];
1757 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1758 item_offset = (unsigned long)iref - (unsigned long)ei;
1759
1760 type = extent_ref_type(parent, owner);
1761 size = btrfs_extent_inline_ref_size(type);
1762
1763 btrfs_extend_item(root, path, size);
1764
1765 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1766 refs = btrfs_extent_refs(leaf, ei);
1767 refs += refs_to_add;
1768 btrfs_set_extent_refs(leaf, ei, refs);
1769 if (extent_op)
1770 __run_delayed_extent_op(extent_op, leaf, ei);
1771
1772 ptr = (unsigned long)ei + item_offset;
1773 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1774 if (ptr < end - size)
1775 memmove_extent_buffer(leaf, ptr + size, ptr,
1776 end - size - ptr);
1777
1778 iref = (struct btrfs_extent_inline_ref *)ptr;
1779 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1780 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1781 struct btrfs_extent_data_ref *dref;
1782 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1783 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1784 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1785 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1786 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1787 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1788 struct btrfs_shared_data_ref *sref;
1789 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1790 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1791 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1792 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1793 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1794 } else {
1795 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1796 }
1797 btrfs_mark_buffer_dirty(leaf);
1798}
1799
1800static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1801 struct btrfs_root *root,
1802 struct btrfs_path *path,
1803 struct btrfs_extent_inline_ref **ref_ret,
1804 u64 bytenr, u64 num_bytes, u64 parent,
1805 u64 root_objectid, u64 owner, u64 offset)
1806{
1807 int ret;
1808
1809 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1810 bytenr, num_bytes, parent,
1811 root_objectid, owner, offset, 0);
1812 if (ret != -ENOENT)
1813 return ret;
1814
1815 btrfs_release_path(path);
1816 *ref_ret = NULL;
1817
1818 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1819 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1820 root_objectid);
1821 } else {
1822 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1823 root_objectid, owner, offset);
1824 }
1825 return ret;
1826}
1827
1828
1829
1830
1831static noinline_for_stack
1832void update_inline_extent_backref(struct btrfs_root *root,
1833 struct btrfs_path *path,
1834 struct btrfs_extent_inline_ref *iref,
1835 int refs_to_mod,
1836 struct btrfs_delayed_extent_op *extent_op,
1837 int *last_ref)
1838{
1839 struct extent_buffer *leaf;
1840 struct btrfs_extent_item *ei;
1841 struct btrfs_extent_data_ref *dref = NULL;
1842 struct btrfs_shared_data_ref *sref = NULL;
1843 unsigned long ptr;
1844 unsigned long end;
1845 u32 item_size;
1846 int size;
1847 int type;
1848 u64 refs;
1849
1850 leaf = path->nodes[0];
1851 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1852 refs = btrfs_extent_refs(leaf, ei);
1853 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1854 refs += refs_to_mod;
1855 btrfs_set_extent_refs(leaf, ei, refs);
1856 if (extent_op)
1857 __run_delayed_extent_op(extent_op, leaf, ei);
1858
1859 type = btrfs_extent_inline_ref_type(leaf, iref);
1860
1861 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1862 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1863 refs = btrfs_extent_data_ref_count(leaf, dref);
1864 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1865 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1866 refs = btrfs_shared_data_ref_count(leaf, sref);
1867 } else {
1868 refs = 1;
1869 BUG_ON(refs_to_mod != -1);
1870 }
1871
1872 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1873 refs += refs_to_mod;
1874
1875 if (refs > 0) {
1876 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1877 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1878 else
1879 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1880 } else {
1881 *last_ref = 1;
1882 size = btrfs_extent_inline_ref_size(type);
1883 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1884 ptr = (unsigned long)iref;
1885 end = (unsigned long)ei + item_size;
1886 if (ptr + size < end)
1887 memmove_extent_buffer(leaf, ptr, ptr + size,
1888 end - ptr - size);
1889 item_size -= size;
1890 btrfs_truncate_item(root, path, item_size, 1);
1891 }
1892 btrfs_mark_buffer_dirty(leaf);
1893}
1894
1895static noinline_for_stack
1896int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1897 struct btrfs_root *root,
1898 struct btrfs_path *path,
1899 u64 bytenr, u64 num_bytes, u64 parent,
1900 u64 root_objectid, u64 owner,
1901 u64 offset, int refs_to_add,
1902 struct btrfs_delayed_extent_op *extent_op)
1903{
1904 struct btrfs_extent_inline_ref *iref;
1905 int ret;
1906
1907 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1908 bytenr, num_bytes, parent,
1909 root_objectid, owner, offset, 1);
1910 if (ret == 0) {
1911 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1912 update_inline_extent_backref(root, path, iref,
1913 refs_to_add, extent_op, NULL);
1914 } else if (ret == -ENOENT) {
1915 setup_inline_extent_backref(root, path, iref, parent,
1916 root_objectid, owner, offset,
1917 refs_to_add, extent_op);
1918 ret = 0;
1919 }
1920 return ret;
1921}
1922
1923static int insert_extent_backref(struct btrfs_trans_handle *trans,
1924 struct btrfs_root *root,
1925 struct btrfs_path *path,
1926 u64 bytenr, u64 parent, u64 root_objectid,
1927 u64 owner, u64 offset, int refs_to_add)
1928{
1929 int ret;
1930 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1931 BUG_ON(refs_to_add != 1);
1932 ret = insert_tree_block_ref(trans, root, path, bytenr,
1933 parent, root_objectid);
1934 } else {
1935 ret = insert_extent_data_ref(trans, root, path, bytenr,
1936 parent, root_objectid,
1937 owner, offset, refs_to_add);
1938 }
1939 return ret;
1940}
1941
1942static int remove_extent_backref(struct btrfs_trans_handle *trans,
1943 struct btrfs_root *root,
1944 struct btrfs_path *path,
1945 struct btrfs_extent_inline_ref *iref,
1946 int refs_to_drop, int is_data, int *last_ref)
1947{
1948 int ret = 0;
1949
1950 BUG_ON(!is_data && refs_to_drop != 1);
1951 if (iref) {
1952 update_inline_extent_backref(root, path, iref,
1953 -refs_to_drop, NULL, last_ref);
1954 } else if (is_data) {
1955 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
1956 last_ref);
1957 } else {
1958 *last_ref = 1;
1959 ret = btrfs_del_item(trans, root, path);
1960 }
1961 return ret;
1962}
1963
1964#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
1965static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1966 u64 *discarded_bytes)
1967{
1968 int j, ret = 0;
1969 u64 bytes_left, end;
1970 u64 aligned_start = ALIGN(start, 1 << 9);
1971
1972 if (WARN_ON(start != aligned_start)) {
1973 len -= aligned_start - start;
1974 len = round_down(len, 1 << 9);
1975 start = aligned_start;
1976 }
1977
1978 *discarded_bytes = 0;
1979
1980 if (!len)
1981 return 0;
1982
1983 end = start + len;
1984 bytes_left = len;
1985
1986
1987 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1988 u64 sb_start = btrfs_sb_offset(j);
1989 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1990 u64 size = sb_start - start;
1991
1992 if (!in_range(sb_start, start, bytes_left) &&
1993 !in_range(sb_end, start, bytes_left) &&
1994 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1995 continue;
1996
1997
1998
1999
2000
2001 if (sb_start <= start) {
2002 start += sb_end - start;
2003 if (start > end) {
2004 bytes_left = 0;
2005 break;
2006 }
2007 bytes_left = end - start;
2008 continue;
2009 }
2010
2011 if (size) {
2012 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
2013 GFP_NOFS, 0);
2014 if (!ret)
2015 *discarded_bytes += size;
2016 else if (ret != -EOPNOTSUPP)
2017 return ret;
2018 }
2019
2020 start = sb_end;
2021 if (start > end) {
2022 bytes_left = 0;
2023 break;
2024 }
2025 bytes_left = end - start;
2026 }
2027
2028 if (bytes_left) {
2029 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
2030 GFP_NOFS, 0);
2031 if (!ret)
2032 *discarded_bytes += bytes_left;
2033 }
2034 return ret;
2035}
2036
2037int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
2038 u64 num_bytes, u64 *actual_bytes)
2039{
2040 int ret;
2041 u64 discarded_bytes = 0;
2042 struct btrfs_bio *bbio = NULL;
2043
2044
2045
2046
2047
2048
2049 btrfs_bio_counter_inc_blocked(root->fs_info);
2050
2051 ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
2052 bytenr, &num_bytes, &bbio, 0);
2053
2054 if (!ret) {
2055 struct btrfs_bio_stripe *stripe = bbio->stripes;
2056 int i;
2057
2058
2059 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
2060 u64 bytes;
2061 if (!stripe->dev->can_discard)
2062 continue;
2063
2064 ret = btrfs_issue_discard(stripe->dev->bdev,
2065 stripe->physical,
2066 stripe->length,
2067 &bytes);
2068 if (!ret)
2069 discarded_bytes += bytes;
2070 else if (ret != -EOPNOTSUPP)
2071 break;
2072
2073
2074
2075
2076
2077
2078 ret = 0;
2079 }
2080 btrfs_put_bbio(bbio);
2081 }
2082 btrfs_bio_counter_dec(root->fs_info);
2083
2084 if (actual_bytes)
2085 *actual_bytes = discarded_bytes;
2086
2087
2088 if (ret == -EOPNOTSUPP)
2089 ret = 0;
2090 return ret;
2091}
2092
2093
2094int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2095 struct btrfs_root *root,
2096 u64 bytenr, u64 num_bytes, u64 parent,
2097 u64 root_objectid, u64 owner, u64 offset)
2098{
2099 int ret;
2100 struct btrfs_fs_info *fs_info = root->fs_info;
2101
2102 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
2103 root_objectid == BTRFS_TREE_LOG_OBJECTID);
2104
2105 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2106 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
2107 num_bytes,
2108 parent, root_objectid, (int)owner,
2109 BTRFS_ADD_DELAYED_REF, NULL);
2110 } else {
2111 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
2112 num_bytes, parent, root_objectid,
2113 owner, offset, 0,
2114 BTRFS_ADD_DELAYED_REF, NULL);
2115 }
2116 return ret;
2117}
2118
2119static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2120 struct btrfs_root *root,
2121 struct btrfs_delayed_ref_node *node,
2122 u64 parent, u64 root_objectid,
2123 u64 owner, u64 offset, int refs_to_add,
2124 struct btrfs_delayed_extent_op *extent_op)
2125{
2126 struct btrfs_fs_info *fs_info = root->fs_info;
2127 struct btrfs_path *path;
2128 struct extent_buffer *leaf;
2129 struct btrfs_extent_item *item;
2130 struct btrfs_key key;
2131 u64 bytenr = node->bytenr;
2132 u64 num_bytes = node->num_bytes;
2133 u64 refs;
2134 int ret;
2135
2136 path = btrfs_alloc_path();
2137 if (!path)
2138 return -ENOMEM;
2139
2140 path->reada = READA_FORWARD;
2141 path->leave_spinning = 1;
2142
2143 ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
2144 bytenr, num_bytes, parent,
2145 root_objectid, owner, offset,
2146 refs_to_add, extent_op);
2147 if ((ret < 0 && ret != -EAGAIN) || !ret)
2148 goto out;
2149
2150
2151
2152
2153
2154
2155 leaf = path->nodes[0];
2156 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2157 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2158 refs = btrfs_extent_refs(leaf, item);
2159 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2160 if (extent_op)
2161 __run_delayed_extent_op(extent_op, leaf, item);
2162
2163 btrfs_mark_buffer_dirty(leaf);
2164 btrfs_release_path(path);
2165
2166 path->reada = READA_FORWARD;
2167 path->leave_spinning = 1;
2168
2169 ret = insert_extent_backref(trans, root->fs_info->extent_root,
2170 path, bytenr, parent, root_objectid,
2171 owner, offset, refs_to_add);
2172 if (ret)
2173 btrfs_abort_transaction(trans, root, ret);
2174out:
2175 btrfs_free_path(path);
2176 return ret;
2177}
2178
2179static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2180 struct btrfs_root *root,
2181 struct btrfs_delayed_ref_node *node,
2182 struct btrfs_delayed_extent_op *extent_op,
2183 int insert_reserved)
2184{
2185 int ret = 0;
2186 struct btrfs_delayed_data_ref *ref;
2187 struct btrfs_key ins;
2188 u64 parent = 0;
2189 u64 ref_root = 0;
2190 u64 flags = 0;
2191
2192 ins.objectid = node->bytenr;
2193 ins.offset = node->num_bytes;
2194 ins.type = BTRFS_EXTENT_ITEM_KEY;
2195
2196 ref = btrfs_delayed_node_to_data_ref(node);
2197 trace_run_delayed_data_ref(node, ref, node->action);
2198
2199 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2200 parent = ref->parent;
2201 ref_root = ref->root;
2202
2203 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2204 if (extent_op)
2205 flags |= extent_op->flags_to_set;
2206 ret = alloc_reserved_file_extent(trans, root,
2207 parent, ref_root, flags,
2208 ref->objectid, ref->offset,
2209 &ins, node->ref_mod);
2210 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2211 ret = __btrfs_inc_extent_ref(trans, root, node, parent,
2212 ref_root, ref->objectid,
2213 ref->offset, node->ref_mod,
2214 extent_op);
2215 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2216 ret = __btrfs_free_extent(trans, root, node, parent,
2217 ref_root, ref->objectid,
2218 ref->offset, node->ref_mod,
2219 extent_op);
2220 } else {
2221 BUG();
2222 }
2223 return ret;
2224}
2225
2226static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2227 struct extent_buffer *leaf,
2228 struct btrfs_extent_item *ei)
2229{
2230 u64 flags = btrfs_extent_flags(leaf, ei);
2231 if (extent_op->update_flags) {
2232 flags |= extent_op->flags_to_set;
2233 btrfs_set_extent_flags(leaf, ei, flags);
2234 }
2235
2236 if (extent_op->update_key) {
2237 struct btrfs_tree_block_info *bi;
2238 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2239 bi = (struct btrfs_tree_block_info *)(ei + 1);
2240 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2241 }
2242}
2243
2244static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2245 struct btrfs_root *root,
2246 struct btrfs_delayed_ref_node *node,
2247 struct btrfs_delayed_extent_op *extent_op)
2248{
2249 struct btrfs_key key;
2250 struct btrfs_path *path;
2251 struct btrfs_extent_item *ei;
2252 struct extent_buffer *leaf;
2253 u32 item_size;
2254 int ret;
2255 int err = 0;
2256 int metadata = !extent_op->is_data;
2257
2258 if (trans->aborted)
2259 return 0;
2260
2261 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2262 metadata = 0;
2263
2264 path = btrfs_alloc_path();
2265 if (!path)
2266 return -ENOMEM;
2267
2268 key.objectid = node->bytenr;
2269
2270 if (metadata) {
2271 key.type = BTRFS_METADATA_ITEM_KEY;
2272 key.offset = extent_op->level;
2273 } else {
2274 key.type = BTRFS_EXTENT_ITEM_KEY;
2275 key.offset = node->num_bytes;
2276 }
2277
2278again:
2279 path->reada = READA_FORWARD;
2280 path->leave_spinning = 1;
2281 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
2282 path, 0, 1);
2283 if (ret < 0) {
2284 err = ret;
2285 goto out;
2286 }
2287 if (ret > 0) {
2288 if (metadata) {
2289 if (path->slots[0] > 0) {
2290 path->slots[0]--;
2291 btrfs_item_key_to_cpu(path->nodes[0], &key,
2292 path->slots[0]);
2293 if (key.objectid == node->bytenr &&
2294 key.type == BTRFS_EXTENT_ITEM_KEY &&
2295 key.offset == node->num_bytes)
2296 ret = 0;
2297 }
2298 if (ret > 0) {
2299 btrfs_release_path(path);
2300 metadata = 0;
2301
2302 key.objectid = node->bytenr;
2303 key.offset = node->num_bytes;
2304 key.type = BTRFS_EXTENT_ITEM_KEY;
2305 goto again;
2306 }
2307 } else {
2308 err = -EIO;
2309 goto out;
2310 }
2311 }
2312
2313 leaf = path->nodes[0];
2314 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2315#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2316 if (item_size < sizeof(*ei)) {
2317 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
2318 path, (u64)-1, 0);
2319 if (ret < 0) {
2320 err = ret;
2321 goto out;
2322 }
2323 leaf = path->nodes[0];
2324 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2325 }
2326#endif
2327 BUG_ON(item_size < sizeof(*ei));
2328 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2329 __run_delayed_extent_op(extent_op, leaf, ei);
2330
2331 btrfs_mark_buffer_dirty(leaf);
2332out:
2333 btrfs_free_path(path);
2334 return err;
2335}
2336
2337static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2338 struct btrfs_root *root,
2339 struct btrfs_delayed_ref_node *node,
2340 struct btrfs_delayed_extent_op *extent_op,
2341 int insert_reserved)
2342{
2343 int ret = 0;
2344 struct btrfs_delayed_tree_ref *ref;
2345 struct btrfs_key ins;
2346 u64 parent = 0;
2347 u64 ref_root = 0;
2348 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2349 SKINNY_METADATA);
2350
2351 ref = btrfs_delayed_node_to_tree_ref(node);
2352 trace_run_delayed_tree_ref(node, ref, node->action);
2353
2354 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2355 parent = ref->parent;
2356 ref_root = ref->root;
2357
2358 ins.objectid = node->bytenr;
2359 if (skinny_metadata) {
2360 ins.offset = ref->level;
2361 ins.type = BTRFS_METADATA_ITEM_KEY;
2362 } else {
2363 ins.offset = node->num_bytes;
2364 ins.type = BTRFS_EXTENT_ITEM_KEY;
2365 }
2366
2367 BUG_ON(node->ref_mod != 1);
2368 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2369 BUG_ON(!extent_op || !extent_op->update_flags);
2370 ret = alloc_reserved_tree_block(trans, root,
2371 parent, ref_root,
2372 extent_op->flags_to_set,
2373 &extent_op->key,
2374 ref->level, &ins);
2375 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2376 ret = __btrfs_inc_extent_ref(trans, root, node,
2377 parent, ref_root,
2378 ref->level, 0, 1,
2379 extent_op);
2380 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2381 ret = __btrfs_free_extent(trans, root, node,
2382 parent, ref_root,
2383 ref->level, 0, 1, extent_op);
2384 } else {
2385 BUG();
2386 }
2387 return ret;
2388}
2389
2390
2391static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2392 struct btrfs_root *root,
2393 struct btrfs_delayed_ref_node *node,
2394 struct btrfs_delayed_extent_op *extent_op,
2395 int insert_reserved)
2396{
2397 int ret = 0;
2398
2399 if (trans->aborted) {
2400 if (insert_reserved)
2401 btrfs_pin_extent(root, node->bytenr,
2402 node->num_bytes, 1);
2403 return 0;
2404 }
2405
2406 if (btrfs_delayed_ref_is_head(node)) {
2407 struct btrfs_delayed_ref_head *head;
2408
2409
2410
2411
2412
2413
2414 BUG_ON(extent_op);
2415 head = btrfs_delayed_node_to_head(node);
2416 trace_run_delayed_ref_head(node, head, node->action);
2417
2418 if (insert_reserved) {
2419 btrfs_pin_extent(root, node->bytenr,
2420 node->num_bytes, 1);
2421 if (head->is_data) {
2422 ret = btrfs_del_csums(trans, root,
2423 node->bytenr,
2424 node->num_bytes);
2425 }
2426 }
2427
2428
2429 btrfs_qgroup_free_delayed_ref(root->fs_info,
2430 head->qgroup_ref_root,
2431 head->qgroup_reserved);
2432 return ret;
2433 }
2434
2435 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2436 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2437 ret = run_delayed_tree_ref(trans, root, node, extent_op,
2438 insert_reserved);
2439 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2440 node->type == BTRFS_SHARED_DATA_REF_KEY)
2441 ret = run_delayed_data_ref(trans, root, node, extent_op,
2442 insert_reserved);
2443 else
2444 BUG();
2445 return ret;
2446}
2447
2448static inline struct btrfs_delayed_ref_node *
2449select_delayed_ref(struct btrfs_delayed_ref_head *head)
2450{
2451 struct btrfs_delayed_ref_node *ref;
2452
2453 if (list_empty(&head->ref_list))
2454 return NULL;
2455
2456
2457
2458
2459
2460
2461
2462 list_for_each_entry(ref, &head->ref_list, list) {
2463 if (ref->action == BTRFS_ADD_DELAYED_REF)
2464 return ref;
2465 }
2466
2467 return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
2468 list);
2469}
2470
2471
2472
2473
2474
2475static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2476 struct btrfs_root *root,
2477 unsigned long nr)
2478{
2479 struct btrfs_delayed_ref_root *delayed_refs;
2480 struct btrfs_delayed_ref_node *ref;
2481 struct btrfs_delayed_ref_head *locked_ref = NULL;
2482 struct btrfs_delayed_extent_op *extent_op;
2483 struct btrfs_fs_info *fs_info = root->fs_info;
2484 ktime_t start = ktime_get();
2485 int ret;
2486 unsigned long count = 0;
2487 unsigned long actual_count = 0;
2488 int must_insert_reserved = 0;
2489
2490 delayed_refs = &trans->transaction->delayed_refs;
2491 while (1) {
2492 if (!locked_ref) {
2493 if (count >= nr)
2494 break;
2495
2496 spin_lock(&delayed_refs->lock);
2497 locked_ref = btrfs_select_ref_head(trans);
2498 if (!locked_ref) {
2499 spin_unlock(&delayed_refs->lock);
2500 break;
2501 }
2502
2503
2504
2505 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2506 spin_unlock(&delayed_refs->lock);
2507
2508
2509
2510
2511
2512
2513 if (ret == -EAGAIN) {
2514 locked_ref = NULL;
2515 count++;
2516 continue;
2517 }
2518 }
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532 spin_lock(&locked_ref->lock);
2533 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2534 locked_ref);
2535
2536
2537
2538
2539
2540 ref = select_delayed_ref(locked_ref);
2541
2542 if (ref && ref->seq &&
2543 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2544 spin_unlock(&locked_ref->lock);
2545 btrfs_delayed_ref_unlock(locked_ref);
2546 spin_lock(&delayed_refs->lock);
2547 locked_ref->processing = 0;
2548 delayed_refs->num_heads_ready++;
2549 spin_unlock(&delayed_refs->lock);
2550 locked_ref = NULL;
2551 cond_resched();
2552 count++;
2553 continue;
2554 }
2555
2556
2557
2558
2559
2560 must_insert_reserved = locked_ref->must_insert_reserved;
2561 locked_ref->must_insert_reserved = 0;
2562
2563 extent_op = locked_ref->extent_op;
2564 locked_ref->extent_op = NULL;
2565
2566 if (!ref) {
2567
2568
2569
2570
2571
2572
2573 ref = &locked_ref->node;
2574
2575 if (extent_op && must_insert_reserved) {
2576 btrfs_free_delayed_extent_op(extent_op);
2577 extent_op = NULL;
2578 }
2579
2580 if (extent_op) {
2581 spin_unlock(&locked_ref->lock);
2582 ret = run_delayed_extent_op(trans, root,
2583 ref, extent_op);
2584 btrfs_free_delayed_extent_op(extent_op);
2585
2586 if (ret) {
2587
2588
2589
2590
2591
2592
2593 if (must_insert_reserved)
2594 locked_ref->must_insert_reserved = 1;
2595 locked_ref->processing = 0;
2596 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2597 btrfs_delayed_ref_unlock(locked_ref);
2598 return ret;
2599 }
2600 continue;
2601 }
2602
2603
2604
2605
2606
2607
2608 spin_unlock(&locked_ref->lock);
2609 spin_lock(&delayed_refs->lock);
2610 spin_lock(&locked_ref->lock);
2611 if (!list_empty(&locked_ref->ref_list) ||
2612 locked_ref->extent_op) {
2613 spin_unlock(&locked_ref->lock);
2614 spin_unlock(&delayed_refs->lock);
2615 continue;
2616 }
2617 ref->in_tree = 0;
2618 delayed_refs->num_heads--;
2619 rb_erase(&locked_ref->href_node,
2620 &delayed_refs->href_root);
2621 spin_unlock(&delayed_refs->lock);
2622 } else {
2623 actual_count++;
2624 ref->in_tree = 0;
2625 list_del(&ref->list);
2626 }
2627 atomic_dec(&delayed_refs->num_entries);
2628
2629 if (!btrfs_delayed_ref_is_head(ref)) {
2630
2631
2632
2633
2634 switch (ref->action) {
2635 case BTRFS_ADD_DELAYED_REF:
2636 case BTRFS_ADD_DELAYED_EXTENT:
2637 locked_ref->node.ref_mod -= ref->ref_mod;
2638 break;
2639 case BTRFS_DROP_DELAYED_REF:
2640 locked_ref->node.ref_mod += ref->ref_mod;
2641 break;
2642 default:
2643 WARN_ON(1);
2644 }
2645 }
2646 spin_unlock(&locked_ref->lock);
2647
2648 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2649 must_insert_reserved);
2650
2651 btrfs_free_delayed_extent_op(extent_op);
2652 if (ret) {
2653 locked_ref->processing = 0;
2654 btrfs_delayed_ref_unlock(locked_ref);
2655 btrfs_put_delayed_ref(ref);
2656 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2657 return ret;
2658 }
2659
2660
2661
2662
2663
2664
2665
2666 if (btrfs_delayed_ref_is_head(ref)) {
2667 if (locked_ref->is_data &&
2668 locked_ref->total_ref_mod < 0) {
2669 spin_lock(&delayed_refs->lock);
2670 delayed_refs->pending_csums -= ref->num_bytes;
2671 spin_unlock(&delayed_refs->lock);
2672 }
2673 btrfs_delayed_ref_unlock(locked_ref);
2674 locked_ref = NULL;
2675 }
2676 btrfs_put_delayed_ref(ref);
2677 count++;
2678 cond_resched();
2679 }
2680
2681
2682
2683
2684
2685
2686 if (actual_count > 0) {
2687 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2688 u64 avg;
2689
2690
2691
2692
2693
2694 spin_lock(&delayed_refs->lock);
2695 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2696 fs_info->avg_delayed_ref_runtime = avg >> 2;
2697 spin_unlock(&delayed_refs->lock);
2698 }
2699 return 0;
2700}
2701
2702#ifdef SCRAMBLE_DELAYED_REFS
2703
2704
2705
2706
2707
2708static u64 find_middle(struct rb_root *root)
2709{
2710 struct rb_node *n = root->rb_node;
2711 struct btrfs_delayed_ref_node *entry;
2712 int alt = 1;
2713 u64 middle;
2714 u64 first = 0, last = 0;
2715
2716 n = rb_first(root);
2717 if (n) {
2718 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2719 first = entry->bytenr;
2720 }
2721 n = rb_last(root);
2722 if (n) {
2723 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2724 last = entry->bytenr;
2725 }
2726 n = root->rb_node;
2727
2728 while (n) {
2729 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2730 WARN_ON(!entry->in_tree);
2731
2732 middle = entry->bytenr;
2733
2734 if (alt)
2735 n = n->rb_left;
2736 else
2737 n = n->rb_right;
2738
2739 alt = 1 - alt;
2740 }
2741 return middle;
2742}
2743#endif
2744
2745static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2746{
2747 u64 num_bytes;
2748
2749 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2750 sizeof(struct btrfs_extent_inline_ref));
2751 if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2752 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2753
2754
2755
2756
2757
2758 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2759}
2760
2761
2762
2763
2764
2765u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
2766{
2767 u64 csum_size;
2768 u64 num_csums_per_leaf;
2769 u64 num_csums;
2770
2771 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
2772 num_csums_per_leaf = div64_u64(csum_size,
2773 (u64)btrfs_super_csum_size(root->fs_info->super_copy));
2774 num_csums = div64_u64(csum_bytes, root->sectorsize);
2775 num_csums += num_csums_per_leaf - 1;
2776 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2777 return num_csums;
2778}
2779
2780int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2781 struct btrfs_root *root)
2782{
2783 struct btrfs_block_rsv *global_rsv;
2784 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2785 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2786 u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
2787 u64 num_bytes, num_dirty_bgs_bytes;
2788 int ret = 0;
2789
2790 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
2791 num_heads = heads_to_leaves(root, num_heads);
2792 if (num_heads > 1)
2793 num_bytes += (num_heads - 1) * root->nodesize;
2794 num_bytes <<= 1;
2795 num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
2796 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
2797 num_dirty_bgs);
2798 global_rsv = &root->fs_info->global_block_rsv;
2799
2800
2801
2802
2803
2804 if (global_rsv->space_info->full) {
2805 num_dirty_bgs_bytes <<= 1;
2806 num_bytes <<= 1;
2807 }
2808
2809 spin_lock(&global_rsv->lock);
2810 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
2811 ret = 1;
2812 spin_unlock(&global_rsv->lock);
2813 return ret;
2814}
2815
2816int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2817 struct btrfs_root *root)
2818{
2819 struct btrfs_fs_info *fs_info = root->fs_info;
2820 u64 num_entries =
2821 atomic_read(&trans->transaction->delayed_refs.num_entries);
2822 u64 avg_runtime;
2823 u64 val;
2824
2825 smp_mb();
2826 avg_runtime = fs_info->avg_delayed_ref_runtime;
2827 val = num_entries * avg_runtime;
2828 if (num_entries * avg_runtime >= NSEC_PER_SEC)
2829 return 1;
2830 if (val >= NSEC_PER_SEC / 2)
2831 return 2;
2832
2833 return btrfs_check_space_for_delayed_refs(trans, root);
2834}
2835
2836struct async_delayed_refs {
2837 struct btrfs_root *root;
2838 u64 transid;
2839 int count;
2840 int error;
2841 int sync;
2842 struct completion wait;
2843 struct btrfs_work work;
2844};
2845
2846static void delayed_ref_async_start(struct btrfs_work *work)
2847{
2848 struct async_delayed_refs *async;
2849 struct btrfs_trans_handle *trans;
2850 int ret;
2851
2852 async = container_of(work, struct async_delayed_refs, work);
2853
2854
2855 if (btrfs_transaction_blocked(async->root->fs_info))
2856 goto done;
2857
2858 trans = btrfs_join_transaction(async->root);
2859 if (IS_ERR(trans)) {
2860 async->error = PTR_ERR(trans);
2861 goto done;
2862 }
2863
2864
2865
2866
2867
2868 trans->sync = true;
2869
2870
2871 if (trans->transid > async->transid)
2872 goto end;
2873
2874 ret = btrfs_run_delayed_refs(trans, async->root, async->count);
2875 if (ret)
2876 async->error = ret;
2877end:
2878 ret = btrfs_end_transaction(trans, async->root);
2879 if (ret && !async->error)
2880 async->error = ret;
2881done:
2882 if (async->sync)
2883 complete(&async->wait);
2884 else
2885 kfree(async);
2886}
2887
2888int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2889 unsigned long count, u64 transid, int wait)
2890{
2891 struct async_delayed_refs *async;
2892 int ret;
2893
2894 async = kmalloc(sizeof(*async), GFP_NOFS);
2895 if (!async)
2896 return -ENOMEM;
2897
2898 async->root = root->fs_info->tree_root;
2899 async->count = count;
2900 async->error = 0;
2901 async->transid = transid;
2902 if (wait)
2903 async->sync = 1;
2904 else
2905 async->sync = 0;
2906 init_completion(&async->wait);
2907
2908 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2909 delayed_ref_async_start, NULL, NULL);
2910
2911 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2912
2913 if (wait) {
2914 wait_for_completion(&async->wait);
2915 ret = async->error;
2916 kfree(async);
2917 return ret;
2918 }
2919 return 0;
2920}
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2933 struct btrfs_root *root, unsigned long count)
2934{
2935 struct rb_node *node;
2936 struct btrfs_delayed_ref_root *delayed_refs;
2937 struct btrfs_delayed_ref_head *head;
2938 int ret;
2939 int run_all = count == (unsigned long)-1;
2940 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
2941
2942
2943 if (trans->aborted)
2944 return 0;
2945
2946 if (root->fs_info->creating_free_space_tree)
2947 return 0;
2948
2949 if (root == root->fs_info->extent_root)
2950 root = root->fs_info->tree_root;
2951
2952 delayed_refs = &trans->transaction->delayed_refs;
2953 if (count == 0)
2954 count = atomic_read(&delayed_refs->num_entries) * 2;
2955
2956again:
2957#ifdef SCRAMBLE_DELAYED_REFS
2958 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2959#endif
2960 trans->can_flush_pending_bgs = false;
2961 ret = __btrfs_run_delayed_refs(trans, root, count);
2962 if (ret < 0) {
2963 btrfs_abort_transaction(trans, root, ret);
2964 return ret;
2965 }
2966
2967 if (run_all) {
2968 if (!list_empty(&trans->new_bgs))
2969 btrfs_create_pending_block_groups(trans, root);
2970
2971 spin_lock(&delayed_refs->lock);
2972 node = rb_first(&delayed_refs->href_root);
2973 if (!node) {
2974 spin_unlock(&delayed_refs->lock);
2975 goto out;
2976 }
2977 count = (unsigned long)-1;
2978
2979 while (node) {
2980 head = rb_entry(node, struct btrfs_delayed_ref_head,
2981 href_node);
2982 if (btrfs_delayed_ref_is_head(&head->node)) {
2983 struct btrfs_delayed_ref_node *ref;
2984
2985 ref = &head->node;
2986 atomic_inc(&ref->refs);
2987
2988 spin_unlock(&delayed_refs->lock);
2989
2990
2991
2992
2993 mutex_lock(&head->mutex);
2994 mutex_unlock(&head->mutex);
2995
2996 btrfs_put_delayed_ref(ref);
2997 cond_resched();
2998 goto again;
2999 } else {
3000 WARN_ON(1);
3001 }
3002 node = rb_next(node);
3003 }
3004 spin_unlock(&delayed_refs->lock);
3005 cond_resched();
3006 goto again;
3007 }
3008out:
3009 assert_qgroups_uptodate(trans);
3010 trans->can_flush_pending_bgs = can_flush_pending_bgs;
3011 return 0;
3012}
3013
3014int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3015 struct btrfs_root *root,
3016 u64 bytenr, u64 num_bytes, u64 flags,
3017 int level, int is_data)
3018{
3019 struct btrfs_delayed_extent_op *extent_op;
3020 int ret;
3021
3022 extent_op = btrfs_alloc_delayed_extent_op();
3023 if (!extent_op)
3024 return -ENOMEM;
3025
3026 extent_op->flags_to_set = flags;
3027 extent_op->update_flags = true;
3028 extent_op->update_key = false;
3029 extent_op->is_data = is_data ? true : false;
3030 extent_op->level = level;
3031
3032 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
3033 num_bytes, extent_op);
3034 if (ret)
3035 btrfs_free_delayed_extent_op(extent_op);
3036 return ret;
3037}
3038
3039static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
3040 struct btrfs_root *root,
3041 struct btrfs_path *path,
3042 u64 objectid, u64 offset, u64 bytenr)
3043{
3044 struct btrfs_delayed_ref_head *head;
3045 struct btrfs_delayed_ref_node *ref;
3046 struct btrfs_delayed_data_ref *data_ref;
3047 struct btrfs_delayed_ref_root *delayed_refs;
3048 int ret = 0;
3049
3050 delayed_refs = &trans->transaction->delayed_refs;
3051 spin_lock(&delayed_refs->lock);
3052 head = btrfs_find_delayed_ref_head(trans, bytenr);
3053 if (!head) {
3054 spin_unlock(&delayed_refs->lock);
3055 return 0;
3056 }
3057
3058 if (!mutex_trylock(&head->mutex)) {
3059 atomic_inc(&head->node.refs);
3060 spin_unlock(&delayed_refs->lock);
3061
3062 btrfs_release_path(path);
3063
3064
3065
3066
3067
3068 mutex_lock(&head->mutex);
3069 mutex_unlock(&head->mutex);
3070 btrfs_put_delayed_ref(&head->node);
3071 return -EAGAIN;
3072 }
3073 spin_unlock(&delayed_refs->lock);
3074
3075 spin_lock(&head->lock);
3076 list_for_each_entry(ref, &head->ref_list, list) {
3077
3078 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
3079 ret = 1;
3080 break;
3081 }
3082
3083 data_ref = btrfs_delayed_node_to_data_ref(ref);
3084
3085
3086
3087
3088
3089 if (data_ref->root != root->root_key.objectid ||
3090 data_ref->objectid != objectid ||
3091 data_ref->offset != offset) {
3092 ret = 1;
3093 break;
3094 }
3095 }
3096 spin_unlock(&head->lock);
3097 mutex_unlock(&head->mutex);
3098 return ret;
3099}
3100
3101static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
3102 struct btrfs_root *root,
3103 struct btrfs_path *path,
3104 u64 objectid, u64 offset, u64 bytenr)
3105{
3106 struct btrfs_root *extent_root = root->fs_info->extent_root;
3107 struct extent_buffer *leaf;
3108 struct btrfs_extent_data_ref *ref;
3109 struct btrfs_extent_inline_ref *iref;
3110 struct btrfs_extent_item *ei;
3111 struct btrfs_key key;
3112 u32 item_size;
3113 int ret;
3114
3115 key.objectid = bytenr;
3116 key.offset = (u64)-1;
3117 key.type = BTRFS_EXTENT_ITEM_KEY;
3118
3119 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3120 if (ret < 0)
3121 goto out;
3122 BUG_ON(ret == 0);
3123
3124 ret = -ENOENT;
3125 if (path->slots[0] == 0)
3126 goto out;
3127
3128 path->slots[0]--;
3129 leaf = path->nodes[0];
3130 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3131
3132 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
3133 goto out;
3134
3135 ret = 1;
3136 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3137#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3138 if (item_size < sizeof(*ei)) {
3139 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3140 goto out;
3141 }
3142#endif
3143 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3144
3145 if (item_size != sizeof(*ei) +
3146 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3147 goto out;
3148
3149 if (btrfs_extent_generation(leaf, ei) <=
3150 btrfs_root_last_snapshot(&root->root_item))
3151 goto out;
3152
3153 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3154 if (btrfs_extent_inline_ref_type(leaf, iref) !=
3155 BTRFS_EXTENT_DATA_REF_KEY)
3156 goto out;
3157
3158 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3159 if (btrfs_extent_refs(leaf, ei) !=
3160 btrfs_extent_data_ref_count(leaf, ref) ||
3161 btrfs_extent_data_ref_root(leaf, ref) !=
3162 root->root_key.objectid ||
3163 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3164 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3165 goto out;
3166
3167 ret = 0;
3168out:
3169 return ret;
3170}
3171
3172int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root,
3174 u64 objectid, u64 offset, u64 bytenr)
3175{
3176 struct btrfs_path *path;
3177 int ret;
3178 int ret2;
3179
3180 path = btrfs_alloc_path();
3181 if (!path)
3182 return -ENOENT;
3183
3184 do {
3185 ret = check_committed_ref(trans, root, path, objectid,
3186 offset, bytenr);
3187 if (ret && ret != -ENOENT)
3188 goto out;
3189
3190 ret2 = check_delayed_ref(trans, root, path, objectid,
3191 offset, bytenr);
3192 } while (ret2 == -EAGAIN);
3193
3194 if (ret2 && ret2 != -ENOENT) {
3195 ret = ret2;
3196 goto out;
3197 }
3198
3199 if (ret != -ENOENT || ret2 != -ENOENT)
3200 ret = 0;
3201out:
3202 btrfs_free_path(path);
3203 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3204 WARN_ON(ret > 0);
3205 return ret;
3206}
3207
3208static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3209 struct btrfs_root *root,
3210 struct extent_buffer *buf,
3211 int full_backref, int inc)
3212{
3213 u64 bytenr;
3214 u64 num_bytes;
3215 u64 parent;
3216 u64 ref_root;
3217 u32 nritems;
3218 struct btrfs_key key;
3219 struct btrfs_file_extent_item *fi;
3220 int i;
3221 int level;
3222 int ret = 0;
3223 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
3224 u64, u64, u64, u64, u64, u64);
3225
3226
3227 if (btrfs_test_is_dummy_root(root))
3228 return 0;
3229
3230 ref_root = btrfs_header_owner(buf);
3231 nritems = btrfs_header_nritems(buf);
3232 level = btrfs_header_level(buf);
3233
3234 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3235 return 0;
3236
3237 if (inc)
3238 process_func = btrfs_inc_extent_ref;
3239 else
3240 process_func = btrfs_free_extent;
3241
3242 if (full_backref)
3243 parent = buf->start;
3244 else
3245 parent = 0;
3246
3247 for (i = 0; i < nritems; i++) {
3248 if (level == 0) {
3249 btrfs_item_key_to_cpu(buf, &key, i);
3250 if (key.type != BTRFS_EXTENT_DATA_KEY)
3251 continue;
3252 fi = btrfs_item_ptr(buf, i,
3253 struct btrfs_file_extent_item);
3254 if (btrfs_file_extent_type(buf, fi) ==
3255 BTRFS_FILE_EXTENT_INLINE)
3256 continue;
3257 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3258 if (bytenr == 0)
3259 continue;
3260
3261 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3262 key.offset -= btrfs_file_extent_offset(buf, fi);
3263 ret = process_func(trans, root, bytenr, num_bytes,
3264 parent, ref_root, key.objectid,
3265 key.offset);
3266 if (ret)
3267 goto fail;
3268 } else {
3269 bytenr = btrfs_node_blockptr(buf, i);
3270 num_bytes = root->nodesize;
3271 ret = process_func(trans, root, bytenr, num_bytes,
3272 parent, ref_root, level - 1, 0);
3273 if (ret)
3274 goto fail;
3275 }
3276 }
3277 return 0;
3278fail:
3279 return ret;
3280}
3281
3282int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3283 struct extent_buffer *buf, int full_backref)
3284{
3285 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3286}
3287
3288int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3289 struct extent_buffer *buf, int full_backref)
3290{
3291 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3292}
3293
3294static int write_one_cache_group(struct btrfs_trans_handle *trans,
3295 struct btrfs_root *root,
3296 struct btrfs_path *path,
3297 struct btrfs_block_group_cache *cache)
3298{
3299 int ret;
3300 struct btrfs_root *extent_root = root->fs_info->extent_root;
3301 unsigned long bi;
3302 struct extent_buffer *leaf;
3303
3304 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3305 if (ret) {
3306 if (ret > 0)
3307 ret = -ENOENT;
3308 goto fail;
3309 }
3310
3311 leaf = path->nodes[0];
3312 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3313 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3314 btrfs_mark_buffer_dirty(leaf);
3315fail:
3316 btrfs_release_path(path);
3317 return ret;
3318
3319}
3320
3321static struct btrfs_block_group_cache *
3322next_block_group(struct btrfs_root *root,
3323 struct btrfs_block_group_cache *cache)
3324{
3325 struct rb_node *node;
3326
3327 spin_lock(&root->fs_info->block_group_cache_lock);
3328
3329
3330 if (RB_EMPTY_NODE(&cache->cache_node)) {
3331 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3332
3333 spin_unlock(&root->fs_info->block_group_cache_lock);
3334 btrfs_put_block_group(cache);
3335 cache = btrfs_lookup_first_block_group(root->fs_info,
3336 next_bytenr);
3337 return cache;
3338 }
3339 node = rb_next(&cache->cache_node);
3340 btrfs_put_block_group(cache);
3341 if (node) {
3342 cache = rb_entry(node, struct btrfs_block_group_cache,
3343 cache_node);
3344 btrfs_get_block_group(cache);
3345 } else
3346 cache = NULL;
3347 spin_unlock(&root->fs_info->block_group_cache_lock);
3348 return cache;
3349}
3350
3351static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3352 struct btrfs_trans_handle *trans,
3353 struct btrfs_path *path)
3354{
3355 struct btrfs_root *root = block_group->fs_info->tree_root;
3356 struct inode *inode = NULL;
3357 u64 alloc_hint = 0;
3358 int dcs = BTRFS_DC_ERROR;
3359 u64 num_pages = 0;
3360 int retries = 0;
3361 int ret = 0;
3362
3363
3364
3365
3366
3367 if (block_group->key.offset < (100 * SZ_1M)) {
3368 spin_lock(&block_group->lock);
3369 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3370 spin_unlock(&block_group->lock);
3371 return 0;
3372 }
3373
3374 if (trans->aborted)
3375 return 0;
3376again:
3377 inode = lookup_free_space_inode(root, block_group, path);
3378 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3379 ret = PTR_ERR(inode);
3380 btrfs_release_path(path);
3381 goto out;
3382 }
3383
3384 if (IS_ERR(inode)) {
3385 BUG_ON(retries);
3386 retries++;
3387
3388 if (block_group->ro)
3389 goto out_free;
3390
3391 ret = create_free_space_inode(root, trans, block_group, path);
3392 if (ret)
3393 goto out_free;
3394 goto again;
3395 }
3396
3397
3398 if (block_group->cache_generation == trans->transid &&
3399 i_size_read(inode)) {
3400 dcs = BTRFS_DC_SETUP;
3401 goto out_put;
3402 }
3403
3404
3405
3406
3407
3408
3409 BTRFS_I(inode)->generation = 0;
3410 ret = btrfs_update_inode(trans, root, inode);
3411 if (ret) {
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422 btrfs_abort_transaction(trans, root, ret);
3423 goto out_put;
3424 }
3425 WARN_ON(ret);
3426
3427 if (i_size_read(inode) > 0) {
3428 ret = btrfs_check_trunc_cache_free_space(root,
3429 &root->fs_info->global_block_rsv);
3430 if (ret)
3431 goto out_put;
3432
3433 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
3434 if (ret)
3435 goto out_put;
3436 }
3437
3438 spin_lock(&block_group->lock);
3439 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3440 !btrfs_test_opt(root, SPACE_CACHE)) {
3441
3442
3443
3444
3445
3446 dcs = BTRFS_DC_WRITTEN;
3447 spin_unlock(&block_group->lock);
3448 goto out_put;
3449 }
3450 spin_unlock(&block_group->lock);
3451
3452
3453
3454
3455
3456 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
3457 ret = -ENOSPC;
3458 goto out_put;
3459 }
3460
3461
3462
3463
3464
3465
3466
3467 num_pages = div_u64(block_group->key.offset, SZ_256M);
3468 if (!num_pages)
3469 num_pages = 1;
3470
3471 num_pages *= 16;
3472 num_pages *= PAGE_SIZE;
3473
3474 ret = btrfs_check_data_free_space(inode, 0, num_pages);
3475 if (ret)
3476 goto out_put;
3477
3478 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3479 num_pages, num_pages,
3480 &alloc_hint);
3481
3482
3483
3484
3485
3486
3487
3488
3489 if (!ret)
3490 dcs = BTRFS_DC_SETUP;
3491 else if (ret == -ENOSPC)
3492 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3493 btrfs_free_reserved_data_space(inode, 0, num_pages);
3494
3495out_put:
3496 iput(inode);
3497out_free:
3498 btrfs_release_path(path);
3499out:
3500 spin_lock(&block_group->lock);
3501 if (!ret && dcs == BTRFS_DC_SETUP)
3502 block_group->cache_generation = trans->transid;
3503 block_group->disk_cache_state = dcs;
3504 spin_unlock(&block_group->lock);
3505
3506 return ret;
3507}
3508
3509int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3510 struct btrfs_root *root)
3511{
3512 struct btrfs_block_group_cache *cache, *tmp;
3513 struct btrfs_transaction *cur_trans = trans->transaction;
3514 struct btrfs_path *path;
3515
3516 if (list_empty(&cur_trans->dirty_bgs) ||
3517 !btrfs_test_opt(root, SPACE_CACHE))
3518 return 0;
3519
3520 path = btrfs_alloc_path();
3521 if (!path)
3522 return -ENOMEM;
3523
3524
3525 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3526 dirty_list) {
3527 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3528 cache_save_setup(cache, trans, path);
3529 }
3530
3531 btrfs_free_path(path);
3532 return 0;
3533}
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3548 struct btrfs_root *root)
3549{
3550 struct btrfs_block_group_cache *cache;
3551 struct btrfs_transaction *cur_trans = trans->transaction;
3552 int ret = 0;
3553 int should_put;
3554 struct btrfs_path *path = NULL;
3555 LIST_HEAD(dirty);
3556 struct list_head *io = &cur_trans->io_bgs;
3557 int num_started = 0;
3558 int loops = 0;
3559
3560 spin_lock(&cur_trans->dirty_bgs_lock);
3561 if (list_empty(&cur_trans->dirty_bgs)) {
3562 spin_unlock(&cur_trans->dirty_bgs_lock);
3563 return 0;
3564 }
3565 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3566 spin_unlock(&cur_trans->dirty_bgs_lock);
3567
3568again:
3569
3570
3571
3572
3573 btrfs_create_pending_block_groups(trans, root);
3574
3575 if (!path) {
3576 path = btrfs_alloc_path();
3577 if (!path)
3578 return -ENOMEM;
3579 }
3580
3581
3582
3583
3584
3585
3586 mutex_lock(&trans->transaction->cache_write_mutex);
3587 while (!list_empty(&dirty)) {
3588 cache = list_first_entry(&dirty,
3589 struct btrfs_block_group_cache,
3590 dirty_list);
3591
3592
3593
3594
3595
3596 if (!list_empty(&cache->io_list)) {
3597 list_del_init(&cache->io_list);
3598 btrfs_wait_cache_io(root, trans, cache,
3599 &cache->io_ctl, path,
3600 cache->key.objectid);
3601 btrfs_put_block_group(cache);
3602 }
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613 spin_lock(&cur_trans->dirty_bgs_lock);
3614 list_del_init(&cache->dirty_list);
3615 spin_unlock(&cur_trans->dirty_bgs_lock);
3616
3617 should_put = 1;
3618
3619 cache_save_setup(cache, trans, path);
3620
3621 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3622 cache->io_ctl.inode = NULL;
3623 ret = btrfs_write_out_cache(root, trans, cache, path);
3624 if (ret == 0 && cache->io_ctl.inode) {
3625 num_started++;
3626 should_put = 0;
3627
3628
3629
3630
3631
3632 list_add_tail(&cache->io_list, io);
3633 } else {
3634
3635
3636
3637
3638 ret = 0;
3639 }
3640 }
3641 if (!ret) {
3642 ret = write_one_cache_group(trans, root, path, cache);
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652 if (ret == -ENOENT) {
3653 ret = 0;
3654 spin_lock(&cur_trans->dirty_bgs_lock);
3655 if (list_empty(&cache->dirty_list)) {
3656 list_add_tail(&cache->dirty_list,
3657 &cur_trans->dirty_bgs);
3658 btrfs_get_block_group(cache);
3659 }
3660 spin_unlock(&cur_trans->dirty_bgs_lock);
3661 } else if (ret) {
3662 btrfs_abort_transaction(trans, root, ret);
3663 }
3664 }
3665
3666
3667 if (should_put)
3668 btrfs_put_block_group(cache);
3669
3670 if (ret)
3671 break;
3672
3673
3674
3675
3676
3677
3678 mutex_unlock(&trans->transaction->cache_write_mutex);
3679 mutex_lock(&trans->transaction->cache_write_mutex);
3680 }
3681 mutex_unlock(&trans->transaction->cache_write_mutex);
3682
3683
3684
3685
3686
3687 ret = btrfs_run_delayed_refs(trans, root, 0);
3688 if (!ret && loops == 0) {
3689 loops++;
3690 spin_lock(&cur_trans->dirty_bgs_lock);
3691 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3692
3693
3694
3695
3696 if (!list_empty(&dirty)) {
3697 spin_unlock(&cur_trans->dirty_bgs_lock);
3698 goto again;
3699 }
3700 spin_unlock(&cur_trans->dirty_bgs_lock);
3701 }
3702
3703 btrfs_free_path(path);
3704 return ret;
3705}
3706
3707int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3708 struct btrfs_root *root)
3709{
3710 struct btrfs_block_group_cache *cache;
3711 struct btrfs_transaction *cur_trans = trans->transaction;
3712 int ret = 0;
3713 int should_put;
3714 struct btrfs_path *path;
3715 struct list_head *io = &cur_trans->io_bgs;
3716 int num_started = 0;
3717
3718 path = btrfs_alloc_path();
3719 if (!path)
3720 return -ENOMEM;
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737 spin_lock(&cur_trans->dirty_bgs_lock);
3738 while (!list_empty(&cur_trans->dirty_bgs)) {
3739 cache = list_first_entry(&cur_trans->dirty_bgs,
3740 struct btrfs_block_group_cache,
3741 dirty_list);
3742
3743
3744
3745
3746
3747
3748 if (!list_empty(&cache->io_list)) {
3749 spin_unlock(&cur_trans->dirty_bgs_lock);
3750 list_del_init(&cache->io_list);
3751 btrfs_wait_cache_io(root, trans, cache,
3752 &cache->io_ctl, path,
3753 cache->key.objectid);
3754 btrfs_put_block_group(cache);
3755 spin_lock(&cur_trans->dirty_bgs_lock);
3756 }
3757
3758
3759
3760
3761
3762 list_del_init(&cache->dirty_list);
3763 spin_unlock(&cur_trans->dirty_bgs_lock);
3764 should_put = 1;
3765
3766 cache_save_setup(cache, trans, path);
3767
3768 if (!ret)
3769 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
3770
3771 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3772 cache->io_ctl.inode = NULL;
3773 ret = btrfs_write_out_cache(root, trans, cache, path);
3774 if (ret == 0 && cache->io_ctl.inode) {
3775 num_started++;
3776 should_put = 0;
3777 list_add_tail(&cache->io_list, io);
3778 } else {
3779
3780
3781
3782
3783 ret = 0;
3784 }
3785 }
3786 if (!ret) {
3787 ret = write_one_cache_group(trans, root, path, cache);
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801 if (ret == -ENOENT) {
3802 wait_event(cur_trans->writer_wait,
3803 atomic_read(&cur_trans->num_writers) == 1);
3804 ret = write_one_cache_group(trans, root, path,
3805 cache);
3806 }
3807 if (ret)
3808 btrfs_abort_transaction(trans, root, ret);
3809 }
3810
3811
3812 if (should_put)
3813 btrfs_put_block_group(cache);
3814 spin_lock(&cur_trans->dirty_bgs_lock);
3815 }
3816 spin_unlock(&cur_trans->dirty_bgs_lock);
3817
3818 while (!list_empty(io)) {
3819 cache = list_first_entry(io, struct btrfs_block_group_cache,
3820 io_list);
3821 list_del_init(&cache->io_list);
3822 btrfs_wait_cache_io(root, trans, cache,
3823 &cache->io_ctl, path, cache->key.objectid);
3824 btrfs_put_block_group(cache);
3825 }
3826
3827 btrfs_free_path(path);
3828 return ret;
3829}
3830
3831int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3832{
3833 struct btrfs_block_group_cache *block_group;
3834 int readonly = 0;
3835
3836 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
3837 if (!block_group || block_group->ro)
3838 readonly = 1;
3839 if (block_group)
3840 btrfs_put_block_group(block_group);
3841 return readonly;
3842}
3843
3844bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3845{
3846 struct btrfs_block_group_cache *bg;
3847 bool ret = true;
3848
3849 bg = btrfs_lookup_block_group(fs_info, bytenr);
3850 if (!bg)
3851 return false;
3852
3853 spin_lock(&bg->lock);
3854 if (bg->ro)
3855 ret = false;
3856 else
3857 atomic_inc(&bg->nocow_writers);
3858 spin_unlock(&bg->lock);
3859
3860
3861 if (!ret)
3862 btrfs_put_block_group(bg);
3863
3864 return ret;
3865
3866}
3867
3868void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3869{
3870 struct btrfs_block_group_cache *bg;
3871
3872 bg = btrfs_lookup_block_group(fs_info, bytenr);
3873 ASSERT(bg);
3874 if (atomic_dec_and_test(&bg->nocow_writers))
3875 wake_up_atomic_t(&bg->nocow_writers);
3876
3877
3878
3879
3880 btrfs_put_block_group(bg);
3881 btrfs_put_block_group(bg);
3882}
3883
3884static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a)
3885{
3886 schedule();
3887 return 0;
3888}
3889
3890void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
3891{
3892 wait_on_atomic_t(&bg->nocow_writers,
3893 btrfs_wait_nocow_writers_atomic_t,
3894 TASK_UNINTERRUPTIBLE);
3895}
3896
3897static const char *alloc_name(u64 flags)
3898{
3899 switch (flags) {
3900 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3901 return "mixed";
3902 case BTRFS_BLOCK_GROUP_METADATA:
3903 return "metadata";
3904 case BTRFS_BLOCK_GROUP_DATA:
3905 return "data";
3906 case BTRFS_BLOCK_GROUP_SYSTEM:
3907 return "system";
3908 default:
3909 WARN_ON(1);
3910 return "invalid-combination";
3911 };
3912}
3913
3914static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3915 u64 total_bytes, u64 bytes_used,
3916 struct btrfs_space_info **space_info)
3917{
3918 struct btrfs_space_info *found;
3919 int i;
3920 int factor;
3921 int ret;
3922
3923 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3924 BTRFS_BLOCK_GROUP_RAID10))
3925 factor = 2;
3926 else
3927 factor = 1;
3928
3929 found = __find_space_info(info, flags);
3930 if (found) {
3931 spin_lock(&found->lock);
3932 found->total_bytes += total_bytes;
3933 found->disk_total += total_bytes * factor;
3934 found->bytes_used += bytes_used;
3935 found->disk_used += bytes_used * factor;
3936 if (total_bytes > 0)
3937 found->full = 0;
3938 spin_unlock(&found->lock);
3939 *space_info = found;
3940 return 0;
3941 }
3942 found = kzalloc(sizeof(*found), GFP_NOFS);
3943 if (!found)
3944 return -ENOMEM;
3945
3946 ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
3947 if (ret) {
3948 kfree(found);
3949 return ret;
3950 }
3951
3952 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3953 INIT_LIST_HEAD(&found->block_groups[i]);
3954 init_rwsem(&found->groups_sem);
3955 spin_lock_init(&found->lock);
3956 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3957 found->total_bytes = total_bytes;
3958 found->disk_total = total_bytes * factor;
3959 found->bytes_used = bytes_used;
3960 found->disk_used = bytes_used * factor;
3961 found->bytes_pinned = 0;
3962 found->bytes_reserved = 0;
3963 found->bytes_readonly = 0;
3964 found->bytes_may_use = 0;
3965 found->full = 0;
3966 found->max_extent_size = 0;
3967 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3968 found->chunk_alloc = 0;
3969 found->flush = 0;
3970 init_waitqueue_head(&found->wait);
3971 INIT_LIST_HEAD(&found->ro_bgs);
3972
3973 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3974 info->space_info_kobj, "%s",
3975 alloc_name(found->flags));
3976 if (ret) {
3977 kfree(found);
3978 return ret;
3979 }
3980
3981 *space_info = found;
3982 list_add_rcu(&found->list, &info->space_info);
3983 if (flags & BTRFS_BLOCK_GROUP_DATA)
3984 info->data_sinfo = found;
3985
3986 return ret;
3987}
3988
3989static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3990{
3991 u64 extra_flags = chunk_to_extended(flags) &
3992 BTRFS_EXTENDED_PROFILE_MASK;
3993
3994 write_seqlock(&fs_info->profiles_lock);
3995 if (flags & BTRFS_BLOCK_GROUP_DATA)
3996 fs_info->avail_data_alloc_bits |= extra_flags;
3997 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3998 fs_info->avail_metadata_alloc_bits |= extra_flags;
3999 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4000 fs_info->avail_system_alloc_bits |= extra_flags;
4001 write_sequnlock(&fs_info->profiles_lock);
4002}
4003
4004
4005
4006
4007
4008
4009
4010static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
4011{
4012 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4013 u64 target = 0;
4014
4015 if (!bctl)
4016 return 0;
4017
4018 if (flags & BTRFS_BLOCK_GROUP_DATA &&
4019 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4020 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
4021 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
4022 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4023 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
4024 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
4025 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4026 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
4027 }
4028
4029 return target;
4030}
4031
4032
4033
4034
4035
4036
4037
4038
4039static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
4040{
4041 u64 num_devices = root->fs_info->fs_devices->rw_devices;
4042 u64 target;
4043 u64 raid_type;
4044 u64 allowed = 0;
4045
4046
4047
4048
4049
4050 spin_lock(&root->fs_info->balance_lock);
4051 target = get_restripe_target(root->fs_info, flags);
4052 if (target) {
4053
4054 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
4055 spin_unlock(&root->fs_info->balance_lock);
4056 return extended_to_chunk(target);
4057 }
4058 }
4059 spin_unlock(&root->fs_info->balance_lock);
4060
4061
4062 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
4063 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
4064 allowed |= btrfs_raid_group[raid_type];
4065 }
4066 allowed &= flags;
4067
4068 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
4069 allowed = BTRFS_BLOCK_GROUP_RAID6;
4070 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
4071 allowed = BTRFS_BLOCK_GROUP_RAID5;
4072 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
4073 allowed = BTRFS_BLOCK_GROUP_RAID10;
4074 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
4075 allowed = BTRFS_BLOCK_GROUP_RAID1;
4076 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
4077 allowed = BTRFS_BLOCK_GROUP_RAID0;
4078
4079 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
4080
4081 return extended_to_chunk(flags | allowed);
4082}
4083
4084static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
4085{
4086 unsigned seq;
4087 u64 flags;
4088
4089 do {
4090 flags = orig_flags;
4091 seq = read_seqbegin(&root->fs_info->profiles_lock);
4092
4093 if (flags & BTRFS_BLOCK_GROUP_DATA)
4094 flags |= root->fs_info->avail_data_alloc_bits;
4095 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4096 flags |= root->fs_info->avail_system_alloc_bits;
4097 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
4098 flags |= root->fs_info->avail_metadata_alloc_bits;
4099 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
4100
4101 return btrfs_reduce_alloc_profile(root, flags);
4102}
4103
4104u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
4105{
4106 u64 flags;
4107 u64 ret;
4108
4109 if (data)
4110 flags = BTRFS_BLOCK_GROUP_DATA;
4111 else if (root == root->fs_info->chunk_root)
4112 flags = BTRFS_BLOCK_GROUP_SYSTEM;
4113 else
4114 flags = BTRFS_BLOCK_GROUP_METADATA;
4115
4116 ret = get_alloc_profile(root, flags);
4117 return ret;
4118}
4119
4120int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
4121{
4122 struct btrfs_space_info *data_sinfo;
4123 struct btrfs_root *root = BTRFS_I(inode)->root;
4124 struct btrfs_fs_info *fs_info = root->fs_info;
4125 u64 used;
4126 int ret = 0;
4127 int need_commit = 2;
4128 int have_pinned_space;
4129
4130
4131 bytes = ALIGN(bytes, root->sectorsize);
4132
4133 if (btrfs_is_free_space_inode(inode)) {
4134 need_commit = 0;
4135 ASSERT(current->journal_info);
4136 }
4137
4138 data_sinfo = fs_info->data_sinfo;
4139 if (!data_sinfo)
4140 goto alloc;
4141
4142again:
4143
4144 spin_lock(&data_sinfo->lock);
4145 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
4146 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
4147 data_sinfo->bytes_may_use;
4148
4149 if (used + bytes > data_sinfo->total_bytes) {
4150 struct btrfs_trans_handle *trans;
4151
4152
4153
4154
4155
4156 if (!data_sinfo->full) {
4157 u64 alloc_target;
4158
4159 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
4160 spin_unlock(&data_sinfo->lock);
4161alloc:
4162 alloc_target = btrfs_get_alloc_profile(root, 1);
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173 trans = btrfs_join_transaction(root);
4174 if (IS_ERR(trans))
4175 return PTR_ERR(trans);
4176
4177 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4178 alloc_target,
4179 CHUNK_ALLOC_NO_FORCE);
4180 btrfs_end_transaction(trans, root);
4181 if (ret < 0) {
4182 if (ret != -ENOSPC)
4183 return ret;
4184 else {
4185 have_pinned_space = 1;
4186 goto commit_trans;
4187 }
4188 }
4189
4190 if (!data_sinfo)
4191 data_sinfo = fs_info->data_sinfo;
4192
4193 goto again;
4194 }
4195
4196
4197
4198
4199
4200
4201 have_pinned_space = percpu_counter_compare(
4202 &data_sinfo->total_bytes_pinned,
4203 used + bytes - data_sinfo->total_bytes);
4204 spin_unlock(&data_sinfo->lock);
4205
4206
4207commit_trans:
4208 if (need_commit &&
4209 !atomic_read(&root->fs_info->open_ioctl_trans)) {
4210 need_commit--;
4211
4212 if (need_commit > 0) {
4213 btrfs_start_delalloc_roots(fs_info, 0, -1);
4214 btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
4215 }
4216
4217 trans = btrfs_join_transaction(root);
4218 if (IS_ERR(trans))
4219 return PTR_ERR(trans);
4220 if (have_pinned_space >= 0 ||
4221 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4222 &trans->transaction->flags) ||
4223 need_commit > 0) {
4224 ret = btrfs_commit_transaction(trans, root);
4225 if (ret)
4226 return ret;
4227
4228
4229
4230
4231
4232 mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
4233 mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
4234 goto again;
4235 } else {
4236 btrfs_end_transaction(trans, root);
4237 }
4238 }
4239
4240 trace_btrfs_space_reservation(root->fs_info,
4241 "space_info:enospc",
4242 data_sinfo->flags, bytes, 1);
4243 return -ENOSPC;
4244 }
4245 data_sinfo->bytes_may_use += bytes;
4246 trace_btrfs_space_reservation(root->fs_info, "space_info",
4247 data_sinfo->flags, bytes, 1);
4248 spin_unlock(&data_sinfo->lock);
4249
4250 return ret;
4251}
4252
4253
4254
4255
4256
4257
4258int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
4259{
4260 struct btrfs_root *root = BTRFS_I(inode)->root;
4261 int ret;
4262
4263
4264 len = round_up(start + len, root->sectorsize) -
4265 round_down(start, root->sectorsize);
4266 start = round_down(start, root->sectorsize);
4267
4268 ret = btrfs_alloc_data_chunk_ondemand(inode, len);
4269 if (ret < 0)
4270 return ret;
4271
4272
4273
4274
4275
4276
4277
4278 ret = btrfs_qgroup_reserve_data(inode, start, len);
4279 return ret;
4280}
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
4291 u64 len)
4292{
4293 struct btrfs_root *root = BTRFS_I(inode)->root;
4294 struct btrfs_space_info *data_sinfo;
4295
4296
4297 len = round_up(start + len, root->sectorsize) -
4298 round_down(start, root->sectorsize);
4299 start = round_down(start, root->sectorsize);
4300
4301 data_sinfo = root->fs_info->data_sinfo;
4302 spin_lock(&data_sinfo->lock);
4303 if (WARN_ON(data_sinfo->bytes_may_use < len))
4304 data_sinfo->bytes_may_use = 0;
4305 else
4306 data_sinfo->bytes_may_use -= len;
4307 trace_btrfs_space_reservation(root->fs_info, "space_info",
4308 data_sinfo->flags, len, 0);
4309 spin_unlock(&data_sinfo->lock);
4310}
4311
4312
4313
4314
4315
4316
4317
4318
4319void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
4320{
4321 btrfs_free_reserved_data_space_noquota(inode, start, len);
4322 btrfs_qgroup_free_data(inode, start, len);
4323}
4324
4325static void force_metadata_allocation(struct btrfs_fs_info *info)
4326{
4327 struct list_head *head = &info->space_info;
4328 struct btrfs_space_info *found;
4329
4330 rcu_read_lock();
4331 list_for_each_entry_rcu(found, head, list) {
4332 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
4333 found->force_alloc = CHUNK_ALLOC_FORCE;
4334 }
4335 rcu_read_unlock();
4336}
4337
4338static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4339{
4340 return (global->size << 1);
4341}
4342
4343static int should_alloc_chunk(struct btrfs_root *root,
4344 struct btrfs_space_info *sinfo, int force)
4345{
4346 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4347 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
4348 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
4349 u64 thresh;
4350
4351 if (force == CHUNK_ALLOC_FORCE)
4352 return 1;
4353
4354
4355
4356
4357
4358
4359 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
4360 num_allocated += calc_global_rsv_need_space(global_rsv);
4361
4362
4363
4364
4365
4366 if (force == CHUNK_ALLOC_LIMITED) {
4367 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
4368 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
4369
4370 if (num_bytes - num_allocated < thresh)
4371 return 1;
4372 }
4373
4374 if (num_allocated + SZ_2M < div_factor(num_bytes, 8))
4375 return 0;
4376 return 1;
4377}
4378
4379static u64 get_profile_num_devs(struct btrfs_root *root, u64 type)
4380{
4381 u64 num_dev;
4382
4383 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4384 BTRFS_BLOCK_GROUP_RAID0 |
4385 BTRFS_BLOCK_GROUP_RAID5 |
4386 BTRFS_BLOCK_GROUP_RAID6))
4387 num_dev = root->fs_info->fs_devices->rw_devices;
4388 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4389 num_dev = 2;
4390 else
4391 num_dev = 1;
4392
4393 return num_dev;
4394}
4395
4396
4397
4398
4399
4400
4401void check_system_chunk(struct btrfs_trans_handle *trans,
4402 struct btrfs_root *root,
4403 u64 type)
4404{
4405 struct btrfs_space_info *info;
4406 u64 left;
4407 u64 thresh;
4408 int ret = 0;
4409 u64 num_devs;
4410
4411
4412
4413
4414
4415 ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
4416
4417 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4418 spin_lock(&info->lock);
4419 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
4420 info->bytes_reserved - info->bytes_readonly -
4421 info->bytes_may_use;
4422 spin_unlock(&info->lock);
4423
4424 num_devs = get_profile_num_devs(root, type);
4425
4426
4427 thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
4428 btrfs_calc_trans_metadata_size(root, 1);
4429
4430 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
4431 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
4432 left, thresh, type);
4433 dump_space_info(info, 0, 0);
4434 }
4435
4436 if (left < thresh) {
4437 u64 flags;
4438
4439 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
4440
4441
4442
4443
4444
4445
4446 ret = btrfs_alloc_chunk(trans, root, flags);
4447 }
4448
4449 if (!ret) {
4450 ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
4451 &root->fs_info->chunk_block_rsv,
4452 thresh, BTRFS_RESERVE_NO_FLUSH);
4453 if (!ret)
4454 trans->chunk_bytes_reserved += thresh;
4455 }
4456}
4457
4458static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4459 struct btrfs_root *extent_root, u64 flags, int force)
4460{
4461 struct btrfs_space_info *space_info;
4462 struct btrfs_fs_info *fs_info = extent_root->fs_info;
4463 int wait_for_alloc = 0;
4464 int ret = 0;
4465
4466
4467 if (trans->allocating_chunk)
4468 return -ENOSPC;
4469
4470 space_info = __find_space_info(extent_root->fs_info, flags);
4471 if (!space_info) {
4472 ret = update_space_info(extent_root->fs_info, flags,
4473 0, 0, &space_info);
4474 BUG_ON(ret);
4475 }
4476 BUG_ON(!space_info);
4477
4478again:
4479 spin_lock(&space_info->lock);
4480 if (force < space_info->force_alloc)
4481 force = space_info->force_alloc;
4482 if (space_info->full) {
4483 if (should_alloc_chunk(extent_root, space_info, force))
4484 ret = -ENOSPC;
4485 else
4486 ret = 0;
4487 spin_unlock(&space_info->lock);
4488 return ret;
4489 }
4490
4491 if (!should_alloc_chunk(extent_root, space_info, force)) {
4492 spin_unlock(&space_info->lock);
4493 return 0;
4494 } else if (space_info->chunk_alloc) {
4495 wait_for_alloc = 1;
4496 } else {
4497 space_info->chunk_alloc = 1;
4498 }
4499
4500 spin_unlock(&space_info->lock);
4501
4502 mutex_lock(&fs_info->chunk_mutex);
4503
4504
4505
4506
4507
4508
4509
4510 if (wait_for_alloc) {
4511 mutex_unlock(&fs_info->chunk_mutex);
4512 wait_for_alloc = 0;
4513 goto again;
4514 }
4515
4516 trans->allocating_chunk = true;
4517
4518
4519
4520
4521
4522 if (btrfs_mixed_space_info(space_info))
4523 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4524
4525
4526
4527
4528
4529
4530 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
4531 fs_info->data_chunk_allocations++;
4532 if (!(fs_info->data_chunk_allocations %
4533 fs_info->metadata_ratio))
4534 force_metadata_allocation(fs_info);
4535 }
4536
4537
4538
4539
4540
4541 check_system_chunk(trans, extent_root, flags);
4542
4543 ret = btrfs_alloc_chunk(trans, extent_root, flags);
4544 trans->allocating_chunk = false;
4545
4546 spin_lock(&space_info->lock);
4547 if (ret < 0 && ret != -ENOSPC)
4548 goto out;
4549 if (ret)
4550 space_info->full = 1;
4551 else
4552 ret = 1;
4553
4554 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4555out:
4556 space_info->chunk_alloc = 0;
4557 spin_unlock(&space_info->lock);
4558 mutex_unlock(&fs_info->chunk_mutex);
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573 if (trans->can_flush_pending_bgs &&
4574 trans->chunk_bytes_reserved >= (u64)SZ_2M) {
4575 btrfs_create_pending_block_groups(trans, trans->root);
4576 btrfs_trans_release_chunk_metadata(trans);
4577 }
4578 return ret;
4579}
4580
4581static int can_overcommit(struct btrfs_root *root,
4582 struct btrfs_space_info *space_info, u64 bytes,
4583 enum btrfs_reserve_flush_enum flush)
4584{
4585 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4586 u64 profile = btrfs_get_alloc_profile(root, 0);
4587 u64 space_size;
4588 u64 avail;
4589 u64 used;
4590
4591 used = space_info->bytes_used + space_info->bytes_reserved +
4592 space_info->bytes_pinned + space_info->bytes_readonly;
4593
4594
4595
4596
4597
4598
4599
4600 spin_lock(&global_rsv->lock);
4601 space_size = calc_global_rsv_need_space(global_rsv);
4602 spin_unlock(&global_rsv->lock);
4603 if (used + space_size >= space_info->total_bytes)
4604 return 0;
4605
4606 used += space_info->bytes_may_use;
4607
4608 spin_lock(&root->fs_info->free_chunk_lock);
4609 avail = root->fs_info->free_chunk_space;
4610 spin_unlock(&root->fs_info->free_chunk_lock);
4611
4612
4613
4614
4615
4616
4617
4618 if (profile & (BTRFS_BLOCK_GROUP_DUP |
4619 BTRFS_BLOCK_GROUP_RAID1 |
4620 BTRFS_BLOCK_GROUP_RAID10))
4621 avail >>= 1;
4622
4623
4624
4625
4626
4627
4628 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4629 avail >>= 3;
4630 else
4631 avail >>= 1;
4632
4633 if (used + bytes < space_info->total_bytes + avail)
4634 return 1;
4635 return 0;
4636}
4637
4638static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
4639 unsigned long nr_pages, int nr_items)
4640{
4641 struct super_block *sb = root->fs_info->sb;
4642
4643 if (down_read_trylock(&sb->s_umount)) {
4644 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4645 up_read(&sb->s_umount);
4646 } else {
4647
4648
4649
4650
4651
4652
4653
4654 btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
4655 if (!current->journal_info)
4656 btrfs_wait_ordered_roots(root->fs_info, nr_items,
4657 0, (u64)-1);
4658 }
4659}
4660
4661static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
4662{
4663 u64 bytes;
4664 int nr;
4665
4666 bytes = btrfs_calc_trans_metadata_size(root, 1);
4667 nr = (int)div64_u64(to_reclaim, bytes);
4668 if (!nr)
4669 nr = 1;
4670 return nr;
4671}
4672
4673#define EXTENT_SIZE_PER_ITEM SZ_256K
4674
4675
4676
4677
4678static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4679 bool wait_ordered)
4680{
4681 struct btrfs_block_rsv *block_rsv;
4682 struct btrfs_space_info *space_info;
4683 struct btrfs_trans_handle *trans;
4684 u64 delalloc_bytes;
4685 u64 max_reclaim;
4686 long time_left;
4687 unsigned long nr_pages;
4688 int loops;
4689 int items;
4690 enum btrfs_reserve_flush_enum flush;
4691
4692
4693 items = calc_reclaim_items_nr(root, to_reclaim);
4694 to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
4695
4696 trans = (struct btrfs_trans_handle *)current->journal_info;
4697 block_rsv = &root->fs_info->delalloc_block_rsv;
4698 space_info = block_rsv->space_info;
4699
4700 delalloc_bytes = percpu_counter_sum_positive(
4701 &root->fs_info->delalloc_bytes);
4702 if (delalloc_bytes == 0) {
4703 if (trans)
4704 return;
4705 if (wait_ordered)
4706 btrfs_wait_ordered_roots(root->fs_info, items,
4707 0, (u64)-1);
4708 return;
4709 }
4710
4711 loops = 0;
4712 while (delalloc_bytes && loops < 3) {
4713 max_reclaim = min(delalloc_bytes, to_reclaim);
4714 nr_pages = max_reclaim >> PAGE_SHIFT;
4715 btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
4716
4717
4718
4719
4720 max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
4721 if (!max_reclaim)
4722 goto skip_async;
4723
4724 if (max_reclaim <= nr_pages)
4725 max_reclaim = 0;
4726 else
4727 max_reclaim -= nr_pages;
4728
4729 wait_event(root->fs_info->async_submit_wait,
4730 atomic_read(&root->fs_info->async_delalloc_pages) <=
4731 (int)max_reclaim);
4732skip_async:
4733 if (!trans)
4734 flush = BTRFS_RESERVE_FLUSH_ALL;
4735 else
4736 flush = BTRFS_RESERVE_NO_FLUSH;
4737 spin_lock(&space_info->lock);
4738 if (can_overcommit(root, space_info, orig, flush)) {
4739 spin_unlock(&space_info->lock);
4740 break;
4741 }
4742 spin_unlock(&space_info->lock);
4743
4744 loops++;
4745 if (wait_ordered && !trans) {
4746 btrfs_wait_ordered_roots(root->fs_info, items,
4747 0, (u64)-1);
4748 } else {
4749 time_left = schedule_timeout_killable(1);
4750 if (time_left)
4751 break;
4752 }
4753 delalloc_bytes = percpu_counter_sum_positive(
4754 &root->fs_info->delalloc_bytes);
4755 }
4756}
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768static int may_commit_transaction(struct btrfs_root *root,
4769 struct btrfs_space_info *space_info,
4770 u64 bytes, int force)
4771{
4772 struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
4773 struct btrfs_trans_handle *trans;
4774
4775 trans = (struct btrfs_trans_handle *)current->journal_info;
4776 if (trans)
4777 return -EAGAIN;
4778
4779 if (force)
4780 goto commit;
4781
4782
4783 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4784 bytes) >= 0)
4785 goto commit;
4786
4787
4788
4789
4790
4791 if (space_info != delayed_rsv->space_info)
4792 return -ENOSPC;
4793
4794 spin_lock(&delayed_rsv->lock);
4795 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4796 bytes - delayed_rsv->size) >= 0) {
4797 spin_unlock(&delayed_rsv->lock);
4798 return -ENOSPC;
4799 }
4800 spin_unlock(&delayed_rsv->lock);
4801
4802commit:
4803 trans = btrfs_join_transaction(root);
4804 if (IS_ERR(trans))
4805 return -ENOSPC;
4806
4807 return btrfs_commit_transaction(trans, root);
4808}
4809
4810enum flush_state {
4811 FLUSH_DELAYED_ITEMS_NR = 1,
4812 FLUSH_DELAYED_ITEMS = 2,
4813 FLUSH_DELALLOC = 3,
4814 FLUSH_DELALLOC_WAIT = 4,
4815 ALLOC_CHUNK = 5,
4816 COMMIT_TRANS = 6,
4817};
4818
4819static int flush_space(struct btrfs_root *root,
4820 struct btrfs_space_info *space_info, u64 num_bytes,
4821 u64 orig_bytes, int state)
4822{
4823 struct btrfs_trans_handle *trans;
4824 int nr;
4825 int ret = 0;
4826
4827 switch (state) {
4828 case FLUSH_DELAYED_ITEMS_NR:
4829 case FLUSH_DELAYED_ITEMS:
4830 if (state == FLUSH_DELAYED_ITEMS_NR)
4831 nr = calc_reclaim_items_nr(root, num_bytes) * 2;
4832 else
4833 nr = -1;
4834
4835 trans = btrfs_join_transaction(root);
4836 if (IS_ERR(trans)) {
4837 ret = PTR_ERR(trans);
4838 break;
4839 }
4840 ret = btrfs_run_delayed_items_nr(trans, root, nr);
4841 btrfs_end_transaction(trans, root);
4842 break;
4843 case FLUSH_DELALLOC:
4844 case FLUSH_DELALLOC_WAIT:
4845 shrink_delalloc(root, num_bytes * 2, orig_bytes,
4846 state == FLUSH_DELALLOC_WAIT);
4847 break;
4848 case ALLOC_CHUNK:
4849 trans = btrfs_join_transaction(root);
4850 if (IS_ERR(trans)) {
4851 ret = PTR_ERR(trans);
4852 break;
4853 }
4854 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4855 btrfs_get_alloc_profile(root, 0),
4856 CHUNK_ALLOC_NO_FORCE);
4857 btrfs_end_transaction(trans, root);
4858 if (ret == -ENOSPC)
4859 ret = 0;
4860 break;
4861 case COMMIT_TRANS:
4862 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
4863 break;
4864 default:
4865 ret = -ENOSPC;
4866 break;
4867 }
4868
4869 return ret;
4870}
4871
4872static inline u64
4873btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4874 struct btrfs_space_info *space_info)
4875{
4876 u64 used;
4877 u64 expected;
4878 u64 to_reclaim;
4879
4880 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
4881 spin_lock(&space_info->lock);
4882 if (can_overcommit(root, space_info, to_reclaim,
4883 BTRFS_RESERVE_FLUSH_ALL)) {
4884 to_reclaim = 0;
4885 goto out;
4886 }
4887
4888 used = space_info->bytes_used + space_info->bytes_reserved +
4889 space_info->bytes_pinned + space_info->bytes_readonly +
4890 space_info->bytes_may_use;
4891 if (can_overcommit(root, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
4892 expected = div_factor_fine(space_info->total_bytes, 95);
4893 else
4894 expected = div_factor_fine(space_info->total_bytes, 90);
4895
4896 if (used > expected)
4897 to_reclaim = used - expected;
4898 else
4899 to_reclaim = 0;
4900 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4901 space_info->bytes_reserved);
4902out:
4903 spin_unlock(&space_info->lock);
4904
4905 return to_reclaim;
4906}
4907
4908static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4909 struct btrfs_fs_info *fs_info, u64 used)
4910{
4911 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4912
4913
4914 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
4915 return 0;
4916
4917 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
4918 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4919}
4920
4921static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
4922 struct btrfs_fs_info *fs_info,
4923 int flush_state)
4924{
4925 u64 used;
4926
4927 spin_lock(&space_info->lock);
4928
4929
4930
4931
4932 if (flush_state > COMMIT_TRANS && space_info->full) {
4933 spin_unlock(&space_info->lock);
4934 return 0;
4935 }
4936
4937 used = space_info->bytes_used + space_info->bytes_reserved +
4938 space_info->bytes_pinned + space_info->bytes_readonly +
4939 space_info->bytes_may_use;
4940 if (need_do_async_reclaim(space_info, fs_info, used)) {
4941 spin_unlock(&space_info->lock);
4942 return 1;
4943 }
4944 spin_unlock(&space_info->lock);
4945
4946 return 0;
4947}
4948
4949static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4950{
4951 struct btrfs_fs_info *fs_info;
4952 struct btrfs_space_info *space_info;
4953 u64 to_reclaim;
4954 int flush_state;
4955
4956 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4957 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4958
4959 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4960 space_info);
4961 if (!to_reclaim)
4962 return;
4963
4964 flush_state = FLUSH_DELAYED_ITEMS_NR;
4965 do {
4966 flush_space(fs_info->fs_root, space_info, to_reclaim,
4967 to_reclaim, flush_state);
4968 flush_state++;
4969 if (!btrfs_need_do_async_reclaim(space_info, fs_info,
4970 flush_state))
4971 return;
4972 } while (flush_state < COMMIT_TRANS);
4973}
4974
4975void btrfs_init_async_reclaim_work(struct work_struct *work)
4976{
4977 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
4978}
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994static int reserve_metadata_bytes(struct btrfs_root *root,
4995 struct btrfs_block_rsv *block_rsv,
4996 u64 orig_bytes,
4997 enum btrfs_reserve_flush_enum flush)
4998{
4999 struct btrfs_space_info *space_info = block_rsv->space_info;
5000 u64 used;
5001 u64 num_bytes = orig_bytes;
5002 int flush_state = FLUSH_DELAYED_ITEMS_NR;
5003 int ret = 0;
5004 bool flushing = false;
5005
5006again:
5007 ret = 0;
5008 spin_lock(&space_info->lock);
5009
5010
5011
5012
5013 while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
5014 space_info->flush) {
5015 spin_unlock(&space_info->lock);
5016
5017
5018
5019
5020
5021
5022 if (current->journal_info)
5023 return -EAGAIN;
5024 ret = wait_event_killable(space_info->wait, !space_info->flush);
5025
5026 if (ret)
5027 return -EINTR;
5028
5029 spin_lock(&space_info->lock);
5030 }
5031
5032 ret = -ENOSPC;
5033 used = space_info->bytes_used + space_info->bytes_reserved +
5034 space_info->bytes_pinned + space_info->bytes_readonly +
5035 space_info->bytes_may_use;
5036
5037
5038
5039
5040
5041
5042
5043
5044 if (used <= space_info->total_bytes) {
5045 if (used + orig_bytes <= space_info->total_bytes) {
5046 space_info->bytes_may_use += orig_bytes;
5047 trace_btrfs_space_reservation(root->fs_info,
5048 "space_info", space_info->flags, orig_bytes, 1);
5049 ret = 0;
5050 } else {
5051
5052
5053
5054
5055
5056 num_bytes = orig_bytes;
5057 }
5058 } else {
5059
5060
5061
5062
5063
5064 num_bytes = used - space_info->total_bytes +
5065 (orig_bytes * 2);
5066 }
5067
5068 if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
5069 space_info->bytes_may_use += orig_bytes;
5070 trace_btrfs_space_reservation(root->fs_info, "space_info",
5071 space_info->flags, orig_bytes,
5072 1);
5073 ret = 0;
5074 }
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5085 flushing = true;
5086 space_info->flush = 1;
5087 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5088 used += orig_bytes;
5089
5090
5091
5092
5093
5094 if (!root->fs_info->log_root_recovering &&
5095 need_do_async_reclaim(space_info, root->fs_info, used) &&
5096 !work_busy(&root->fs_info->async_reclaim_work))
5097 queue_work(system_unbound_wq,
5098 &root->fs_info->async_reclaim_work);
5099 }
5100 spin_unlock(&space_info->lock);
5101
5102 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5103 goto out;
5104
5105 ret = flush_space(root, space_info, num_bytes, orig_bytes,
5106 flush_state);
5107 flush_state++;
5108
5109
5110
5111
5112
5113 if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
5114 (flush_state == FLUSH_DELALLOC ||
5115 flush_state == FLUSH_DELALLOC_WAIT))
5116 flush_state = ALLOC_CHUNK;
5117
5118 if (!ret)
5119 goto again;
5120 else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
5121 flush_state < COMMIT_TRANS)
5122 goto again;
5123 else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
5124 flush_state <= COMMIT_TRANS)
5125 goto again;
5126
5127out:
5128 if (ret == -ENOSPC &&
5129 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5130 struct btrfs_block_rsv *global_rsv =
5131 &root->fs_info->global_block_rsv;
5132
5133 if (block_rsv != global_rsv &&
5134 !block_rsv_use_bytes(global_rsv, orig_bytes))
5135 ret = 0;
5136 }
5137 if (ret == -ENOSPC)
5138 trace_btrfs_space_reservation(root->fs_info,
5139 "space_info:enospc",
5140 space_info->flags, orig_bytes, 1);
5141 if (flushing) {
5142 spin_lock(&space_info->lock);
5143 space_info->flush = 0;
5144 wake_up_all(&space_info->wait);
5145 spin_unlock(&space_info->lock);
5146 }
5147 return ret;
5148}
5149
5150static struct btrfs_block_rsv *get_block_rsv(
5151 const struct btrfs_trans_handle *trans,
5152 const struct btrfs_root *root)
5153{
5154 struct btrfs_block_rsv *block_rsv = NULL;
5155
5156 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
5157 (root == root->fs_info->csum_root && trans->adding_csums) ||
5158 (root == root->fs_info->uuid_root))
5159 block_rsv = trans->block_rsv;
5160
5161 if (!block_rsv)
5162 block_rsv = root->block_rsv;
5163
5164 if (!block_rsv)
5165 block_rsv = &root->fs_info->empty_block_rsv;
5166
5167 return block_rsv;
5168}
5169
5170static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
5171 u64 num_bytes)
5172{
5173 int ret = -ENOSPC;
5174 spin_lock(&block_rsv->lock);
5175 if (block_rsv->reserved >= num_bytes) {
5176 block_rsv->reserved -= num_bytes;
5177 if (block_rsv->reserved < block_rsv->size)
5178 block_rsv->full = 0;
5179 ret = 0;
5180 }
5181 spin_unlock(&block_rsv->lock);
5182 return ret;
5183}
5184
5185static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
5186 u64 num_bytes, int update_size)
5187{
5188 spin_lock(&block_rsv->lock);
5189 block_rsv->reserved += num_bytes;
5190 if (update_size)
5191 block_rsv->size += num_bytes;
5192 else if (block_rsv->reserved >= block_rsv->size)
5193 block_rsv->full = 1;
5194 spin_unlock(&block_rsv->lock);
5195}
5196
5197int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5198 struct btrfs_block_rsv *dest, u64 num_bytes,
5199 int min_factor)
5200{
5201 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5202 u64 min_bytes;
5203
5204 if (global_rsv->space_info != dest->space_info)
5205 return -ENOSPC;
5206
5207 spin_lock(&global_rsv->lock);
5208 min_bytes = div_factor(global_rsv->size, min_factor);
5209 if (global_rsv->reserved < min_bytes + num_bytes) {
5210 spin_unlock(&global_rsv->lock);
5211 return -ENOSPC;
5212 }
5213 global_rsv->reserved -= num_bytes;
5214 if (global_rsv->reserved < global_rsv->size)
5215 global_rsv->full = 0;
5216 spin_unlock(&global_rsv->lock);
5217
5218 block_rsv_add_bytes(dest, num_bytes, 1);
5219 return 0;
5220}
5221
5222static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5223 struct btrfs_block_rsv *block_rsv,
5224 struct btrfs_block_rsv *dest, u64 num_bytes)
5225{
5226 struct btrfs_space_info *space_info = block_rsv->space_info;
5227
5228 spin_lock(&block_rsv->lock);
5229 if (num_bytes == (u64)-1)
5230 num_bytes = block_rsv->size;
5231 block_rsv->size -= num_bytes;
5232 if (block_rsv->reserved >= block_rsv->size) {
5233 num_bytes = block_rsv->reserved - block_rsv->size;
5234 block_rsv->reserved = block_rsv->size;
5235 block_rsv->full = 1;
5236 } else {
5237 num_bytes = 0;
5238 }
5239 spin_unlock(&block_rsv->lock);
5240
5241 if (num_bytes > 0) {
5242 if (dest) {
5243 spin_lock(&dest->lock);
5244 if (!dest->full) {
5245 u64 bytes_to_add;
5246
5247 bytes_to_add = dest->size - dest->reserved;
5248 bytes_to_add = min(num_bytes, bytes_to_add);
5249 dest->reserved += bytes_to_add;
5250 if (dest->reserved >= dest->size)
5251 dest->full = 1;
5252 num_bytes -= bytes_to_add;
5253 }
5254 spin_unlock(&dest->lock);
5255 }
5256 if (num_bytes) {
5257 spin_lock(&space_info->lock);
5258 space_info->bytes_may_use -= num_bytes;
5259 trace_btrfs_space_reservation(fs_info, "space_info",
5260 space_info->flags, num_bytes, 0);
5261 spin_unlock(&space_info->lock);
5262 }
5263 }
5264}
5265
5266static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
5267 struct btrfs_block_rsv *dst, u64 num_bytes)
5268{
5269 int ret;
5270
5271 ret = block_rsv_use_bytes(src, num_bytes);
5272 if (ret)
5273 return ret;
5274
5275 block_rsv_add_bytes(dst, num_bytes, 1);
5276 return 0;
5277}
5278
5279void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
5280{
5281 memset(rsv, 0, sizeof(*rsv));
5282 spin_lock_init(&rsv->lock);
5283 rsv->type = type;
5284}
5285
5286struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
5287 unsigned short type)
5288{
5289 struct btrfs_block_rsv *block_rsv;
5290 struct btrfs_fs_info *fs_info = root->fs_info;
5291
5292 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5293 if (!block_rsv)
5294 return NULL;
5295
5296 btrfs_init_block_rsv(block_rsv, type);
5297 block_rsv->space_info = __find_space_info(fs_info,
5298 BTRFS_BLOCK_GROUP_METADATA);
5299 return block_rsv;
5300}
5301
5302void btrfs_free_block_rsv(struct btrfs_root *root,
5303 struct btrfs_block_rsv *rsv)
5304{
5305 if (!rsv)
5306 return;
5307 btrfs_block_rsv_release(root, rsv, (u64)-1);
5308 kfree(rsv);
5309}
5310
5311void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
5312{
5313 kfree(rsv);
5314}
5315
5316int btrfs_block_rsv_add(struct btrfs_root *root,
5317 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5318 enum btrfs_reserve_flush_enum flush)
5319{
5320 int ret;
5321
5322 if (num_bytes == 0)
5323 return 0;
5324
5325 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5326 if (!ret) {
5327 block_rsv_add_bytes(block_rsv, num_bytes, 1);
5328 return 0;
5329 }
5330
5331 return ret;
5332}
5333
5334int btrfs_block_rsv_check(struct btrfs_root *root,
5335 struct btrfs_block_rsv *block_rsv, int min_factor)
5336{
5337 u64 num_bytes = 0;
5338 int ret = -ENOSPC;
5339
5340 if (!block_rsv)
5341 return 0;
5342
5343 spin_lock(&block_rsv->lock);
5344 num_bytes = div_factor(block_rsv->size, min_factor);
5345 if (block_rsv->reserved >= num_bytes)
5346 ret = 0;
5347 spin_unlock(&block_rsv->lock);
5348
5349 return ret;
5350}
5351
5352int btrfs_block_rsv_refill(struct btrfs_root *root,
5353 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5354 enum btrfs_reserve_flush_enum flush)
5355{
5356 u64 num_bytes = 0;
5357 int ret = -ENOSPC;
5358
5359 if (!block_rsv)
5360 return 0;
5361
5362 spin_lock(&block_rsv->lock);
5363 num_bytes = min_reserved;
5364 if (block_rsv->reserved >= num_bytes)
5365 ret = 0;
5366 else
5367 num_bytes -= block_rsv->reserved;
5368 spin_unlock(&block_rsv->lock);
5369
5370 if (!ret)
5371 return 0;
5372
5373 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5374 if (!ret) {
5375 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5376 return 0;
5377 }
5378
5379 return ret;
5380}
5381
5382int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
5383 struct btrfs_block_rsv *dst_rsv,
5384 u64 num_bytes)
5385{
5386 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5387}
5388
5389void btrfs_block_rsv_release(struct btrfs_root *root,
5390 struct btrfs_block_rsv *block_rsv,
5391 u64 num_bytes)
5392{
5393 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5394 if (global_rsv == block_rsv ||
5395 block_rsv->space_info != global_rsv->space_info)
5396 global_rsv = NULL;
5397 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
5398 num_bytes);
5399}
5400
5401
5402
5403
5404
5405
5406static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
5407{
5408 struct btrfs_space_info *sinfo;
5409 u64 num_bytes;
5410 u64 meta_used;
5411 u64 data_used;
5412 int csum_size = btrfs_super_csum_size(fs_info->super_copy);
5413
5414 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
5415 spin_lock(&sinfo->lock);
5416 data_used = sinfo->bytes_used;
5417 spin_unlock(&sinfo->lock);
5418
5419 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5420 spin_lock(&sinfo->lock);
5421 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
5422 data_used = 0;
5423 meta_used = sinfo->bytes_used;
5424 spin_unlock(&sinfo->lock);
5425
5426 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
5427 csum_size * 2;
5428 num_bytes += div_u64(data_used + meta_used, 50);
5429
5430 if (num_bytes * 3 > meta_used)
5431 num_bytes = div_u64(meta_used, 3);
5432
5433 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
5434}
5435
5436static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5437{
5438 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5439 struct btrfs_space_info *sinfo = block_rsv->space_info;
5440 u64 num_bytes;
5441
5442 num_bytes = calc_global_metadata_size(fs_info);
5443
5444 spin_lock(&sinfo->lock);
5445 spin_lock(&block_rsv->lock);
5446
5447 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
5448
5449 if (block_rsv->reserved < block_rsv->size) {
5450 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
5451 sinfo->bytes_reserved + sinfo->bytes_readonly +
5452 sinfo->bytes_may_use;
5453 if (sinfo->total_bytes > num_bytes) {
5454 num_bytes = sinfo->total_bytes - num_bytes;
5455 num_bytes = min(num_bytes,
5456 block_rsv->size - block_rsv->reserved);
5457 block_rsv->reserved += num_bytes;
5458 sinfo->bytes_may_use += num_bytes;
5459 trace_btrfs_space_reservation(fs_info, "space_info",
5460 sinfo->flags, num_bytes,
5461 1);
5462 }
5463 } else if (block_rsv->reserved > block_rsv->size) {
5464 num_bytes = block_rsv->reserved - block_rsv->size;
5465 sinfo->bytes_may_use -= num_bytes;
5466 trace_btrfs_space_reservation(fs_info, "space_info",
5467 sinfo->flags, num_bytes, 0);
5468 block_rsv->reserved = block_rsv->size;
5469 }
5470
5471 if (block_rsv->reserved == block_rsv->size)
5472 block_rsv->full = 1;
5473 else
5474 block_rsv->full = 0;
5475
5476 spin_unlock(&block_rsv->lock);
5477 spin_unlock(&sinfo->lock);
5478}
5479
5480static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5481{
5482 struct btrfs_space_info *space_info;
5483
5484 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5485 fs_info->chunk_block_rsv.space_info = space_info;
5486
5487 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5488 fs_info->global_block_rsv.space_info = space_info;
5489 fs_info->delalloc_block_rsv.space_info = space_info;
5490 fs_info->trans_block_rsv.space_info = space_info;
5491 fs_info->empty_block_rsv.space_info = space_info;
5492 fs_info->delayed_block_rsv.space_info = space_info;
5493
5494 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
5495 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
5496 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5497 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
5498 if (fs_info->quota_root)
5499 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
5500 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
5501
5502 update_global_block_rsv(fs_info);
5503}
5504
5505static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5506{
5507 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5508 (u64)-1);
5509 WARN_ON(fs_info->delalloc_block_rsv.size > 0);
5510 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
5511 WARN_ON(fs_info->trans_block_rsv.size > 0);
5512 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5513 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5514 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
5515 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5516 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
5517}
5518
5519void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
5520 struct btrfs_root *root)
5521{
5522 if (!trans->block_rsv)
5523 return;
5524
5525 if (!trans->bytes_reserved)
5526 return;
5527
5528 trace_btrfs_space_reservation(root->fs_info, "transaction",
5529 trans->transid, trans->bytes_reserved, 0);
5530 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
5531 trans->bytes_reserved = 0;
5532}
5533
5534
5535
5536
5537
5538void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5539{
5540 struct btrfs_fs_info *fs_info = trans->root->fs_info;
5541
5542 if (!trans->chunk_bytes_reserved)
5543 return;
5544
5545 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5546
5547 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5548 trans->chunk_bytes_reserved);
5549 trans->chunk_bytes_reserved = 0;
5550}
5551
5552
5553int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5554 struct inode *inode)
5555{
5556 struct btrfs_root *root = BTRFS_I(inode)->root;
5557 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
5558 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
5559
5560
5561
5562
5563
5564
5565 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
5566 trace_btrfs_space_reservation(root->fs_info, "orphan",
5567 btrfs_ino(inode), num_bytes, 1);
5568 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5569}
5570
5571void btrfs_orphan_release_metadata(struct inode *inode)
5572{
5573 struct btrfs_root *root = BTRFS_I(inode)->root;
5574 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
5575 trace_btrfs_space_reservation(root->fs_info, "orphan",
5576 btrfs_ino(inode), num_bytes, 0);
5577 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
5578}
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5595 struct btrfs_block_rsv *rsv,
5596 int items,
5597 u64 *qgroup_reserved,
5598 bool use_global_rsv)
5599{
5600 u64 num_bytes;
5601 int ret;
5602 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5603
5604 if (root->fs_info->quota_enabled) {
5605
5606 num_bytes = 3 * root->nodesize;
5607 ret = btrfs_qgroup_reserve_meta(root, num_bytes);
5608 if (ret)
5609 return ret;
5610 } else {
5611 num_bytes = 0;
5612 }
5613
5614 *qgroup_reserved = num_bytes;
5615
5616 num_bytes = btrfs_calc_trans_metadata_size(root, items);
5617 rsv->space_info = __find_space_info(root->fs_info,
5618 BTRFS_BLOCK_GROUP_METADATA);
5619 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
5620 BTRFS_RESERVE_FLUSH_ALL);
5621
5622 if (ret == -ENOSPC && use_global_rsv)
5623 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
5624
5625 if (ret && *qgroup_reserved)
5626 btrfs_qgroup_free_meta(root, *qgroup_reserved);
5627
5628 return ret;
5629}
5630
5631void btrfs_subvolume_release_metadata(struct btrfs_root *root,
5632 struct btrfs_block_rsv *rsv,
5633 u64 qgroup_reserved)
5634{
5635 btrfs_block_rsv_release(root, rsv, (u64)-1);
5636}
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
5649{
5650 unsigned drop_inode_space = 0;
5651 unsigned dropped_extents = 0;
5652 unsigned num_extents = 0;
5653
5654 num_extents = (unsigned)div64_u64(num_bytes +
5655 BTRFS_MAX_EXTENT_SIZE - 1,
5656 BTRFS_MAX_EXTENT_SIZE);
5657 ASSERT(num_extents);
5658 ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
5659 BTRFS_I(inode)->outstanding_extents -= num_extents;
5660
5661 if (BTRFS_I(inode)->outstanding_extents == 0 &&
5662 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5663 &BTRFS_I(inode)->runtime_flags))
5664 drop_inode_space = 1;
5665
5666
5667
5668
5669
5670 if (BTRFS_I(inode)->outstanding_extents >=
5671 BTRFS_I(inode)->reserved_extents)
5672 return drop_inode_space;
5673
5674 dropped_extents = BTRFS_I(inode)->reserved_extents -
5675 BTRFS_I(inode)->outstanding_extents;
5676 BTRFS_I(inode)->reserved_extents -= dropped_extents;
5677 return dropped_extents + drop_inode_space;
5678}
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5699 int reserve)
5700{
5701 struct btrfs_root *root = BTRFS_I(inode)->root;
5702 u64 old_csums, num_csums;
5703
5704 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5705 BTRFS_I(inode)->csum_bytes == 0)
5706 return 0;
5707
5708 old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5709 if (reserve)
5710 BTRFS_I(inode)->csum_bytes += num_bytes;
5711 else
5712 BTRFS_I(inode)->csum_bytes -= num_bytes;
5713 num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5714
5715
5716 if (old_csums == num_csums)
5717 return 0;
5718
5719 if (reserve)
5720 return btrfs_calc_trans_metadata_size(root,
5721 num_csums - old_csums);
5722
5723 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
5724}
5725
5726int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5727{
5728 struct btrfs_root *root = BTRFS_I(inode)->root;
5729 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
5730 u64 to_reserve = 0;
5731 u64 csum_bytes;
5732 unsigned nr_extents = 0;
5733 int extra_reserve = 0;
5734 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
5735 int ret = 0;
5736 bool delalloc_lock = true;
5737 u64 to_free = 0;
5738 unsigned dropped;
5739
5740
5741
5742
5743
5744
5745 if (btrfs_is_free_space_inode(inode)) {
5746 flush = BTRFS_RESERVE_NO_FLUSH;
5747 delalloc_lock = false;
5748 }
5749
5750 if (flush != BTRFS_RESERVE_NO_FLUSH &&
5751 btrfs_transaction_in_commit(root->fs_info))
5752 schedule_timeout(1);
5753
5754 if (delalloc_lock)
5755 mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
5756
5757 num_bytes = ALIGN(num_bytes, root->sectorsize);
5758
5759 spin_lock(&BTRFS_I(inode)->lock);
5760 nr_extents = (unsigned)div64_u64(num_bytes +
5761 BTRFS_MAX_EXTENT_SIZE - 1,
5762 BTRFS_MAX_EXTENT_SIZE);
5763 BTRFS_I(inode)->outstanding_extents += nr_extents;
5764 nr_extents = 0;
5765
5766 if (BTRFS_I(inode)->outstanding_extents >
5767 BTRFS_I(inode)->reserved_extents)
5768 nr_extents = BTRFS_I(inode)->outstanding_extents -
5769 BTRFS_I(inode)->reserved_extents;
5770
5771
5772
5773
5774
5775 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5776 &BTRFS_I(inode)->runtime_flags)) {
5777 nr_extents++;
5778 extra_reserve = 1;
5779 }
5780
5781 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
5782 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
5783 csum_bytes = BTRFS_I(inode)->csum_bytes;
5784 spin_unlock(&BTRFS_I(inode)->lock);
5785
5786 if (root->fs_info->quota_enabled) {
5787 ret = btrfs_qgroup_reserve_meta(root,
5788 nr_extents * root->nodesize);
5789 if (ret)
5790 goto out_fail;
5791 }
5792
5793 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
5794 if (unlikely(ret)) {
5795 btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
5796 goto out_fail;
5797 }
5798
5799 spin_lock(&BTRFS_I(inode)->lock);
5800 if (extra_reserve) {
5801 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5802 &BTRFS_I(inode)->runtime_flags);
5803 nr_extents--;
5804 }
5805 BTRFS_I(inode)->reserved_extents += nr_extents;
5806 spin_unlock(&BTRFS_I(inode)->lock);
5807
5808 if (delalloc_lock)
5809 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5810
5811 if (to_reserve)
5812 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5813 btrfs_ino(inode), to_reserve, 1);
5814 block_rsv_add_bytes(block_rsv, to_reserve, 1);
5815
5816 return 0;
5817
5818out_fail:
5819 spin_lock(&BTRFS_I(inode)->lock);
5820 dropped = drop_outstanding_extent(inode, num_bytes);
5821
5822
5823
5824
5825
5826 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
5827 calc_csum_metadata_size(inode, num_bytes, 0);
5828 } else {
5829 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
5830 u64 bytes;
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
5841 BTRFS_I(inode)->csum_bytes = csum_bytes;
5842 to_free = calc_csum_metadata_size(inode, bytes, 0);
5843
5844
5845
5846
5847
5848
5849
5850 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
5851 bytes = csum_bytes - orig_csum_bytes;
5852 bytes = calc_csum_metadata_size(inode, bytes, 0);
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
5863 if (bytes > to_free)
5864 to_free = bytes - to_free;
5865 else
5866 to_free = 0;
5867 }
5868 spin_unlock(&BTRFS_I(inode)->lock);
5869 if (dropped)
5870 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5871
5872 if (to_free) {
5873 btrfs_block_rsv_release(root, block_rsv, to_free);
5874 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5875 btrfs_ino(inode), to_free, 0);
5876 }
5877 if (delalloc_lock)
5878 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5879 return ret;
5880}
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5892{
5893 struct btrfs_root *root = BTRFS_I(inode)->root;
5894 u64 to_free = 0;
5895 unsigned dropped;
5896
5897 num_bytes = ALIGN(num_bytes, root->sectorsize);
5898 spin_lock(&BTRFS_I(inode)->lock);
5899 dropped = drop_outstanding_extent(inode, num_bytes);
5900
5901 if (num_bytes)
5902 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
5903 spin_unlock(&BTRFS_I(inode)->lock);
5904 if (dropped > 0)
5905 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5906
5907 if (btrfs_test_is_dummy_root(root))
5908 return;
5909
5910 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5911 btrfs_ino(inode), to_free, 0);
5912
5913 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
5914 to_free);
5915}
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
5943{
5944 int ret;
5945
5946 ret = btrfs_check_data_free_space(inode, start, len);
5947 if (ret < 0)
5948 return ret;
5949 ret = btrfs_delalloc_reserve_metadata(inode, len);
5950 if (ret < 0)
5951 btrfs_free_reserved_data_space(inode, start, len);
5952 return ret;
5953}
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len)
5971{
5972 btrfs_delalloc_release_metadata(inode, len);
5973 btrfs_free_reserved_data_space(inode, start, len);
5974}
5975
5976static int update_block_group(struct btrfs_trans_handle *trans,
5977 struct btrfs_root *root, u64 bytenr,
5978 u64 num_bytes, int alloc)
5979{
5980 struct btrfs_block_group_cache *cache = NULL;
5981 struct btrfs_fs_info *info = root->fs_info;
5982 u64 total = num_bytes;
5983 u64 old_val;
5984 u64 byte_in_group;
5985 int factor;
5986
5987
5988 spin_lock(&info->delalloc_root_lock);
5989 old_val = btrfs_super_bytes_used(info->super_copy);
5990 if (alloc)
5991 old_val += num_bytes;
5992 else
5993 old_val -= num_bytes;
5994 btrfs_set_super_bytes_used(info->super_copy, old_val);
5995 spin_unlock(&info->delalloc_root_lock);
5996
5997 while (total) {
5998 cache = btrfs_lookup_block_group(info, bytenr);
5999 if (!cache)
6000 return -ENOENT;
6001 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
6002 BTRFS_BLOCK_GROUP_RAID1 |
6003 BTRFS_BLOCK_GROUP_RAID10))
6004 factor = 2;
6005 else
6006 factor = 1;
6007
6008
6009
6010
6011
6012
6013 if (!alloc && cache->cached == BTRFS_CACHE_NO)
6014 cache_block_group(cache, 1);
6015
6016 byte_in_group = bytenr - cache->key.objectid;
6017 WARN_ON(byte_in_group > cache->key.offset);
6018
6019 spin_lock(&cache->space_info->lock);
6020 spin_lock(&cache->lock);
6021
6022 if (btrfs_test_opt(root, SPACE_CACHE) &&
6023 cache->disk_cache_state < BTRFS_DC_CLEAR)
6024 cache->disk_cache_state = BTRFS_DC_CLEAR;
6025
6026 old_val = btrfs_block_group_used(&cache->item);
6027 num_bytes = min(total, cache->key.offset - byte_in_group);
6028 if (alloc) {
6029 old_val += num_bytes;
6030 btrfs_set_block_group_used(&cache->item, old_val);
6031 cache->reserved -= num_bytes;
6032 cache->space_info->bytes_reserved -= num_bytes;
6033 cache->space_info->bytes_used += num_bytes;
6034 cache->space_info->disk_used += num_bytes * factor;
6035 spin_unlock(&cache->lock);
6036 spin_unlock(&cache->space_info->lock);
6037 } else {
6038 old_val -= num_bytes;
6039 btrfs_set_block_group_used(&cache->item, old_val);
6040 cache->pinned += num_bytes;
6041 cache->space_info->bytes_pinned += num_bytes;
6042 cache->space_info->bytes_used -= num_bytes;
6043 cache->space_info->disk_used -= num_bytes * factor;
6044 spin_unlock(&cache->lock);
6045 spin_unlock(&cache->space_info->lock);
6046
6047 set_extent_dirty(info->pinned_extents,
6048 bytenr, bytenr + num_bytes - 1,
6049 GFP_NOFS | __GFP_NOFAIL);
6050 }
6051
6052 spin_lock(&trans->transaction->dirty_bgs_lock);
6053 if (list_empty(&cache->dirty_list)) {
6054 list_add_tail(&cache->dirty_list,
6055 &trans->transaction->dirty_bgs);
6056 trans->transaction->num_dirty_bgs++;
6057 btrfs_get_block_group(cache);
6058 }
6059 spin_unlock(&trans->transaction->dirty_bgs_lock);
6060
6061
6062
6063
6064
6065
6066
6067 if (!alloc && old_val == 0) {
6068 spin_lock(&info->unused_bgs_lock);
6069 if (list_empty(&cache->bg_list)) {
6070 btrfs_get_block_group(cache);
6071 list_add_tail(&cache->bg_list,
6072 &info->unused_bgs);
6073 }
6074 spin_unlock(&info->unused_bgs_lock);
6075 }
6076
6077 btrfs_put_block_group(cache);
6078 total -= num_bytes;
6079 bytenr += num_bytes;
6080 }
6081 return 0;
6082}
6083
6084static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
6085{
6086 struct btrfs_block_group_cache *cache;
6087 u64 bytenr;
6088
6089 spin_lock(&root->fs_info->block_group_cache_lock);
6090 bytenr = root->fs_info->first_logical_byte;
6091 spin_unlock(&root->fs_info->block_group_cache_lock);
6092
6093 if (bytenr < (u64)-1)
6094 return bytenr;
6095
6096 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
6097 if (!cache)
6098 return 0;
6099
6100 bytenr = cache->key.objectid;
6101 btrfs_put_block_group(cache);
6102
6103 return bytenr;
6104}
6105
6106static int pin_down_extent(struct btrfs_root *root,
6107 struct btrfs_block_group_cache *cache,
6108 u64 bytenr, u64 num_bytes, int reserved)
6109{
6110 spin_lock(&cache->space_info->lock);
6111 spin_lock(&cache->lock);
6112 cache->pinned += num_bytes;
6113 cache->space_info->bytes_pinned += num_bytes;
6114 if (reserved) {
6115 cache->reserved -= num_bytes;
6116 cache->space_info->bytes_reserved -= num_bytes;
6117 }
6118 spin_unlock(&cache->lock);
6119 spin_unlock(&cache->space_info->lock);
6120
6121 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
6122 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
6123 if (reserved)
6124 trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
6125 return 0;
6126}
6127
6128
6129
6130
6131int btrfs_pin_extent(struct btrfs_root *root,
6132 u64 bytenr, u64 num_bytes, int reserved)
6133{
6134 struct btrfs_block_group_cache *cache;
6135
6136 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
6137 BUG_ON(!cache);
6138
6139 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
6140
6141 btrfs_put_block_group(cache);
6142 return 0;
6143}
6144
6145
6146
6147
6148int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
6149 u64 bytenr, u64 num_bytes)
6150{
6151 struct btrfs_block_group_cache *cache;
6152 int ret;
6153
6154 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
6155 if (!cache)
6156 return -EINVAL;
6157
6158
6159
6160
6161
6162
6163
6164 cache_block_group(cache, 1);
6165
6166 pin_down_extent(root, cache, bytenr, num_bytes, 0);
6167
6168
6169 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
6170 btrfs_put_block_group(cache);
6171 return ret;
6172}
6173
6174static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes)
6175{
6176 int ret;
6177 struct btrfs_block_group_cache *block_group;
6178 struct btrfs_caching_control *caching_ctl;
6179
6180 block_group = btrfs_lookup_block_group(root->fs_info, start);
6181 if (!block_group)
6182 return -EINVAL;
6183
6184 cache_block_group(block_group, 0);
6185 caching_ctl = get_caching_control(block_group);
6186
6187 if (!caching_ctl) {
6188
6189 BUG_ON(!block_group_cache_done(block_group));
6190 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6191 } else {
6192 mutex_lock(&caching_ctl->mutex);
6193
6194 if (start >= caching_ctl->progress) {
6195 ret = add_excluded_extent(root, start, num_bytes);
6196 } else if (start + num_bytes <= caching_ctl->progress) {
6197 ret = btrfs_remove_free_space(block_group,
6198 start, num_bytes);
6199 } else {
6200 num_bytes = caching_ctl->progress - start;
6201 ret = btrfs_remove_free_space(block_group,
6202 start, num_bytes);
6203 if (ret)
6204 goto out_lock;
6205
6206 num_bytes = (start + num_bytes) -
6207 caching_ctl->progress;
6208 start = caching_ctl->progress;
6209 ret = add_excluded_extent(root, start, num_bytes);
6210 }
6211out_lock:
6212 mutex_unlock(&caching_ctl->mutex);
6213 put_caching_control(caching_ctl);
6214 }
6215 btrfs_put_block_group(block_group);
6216 return ret;
6217}
6218
6219int btrfs_exclude_logged_extents(struct btrfs_root *log,
6220 struct extent_buffer *eb)
6221{
6222 struct btrfs_file_extent_item *item;
6223 struct btrfs_key key;
6224 int found_type;
6225 int i;
6226
6227 if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS))
6228 return 0;
6229
6230 for (i = 0; i < btrfs_header_nritems(eb); i++) {
6231 btrfs_item_key_to_cpu(eb, &key, i);
6232 if (key.type != BTRFS_EXTENT_DATA_KEY)
6233 continue;
6234 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
6235 found_type = btrfs_file_extent_type(eb, item);
6236 if (found_type == BTRFS_FILE_EXTENT_INLINE)
6237 continue;
6238 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
6239 continue;
6240 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
6241 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
6242 __exclude_logged_extent(log, key.objectid, key.offset);
6243 }
6244
6245 return 0;
6246}
6247
6248static void
6249btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
6250{
6251 atomic_inc(&bg->reservations);
6252}
6253
6254void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
6255 const u64 start)
6256{
6257 struct btrfs_block_group_cache *bg;
6258
6259 bg = btrfs_lookup_block_group(fs_info, start);
6260 ASSERT(bg);
6261 if (atomic_dec_and_test(&bg->reservations))
6262 wake_up_atomic_t(&bg->reservations);
6263 btrfs_put_block_group(bg);
6264}
6265
6266static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a)
6267{
6268 schedule();
6269 return 0;
6270}
6271
6272void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6273{
6274 struct btrfs_space_info *space_info = bg->space_info;
6275
6276 ASSERT(bg->ro);
6277
6278 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
6279 return;
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291 down_write(&space_info->groups_sem);
6292 up_write(&space_info->groups_sem);
6293
6294 wait_on_atomic_t(&bg->reservations,
6295 btrfs_wait_bg_reservations_atomic_t,
6296 TASK_UNINTERRUPTIBLE);
6297}
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
6323 u64 num_bytes, int reserve, int delalloc)
6324{
6325 struct btrfs_space_info *space_info = cache->space_info;
6326 int ret = 0;
6327
6328 spin_lock(&space_info->lock);
6329 spin_lock(&cache->lock);
6330 if (reserve != RESERVE_FREE) {
6331 if (cache->ro) {
6332 ret = -EAGAIN;
6333 } else {
6334 cache->reserved += num_bytes;
6335 space_info->bytes_reserved += num_bytes;
6336 if (reserve == RESERVE_ALLOC) {
6337 trace_btrfs_space_reservation(cache->fs_info,
6338 "space_info", space_info->flags,
6339 num_bytes, 0);
6340 space_info->bytes_may_use -= num_bytes;
6341 }
6342
6343 if (delalloc)
6344 cache->delalloc_bytes += num_bytes;
6345 }
6346 } else {
6347 if (cache->ro)
6348 space_info->bytes_readonly += num_bytes;
6349 cache->reserved -= num_bytes;
6350 space_info->bytes_reserved -= num_bytes;
6351
6352 if (delalloc)
6353 cache->delalloc_bytes -= num_bytes;
6354 }
6355 spin_unlock(&cache->lock);
6356 spin_unlock(&space_info->lock);
6357 return ret;
6358}
6359
6360void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
6361 struct btrfs_root *root)
6362{
6363 struct btrfs_fs_info *fs_info = root->fs_info;
6364 struct btrfs_caching_control *next;
6365 struct btrfs_caching_control *caching_ctl;
6366 struct btrfs_block_group_cache *cache;
6367
6368 down_write(&fs_info->commit_root_sem);
6369
6370 list_for_each_entry_safe(caching_ctl, next,
6371 &fs_info->caching_block_groups, list) {
6372 cache = caching_ctl->block_group;
6373 if (block_group_cache_done(cache)) {
6374 cache->last_byte_to_unpin = (u64)-1;
6375 list_del_init(&caching_ctl->list);
6376 put_caching_control(caching_ctl);
6377 } else {
6378 cache->last_byte_to_unpin = caching_ctl->progress;
6379 }
6380 }
6381
6382 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6383 fs_info->pinned_extents = &fs_info->freed_extents[1];
6384 else
6385 fs_info->pinned_extents = &fs_info->freed_extents[0];
6386
6387 up_write(&fs_info->commit_root_sem);
6388
6389 update_global_block_rsv(fs_info);
6390}
6391
6392
6393
6394
6395
6396static struct btrfs_free_cluster *
6397fetch_cluster_info(struct btrfs_root *root, struct btrfs_space_info *space_info,
6398 u64 *empty_cluster)
6399{
6400 struct btrfs_free_cluster *ret = NULL;
6401 bool ssd = btrfs_test_opt(root, SSD);
6402
6403 *empty_cluster = 0;
6404 if (btrfs_mixed_space_info(space_info))
6405 return ret;
6406
6407 if (ssd)
6408 *empty_cluster = SZ_2M;
6409 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
6410 ret = &root->fs_info->meta_alloc_cluster;
6411 if (!ssd)
6412 *empty_cluster = SZ_64K;
6413 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && ssd) {
6414 ret = &root->fs_info->data_alloc_cluster;
6415 }
6416
6417 return ret;
6418}
6419
6420static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6421 const bool return_free_space)
6422{
6423 struct btrfs_fs_info *fs_info = root->fs_info;
6424 struct btrfs_block_group_cache *cache = NULL;
6425 struct btrfs_space_info *space_info;
6426 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6427 struct btrfs_free_cluster *cluster = NULL;
6428 u64 len;
6429 u64 total_unpinned = 0;
6430 u64 empty_cluster = 0;
6431 bool readonly;
6432
6433 while (start <= end) {
6434 readonly = false;
6435 if (!cache ||
6436 start >= cache->key.objectid + cache->key.offset) {
6437 if (cache)
6438 btrfs_put_block_group(cache);
6439 total_unpinned = 0;
6440 cache = btrfs_lookup_block_group(fs_info, start);
6441 BUG_ON(!cache);
6442
6443 cluster = fetch_cluster_info(root,
6444 cache->space_info,
6445 &empty_cluster);
6446 empty_cluster <<= 1;
6447 }
6448
6449 len = cache->key.objectid + cache->key.offset - start;
6450 len = min(len, end + 1 - start);
6451
6452 if (start < cache->last_byte_to_unpin) {
6453 len = min(len, cache->last_byte_to_unpin - start);
6454 if (return_free_space)
6455 btrfs_add_free_space(cache, start, len);
6456 }
6457
6458 start += len;
6459 total_unpinned += len;
6460 space_info = cache->space_info;
6461
6462
6463
6464
6465
6466
6467
6468 if (cluster && cluster->fragmented &&
6469 total_unpinned > empty_cluster) {
6470 spin_lock(&cluster->lock);
6471 cluster->fragmented = 0;
6472 spin_unlock(&cluster->lock);
6473 }
6474
6475 spin_lock(&space_info->lock);
6476 spin_lock(&cache->lock);
6477 cache->pinned -= len;
6478 space_info->bytes_pinned -= len;
6479 space_info->max_extent_size = 0;
6480 percpu_counter_add(&space_info->total_bytes_pinned, -len);
6481 if (cache->ro) {
6482 space_info->bytes_readonly += len;
6483 readonly = true;
6484 }
6485 spin_unlock(&cache->lock);
6486 if (!readonly && global_rsv->space_info == space_info) {
6487 spin_lock(&global_rsv->lock);
6488 if (!global_rsv->full) {
6489 len = min(len, global_rsv->size -
6490 global_rsv->reserved);
6491 global_rsv->reserved += len;
6492 space_info->bytes_may_use += len;
6493 if (global_rsv->reserved >= global_rsv->size)
6494 global_rsv->full = 1;
6495 }
6496 spin_unlock(&global_rsv->lock);
6497 }
6498 spin_unlock(&space_info->lock);
6499 }
6500
6501 if (cache)
6502 btrfs_put_block_group(cache);
6503 return 0;
6504}
6505
6506int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
6507 struct btrfs_root *root)
6508{
6509 struct btrfs_fs_info *fs_info = root->fs_info;
6510 struct btrfs_block_group_cache *block_group, *tmp;
6511 struct list_head *deleted_bgs;
6512 struct extent_io_tree *unpin;
6513 u64 start;
6514 u64 end;
6515 int ret;
6516
6517 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6518 unpin = &fs_info->freed_extents[1];
6519 else
6520 unpin = &fs_info->freed_extents[0];
6521
6522 while (!trans->aborted) {
6523 mutex_lock(&fs_info->unused_bg_unpin_mutex);
6524 ret = find_first_extent_bit(unpin, 0, &start, &end,
6525 EXTENT_DIRTY, NULL);
6526 if (ret) {
6527 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6528 break;
6529 }
6530
6531 if (btrfs_test_opt(root, DISCARD))
6532 ret = btrfs_discard_extent(root, start,
6533 end + 1 - start, NULL);
6534
6535 clear_extent_dirty(unpin, start, end);
6536 unpin_extent_range(root, start, end, true);
6537 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6538 cond_resched();
6539 }
6540
6541
6542
6543
6544
6545
6546 deleted_bgs = &trans->transaction->deleted_bgs;
6547 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6548 u64 trimmed = 0;
6549
6550 ret = -EROFS;
6551 if (!trans->aborted)
6552 ret = btrfs_discard_extent(root,
6553 block_group->key.objectid,
6554 block_group->key.offset,
6555 &trimmed);
6556
6557 list_del_init(&block_group->bg_list);
6558 btrfs_put_block_group_trimming(block_group);
6559 btrfs_put_block_group(block_group);
6560
6561 if (ret) {
6562 const char *errstr = btrfs_decode_error(ret);
6563 btrfs_warn(fs_info,
6564 "Discard failed while removing blockgroup: errno=%d %s\n",
6565 ret, errstr);
6566 }
6567 }
6568
6569 return 0;
6570}
6571
6572static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
6573 u64 owner, u64 root_objectid)
6574{
6575 struct btrfs_space_info *space_info;
6576 u64 flags;
6577
6578 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
6579 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
6580 flags = BTRFS_BLOCK_GROUP_SYSTEM;
6581 else
6582 flags = BTRFS_BLOCK_GROUP_METADATA;
6583 } else {
6584 flags = BTRFS_BLOCK_GROUP_DATA;
6585 }
6586
6587 space_info = __find_space_info(fs_info, flags);
6588 BUG_ON(!space_info);
6589 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
6590}
6591
6592
6593static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6594 struct btrfs_root *root,
6595 struct btrfs_delayed_ref_node *node, u64 parent,
6596 u64 root_objectid, u64 owner_objectid,
6597 u64 owner_offset, int refs_to_drop,
6598 struct btrfs_delayed_extent_op *extent_op)
6599{
6600 struct btrfs_key key;
6601 struct btrfs_path *path;
6602 struct btrfs_fs_info *info = root->fs_info;
6603 struct btrfs_root *extent_root = info->extent_root;
6604 struct extent_buffer *leaf;
6605 struct btrfs_extent_item *ei;
6606 struct btrfs_extent_inline_ref *iref;
6607 int ret;
6608 int is_data;
6609 int extent_slot = 0;
6610 int found_extent = 0;
6611 int num_to_del = 1;
6612 u32 item_size;
6613 u64 refs;
6614 u64 bytenr = node->bytenr;
6615 u64 num_bytes = node->num_bytes;
6616 int last_ref = 0;
6617 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6618 SKINNY_METADATA);
6619
6620 path = btrfs_alloc_path();
6621 if (!path)
6622 return -ENOMEM;
6623
6624 path->reada = READA_FORWARD;
6625 path->leave_spinning = 1;
6626
6627 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6628 BUG_ON(!is_data && refs_to_drop != 1);
6629
6630 if (is_data)
6631 skinny_metadata = 0;
6632
6633 ret = lookup_extent_backref(trans, extent_root, path, &iref,
6634 bytenr, num_bytes, parent,
6635 root_objectid, owner_objectid,
6636 owner_offset);
6637 if (ret == 0) {
6638 extent_slot = path->slots[0];
6639 while (extent_slot >= 0) {
6640 btrfs_item_key_to_cpu(path->nodes[0], &key,
6641 extent_slot);
6642 if (key.objectid != bytenr)
6643 break;
6644 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6645 key.offset == num_bytes) {
6646 found_extent = 1;
6647 break;
6648 }
6649 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6650 key.offset == owner_objectid) {
6651 found_extent = 1;
6652 break;
6653 }
6654 if (path->slots[0] - extent_slot > 5)
6655 break;
6656 extent_slot--;
6657 }
6658#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6659 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
6660 if (found_extent && item_size < sizeof(*ei))
6661 found_extent = 0;
6662#endif
6663 if (!found_extent) {
6664 BUG_ON(iref);
6665 ret = remove_extent_backref(trans, extent_root, path,
6666 NULL, refs_to_drop,
6667 is_data, &last_ref);
6668 if (ret) {
6669 btrfs_abort_transaction(trans, extent_root, ret);
6670 goto out;
6671 }
6672 btrfs_release_path(path);
6673 path->leave_spinning = 1;
6674
6675 key.objectid = bytenr;
6676 key.type = BTRFS_EXTENT_ITEM_KEY;
6677 key.offset = num_bytes;
6678
6679 if (!is_data && skinny_metadata) {
6680 key.type = BTRFS_METADATA_ITEM_KEY;
6681 key.offset = owner_objectid;
6682 }
6683
6684 ret = btrfs_search_slot(trans, extent_root,
6685 &key, path, -1, 1);
6686 if (ret > 0 && skinny_metadata && path->slots[0]) {
6687
6688
6689
6690
6691 path->slots[0]--;
6692 btrfs_item_key_to_cpu(path->nodes[0], &key,
6693 path->slots[0]);
6694 if (key.objectid == bytenr &&
6695 key.type == BTRFS_EXTENT_ITEM_KEY &&
6696 key.offset == num_bytes)
6697 ret = 0;
6698 }
6699
6700 if (ret > 0 && skinny_metadata) {
6701 skinny_metadata = false;
6702 key.objectid = bytenr;
6703 key.type = BTRFS_EXTENT_ITEM_KEY;
6704 key.offset = num_bytes;
6705 btrfs_release_path(path);
6706 ret = btrfs_search_slot(trans, extent_root,
6707 &key, path, -1, 1);
6708 }
6709
6710 if (ret) {
6711 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
6712 ret, bytenr);
6713 if (ret > 0)
6714 btrfs_print_leaf(extent_root,
6715 path->nodes[0]);
6716 }
6717 if (ret < 0) {
6718 btrfs_abort_transaction(trans, extent_root, ret);
6719 goto out;
6720 }
6721 extent_slot = path->slots[0];
6722 }
6723 } else if (WARN_ON(ret == -ENOENT)) {
6724 btrfs_print_leaf(extent_root, path->nodes[0]);
6725 btrfs_err(info,
6726 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
6727 bytenr, parent, root_objectid, owner_objectid,
6728 owner_offset);
6729 btrfs_abort_transaction(trans, extent_root, ret);
6730 goto out;
6731 } else {
6732 btrfs_abort_transaction(trans, extent_root, ret);
6733 goto out;
6734 }
6735
6736 leaf = path->nodes[0];
6737 item_size = btrfs_item_size_nr(leaf, extent_slot);
6738#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6739 if (item_size < sizeof(*ei)) {
6740 BUG_ON(found_extent || extent_slot != path->slots[0]);
6741 ret = convert_extent_item_v0(trans, extent_root, path,
6742 owner_objectid, 0);
6743 if (ret < 0) {
6744 btrfs_abort_transaction(trans, extent_root, ret);
6745 goto out;
6746 }
6747
6748 btrfs_release_path(path);
6749 path->leave_spinning = 1;
6750
6751 key.objectid = bytenr;
6752 key.type = BTRFS_EXTENT_ITEM_KEY;
6753 key.offset = num_bytes;
6754
6755 ret = btrfs_search_slot(trans, extent_root, &key, path,
6756 -1, 1);
6757 if (ret) {
6758 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
6759 ret, bytenr);
6760 btrfs_print_leaf(extent_root, path->nodes[0]);
6761 }
6762 if (ret < 0) {
6763 btrfs_abort_transaction(trans, extent_root, ret);
6764 goto out;
6765 }
6766
6767 extent_slot = path->slots[0];
6768 leaf = path->nodes[0];
6769 item_size = btrfs_item_size_nr(leaf, extent_slot);
6770 }
6771#endif
6772 BUG_ON(item_size < sizeof(*ei));
6773 ei = btrfs_item_ptr(leaf, extent_slot,
6774 struct btrfs_extent_item);
6775 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
6776 key.type == BTRFS_EXTENT_ITEM_KEY) {
6777 struct btrfs_tree_block_info *bi;
6778 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
6779 bi = (struct btrfs_tree_block_info *)(ei + 1);
6780 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
6781 }
6782
6783 refs = btrfs_extent_refs(leaf, ei);
6784 if (refs < refs_to_drop) {
6785 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
6786 "for bytenr %Lu", refs_to_drop, refs, bytenr);
6787 ret = -EINVAL;
6788 btrfs_abort_transaction(trans, extent_root, ret);
6789 goto out;
6790 }
6791 refs -= refs_to_drop;
6792
6793 if (refs > 0) {
6794 if (extent_op)
6795 __run_delayed_extent_op(extent_op, leaf, ei);
6796
6797
6798
6799
6800 if (iref) {
6801 BUG_ON(!found_extent);
6802 } else {
6803 btrfs_set_extent_refs(leaf, ei, refs);
6804 btrfs_mark_buffer_dirty(leaf);
6805 }
6806 if (found_extent) {
6807 ret = remove_extent_backref(trans, extent_root, path,
6808 iref, refs_to_drop,
6809 is_data, &last_ref);
6810 if (ret) {
6811 btrfs_abort_transaction(trans, extent_root, ret);
6812 goto out;
6813 }
6814 }
6815 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
6816 root_objectid);
6817 } else {
6818 if (found_extent) {
6819 BUG_ON(is_data && refs_to_drop !=
6820 extent_data_ref_count(path, iref));
6821 if (iref) {
6822 BUG_ON(path->slots[0] != extent_slot);
6823 } else {
6824 BUG_ON(path->slots[0] != extent_slot + 1);
6825 path->slots[0] = extent_slot;
6826 num_to_del = 2;
6827 }
6828 }
6829
6830 last_ref = 1;
6831 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
6832 num_to_del);
6833 if (ret) {
6834 btrfs_abort_transaction(trans, extent_root, ret);
6835 goto out;
6836 }
6837 btrfs_release_path(path);
6838
6839 if (is_data) {
6840 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
6841 if (ret) {
6842 btrfs_abort_transaction(trans, extent_root, ret);
6843 goto out;
6844 }
6845 }
6846
6847 ret = add_to_free_space_tree(trans, root->fs_info, bytenr,
6848 num_bytes);
6849 if (ret) {
6850 btrfs_abort_transaction(trans, extent_root, ret);
6851 goto out;
6852 }
6853
6854 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
6855 if (ret) {
6856 btrfs_abort_transaction(trans, extent_root, ret);
6857 goto out;
6858 }
6859 }
6860 btrfs_release_path(path);
6861
6862out:
6863 btrfs_free_path(path);
6864 return ret;
6865}
6866
6867
6868
6869
6870
6871
6872
6873static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
6874 struct btrfs_root *root, u64 bytenr)
6875{
6876 struct btrfs_delayed_ref_head *head;
6877 struct btrfs_delayed_ref_root *delayed_refs;
6878 int ret = 0;
6879
6880 delayed_refs = &trans->transaction->delayed_refs;
6881 spin_lock(&delayed_refs->lock);
6882 head = btrfs_find_delayed_ref_head(trans, bytenr);
6883 if (!head)
6884 goto out_delayed_unlock;
6885
6886 spin_lock(&head->lock);
6887 if (!list_empty(&head->ref_list))
6888 goto out;
6889
6890 if (head->extent_op) {
6891 if (!head->must_insert_reserved)
6892 goto out;
6893 btrfs_free_delayed_extent_op(head->extent_op);
6894 head->extent_op = NULL;
6895 }
6896
6897
6898
6899
6900
6901 if (!mutex_trylock(&head->mutex))
6902 goto out;
6903
6904
6905
6906
6907
6908 head->node.in_tree = 0;
6909 rb_erase(&head->href_node, &delayed_refs->href_root);
6910
6911 atomic_dec(&delayed_refs->num_entries);
6912
6913
6914
6915
6916
6917 delayed_refs->num_heads--;
6918 if (head->processing == 0)
6919 delayed_refs->num_heads_ready--;
6920 head->processing = 0;
6921 spin_unlock(&head->lock);
6922 spin_unlock(&delayed_refs->lock);
6923
6924 BUG_ON(head->extent_op);
6925 if (head->must_insert_reserved)
6926 ret = 1;
6927
6928 mutex_unlock(&head->mutex);
6929 btrfs_put_delayed_ref(&head->node);
6930 return ret;
6931out:
6932 spin_unlock(&head->lock);
6933
6934out_delayed_unlock:
6935 spin_unlock(&delayed_refs->lock);
6936 return 0;
6937}
6938
6939void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6940 struct btrfs_root *root,
6941 struct extent_buffer *buf,
6942 u64 parent, int last_ref)
6943{
6944 int pin = 1;
6945 int ret;
6946
6947 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6948 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
6949 buf->start, buf->len,
6950 parent, root->root_key.objectid,
6951 btrfs_header_level(buf),
6952 BTRFS_DROP_DELAYED_REF, NULL);
6953 BUG_ON(ret);
6954 }
6955
6956 if (!last_ref)
6957 return;
6958
6959 if (btrfs_header_generation(buf) == trans->transid) {
6960 struct btrfs_block_group_cache *cache;
6961
6962 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6963 ret = check_ref_cleanup(trans, root, buf->start);
6964 if (!ret)
6965 goto out;
6966 }
6967
6968 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6969
6970 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
6971 pin_down_extent(root, cache, buf->start, buf->len, 1);
6972 btrfs_put_block_group(cache);
6973 goto out;
6974 }
6975
6976 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
6977
6978 btrfs_add_free_space(cache, buf->start, buf->len);
6979 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
6980 btrfs_put_block_group(cache);
6981 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
6982 pin = 0;
6983 }
6984out:
6985 if (pin)
6986 add_pinned_bytes(root->fs_info, buf->len,
6987 btrfs_header_level(buf),
6988 root->root_key.objectid);
6989
6990
6991
6992
6993
6994 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
6995}
6996
6997
6998int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6999 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
7000 u64 owner, u64 offset)
7001{
7002 int ret;
7003 struct btrfs_fs_info *fs_info = root->fs_info;
7004
7005 if (btrfs_test_is_dummy_root(root))
7006 return 0;
7007
7008 add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
7009
7010
7011
7012
7013
7014 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
7015 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
7016
7017 btrfs_pin_extent(root, bytenr, num_bytes, 1);
7018 ret = 0;
7019 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
7020 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
7021 num_bytes,
7022 parent, root_objectid, (int)owner,
7023 BTRFS_DROP_DELAYED_REF, NULL);
7024 } else {
7025 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
7026 num_bytes,
7027 parent, root_objectid, owner,
7028 offset, 0,
7029 BTRFS_DROP_DELAYED_REF, NULL);
7030 }
7031 return ret;
7032}
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048static noinline void
7049wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
7050 u64 num_bytes)
7051{
7052 struct btrfs_caching_control *caching_ctl;
7053
7054 caching_ctl = get_caching_control(cache);
7055 if (!caching_ctl)
7056 return;
7057
7058 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
7059 (cache->free_space_ctl->free_space >= num_bytes));
7060
7061 put_caching_control(caching_ctl);
7062}
7063
7064static noinline int
7065wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
7066{
7067 struct btrfs_caching_control *caching_ctl;
7068 int ret = 0;
7069
7070 caching_ctl = get_caching_control(cache);
7071 if (!caching_ctl)
7072 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
7073
7074 wait_event(caching_ctl->wait, block_group_cache_done(cache));
7075 if (cache->cached == BTRFS_CACHE_ERROR)
7076 ret = -EIO;
7077 put_caching_control(caching_ctl);
7078 return ret;
7079}
7080
7081int __get_raid_index(u64 flags)
7082{
7083 if (flags & BTRFS_BLOCK_GROUP_RAID10)
7084 return BTRFS_RAID_RAID10;
7085 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
7086 return BTRFS_RAID_RAID1;
7087 else if (flags & BTRFS_BLOCK_GROUP_DUP)
7088 return BTRFS_RAID_DUP;
7089 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
7090 return BTRFS_RAID_RAID0;
7091 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
7092 return BTRFS_RAID_RAID5;
7093 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
7094 return BTRFS_RAID_RAID6;
7095
7096 return BTRFS_RAID_SINGLE;
7097}
7098
7099int get_block_group_index(struct btrfs_block_group_cache *cache)
7100{
7101 return __get_raid_index(cache->flags);
7102}
7103
7104static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
7105 [BTRFS_RAID_RAID10] = "raid10",
7106 [BTRFS_RAID_RAID1] = "raid1",
7107 [BTRFS_RAID_DUP] = "dup",
7108 [BTRFS_RAID_RAID0] = "raid0",
7109 [BTRFS_RAID_SINGLE] = "single",
7110 [BTRFS_RAID_RAID5] = "raid5",
7111 [BTRFS_RAID_RAID6] = "raid6",
7112};
7113
7114static const char *get_raid_name(enum btrfs_raid_types type)
7115{
7116 if (type >= BTRFS_NR_RAID_TYPES)
7117 return NULL;
7118
7119 return btrfs_raid_type_names[type];
7120}
7121
7122enum btrfs_loop_type {
7123 LOOP_CACHING_NOWAIT = 0,
7124 LOOP_CACHING_WAIT = 1,
7125 LOOP_ALLOC_CHUNK = 2,
7126 LOOP_NO_EMPTY_SIZE = 3,
7127};
7128
7129static inline void
7130btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
7131 int delalloc)
7132{
7133 if (delalloc)
7134 down_read(&cache->data_rwsem);
7135}
7136
7137static inline void
7138btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
7139 int delalloc)
7140{
7141 btrfs_get_block_group(cache);
7142 if (delalloc)
7143 down_read(&cache->data_rwsem);
7144}
7145
7146static struct btrfs_block_group_cache *
7147btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
7148 struct btrfs_free_cluster *cluster,
7149 int delalloc)
7150{
7151 struct btrfs_block_group_cache *used_bg = NULL;
7152
7153 spin_lock(&cluster->refill_lock);
7154 while (1) {
7155 used_bg = cluster->block_group;
7156 if (!used_bg)
7157 return NULL;
7158
7159 if (used_bg == block_group)
7160 return used_bg;
7161
7162 btrfs_get_block_group(used_bg);
7163
7164 if (!delalloc)
7165 return used_bg;
7166
7167 if (down_read_trylock(&used_bg->data_rwsem))
7168 return used_bg;
7169
7170 spin_unlock(&cluster->refill_lock);
7171
7172 down_read(&used_bg->data_rwsem);
7173
7174 spin_lock(&cluster->refill_lock);
7175 if (used_bg == cluster->block_group)
7176 return used_bg;
7177
7178 up_read(&used_bg->data_rwsem);
7179 btrfs_put_block_group(used_bg);
7180 }
7181}
7182
7183static inline void
7184btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7185 int delalloc)
7186{
7187 if (delalloc)
7188 up_read(&cache->data_rwsem);
7189 btrfs_put_block_group(cache);
7190}
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203static noinline int find_free_extent(struct btrfs_root *orig_root,
7204 u64 num_bytes, u64 empty_size,
7205 u64 hint_byte, struct btrfs_key *ins,
7206 u64 flags, int delalloc)
7207{
7208 int ret = 0;
7209 struct btrfs_root *root = orig_root->fs_info->extent_root;
7210 struct btrfs_free_cluster *last_ptr = NULL;
7211 struct btrfs_block_group_cache *block_group = NULL;
7212 u64 search_start = 0;
7213 u64 max_extent_size = 0;
7214 u64 empty_cluster = 0;
7215 struct btrfs_space_info *space_info;
7216 int loop = 0;
7217 int index = __get_raid_index(flags);
7218 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
7219 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
7220 bool failed_cluster_refill = false;
7221 bool failed_alloc = false;
7222 bool use_cluster = true;
7223 bool have_caching_bg = false;
7224 bool orig_have_caching_bg = false;
7225 bool full_search = false;
7226
7227 WARN_ON(num_bytes < root->sectorsize);
7228 ins->type = BTRFS_EXTENT_ITEM_KEY;
7229 ins->objectid = 0;
7230 ins->offset = 0;
7231
7232 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
7233
7234 space_info = __find_space_info(root->fs_info, flags);
7235 if (!space_info) {
7236 btrfs_err(root->fs_info, "No space info for %llu", flags);
7237 return -ENOSPC;
7238 }
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250 if (unlikely(space_info->max_extent_size)) {
7251 spin_lock(&space_info->lock);
7252 if (space_info->max_extent_size &&
7253 num_bytes > space_info->max_extent_size) {
7254 ins->offset = space_info->max_extent_size;
7255 spin_unlock(&space_info->lock);
7256 return -ENOSPC;
7257 } else if (space_info->max_extent_size) {
7258 use_cluster = false;
7259 }
7260 spin_unlock(&space_info->lock);
7261 }
7262
7263 last_ptr = fetch_cluster_info(orig_root, space_info, &empty_cluster);
7264 if (last_ptr) {
7265 spin_lock(&last_ptr->lock);
7266 if (last_ptr->block_group)
7267 hint_byte = last_ptr->window_start;
7268 if (last_ptr->fragmented) {
7269
7270
7271
7272
7273
7274 hint_byte = last_ptr->window_start;
7275 use_cluster = false;
7276 }
7277 spin_unlock(&last_ptr->lock);
7278 }
7279
7280 search_start = max(search_start, first_logical_byte(root, 0));
7281 search_start = max(search_start, hint_byte);
7282 if (search_start == hint_byte) {
7283 block_group = btrfs_lookup_block_group(root->fs_info,
7284 search_start);
7285
7286
7287
7288
7289
7290
7291
7292 if (block_group && block_group_bits(block_group, flags) &&
7293 block_group->cached != BTRFS_CACHE_NO) {
7294 down_read(&space_info->groups_sem);
7295 if (list_empty(&block_group->list) ||
7296 block_group->ro) {
7297
7298
7299
7300
7301
7302
7303 btrfs_put_block_group(block_group);
7304 up_read(&space_info->groups_sem);
7305 } else {
7306 index = get_block_group_index(block_group);
7307 btrfs_lock_block_group(block_group, delalloc);
7308 goto have_block_group;
7309 }
7310 } else if (block_group) {
7311 btrfs_put_block_group(block_group);
7312 }
7313 }
7314search:
7315 have_caching_bg = false;
7316 if (index == 0 || index == __get_raid_index(flags))
7317 full_search = true;
7318 down_read(&space_info->groups_sem);
7319 list_for_each_entry(block_group, &space_info->block_groups[index],
7320 list) {
7321 u64 offset;
7322 int cached;
7323
7324 btrfs_grab_block_group(block_group, delalloc);
7325 search_start = block_group->key.objectid;
7326
7327
7328
7329
7330
7331
7332 if (!block_group_bits(block_group, flags)) {
7333 u64 extra = BTRFS_BLOCK_GROUP_DUP |
7334 BTRFS_BLOCK_GROUP_RAID1 |
7335 BTRFS_BLOCK_GROUP_RAID5 |
7336 BTRFS_BLOCK_GROUP_RAID6 |
7337 BTRFS_BLOCK_GROUP_RAID10;
7338
7339
7340
7341
7342
7343
7344 if ((flags & extra) && !(block_group->flags & extra))
7345 goto loop;
7346 }
7347
7348have_block_group:
7349 cached = block_group_cache_done(block_group);
7350 if (unlikely(!cached)) {
7351 have_caching_bg = true;
7352 ret = cache_block_group(block_group, 0);
7353 BUG_ON(ret < 0);
7354 ret = 0;
7355 }
7356
7357 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
7358 goto loop;
7359 if (unlikely(block_group->ro))
7360 goto loop;
7361
7362
7363
7364
7365
7366 if (last_ptr && use_cluster) {
7367 struct btrfs_block_group_cache *used_block_group;
7368 unsigned long aligned_cluster;
7369
7370
7371
7372
7373 used_block_group = btrfs_lock_cluster(block_group,
7374 last_ptr,
7375 delalloc);
7376 if (!used_block_group)
7377 goto refill_cluster;
7378
7379 if (used_block_group != block_group &&
7380 (used_block_group->ro ||
7381 !block_group_bits(used_block_group, flags)))
7382 goto release_cluster;
7383
7384 offset = btrfs_alloc_from_cluster(used_block_group,
7385 last_ptr,
7386 num_bytes,
7387 used_block_group->key.objectid,
7388 &max_extent_size);
7389 if (offset) {
7390
7391 spin_unlock(&last_ptr->refill_lock);
7392 trace_btrfs_reserve_extent_cluster(root,
7393 used_block_group,
7394 search_start, num_bytes);
7395 if (used_block_group != block_group) {
7396 btrfs_release_block_group(block_group,
7397 delalloc);
7398 block_group = used_block_group;
7399 }
7400 goto checks;
7401 }
7402
7403 WARN_ON(last_ptr->block_group != used_block_group);
7404release_cluster:
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420 if (loop >= LOOP_NO_EMPTY_SIZE &&
7421 used_block_group != block_group) {
7422 spin_unlock(&last_ptr->refill_lock);
7423 btrfs_release_block_group(used_block_group,
7424 delalloc);
7425 goto unclustered_alloc;
7426 }
7427
7428
7429
7430
7431
7432 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7433
7434 if (used_block_group != block_group)
7435 btrfs_release_block_group(used_block_group,
7436 delalloc);
7437refill_cluster:
7438 if (loop >= LOOP_NO_EMPTY_SIZE) {
7439 spin_unlock(&last_ptr->refill_lock);
7440 goto unclustered_alloc;
7441 }
7442
7443 aligned_cluster = max_t(unsigned long,
7444 empty_cluster + empty_size,
7445 block_group->full_stripe_len);
7446
7447
7448 ret = btrfs_find_space_cluster(root, block_group,
7449 last_ptr, search_start,
7450 num_bytes,
7451 aligned_cluster);
7452 if (ret == 0) {
7453
7454
7455
7456
7457 offset = btrfs_alloc_from_cluster(block_group,
7458 last_ptr,
7459 num_bytes,
7460 search_start,
7461 &max_extent_size);
7462 if (offset) {
7463
7464 spin_unlock(&last_ptr->refill_lock);
7465 trace_btrfs_reserve_extent_cluster(root,
7466 block_group, search_start,
7467 num_bytes);
7468 goto checks;
7469 }
7470 } else if (!cached && loop > LOOP_CACHING_NOWAIT
7471 && !failed_cluster_refill) {
7472 spin_unlock(&last_ptr->refill_lock);
7473
7474 failed_cluster_refill = true;
7475 wait_block_group_cache_progress(block_group,
7476 num_bytes + empty_cluster + empty_size);
7477 goto have_block_group;
7478 }
7479
7480
7481
7482
7483
7484
7485
7486 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7487 spin_unlock(&last_ptr->refill_lock);
7488 goto loop;
7489 }
7490
7491unclustered_alloc:
7492
7493
7494
7495
7496
7497 if (unlikely(last_ptr)) {
7498 spin_lock(&last_ptr->lock);
7499 last_ptr->fragmented = 1;
7500 spin_unlock(&last_ptr->lock);
7501 }
7502 spin_lock(&block_group->free_space_ctl->tree_lock);
7503 if (cached &&
7504 block_group->free_space_ctl->free_space <
7505 num_bytes + empty_cluster + empty_size) {
7506 if (block_group->free_space_ctl->free_space >
7507 max_extent_size)
7508 max_extent_size =
7509 block_group->free_space_ctl->free_space;
7510 spin_unlock(&block_group->free_space_ctl->tree_lock);
7511 goto loop;
7512 }
7513 spin_unlock(&block_group->free_space_ctl->tree_lock);
7514
7515 offset = btrfs_find_space_for_alloc(block_group, search_start,
7516 num_bytes, empty_size,
7517 &max_extent_size);
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527 if (!offset && !failed_alloc && !cached &&
7528 loop > LOOP_CACHING_NOWAIT) {
7529 wait_block_group_cache_progress(block_group,
7530 num_bytes + empty_size);
7531 failed_alloc = true;
7532 goto have_block_group;
7533 } else if (!offset) {
7534 goto loop;
7535 }
7536checks:
7537 search_start = ALIGN(offset, root->stripesize);
7538
7539
7540 if (search_start + num_bytes >
7541 block_group->key.objectid + block_group->key.offset) {
7542 btrfs_add_free_space(block_group, offset, num_bytes);
7543 goto loop;
7544 }
7545
7546 if (offset < search_start)
7547 btrfs_add_free_space(block_group, offset,
7548 search_start - offset);
7549 BUG_ON(offset > search_start);
7550
7551 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
7552 alloc_type, delalloc);
7553 if (ret == -EAGAIN) {
7554 btrfs_add_free_space(block_group, offset, num_bytes);
7555 goto loop;
7556 }
7557 btrfs_inc_block_group_reservations(block_group);
7558
7559
7560 ins->objectid = search_start;
7561 ins->offset = num_bytes;
7562
7563 trace_btrfs_reserve_extent(orig_root, block_group,
7564 search_start, num_bytes);
7565 btrfs_release_block_group(block_group, delalloc);
7566 break;
7567loop:
7568 failed_cluster_refill = false;
7569 failed_alloc = false;
7570 BUG_ON(index != get_block_group_index(block_group));
7571 btrfs_release_block_group(block_group, delalloc);
7572 }
7573 up_read(&space_info->groups_sem);
7574
7575 if ((loop == LOOP_CACHING_NOWAIT) && have_caching_bg
7576 && !orig_have_caching_bg)
7577 orig_have_caching_bg = true;
7578
7579 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
7580 goto search;
7581
7582 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
7583 goto search;
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
7594 index = 0;
7595 if (loop == LOOP_CACHING_NOWAIT) {
7596
7597
7598
7599
7600
7601 if (orig_have_caching_bg || !full_search)
7602 loop = LOOP_CACHING_WAIT;
7603 else
7604 loop = LOOP_ALLOC_CHUNK;
7605 } else {
7606 loop++;
7607 }
7608
7609 if (loop == LOOP_ALLOC_CHUNK) {
7610 struct btrfs_trans_handle *trans;
7611 int exist = 0;
7612
7613 trans = current->journal_info;
7614 if (trans)
7615 exist = 1;
7616 else
7617 trans = btrfs_join_transaction(root);
7618
7619 if (IS_ERR(trans)) {
7620 ret = PTR_ERR(trans);
7621 goto out;
7622 }
7623
7624 ret = do_chunk_alloc(trans, root, flags,
7625 CHUNK_ALLOC_FORCE);
7626
7627
7628
7629
7630
7631
7632 if (ret == -ENOSPC)
7633 loop = LOOP_NO_EMPTY_SIZE;
7634
7635
7636
7637
7638
7639 if (ret < 0 && ret != -ENOSPC)
7640 btrfs_abort_transaction(trans,
7641 root, ret);
7642 else
7643 ret = 0;
7644 if (!exist)
7645 btrfs_end_transaction(trans, root);
7646 if (ret)
7647 goto out;
7648 }
7649
7650 if (loop == LOOP_NO_EMPTY_SIZE) {
7651
7652
7653
7654
7655 if (empty_size == 0 &&
7656 empty_cluster == 0) {
7657 ret = -ENOSPC;
7658 goto out;
7659 }
7660 empty_size = 0;
7661 empty_cluster = 0;
7662 }
7663
7664 goto search;
7665 } else if (!ins->objectid) {
7666 ret = -ENOSPC;
7667 } else if (ins->objectid) {
7668 if (!use_cluster && last_ptr) {
7669 spin_lock(&last_ptr->lock);
7670 last_ptr->window_start = ins->objectid;
7671 spin_unlock(&last_ptr->lock);
7672 }
7673 ret = 0;
7674 }
7675out:
7676 if (ret == -ENOSPC) {
7677 spin_lock(&space_info->lock);
7678 space_info->max_extent_size = max_extent_size;
7679 spin_unlock(&space_info->lock);
7680 ins->offset = max_extent_size;
7681 }
7682 return ret;
7683}
7684
7685static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
7686 int dump_block_groups)
7687{
7688 struct btrfs_block_group_cache *cache;
7689 int index = 0;
7690
7691 spin_lock(&info->lock);
7692 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
7693 info->flags,
7694 info->total_bytes - info->bytes_used - info->bytes_pinned -
7695 info->bytes_reserved - info->bytes_readonly,
7696 (info->full) ? "" : "not ");
7697 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
7698 "reserved=%llu, may_use=%llu, readonly=%llu\n",
7699 info->total_bytes, info->bytes_used, info->bytes_pinned,
7700 info->bytes_reserved, info->bytes_may_use,
7701 info->bytes_readonly);
7702 spin_unlock(&info->lock);
7703
7704 if (!dump_block_groups)
7705 return;
7706
7707 down_read(&info->groups_sem);
7708again:
7709 list_for_each_entry(cache, &info->block_groups[index], list) {
7710 spin_lock(&cache->lock);
7711 printk(KERN_INFO "BTRFS: "
7712 "block group %llu has %llu bytes, "
7713 "%llu used %llu pinned %llu reserved %s\n",
7714 cache->key.objectid, cache->key.offset,
7715 btrfs_block_group_used(&cache->item), cache->pinned,
7716 cache->reserved, cache->ro ? "[readonly]" : "");
7717 btrfs_dump_free_space(cache, bytes);
7718 spin_unlock(&cache->lock);
7719 }
7720 if (++index < BTRFS_NR_RAID_TYPES)
7721 goto again;
7722 up_read(&info->groups_sem);
7723}
7724
7725int btrfs_reserve_extent(struct btrfs_root *root,
7726 u64 num_bytes, u64 min_alloc_size,
7727 u64 empty_size, u64 hint_byte,
7728 struct btrfs_key *ins, int is_data, int delalloc)
7729{
7730 bool final_tried = num_bytes == min_alloc_size;
7731 u64 flags;
7732 int ret;
7733
7734 flags = btrfs_get_alloc_profile(root, is_data);
7735again:
7736 WARN_ON(num_bytes < root->sectorsize);
7737 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
7738 flags, delalloc);
7739 if (!ret && !is_data) {
7740 btrfs_dec_block_group_reservations(root->fs_info,
7741 ins->objectid);
7742 } else if (ret == -ENOSPC) {
7743 if (!final_tried && ins->offset) {
7744 num_bytes = min(num_bytes >> 1, ins->offset);
7745 num_bytes = round_down(num_bytes, root->sectorsize);
7746 num_bytes = max(num_bytes, min_alloc_size);
7747 if (num_bytes == min_alloc_size)
7748 final_tried = true;
7749 goto again;
7750 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7751 struct btrfs_space_info *sinfo;
7752
7753 sinfo = __find_space_info(root->fs_info, flags);
7754 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
7755 flags, num_bytes);
7756 if (sinfo)
7757 dump_space_info(sinfo, num_bytes, 1);
7758 }
7759 }
7760
7761 return ret;
7762}
7763
7764static int __btrfs_free_reserved_extent(struct btrfs_root *root,
7765 u64 start, u64 len,
7766 int pin, int delalloc)
7767{
7768 struct btrfs_block_group_cache *cache;
7769 int ret = 0;
7770
7771 cache = btrfs_lookup_block_group(root->fs_info, start);
7772 if (!cache) {
7773 btrfs_err(root->fs_info, "Unable to find block group for %llu",
7774 start);
7775 return -ENOSPC;
7776 }
7777
7778 if (pin)
7779 pin_down_extent(root, cache, start, len, 1);
7780 else {
7781 if (btrfs_test_opt(root, DISCARD))
7782 ret = btrfs_discard_extent(root, start, len, NULL);
7783 btrfs_add_free_space(cache, start, len);
7784 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
7785 }
7786
7787 btrfs_put_block_group(cache);
7788
7789 trace_btrfs_reserved_extent_free(root, start, len);
7790
7791 return ret;
7792}
7793
7794int btrfs_free_reserved_extent(struct btrfs_root *root,
7795 u64 start, u64 len, int delalloc)
7796{
7797 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
7798}
7799
7800int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
7801 u64 start, u64 len)
7802{
7803 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
7804}
7805
7806static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7807 struct btrfs_root *root,
7808 u64 parent, u64 root_objectid,
7809 u64 flags, u64 owner, u64 offset,
7810 struct btrfs_key *ins, int ref_mod)
7811{
7812 int ret;
7813 struct btrfs_fs_info *fs_info = root->fs_info;
7814 struct btrfs_extent_item *extent_item;
7815 struct btrfs_extent_inline_ref *iref;
7816 struct btrfs_path *path;
7817 struct extent_buffer *leaf;
7818 int type;
7819 u32 size;
7820
7821 if (parent > 0)
7822 type = BTRFS_SHARED_DATA_REF_KEY;
7823 else
7824 type = BTRFS_EXTENT_DATA_REF_KEY;
7825
7826 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
7827
7828 path = btrfs_alloc_path();
7829 if (!path)
7830 return -ENOMEM;
7831
7832 path->leave_spinning = 1;
7833 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7834 ins, size);
7835 if (ret) {
7836 btrfs_free_path(path);
7837 return ret;
7838 }
7839
7840 leaf = path->nodes[0];
7841 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7842 struct btrfs_extent_item);
7843 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
7844 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7845 btrfs_set_extent_flags(leaf, extent_item,
7846 flags | BTRFS_EXTENT_FLAG_DATA);
7847
7848 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7849 btrfs_set_extent_inline_ref_type(leaf, iref, type);
7850 if (parent > 0) {
7851 struct btrfs_shared_data_ref *ref;
7852 ref = (struct btrfs_shared_data_ref *)(iref + 1);
7853 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7854 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
7855 } else {
7856 struct btrfs_extent_data_ref *ref;
7857 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
7858 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
7859 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
7860 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
7861 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
7862 }
7863
7864 btrfs_mark_buffer_dirty(path->nodes[0]);
7865 btrfs_free_path(path);
7866
7867 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
7868 ins->offset);
7869 if (ret)
7870 return ret;
7871
7872 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
7873 if (ret) {
7874 btrfs_err(fs_info, "update block group failed for %llu %llu",
7875 ins->objectid, ins->offset);
7876 BUG();
7877 }
7878 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
7879 return ret;
7880}
7881
7882static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7883 struct btrfs_root *root,
7884 u64 parent, u64 root_objectid,
7885 u64 flags, struct btrfs_disk_key *key,
7886 int level, struct btrfs_key *ins)
7887{
7888 int ret;
7889 struct btrfs_fs_info *fs_info = root->fs_info;
7890 struct btrfs_extent_item *extent_item;
7891 struct btrfs_tree_block_info *block_info;
7892 struct btrfs_extent_inline_ref *iref;
7893 struct btrfs_path *path;
7894 struct extent_buffer *leaf;
7895 u32 size = sizeof(*extent_item) + sizeof(*iref);
7896 u64 num_bytes = ins->offset;
7897 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7898 SKINNY_METADATA);
7899
7900 if (!skinny_metadata)
7901 size += sizeof(*block_info);
7902
7903 path = btrfs_alloc_path();
7904 if (!path) {
7905 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7906 root->nodesize);
7907 return -ENOMEM;
7908 }
7909
7910 path->leave_spinning = 1;
7911 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7912 ins, size);
7913 if (ret) {
7914 btrfs_free_path(path);
7915 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7916 root->nodesize);
7917 return ret;
7918 }
7919
7920 leaf = path->nodes[0];
7921 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7922 struct btrfs_extent_item);
7923 btrfs_set_extent_refs(leaf, extent_item, 1);
7924 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7925 btrfs_set_extent_flags(leaf, extent_item,
7926 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
7927
7928 if (skinny_metadata) {
7929 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7930 num_bytes = root->nodesize;
7931 } else {
7932 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
7933 btrfs_set_tree_block_key(leaf, block_info, key);
7934 btrfs_set_tree_block_level(leaf, block_info, level);
7935 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
7936 }
7937
7938 if (parent > 0) {
7939 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
7940 btrfs_set_extent_inline_ref_type(leaf, iref,
7941 BTRFS_SHARED_BLOCK_REF_KEY);
7942 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7943 } else {
7944 btrfs_set_extent_inline_ref_type(leaf, iref,
7945 BTRFS_TREE_BLOCK_REF_KEY);
7946 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
7947 }
7948
7949 btrfs_mark_buffer_dirty(leaf);
7950 btrfs_free_path(path);
7951
7952 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
7953 num_bytes);
7954 if (ret)
7955 return ret;
7956
7957 ret = update_block_group(trans, root, ins->objectid, root->nodesize,
7958 1);
7959 if (ret) {
7960 btrfs_err(fs_info, "update block group failed for %llu %llu",
7961 ins->objectid, ins->offset);
7962 BUG();
7963 }
7964
7965 trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize);
7966 return ret;
7967}
7968
7969int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7970 struct btrfs_root *root,
7971 u64 root_objectid, u64 owner,
7972 u64 offset, u64 ram_bytes,
7973 struct btrfs_key *ins)
7974{
7975 int ret;
7976
7977 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
7978
7979 ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
7980 ins->offset, 0,
7981 root_objectid, owner, offset,
7982 ram_bytes, BTRFS_ADD_DELAYED_EXTENT,
7983 NULL);
7984 return ret;
7985}
7986
7987
7988
7989
7990
7991
7992int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7993 struct btrfs_root *root,
7994 u64 root_objectid, u64 owner, u64 offset,
7995 struct btrfs_key *ins)
7996{
7997 int ret;
7998 struct btrfs_block_group_cache *block_group;
7999
8000
8001
8002
8003
8004 if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
8005 ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
8006 if (ret)
8007 return ret;
8008 }
8009
8010 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
8011 if (!block_group)
8012 return -EINVAL;
8013
8014 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
8015 RESERVE_ALLOC_NO_ACCOUNT, 0);
8016 BUG_ON(ret);
8017 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
8018 0, owner, offset, ins, 1);
8019 btrfs_put_block_group(block_group);
8020 return ret;
8021}
8022
8023static struct extent_buffer *
8024btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8025 u64 bytenr, int level)
8026{
8027 struct extent_buffer *buf;
8028
8029 buf = btrfs_find_create_tree_block(root, bytenr);
8030 if (IS_ERR(buf))
8031 return buf;
8032
8033 btrfs_set_header_generation(buf, trans->transid);
8034 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
8035 btrfs_tree_lock(buf);
8036 clean_tree_block(trans, root->fs_info, buf);
8037 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
8038
8039 btrfs_set_lock_blocking(buf);
8040 set_extent_buffer_uptodate(buf);
8041
8042 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
8043 buf->log_index = root->log_transid % 2;
8044
8045
8046
8047
8048 if (buf->log_index == 0)
8049 set_extent_dirty(&root->dirty_log_pages, buf->start,
8050 buf->start + buf->len - 1, GFP_NOFS);
8051 else
8052 set_extent_new(&root->dirty_log_pages, buf->start,
8053 buf->start + buf->len - 1);
8054 } else {
8055 buf->log_index = -1;
8056 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
8057 buf->start + buf->len - 1, GFP_NOFS);
8058 }
8059 trans->dirty = true;
8060
8061 return buf;
8062}
8063
8064static struct btrfs_block_rsv *
8065use_block_rsv(struct btrfs_trans_handle *trans,
8066 struct btrfs_root *root, u32 blocksize)
8067{
8068 struct btrfs_block_rsv *block_rsv;
8069 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
8070 int ret;
8071 bool global_updated = false;
8072
8073 block_rsv = get_block_rsv(trans, root);
8074
8075 if (unlikely(block_rsv->size == 0))
8076 goto try_reserve;
8077again:
8078 ret = block_rsv_use_bytes(block_rsv, blocksize);
8079 if (!ret)
8080 return block_rsv;
8081
8082 if (block_rsv->failfast)
8083 return ERR_PTR(ret);
8084
8085 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
8086 global_updated = true;
8087 update_global_block_rsv(root->fs_info);
8088 goto again;
8089 }
8090
8091 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
8092 static DEFINE_RATELIMIT_STATE(_rs,
8093 DEFAULT_RATELIMIT_INTERVAL * 10,
8094 1);
8095 if (__ratelimit(&_rs))
8096 WARN(1, KERN_DEBUG
8097 "BTRFS: block rsv returned %d\n", ret);
8098 }
8099try_reserve:
8100 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
8101 BTRFS_RESERVE_NO_FLUSH);
8102 if (!ret)
8103 return block_rsv;
8104
8105
8106
8107
8108
8109 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
8110 block_rsv->space_info == global_rsv->space_info) {
8111 ret = block_rsv_use_bytes(global_rsv, blocksize);
8112 if (!ret)
8113 return global_rsv;
8114 }
8115 return ERR_PTR(ret);
8116}
8117
8118static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
8119 struct btrfs_block_rsv *block_rsv, u32 blocksize)
8120{
8121 block_rsv_add_bytes(block_rsv, blocksize, 0);
8122 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
8123}
8124
8125
8126
8127
8128
8129struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8130 struct btrfs_root *root,
8131 u64 parent, u64 root_objectid,
8132 struct btrfs_disk_key *key, int level,
8133 u64 hint, u64 empty_size)
8134{
8135 struct btrfs_key ins;
8136 struct btrfs_block_rsv *block_rsv;
8137 struct extent_buffer *buf;
8138 struct btrfs_delayed_extent_op *extent_op;
8139 u64 flags = 0;
8140 int ret;
8141 u32 blocksize = root->nodesize;
8142 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
8143 SKINNY_METADATA);
8144
8145 if (btrfs_test_is_dummy_root(root)) {
8146 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
8147 level);
8148 if (!IS_ERR(buf))
8149 root->alloc_bytenr += blocksize;
8150 return buf;
8151 }
8152
8153 block_rsv = use_block_rsv(trans, root, blocksize);
8154 if (IS_ERR(block_rsv))
8155 return ERR_CAST(block_rsv);
8156
8157 ret = btrfs_reserve_extent(root, blocksize, blocksize,
8158 empty_size, hint, &ins, 0, 0);
8159 if (ret)
8160 goto out_unuse;
8161
8162 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
8163 if (IS_ERR(buf)) {
8164 ret = PTR_ERR(buf);
8165 goto out_free_reserved;
8166 }
8167
8168 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
8169 if (parent == 0)
8170 parent = ins.objectid;
8171 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8172 } else
8173 BUG_ON(parent > 0);
8174
8175 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
8176 extent_op = btrfs_alloc_delayed_extent_op();
8177 if (!extent_op) {
8178 ret = -ENOMEM;
8179 goto out_free_buf;
8180 }
8181 if (key)
8182 memcpy(&extent_op->key, key, sizeof(extent_op->key));
8183 else
8184 memset(&extent_op->key, 0, sizeof(extent_op->key));
8185 extent_op->flags_to_set = flags;
8186 extent_op->update_key = skinny_metadata ? false : true;
8187 extent_op->update_flags = true;
8188 extent_op->is_data = false;
8189 extent_op->level = level;
8190
8191 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
8192 ins.objectid, ins.offset,
8193 parent, root_objectid, level,
8194 BTRFS_ADD_DELAYED_EXTENT,
8195 extent_op);
8196 if (ret)
8197 goto out_free_delayed;
8198 }
8199 return buf;
8200
8201out_free_delayed:
8202 btrfs_free_delayed_extent_op(extent_op);
8203out_free_buf:
8204 free_extent_buffer(buf);
8205out_free_reserved:
8206 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
8207out_unuse:
8208 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
8209 return ERR_PTR(ret);
8210}
8211
8212struct walk_control {
8213 u64 refs[BTRFS_MAX_LEVEL];
8214 u64 flags[BTRFS_MAX_LEVEL];
8215 struct btrfs_key update_progress;
8216 int stage;
8217 int level;
8218 int shared_level;
8219 int update_ref;
8220 int keep_locks;
8221 int reada_slot;
8222 int reada_count;
8223 int for_reloc;
8224};
8225
8226#define DROP_REFERENCE 1
8227#define UPDATE_BACKREF 2
8228
8229static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
8230 struct btrfs_root *root,
8231 struct walk_control *wc,
8232 struct btrfs_path *path)
8233{
8234 u64 bytenr;
8235 u64 generation;
8236 u64 refs;
8237 u64 flags;
8238 u32 nritems;
8239 u32 blocksize;
8240 struct btrfs_key key;
8241 struct extent_buffer *eb;
8242 int ret;
8243 int slot;
8244 int nread = 0;
8245
8246 if (path->slots[wc->level] < wc->reada_slot) {
8247 wc->reada_count = wc->reada_count * 2 / 3;
8248 wc->reada_count = max(wc->reada_count, 2);
8249 } else {
8250 wc->reada_count = wc->reada_count * 3 / 2;
8251 wc->reada_count = min_t(int, wc->reada_count,
8252 BTRFS_NODEPTRS_PER_BLOCK(root));
8253 }
8254
8255 eb = path->nodes[wc->level];
8256 nritems = btrfs_header_nritems(eb);
8257 blocksize = root->nodesize;
8258
8259 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
8260 if (nread >= wc->reada_count)
8261 break;
8262
8263 cond_resched();
8264 bytenr = btrfs_node_blockptr(eb, slot);
8265 generation = btrfs_node_ptr_generation(eb, slot);
8266
8267 if (slot == path->slots[wc->level])
8268 goto reada;
8269
8270 if (wc->stage == UPDATE_BACKREF &&
8271 generation <= root->root_key.offset)
8272 continue;
8273
8274
8275 ret = btrfs_lookup_extent_info(trans, root, bytenr,
8276 wc->level - 1, 1, &refs,
8277 &flags);
8278
8279 if (ret < 0)
8280 continue;
8281 BUG_ON(refs == 0);
8282
8283 if (wc->stage == DROP_REFERENCE) {
8284 if (refs == 1)
8285 goto reada;
8286
8287 if (wc->level == 1 &&
8288 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8289 continue;
8290 if (!wc->update_ref ||
8291 generation <= root->root_key.offset)
8292 continue;
8293 btrfs_node_key_to_cpu(eb, &key, slot);
8294 ret = btrfs_comp_cpu_keys(&key,
8295 &wc->update_progress);
8296 if (ret < 0)
8297 continue;
8298 } else {
8299 if (wc->level == 1 &&
8300 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8301 continue;
8302 }
8303reada:
8304 readahead_tree_block(root, bytenr);
8305 nread++;
8306 }
8307 wc->reada_slot = slot;
8308}
8309
8310
8311
8312
8313
8314static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
8315 struct btrfs_root *root, u64 bytenr,
8316 u64 num_bytes)
8317{
8318 struct btrfs_qgroup_extent_record *qrecord;
8319 struct btrfs_delayed_ref_root *delayed_refs;
8320
8321 qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
8322 if (!qrecord)
8323 return -ENOMEM;
8324
8325 qrecord->bytenr = bytenr;
8326 qrecord->num_bytes = num_bytes;
8327 qrecord->old_roots = NULL;
8328
8329 delayed_refs = &trans->transaction->delayed_refs;
8330 spin_lock(&delayed_refs->lock);
8331 if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord))
8332 kfree(qrecord);
8333 spin_unlock(&delayed_refs->lock);
8334
8335 return 0;
8336}
8337
8338static int account_leaf_items(struct btrfs_trans_handle *trans,
8339 struct btrfs_root *root,
8340 struct extent_buffer *eb)
8341{
8342 int nr = btrfs_header_nritems(eb);
8343 int i, extent_type, ret;
8344 struct btrfs_key key;
8345 struct btrfs_file_extent_item *fi;
8346 u64 bytenr, num_bytes;
8347
8348
8349 if (!root->fs_info->quota_enabled)
8350 return 0;
8351
8352 for (i = 0; i < nr; i++) {
8353 btrfs_item_key_to_cpu(eb, &key, i);
8354
8355 if (key.type != BTRFS_EXTENT_DATA_KEY)
8356 continue;
8357
8358 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
8359
8360 extent_type = btrfs_file_extent_type(eb, fi);
8361
8362 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
8363 continue;
8364
8365 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8366 if (!bytenr)
8367 continue;
8368
8369 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8370
8371 ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
8372 if (ret)
8373 return ret;
8374 }
8375 return 0;
8376}
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392static int adjust_slots_upwards(struct btrfs_root *root,
8393 struct btrfs_path *path, int root_level)
8394{
8395 int level = 0;
8396 int nr, slot;
8397 struct extent_buffer *eb;
8398
8399 if (root_level == 0)
8400 return 1;
8401
8402 while (level <= root_level) {
8403 eb = path->nodes[level];
8404 nr = btrfs_header_nritems(eb);
8405 path->slots[level]++;
8406 slot = path->slots[level];
8407 if (slot >= nr || level == 0) {
8408
8409
8410
8411
8412
8413 if (level != root_level) {
8414 btrfs_tree_unlock_rw(eb, path->locks[level]);
8415 path->locks[level] = 0;
8416
8417 free_extent_buffer(eb);
8418 path->nodes[level] = NULL;
8419 path->slots[level] = 0;
8420 }
8421 } else {
8422
8423
8424
8425
8426
8427 break;
8428 }
8429
8430 level++;
8431 }
8432
8433 eb = path->nodes[root_level];
8434 if (path->slots[root_level] >= btrfs_header_nritems(eb))
8435 return 1;
8436
8437 return 0;
8438}
8439
8440
8441
8442
8443static int account_shared_subtree(struct btrfs_trans_handle *trans,
8444 struct btrfs_root *root,
8445 struct extent_buffer *root_eb,
8446 u64 root_gen,
8447 int root_level)
8448{
8449 int ret = 0;
8450 int level;
8451 struct extent_buffer *eb = root_eb;
8452 struct btrfs_path *path = NULL;
8453
8454 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
8455 BUG_ON(root_eb == NULL);
8456
8457 if (!root->fs_info->quota_enabled)
8458 return 0;
8459
8460 if (!extent_buffer_uptodate(root_eb)) {
8461 ret = btrfs_read_buffer(root_eb, root_gen);
8462 if (ret)
8463 goto out;
8464 }
8465
8466 if (root_level == 0) {
8467 ret = account_leaf_items(trans, root, root_eb);
8468 goto out;
8469 }
8470
8471 path = btrfs_alloc_path();
8472 if (!path)
8473 return -ENOMEM;
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484 extent_buffer_get(root_eb);
8485 path->nodes[root_level] = root_eb;
8486 path->slots[root_level] = 0;
8487 path->locks[root_level] = 0;
8488walk_down:
8489 level = root_level;
8490 while (level >= 0) {
8491 if (path->nodes[level] == NULL) {
8492 int parent_slot;
8493 u64 child_gen;
8494 u64 child_bytenr;
8495
8496
8497
8498 eb = path->nodes[level + 1];
8499 parent_slot = path->slots[level + 1];
8500 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
8501 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
8502
8503 eb = read_tree_block(root, child_bytenr, child_gen);
8504 if (IS_ERR(eb)) {
8505 ret = PTR_ERR(eb);
8506 goto out;
8507 } else if (!extent_buffer_uptodate(eb)) {
8508 free_extent_buffer(eb);
8509 ret = -EIO;
8510 goto out;
8511 }
8512
8513 path->nodes[level] = eb;
8514 path->slots[level] = 0;
8515
8516 btrfs_tree_read_lock(eb);
8517 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
8518 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
8519
8520 ret = record_one_subtree_extent(trans, root, child_bytenr,
8521 root->nodesize);
8522 if (ret)
8523 goto out;
8524 }
8525
8526 if (level == 0) {
8527 ret = account_leaf_items(trans, root, path->nodes[level]);
8528 if (ret)
8529 goto out;
8530
8531
8532 ret = adjust_slots_upwards(root, path, root_level);
8533 if (ret)
8534 break;
8535
8536
8537 goto walk_down;
8538 }
8539
8540 level--;
8541 }
8542
8543 ret = 0;
8544out:
8545 btrfs_free_path(path);
8546
8547 return ret;
8548}
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8559 struct btrfs_root *root,
8560 struct btrfs_path *path,
8561 struct walk_control *wc, int lookup_info)
8562{
8563 int level = wc->level;
8564 struct extent_buffer *eb = path->nodes[level];
8565 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8566 int ret;
8567
8568 if (wc->stage == UPDATE_BACKREF &&
8569 btrfs_header_owner(eb) != root->root_key.objectid)
8570 return 1;
8571
8572
8573
8574
8575
8576 if (lookup_info &&
8577 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
8578 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
8579 BUG_ON(!path->locks[level]);
8580 ret = btrfs_lookup_extent_info(trans, root,
8581 eb->start, level, 1,
8582 &wc->refs[level],
8583 &wc->flags[level]);
8584 BUG_ON(ret == -ENOMEM);
8585 if (ret)
8586 return ret;
8587 BUG_ON(wc->refs[level] == 0);
8588 }
8589
8590 if (wc->stage == DROP_REFERENCE) {
8591 if (wc->refs[level] > 1)
8592 return 1;
8593
8594 if (path->locks[level] && !wc->keep_locks) {
8595 btrfs_tree_unlock_rw(eb, path->locks[level]);
8596 path->locks[level] = 0;
8597 }
8598 return 0;
8599 }
8600
8601
8602 if (!(wc->flags[level] & flag)) {
8603 BUG_ON(!path->locks[level]);
8604 ret = btrfs_inc_ref(trans, root, eb, 1);
8605 BUG_ON(ret);
8606 ret = btrfs_dec_ref(trans, root, eb, 0);
8607 BUG_ON(ret);
8608 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
8609 eb->len, flag,
8610 btrfs_header_level(eb), 0);
8611 BUG_ON(ret);
8612 wc->flags[level] |= flag;
8613 }
8614
8615
8616
8617
8618
8619 if (path->locks[level] && level > 0) {
8620 btrfs_tree_unlock_rw(eb, path->locks[level]);
8621 path->locks[level] = 0;
8622 }
8623 return 0;
8624}
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8640 struct btrfs_root *root,
8641 struct btrfs_path *path,
8642 struct walk_control *wc, int *lookup_info)
8643{
8644 u64 bytenr;
8645 u64 generation;
8646 u64 parent;
8647 u32 blocksize;
8648 struct btrfs_key key;
8649 struct extent_buffer *next;
8650 int level = wc->level;
8651 int reada = 0;
8652 int ret = 0;
8653 bool need_account = false;
8654
8655 generation = btrfs_node_ptr_generation(path->nodes[level],
8656 path->slots[level]);
8657
8658
8659
8660
8661
8662 if (wc->stage == UPDATE_BACKREF &&
8663 generation <= root->root_key.offset) {
8664 *lookup_info = 1;
8665 return 1;
8666 }
8667
8668 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8669 blocksize = root->nodesize;
8670
8671 next = btrfs_find_tree_block(root->fs_info, bytenr);
8672 if (!next) {
8673 next = btrfs_find_create_tree_block(root, bytenr);
8674 if (IS_ERR(next))
8675 return PTR_ERR(next);
8676
8677 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8678 level - 1);
8679 reada = 1;
8680 }
8681 btrfs_tree_lock(next);
8682 btrfs_set_lock_blocking(next);
8683
8684 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
8685 &wc->refs[level - 1],
8686 &wc->flags[level - 1]);
8687 if (ret < 0) {
8688 btrfs_tree_unlock(next);
8689 return ret;
8690 }
8691
8692 if (unlikely(wc->refs[level - 1] == 0)) {
8693 btrfs_err(root->fs_info, "Missing references.");
8694 BUG();
8695 }
8696 *lookup_info = 0;
8697
8698 if (wc->stage == DROP_REFERENCE) {
8699 if (wc->refs[level - 1] > 1) {
8700 need_account = true;
8701 if (level == 1 &&
8702 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8703 goto skip;
8704
8705 if (!wc->update_ref ||
8706 generation <= root->root_key.offset)
8707 goto skip;
8708
8709 btrfs_node_key_to_cpu(path->nodes[level], &key,
8710 path->slots[level]);
8711 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8712 if (ret < 0)
8713 goto skip;
8714
8715 wc->stage = UPDATE_BACKREF;
8716 wc->shared_level = level - 1;
8717 }
8718 } else {
8719 if (level == 1 &&
8720 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8721 goto skip;
8722 }
8723
8724 if (!btrfs_buffer_uptodate(next, generation, 0)) {
8725 btrfs_tree_unlock(next);
8726 free_extent_buffer(next);
8727 next = NULL;
8728 *lookup_info = 1;
8729 }
8730
8731 if (!next) {
8732 if (reada && level == 1)
8733 reada_walk_down(trans, root, wc, path);
8734 next = read_tree_block(root, bytenr, generation);
8735 if (IS_ERR(next)) {
8736 return PTR_ERR(next);
8737 } else if (!extent_buffer_uptodate(next)) {
8738 free_extent_buffer(next);
8739 return -EIO;
8740 }
8741 btrfs_tree_lock(next);
8742 btrfs_set_lock_blocking(next);
8743 }
8744
8745 level--;
8746 BUG_ON(level != btrfs_header_level(next));
8747 path->nodes[level] = next;
8748 path->slots[level] = 0;
8749 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8750 wc->level = level;
8751 if (wc->level == 1)
8752 wc->reada_slot = 0;
8753 return 0;
8754skip:
8755 wc->refs[level - 1] = 0;
8756 wc->flags[level - 1] = 0;
8757 if (wc->stage == DROP_REFERENCE) {
8758 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8759 parent = path->nodes[level]->start;
8760 } else {
8761 BUG_ON(root->root_key.objectid !=
8762 btrfs_header_owner(path->nodes[level]));
8763 parent = 0;
8764 }
8765
8766 if (need_account) {
8767 ret = account_shared_subtree(trans, root, next,
8768 generation, level - 1);
8769 if (ret) {
8770 btrfs_err_rl(root->fs_info,
8771 "Error "
8772 "%d accounting shared subtree. Quota "
8773 "is out of sync, rescan required.",
8774 ret);
8775 }
8776 }
8777 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
8778 root->root_key.objectid, level - 1, 0);
8779 BUG_ON(ret);
8780 }
8781 btrfs_tree_unlock(next);
8782 free_extent_buffer(next);
8783 *lookup_info = 1;
8784 return 1;
8785}
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
8800 struct btrfs_root *root,
8801 struct btrfs_path *path,
8802 struct walk_control *wc)
8803{
8804 int ret;
8805 int level = wc->level;
8806 struct extent_buffer *eb = path->nodes[level];
8807 u64 parent = 0;
8808
8809 if (wc->stage == UPDATE_BACKREF) {
8810 BUG_ON(wc->shared_level < level);
8811 if (level < wc->shared_level)
8812 goto out;
8813
8814 ret = find_next_key(path, level + 1, &wc->update_progress);
8815 if (ret > 0)
8816 wc->update_ref = 0;
8817
8818 wc->stage = DROP_REFERENCE;
8819 wc->shared_level = -1;
8820 path->slots[level] = 0;
8821
8822
8823
8824
8825
8826
8827 if (!path->locks[level]) {
8828 BUG_ON(level == 0);
8829 btrfs_tree_lock(eb);
8830 btrfs_set_lock_blocking(eb);
8831 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8832
8833 ret = btrfs_lookup_extent_info(trans, root,
8834 eb->start, level, 1,
8835 &wc->refs[level],
8836 &wc->flags[level]);
8837 if (ret < 0) {
8838 btrfs_tree_unlock_rw(eb, path->locks[level]);
8839 path->locks[level] = 0;
8840 return ret;
8841 }
8842 BUG_ON(wc->refs[level] == 0);
8843 if (wc->refs[level] == 1) {
8844 btrfs_tree_unlock_rw(eb, path->locks[level]);
8845 path->locks[level] = 0;
8846 return 1;
8847 }
8848 }
8849 }
8850
8851
8852 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
8853
8854 if (wc->refs[level] == 1) {
8855 if (level == 0) {
8856 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8857 ret = btrfs_dec_ref(trans, root, eb, 1);
8858 else
8859 ret = btrfs_dec_ref(trans, root, eb, 0);
8860 BUG_ON(ret);
8861 ret = account_leaf_items(trans, root, eb);
8862 if (ret) {
8863 btrfs_err_rl(root->fs_info,
8864 "error "
8865 "%d accounting leaf items. Quota "
8866 "is out of sync, rescan required.",
8867 ret);
8868 }
8869 }
8870
8871 if (!path->locks[level] &&
8872 btrfs_header_generation(eb) == trans->transid) {
8873 btrfs_tree_lock(eb);
8874 btrfs_set_lock_blocking(eb);
8875 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8876 }
8877 clean_tree_block(trans, root->fs_info, eb);
8878 }
8879
8880 if (eb == root->node) {
8881 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8882 parent = eb->start;
8883 else
8884 BUG_ON(root->root_key.objectid !=
8885 btrfs_header_owner(eb));
8886 } else {
8887 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8888 parent = path->nodes[level + 1]->start;
8889 else
8890 BUG_ON(root->root_key.objectid !=
8891 btrfs_header_owner(path->nodes[level + 1]));
8892 }
8893
8894 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
8895out:
8896 wc->refs[level] = 0;
8897 wc->flags[level] = 0;
8898 return 0;
8899}
8900
8901static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
8902 struct btrfs_root *root,
8903 struct btrfs_path *path,
8904 struct walk_control *wc)
8905{
8906 int level = wc->level;
8907 int lookup_info = 1;
8908 int ret;
8909
8910 while (level >= 0) {
8911 ret = walk_down_proc(trans, root, path, wc, lookup_info);
8912 if (ret > 0)
8913 break;
8914
8915 if (level == 0)
8916 break;
8917
8918 if (path->slots[level] >=
8919 btrfs_header_nritems(path->nodes[level]))
8920 break;
8921
8922 ret = do_walk_down(trans, root, path, wc, &lookup_info);
8923 if (ret > 0) {
8924 path->slots[level]++;
8925 continue;
8926 } else if (ret < 0)
8927 return ret;
8928 level = wc->level;
8929 }
8930 return 0;
8931}
8932
8933static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
8934 struct btrfs_root *root,
8935 struct btrfs_path *path,
8936 struct walk_control *wc, int max_level)
8937{
8938 int level = wc->level;
8939 int ret;
8940
8941 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
8942 while (level < max_level && path->nodes[level]) {
8943 wc->level = level;
8944 if (path->slots[level] + 1 <
8945 btrfs_header_nritems(path->nodes[level])) {
8946 path->slots[level]++;
8947 return 0;
8948 } else {
8949 ret = walk_up_proc(trans, root, path, wc);
8950 if (ret > 0)
8951 return 0;
8952
8953 if (path->locks[level]) {
8954 btrfs_tree_unlock_rw(path->nodes[level],
8955 path->locks[level]);
8956 path->locks[level] = 0;
8957 }
8958 free_extent_buffer(path->nodes[level]);
8959 path->nodes[level] = NULL;
8960 level++;
8961 }
8962 }
8963 return 1;
8964}
8965
8966
8967
8968
8969
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979int btrfs_drop_snapshot(struct btrfs_root *root,
8980 struct btrfs_block_rsv *block_rsv, int update_ref,
8981 int for_reloc)
8982{
8983 struct btrfs_path *path;
8984 struct btrfs_trans_handle *trans;
8985 struct btrfs_root *tree_root = root->fs_info->tree_root;
8986 struct btrfs_root_item *root_item = &root->root_item;
8987 struct walk_control *wc;
8988 struct btrfs_key key;
8989 int err = 0;
8990 int ret;
8991 int level;
8992 bool root_dropped = false;
8993
8994 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8995
8996 path = btrfs_alloc_path();
8997 if (!path) {
8998 err = -ENOMEM;
8999 goto out;
9000 }
9001
9002 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9003 if (!wc) {
9004 btrfs_free_path(path);
9005 err = -ENOMEM;
9006 goto out;
9007 }
9008
9009 trans = btrfs_start_transaction(tree_root, 0);
9010 if (IS_ERR(trans)) {
9011 err = PTR_ERR(trans);
9012 goto out_free;
9013 }
9014
9015 if (block_rsv)
9016 trans->block_rsv = block_rsv;
9017
9018 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
9019 level = btrfs_header_level(root->node);
9020 path->nodes[level] = btrfs_lock_root_node(root);
9021 btrfs_set_lock_blocking(path->nodes[level]);
9022 path->slots[level] = 0;
9023 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9024 memset(&wc->update_progress, 0,
9025 sizeof(wc->update_progress));
9026 } else {
9027 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
9028 memcpy(&wc->update_progress, &key,
9029 sizeof(wc->update_progress));
9030
9031 level = root_item->drop_level;
9032 BUG_ON(level == 0);
9033 path->lowest_level = level;
9034 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9035 path->lowest_level = 0;
9036 if (ret < 0) {
9037 err = ret;
9038 goto out_end_trans;
9039 }
9040 WARN_ON(ret > 0);
9041
9042
9043
9044
9045
9046 btrfs_unlock_up_safe(path, 0);
9047
9048 level = btrfs_header_level(root->node);
9049 while (1) {
9050 btrfs_tree_lock(path->nodes[level]);
9051 btrfs_set_lock_blocking(path->nodes[level]);
9052 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9053
9054 ret = btrfs_lookup_extent_info(trans, root,
9055 path->nodes[level]->start,
9056 level, 1, &wc->refs[level],
9057 &wc->flags[level]);
9058 if (ret < 0) {
9059 err = ret;
9060 goto out_end_trans;
9061 }
9062 BUG_ON(wc->refs[level] == 0);
9063
9064 if (level == root_item->drop_level)
9065 break;
9066
9067 btrfs_tree_unlock(path->nodes[level]);
9068 path->locks[level] = 0;
9069 WARN_ON(wc->refs[level] != 1);
9070 level--;
9071 }
9072 }
9073
9074 wc->level = level;
9075 wc->shared_level = -1;
9076 wc->stage = DROP_REFERENCE;
9077 wc->update_ref = update_ref;
9078 wc->keep_locks = 0;
9079 wc->for_reloc = for_reloc;
9080 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
9081
9082 while (1) {
9083
9084 ret = walk_down_tree(trans, root, path, wc);
9085 if (ret < 0) {
9086 err = ret;
9087 break;
9088 }
9089
9090 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
9091 if (ret < 0) {
9092 err = ret;
9093 break;
9094 }
9095
9096 if (ret > 0) {
9097 BUG_ON(wc->stage != DROP_REFERENCE);
9098 break;
9099 }
9100
9101 if (wc->stage == DROP_REFERENCE) {
9102 level = wc->level;
9103 btrfs_node_key(path->nodes[level],
9104 &root_item->drop_progress,
9105 path->slots[level]);
9106 root_item->drop_level = level;
9107 }
9108
9109 BUG_ON(wc->level == 0);
9110 if (btrfs_should_end_transaction(trans, tree_root) ||
9111 (!for_reloc && btrfs_need_cleaner_sleep(root))) {
9112 ret = btrfs_update_root(trans, tree_root,
9113 &root->root_key,
9114 root_item);
9115 if (ret) {
9116 btrfs_abort_transaction(trans, tree_root, ret);
9117 err = ret;
9118 goto out_end_trans;
9119 }
9120
9121 btrfs_end_transaction_throttle(trans, tree_root);
9122 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
9123 pr_debug("BTRFS: drop snapshot early exit\n");
9124 err = -EAGAIN;
9125 goto out_free;
9126 }
9127
9128 trans = btrfs_start_transaction(tree_root, 0);
9129 if (IS_ERR(trans)) {
9130 err = PTR_ERR(trans);
9131 goto out_free;
9132 }
9133 if (block_rsv)
9134 trans->block_rsv = block_rsv;
9135 }
9136 }
9137 btrfs_release_path(path);
9138 if (err)
9139 goto out_end_trans;
9140
9141 ret = btrfs_del_root(trans, tree_root, &root->root_key);
9142 if (ret) {
9143 btrfs_abort_transaction(trans, tree_root, ret);
9144 goto out_end_trans;
9145 }
9146
9147 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9148 ret = btrfs_find_root(tree_root, &root->root_key, path,
9149 NULL, NULL);
9150 if (ret < 0) {
9151 btrfs_abort_transaction(trans, tree_root, ret);
9152 err = ret;
9153 goto out_end_trans;
9154 } else if (ret > 0) {
9155
9156
9157
9158
9159
9160 btrfs_del_orphan_item(trans, tree_root,
9161 root->root_key.objectid);
9162 }
9163 }
9164
9165 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
9166 btrfs_add_dropped_root(trans, root);
9167 } else {
9168 free_extent_buffer(root->node);
9169 free_extent_buffer(root->commit_root);
9170 btrfs_put_fs_root(root);
9171 }
9172 root_dropped = true;
9173out_end_trans:
9174 btrfs_end_transaction_throttle(trans, tree_root);
9175out_free:
9176 kfree(wc);
9177 btrfs_free_path(path);
9178out:
9179
9180
9181
9182
9183
9184
9185
9186 if (!for_reloc && root_dropped == false)
9187 btrfs_add_dead_root(root);
9188 if (err && err != -EAGAIN)
9189 btrfs_handle_fs_error(root->fs_info, err, NULL);
9190 return err;
9191}
9192
9193
9194
9195
9196
9197
9198
9199int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
9200 struct btrfs_root *root,
9201 struct extent_buffer *node,
9202 struct extent_buffer *parent)
9203{
9204 struct btrfs_path *path;
9205 struct walk_control *wc;
9206 int level;
9207 int parent_level;
9208 int ret = 0;
9209 int wret;
9210
9211 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
9212
9213 path = btrfs_alloc_path();
9214 if (!path)
9215 return -ENOMEM;
9216
9217 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9218 if (!wc) {
9219 btrfs_free_path(path);
9220 return -ENOMEM;
9221 }
9222
9223 btrfs_assert_tree_locked(parent);
9224 parent_level = btrfs_header_level(parent);
9225 extent_buffer_get(parent);
9226 path->nodes[parent_level] = parent;
9227 path->slots[parent_level] = btrfs_header_nritems(parent);
9228
9229 btrfs_assert_tree_locked(node);
9230 level = btrfs_header_level(node);
9231 path->nodes[level] = node;
9232 path->slots[level] = 0;
9233 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9234
9235 wc->refs[parent_level] = 1;
9236 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
9237 wc->level = level;
9238 wc->shared_level = -1;
9239 wc->stage = DROP_REFERENCE;
9240 wc->update_ref = 0;
9241 wc->keep_locks = 1;
9242 wc->for_reloc = 1;
9243 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
9244
9245 while (1) {
9246 wret = walk_down_tree(trans, root, path, wc);
9247 if (wret < 0) {
9248 ret = wret;
9249 break;
9250 }
9251
9252 wret = walk_up_tree(trans, root, path, wc, parent_level);
9253 if (wret < 0)
9254 ret = wret;
9255 if (wret != 0)
9256 break;
9257 }
9258
9259 kfree(wc);
9260 btrfs_free_path(path);
9261 return ret;
9262}
9263
9264static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
9265{
9266 u64 num_devices;
9267 u64 stripped;
9268
9269
9270
9271
9272
9273 stripped = get_restripe_target(root->fs_info, flags);
9274 if (stripped)
9275 return extended_to_chunk(stripped);
9276
9277 num_devices = root->fs_info->fs_devices->rw_devices;
9278
9279 stripped = BTRFS_BLOCK_GROUP_RAID0 |
9280 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
9281 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
9282
9283 if (num_devices == 1) {
9284 stripped |= BTRFS_BLOCK_GROUP_DUP;
9285 stripped = flags & ~stripped;
9286
9287
9288 if (flags & BTRFS_BLOCK_GROUP_RAID0)
9289 return stripped;
9290
9291
9292 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
9293 BTRFS_BLOCK_GROUP_RAID10))
9294 return stripped | BTRFS_BLOCK_GROUP_DUP;
9295 } else {
9296
9297 if (flags & stripped)
9298 return flags;
9299
9300 stripped |= BTRFS_BLOCK_GROUP_DUP;
9301 stripped = flags & ~stripped;
9302
9303
9304 if (flags & BTRFS_BLOCK_GROUP_DUP)
9305 return stripped | BTRFS_BLOCK_GROUP_RAID1;
9306
9307
9308 }
9309
9310 return flags;
9311}
9312
9313static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
9314{
9315 struct btrfs_space_info *sinfo = cache->space_info;
9316 u64 num_bytes;
9317 u64 min_allocable_bytes;
9318 int ret = -ENOSPC;
9319
9320
9321
9322
9323
9324
9325 if ((sinfo->flags &
9326 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
9327 !force)
9328 min_allocable_bytes = SZ_1M;
9329 else
9330 min_allocable_bytes = 0;
9331
9332 spin_lock(&sinfo->lock);
9333 spin_lock(&cache->lock);
9334
9335 if (cache->ro) {
9336 cache->ro++;
9337 ret = 0;
9338 goto out;
9339 }
9340
9341 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
9342 cache->bytes_super - btrfs_block_group_used(&cache->item);
9343
9344 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
9345 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
9346 min_allocable_bytes <= sinfo->total_bytes) {
9347 sinfo->bytes_readonly += num_bytes;
9348 cache->ro++;
9349 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
9350 ret = 0;
9351 }
9352out:
9353 spin_unlock(&cache->lock);
9354 spin_unlock(&sinfo->lock);
9355 return ret;
9356}
9357
9358int btrfs_inc_block_group_ro(struct btrfs_root *root,
9359 struct btrfs_block_group_cache *cache)
9360
9361{
9362 struct btrfs_trans_handle *trans;
9363 u64 alloc_flags;
9364 int ret;
9365
9366again:
9367 trans = btrfs_join_transaction(root);
9368 if (IS_ERR(trans))
9369 return PTR_ERR(trans);
9370
9371
9372
9373
9374
9375
9376 mutex_lock(&root->fs_info->ro_block_group_mutex);
9377 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
9378 u64 transid = trans->transid;
9379
9380 mutex_unlock(&root->fs_info->ro_block_group_mutex);
9381 btrfs_end_transaction(trans, root);
9382
9383 ret = btrfs_wait_for_commit(root, transid);
9384 if (ret)
9385 return ret;
9386 goto again;
9387 }
9388
9389
9390
9391
9392
9393 alloc_flags = update_block_group_flags(root, cache->flags);
9394 if (alloc_flags != cache->flags) {
9395 ret = do_chunk_alloc(trans, root, alloc_flags,
9396 CHUNK_ALLOC_FORCE);
9397
9398
9399
9400
9401
9402 if (ret == -ENOSPC)
9403 ret = 0;
9404 if (ret < 0)
9405 goto out;
9406 }
9407
9408 ret = inc_block_group_ro(cache, 0);
9409 if (!ret)
9410 goto out;
9411 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
9412 ret = do_chunk_alloc(trans, root, alloc_flags,
9413 CHUNK_ALLOC_FORCE);
9414 if (ret < 0)
9415 goto out;
9416 ret = inc_block_group_ro(cache, 0);
9417out:
9418 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
9419 alloc_flags = update_block_group_flags(root, cache->flags);
9420 lock_chunks(root->fs_info->chunk_root);
9421 check_system_chunk(trans, root, alloc_flags);
9422 unlock_chunks(root->fs_info->chunk_root);
9423 }
9424 mutex_unlock(&root->fs_info->ro_block_group_mutex);
9425
9426 btrfs_end_transaction(trans, root);
9427 return ret;
9428}
9429
9430int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
9431 struct btrfs_root *root, u64 type)
9432{
9433 u64 alloc_flags = get_alloc_profile(root, type);
9434 return do_chunk_alloc(trans, root, alloc_flags,
9435 CHUNK_ALLOC_FORCE);
9436}
9437
9438
9439
9440
9441
9442u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
9443{
9444 struct btrfs_block_group_cache *block_group;
9445 u64 free_bytes = 0;
9446 int factor;
9447
9448
9449 if (list_empty(&sinfo->ro_bgs))
9450 return 0;
9451
9452 spin_lock(&sinfo->lock);
9453 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
9454 spin_lock(&block_group->lock);
9455
9456 if (!block_group->ro) {
9457 spin_unlock(&block_group->lock);
9458 continue;
9459 }
9460
9461 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
9462 BTRFS_BLOCK_GROUP_RAID10 |
9463 BTRFS_BLOCK_GROUP_DUP))
9464 factor = 2;
9465 else
9466 factor = 1;
9467
9468 free_bytes += (block_group->key.offset -
9469 btrfs_block_group_used(&block_group->item)) *
9470 factor;
9471
9472 spin_unlock(&block_group->lock);
9473 }
9474 spin_unlock(&sinfo->lock);
9475
9476 return free_bytes;
9477}
9478
9479void btrfs_dec_block_group_ro(struct btrfs_root *root,
9480 struct btrfs_block_group_cache *cache)
9481{
9482 struct btrfs_space_info *sinfo = cache->space_info;
9483 u64 num_bytes;
9484
9485 BUG_ON(!cache->ro);
9486
9487 spin_lock(&sinfo->lock);
9488 spin_lock(&cache->lock);
9489 if (!--cache->ro) {
9490 num_bytes = cache->key.offset - cache->reserved -
9491 cache->pinned - cache->bytes_super -
9492 btrfs_block_group_used(&cache->item);
9493 sinfo->bytes_readonly -= num_bytes;
9494 list_del_init(&cache->ro_list);
9495 }
9496 spin_unlock(&cache->lock);
9497 spin_unlock(&sinfo->lock);
9498}
9499
9500
9501
9502
9503
9504
9505
9506int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
9507{
9508 struct btrfs_block_group_cache *block_group;
9509 struct btrfs_space_info *space_info;
9510 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
9511 struct btrfs_device *device;
9512 struct btrfs_trans_handle *trans;
9513 u64 min_free;
9514 u64 dev_min = 1;
9515 u64 dev_nr = 0;
9516 u64 target;
9517 int debug;
9518 int index;
9519 int full = 0;
9520 int ret = 0;
9521
9522 debug = btrfs_test_opt(root, ENOSPC_DEBUG);
9523
9524 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
9525
9526
9527 if (!block_group) {
9528 if (debug)
9529 btrfs_warn(root->fs_info,
9530 "can't find block group for bytenr %llu",
9531 bytenr);
9532 return -1;
9533 }
9534
9535 min_free = btrfs_block_group_used(&block_group->item);
9536
9537
9538 if (!min_free)
9539 goto out;
9540
9541 space_info = block_group->space_info;
9542 spin_lock(&space_info->lock);
9543
9544 full = space_info->full;
9545
9546
9547
9548
9549
9550
9551
9552
9553 if ((space_info->total_bytes != block_group->key.offset) &&
9554 (space_info->bytes_used + space_info->bytes_reserved +
9555 space_info->bytes_pinned + space_info->bytes_readonly +
9556 min_free < space_info->total_bytes)) {
9557 spin_unlock(&space_info->lock);
9558 goto out;
9559 }
9560 spin_unlock(&space_info->lock);
9561
9562
9563
9564
9565
9566
9567
9568
9569 ret = -1;
9570
9571
9572
9573
9574
9575
9576
9577
9578
9579 target = get_restripe_target(root->fs_info, block_group->flags);
9580 if (target) {
9581 index = __get_raid_index(extended_to_chunk(target));
9582 } else {
9583
9584
9585
9586
9587 if (full) {
9588 if (debug)
9589 btrfs_warn(root->fs_info,
9590 "no space to alloc new chunk for block group %llu",
9591 block_group->key.objectid);
9592 goto out;
9593 }
9594
9595 index = get_block_group_index(block_group);
9596 }
9597
9598 if (index == BTRFS_RAID_RAID10) {
9599 dev_min = 4;
9600
9601 min_free >>= 1;
9602 } else if (index == BTRFS_RAID_RAID1) {
9603 dev_min = 2;
9604 } else if (index == BTRFS_RAID_DUP) {
9605
9606 min_free <<= 1;
9607 } else if (index == BTRFS_RAID_RAID0) {
9608 dev_min = fs_devices->rw_devices;
9609 min_free = div64_u64(min_free, dev_min);
9610 }
9611
9612
9613 trans = btrfs_join_transaction(root);
9614 if (IS_ERR(trans)) {
9615 ret = PTR_ERR(trans);
9616 goto out;
9617 }
9618
9619 mutex_lock(&root->fs_info->chunk_mutex);
9620 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
9621 u64 dev_offset;
9622
9623
9624
9625
9626
9627 if (device->total_bytes > device->bytes_used + min_free &&
9628 !device->is_tgtdev_for_dev_replace) {
9629 ret = find_free_dev_extent(trans, device, min_free,
9630 &dev_offset, NULL);
9631 if (!ret)
9632 dev_nr++;
9633
9634 if (dev_nr >= dev_min)
9635 break;
9636
9637 ret = -1;
9638 }
9639 }
9640 if (debug && ret == -1)
9641 btrfs_warn(root->fs_info,
9642 "no space to allocate a new chunk for block group %llu",
9643 block_group->key.objectid);
9644 mutex_unlock(&root->fs_info->chunk_mutex);
9645 btrfs_end_transaction(trans, root);
9646out:
9647 btrfs_put_block_group(block_group);
9648 return ret;
9649}
9650
9651static int find_first_block_group(struct btrfs_root *root,
9652 struct btrfs_path *path, struct btrfs_key *key)
9653{
9654 int ret = 0;
9655 struct btrfs_key found_key;
9656 struct extent_buffer *leaf;
9657 int slot;
9658
9659 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9660 if (ret < 0)
9661 goto out;
9662
9663 while (1) {
9664 slot = path->slots[0];
9665 leaf = path->nodes[0];
9666 if (slot >= btrfs_header_nritems(leaf)) {
9667 ret = btrfs_next_leaf(root, path);
9668 if (ret == 0)
9669 continue;
9670 if (ret < 0)
9671 goto out;
9672 break;
9673 }
9674 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9675
9676 if (found_key.objectid >= key->objectid &&
9677 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9678 ret = 0;
9679 goto out;
9680 }
9681 path->slots[0]++;
9682 }
9683out:
9684 return ret;
9685}
9686
9687void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9688{
9689 struct btrfs_block_group_cache *block_group;
9690 u64 last = 0;
9691
9692 while (1) {
9693 struct inode *inode;
9694
9695 block_group = btrfs_lookup_first_block_group(info, last);
9696 while (block_group) {
9697 spin_lock(&block_group->lock);
9698 if (block_group->iref)
9699 break;
9700 spin_unlock(&block_group->lock);
9701 block_group = next_block_group(info->tree_root,
9702 block_group);
9703 }
9704 if (!block_group) {
9705 if (last == 0)
9706 break;
9707 last = 0;
9708 continue;
9709 }
9710
9711 inode = block_group->inode;
9712 block_group->iref = 0;
9713 block_group->inode = NULL;
9714 spin_unlock(&block_group->lock);
9715 iput(inode);
9716 last = block_group->key.objectid + block_group->key.offset;
9717 btrfs_put_block_group(block_group);
9718 }
9719}
9720
9721int btrfs_free_block_groups(struct btrfs_fs_info *info)
9722{
9723 struct btrfs_block_group_cache *block_group;
9724 struct btrfs_space_info *space_info;
9725 struct btrfs_caching_control *caching_ctl;
9726 struct rb_node *n;
9727
9728 down_write(&info->commit_root_sem);
9729 while (!list_empty(&info->caching_block_groups)) {
9730 caching_ctl = list_entry(info->caching_block_groups.next,
9731 struct btrfs_caching_control, list);
9732 list_del(&caching_ctl->list);
9733 put_caching_control(caching_ctl);
9734 }
9735 up_write(&info->commit_root_sem);
9736
9737 spin_lock(&info->unused_bgs_lock);
9738 while (!list_empty(&info->unused_bgs)) {
9739 block_group = list_first_entry(&info->unused_bgs,
9740 struct btrfs_block_group_cache,
9741 bg_list);
9742 list_del_init(&block_group->bg_list);
9743 btrfs_put_block_group(block_group);
9744 }
9745 spin_unlock(&info->unused_bgs_lock);
9746
9747 spin_lock(&info->block_group_cache_lock);
9748 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
9749 block_group = rb_entry(n, struct btrfs_block_group_cache,
9750 cache_node);
9751 rb_erase(&block_group->cache_node,
9752 &info->block_group_cache_tree);
9753 RB_CLEAR_NODE(&block_group->cache_node);
9754 spin_unlock(&info->block_group_cache_lock);
9755
9756 down_write(&block_group->space_info->groups_sem);
9757 list_del(&block_group->list);
9758 up_write(&block_group->space_info->groups_sem);
9759
9760 if (block_group->cached == BTRFS_CACHE_STARTED)
9761 wait_block_group_cache_done(block_group);
9762
9763
9764
9765
9766
9767 if (block_group->cached == BTRFS_CACHE_NO ||
9768 block_group->cached == BTRFS_CACHE_ERROR)
9769 free_excluded_extents(info->extent_root, block_group);
9770
9771 btrfs_remove_free_space_cache(block_group);
9772 btrfs_put_block_group(block_group);
9773
9774 spin_lock(&info->block_group_cache_lock);
9775 }
9776 spin_unlock(&info->block_group_cache_lock);
9777
9778
9779
9780
9781
9782
9783
9784 synchronize_rcu();
9785
9786 release_global_block_rsv(info);
9787
9788 while (!list_empty(&info->space_info)) {
9789 int i;
9790
9791 space_info = list_entry(info->space_info.next,
9792 struct btrfs_space_info,
9793 list);
9794 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
9795 if (WARN_ON(space_info->bytes_pinned > 0 ||
9796 space_info->bytes_reserved > 0 ||
9797 space_info->bytes_may_use > 0)) {
9798 dump_space_info(space_info, 0, 0);
9799 }
9800 }
9801 list_del(&space_info->list);
9802 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
9803 struct kobject *kobj;
9804 kobj = space_info->block_group_kobjs[i];
9805 space_info->block_group_kobjs[i] = NULL;
9806 if (kobj) {
9807 kobject_del(kobj);
9808 kobject_put(kobj);
9809 }
9810 }
9811 kobject_del(&space_info->kobj);
9812 kobject_put(&space_info->kobj);
9813 }
9814 return 0;
9815}
9816
9817static void __link_block_group(struct btrfs_space_info *space_info,
9818 struct btrfs_block_group_cache *cache)
9819{
9820 int index = get_block_group_index(cache);
9821 bool first = false;
9822
9823 down_write(&space_info->groups_sem);
9824 if (list_empty(&space_info->block_groups[index]))
9825 first = true;
9826 list_add_tail(&cache->list, &space_info->block_groups[index]);
9827 up_write(&space_info->groups_sem);
9828
9829 if (first) {
9830 struct raid_kobject *rkobj;
9831 int ret;
9832
9833 rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
9834 if (!rkobj)
9835 goto out_err;
9836 rkobj->raid_type = index;
9837 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
9838 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
9839 "%s", get_raid_name(index));
9840 if (ret) {
9841 kobject_put(&rkobj->kobj);
9842 goto out_err;
9843 }
9844 space_info->block_group_kobjs[index] = &rkobj->kobj;
9845 }
9846
9847 return;
9848out_err:
9849 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
9850}
9851
9852static struct btrfs_block_group_cache *
9853btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
9854{
9855 struct btrfs_block_group_cache *cache;
9856
9857 cache = kzalloc(sizeof(*cache), GFP_NOFS);
9858 if (!cache)
9859 return NULL;
9860
9861 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
9862 GFP_NOFS);
9863 if (!cache->free_space_ctl) {
9864 kfree(cache);
9865 return NULL;
9866 }
9867
9868 cache->key.objectid = start;
9869 cache->key.offset = size;
9870 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9871
9872 cache->sectorsize = root->sectorsize;
9873 cache->fs_info = root->fs_info;
9874 cache->full_stripe_len = btrfs_full_stripe_len(root,
9875 &root->fs_info->mapping_tree,
9876 start);
9877 set_free_space_tree_thresholds(cache);
9878
9879 atomic_set(&cache->count, 1);
9880 spin_lock_init(&cache->lock);
9881 init_rwsem(&cache->data_rwsem);
9882 INIT_LIST_HEAD(&cache->list);
9883 INIT_LIST_HEAD(&cache->cluster_list);
9884 INIT_LIST_HEAD(&cache->bg_list);
9885 INIT_LIST_HEAD(&cache->ro_list);
9886 INIT_LIST_HEAD(&cache->dirty_list);
9887 INIT_LIST_HEAD(&cache->io_list);
9888 btrfs_init_free_space_ctl(cache);
9889 atomic_set(&cache->trimming, 0);
9890 mutex_init(&cache->free_space_lock);
9891
9892 return cache;
9893}
9894
9895int btrfs_read_block_groups(struct btrfs_root *root)
9896{
9897 struct btrfs_path *path;
9898 int ret;
9899 struct btrfs_block_group_cache *cache;
9900 struct btrfs_fs_info *info = root->fs_info;
9901 struct btrfs_space_info *space_info;
9902 struct btrfs_key key;
9903 struct btrfs_key found_key;
9904 struct extent_buffer *leaf;
9905 int need_clear = 0;
9906 u64 cache_gen;
9907
9908 root = info->extent_root;
9909 key.objectid = 0;
9910 key.offset = 0;
9911 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9912 path = btrfs_alloc_path();
9913 if (!path)
9914 return -ENOMEM;
9915 path->reada = READA_FORWARD;
9916
9917 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
9918 if (btrfs_test_opt(root, SPACE_CACHE) &&
9919 btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
9920 need_clear = 1;
9921 if (btrfs_test_opt(root, CLEAR_CACHE))
9922 need_clear = 1;
9923
9924 while (1) {
9925 ret = find_first_block_group(root, path, &key);
9926 if (ret > 0)
9927 break;
9928 if (ret != 0)
9929 goto error;
9930
9931 leaf = path->nodes[0];
9932 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9933
9934 cache = btrfs_create_block_group_cache(root, found_key.objectid,
9935 found_key.offset);
9936 if (!cache) {
9937 ret = -ENOMEM;
9938 goto error;
9939 }
9940
9941 if (need_clear) {
9942
9943
9944
9945
9946
9947
9948
9949
9950
9951
9952 if (btrfs_test_opt(root, SPACE_CACHE))
9953 cache->disk_cache_state = BTRFS_DC_CLEAR;
9954 }
9955
9956 read_extent_buffer(leaf, &cache->item,
9957 btrfs_item_ptr_offset(leaf, path->slots[0]),
9958 sizeof(cache->item));
9959 cache->flags = btrfs_block_group_flags(&cache->item);
9960
9961 key.objectid = found_key.objectid + found_key.offset;
9962 btrfs_release_path(path);
9963
9964
9965
9966
9967
9968
9969 ret = exclude_super_stripes(root, cache);
9970 if (ret) {
9971
9972
9973
9974
9975 free_excluded_extents(root, cache);
9976 btrfs_put_block_group(cache);
9977 goto error;
9978 }
9979
9980
9981
9982
9983
9984
9985
9986
9987 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
9988 cache->last_byte_to_unpin = (u64)-1;
9989 cache->cached = BTRFS_CACHE_FINISHED;
9990 free_excluded_extents(root, cache);
9991 } else if (btrfs_block_group_used(&cache->item) == 0) {
9992 cache->last_byte_to_unpin = (u64)-1;
9993 cache->cached = BTRFS_CACHE_FINISHED;
9994 add_new_free_space(cache, root->fs_info,
9995 found_key.objectid,
9996 found_key.objectid +
9997 found_key.offset);
9998 free_excluded_extents(root, cache);
9999 }
10000
10001 ret = btrfs_add_block_group_cache(root->fs_info, cache);
10002 if (ret) {
10003 btrfs_remove_free_space_cache(cache);
10004 btrfs_put_block_group(cache);
10005 goto error;
10006 }
10007
10008 ret = update_space_info(info, cache->flags, found_key.offset,
10009 btrfs_block_group_used(&cache->item),
10010 &space_info);
10011 if (ret) {
10012 btrfs_remove_free_space_cache(cache);
10013 spin_lock(&info->block_group_cache_lock);
10014 rb_erase(&cache->cache_node,
10015 &info->block_group_cache_tree);
10016 RB_CLEAR_NODE(&cache->cache_node);
10017 spin_unlock(&info->block_group_cache_lock);
10018 btrfs_put_block_group(cache);
10019 goto error;
10020 }
10021
10022 cache->space_info = space_info;
10023 spin_lock(&cache->space_info->lock);
10024 cache->space_info->bytes_readonly += cache->bytes_super;
10025 spin_unlock(&cache->space_info->lock);
10026
10027 __link_block_group(space_info, cache);
10028
10029 set_avail_alloc_bits(root->fs_info, cache->flags);
10030 if (btrfs_chunk_readonly(root, cache->key.objectid)) {
10031 inc_block_group_ro(cache, 1);
10032 } else if (btrfs_block_group_used(&cache->item) == 0) {
10033 spin_lock(&info->unused_bgs_lock);
10034
10035 if (list_empty(&cache->bg_list)) {
10036 btrfs_get_block_group(cache);
10037 list_add_tail(&cache->bg_list,
10038 &info->unused_bgs);
10039 }
10040 spin_unlock(&info->unused_bgs_lock);
10041 }
10042 }
10043
10044 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
10045 if (!(get_alloc_profile(root, space_info->flags) &
10046 (BTRFS_BLOCK_GROUP_RAID10 |
10047 BTRFS_BLOCK_GROUP_RAID1 |
10048 BTRFS_BLOCK_GROUP_RAID5 |
10049 BTRFS_BLOCK_GROUP_RAID6 |
10050 BTRFS_BLOCK_GROUP_DUP)))
10051 continue;
10052
10053
10054
10055
10056 list_for_each_entry(cache,
10057 &space_info->block_groups[BTRFS_RAID_RAID0],
10058 list)
10059 inc_block_group_ro(cache, 1);
10060 list_for_each_entry(cache,
10061 &space_info->block_groups[BTRFS_RAID_SINGLE],
10062 list)
10063 inc_block_group_ro(cache, 1);
10064 }
10065
10066 init_global_block_rsv(info);
10067 ret = 0;
10068error:
10069 btrfs_free_path(path);
10070 return ret;
10071}
10072
10073void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
10074 struct btrfs_root *root)
10075{
10076 struct btrfs_block_group_cache *block_group, *tmp;
10077 struct btrfs_root *extent_root = root->fs_info->extent_root;
10078 struct btrfs_block_group_item item;
10079 struct btrfs_key key;
10080 int ret = 0;
10081 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
10082
10083 trans->can_flush_pending_bgs = false;
10084 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
10085 if (ret)
10086 goto next;
10087
10088 spin_lock(&block_group->lock);
10089 memcpy(&item, &block_group->item, sizeof(item));
10090 memcpy(&key, &block_group->key, sizeof(key));
10091 spin_unlock(&block_group->lock);
10092
10093 ret = btrfs_insert_item(trans, extent_root, &key, &item,
10094 sizeof(item));
10095 if (ret)
10096 btrfs_abort_transaction(trans, extent_root, ret);
10097 ret = btrfs_finish_chunk_alloc(trans, extent_root,
10098 key.objectid, key.offset);
10099 if (ret)
10100 btrfs_abort_transaction(trans, extent_root, ret);
10101 add_block_group_free_space(trans, root->fs_info, block_group);
10102
10103next:
10104 list_del_init(&block_group->bg_list);
10105 }
10106 trans->can_flush_pending_bgs = can_flush_pending_bgs;
10107}
10108
10109int btrfs_make_block_group(struct btrfs_trans_handle *trans,
10110 struct btrfs_root *root, u64 bytes_used,
10111 u64 type, u64 chunk_objectid, u64 chunk_offset,
10112 u64 size)
10113{
10114 int ret;
10115 struct btrfs_root *extent_root;
10116 struct btrfs_block_group_cache *cache;
10117
10118 extent_root = root->fs_info->extent_root;
10119
10120 btrfs_set_log_full_commit(root->fs_info, trans);
10121
10122 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
10123 if (!cache)
10124 return -ENOMEM;
10125
10126 btrfs_set_block_group_used(&cache->item, bytes_used);
10127 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
10128 btrfs_set_block_group_flags(&cache->item, type);
10129
10130 cache->flags = type;
10131 cache->last_byte_to_unpin = (u64)-1;
10132 cache->cached = BTRFS_CACHE_FINISHED;
10133 cache->needs_free_space = 1;
10134 ret = exclude_super_stripes(root, cache);
10135 if (ret) {
10136
10137
10138
10139
10140 free_excluded_extents(root, cache);
10141 btrfs_put_block_group(cache);
10142 return ret;
10143 }
10144
10145 add_new_free_space(cache, root->fs_info, chunk_offset,
10146 chunk_offset + size);
10147
10148 free_excluded_extents(root, cache);
10149
10150#ifdef CONFIG_BTRFS_DEBUG
10151 if (btrfs_should_fragment_free_space(root, cache)) {
10152 u64 new_bytes_used = size - bytes_used;
10153
10154 bytes_used += new_bytes_used >> 1;
10155 fragment_free_space(root, cache);
10156 }
10157#endif
10158
10159
10160
10161
10162
10163 ret = update_space_info(root->fs_info, cache->flags, 0, 0,
10164 &cache->space_info);
10165 if (ret) {
10166 btrfs_remove_free_space_cache(cache);
10167 btrfs_put_block_group(cache);
10168 return ret;
10169 }
10170
10171 ret = btrfs_add_block_group_cache(root->fs_info, cache);
10172 if (ret) {
10173 btrfs_remove_free_space_cache(cache);
10174 btrfs_put_block_group(cache);
10175 return ret;
10176 }
10177
10178
10179
10180
10181
10182 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
10183 &cache->space_info);
10184 if (ret) {
10185 btrfs_remove_free_space_cache(cache);
10186 spin_lock(&root->fs_info->block_group_cache_lock);
10187 rb_erase(&cache->cache_node,
10188 &root->fs_info->block_group_cache_tree);
10189 RB_CLEAR_NODE(&cache->cache_node);
10190 spin_unlock(&root->fs_info->block_group_cache_lock);
10191 btrfs_put_block_group(cache);
10192 return ret;
10193 }
10194 update_global_block_rsv(root->fs_info);
10195
10196 spin_lock(&cache->space_info->lock);
10197 cache->space_info->bytes_readonly += cache->bytes_super;
10198 spin_unlock(&cache->space_info->lock);
10199
10200 __link_block_group(cache->space_info, cache);
10201
10202 list_add_tail(&cache->bg_list, &trans->new_bgs);
10203
10204 set_avail_alloc_bits(extent_root->fs_info, type);
10205
10206 return 0;
10207}
10208
10209static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10210{
10211 u64 extra_flags = chunk_to_extended(flags) &
10212 BTRFS_EXTENDED_PROFILE_MASK;
10213
10214 write_seqlock(&fs_info->profiles_lock);
10215 if (flags & BTRFS_BLOCK_GROUP_DATA)
10216 fs_info->avail_data_alloc_bits &= ~extra_flags;
10217 if (flags & BTRFS_BLOCK_GROUP_METADATA)
10218 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
10219 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
10220 fs_info->avail_system_alloc_bits &= ~extra_flags;
10221 write_sequnlock(&fs_info->profiles_lock);
10222}
10223
10224int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10225 struct btrfs_root *root, u64 group_start,
10226 struct extent_map *em)
10227{
10228 struct btrfs_path *path;
10229 struct btrfs_block_group_cache *block_group;
10230 struct btrfs_free_cluster *cluster;
10231 struct btrfs_root *tree_root = root->fs_info->tree_root;
10232 struct btrfs_key key;
10233 struct inode *inode;
10234 struct kobject *kobj = NULL;
10235 int ret;
10236 int index;
10237 int factor;
10238 struct btrfs_caching_control *caching_ctl = NULL;
10239 bool remove_em;
10240
10241 root = root->fs_info->extent_root;
10242
10243 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
10244 BUG_ON(!block_group);
10245 BUG_ON(!block_group->ro);
10246
10247
10248
10249
10250
10251 free_excluded_extents(root, block_group);
10252
10253 memcpy(&key, &block_group->key, sizeof(key));
10254 index = get_block_group_index(block_group);
10255 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
10256 BTRFS_BLOCK_GROUP_RAID1 |
10257 BTRFS_BLOCK_GROUP_RAID10))
10258 factor = 2;
10259 else
10260 factor = 1;
10261
10262
10263 cluster = &root->fs_info->data_alloc_cluster;
10264 spin_lock(&cluster->refill_lock);
10265 btrfs_return_cluster_to_free_space(block_group, cluster);
10266 spin_unlock(&cluster->refill_lock);
10267
10268
10269
10270
10271
10272 cluster = &root->fs_info->meta_alloc_cluster;
10273 spin_lock(&cluster->refill_lock);
10274 btrfs_return_cluster_to_free_space(block_group, cluster);
10275 spin_unlock(&cluster->refill_lock);
10276
10277 path = btrfs_alloc_path();
10278 if (!path) {
10279 ret = -ENOMEM;
10280 goto out;
10281 }
10282
10283
10284
10285
10286
10287 inode = lookup_free_space_inode(tree_root, block_group, path);
10288
10289 mutex_lock(&trans->transaction->cache_write_mutex);
10290
10291
10292
10293
10294 spin_lock(&trans->transaction->dirty_bgs_lock);
10295 if (!list_empty(&block_group->io_list)) {
10296 list_del_init(&block_group->io_list);
10297
10298 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
10299
10300 spin_unlock(&trans->transaction->dirty_bgs_lock);
10301 btrfs_wait_cache_io(root, trans, block_group,
10302 &block_group->io_ctl, path,
10303 block_group->key.objectid);
10304 btrfs_put_block_group(block_group);
10305 spin_lock(&trans->transaction->dirty_bgs_lock);
10306 }
10307
10308 if (!list_empty(&block_group->dirty_list)) {
10309 list_del_init(&block_group->dirty_list);
10310 btrfs_put_block_group(block_group);
10311 }
10312 spin_unlock(&trans->transaction->dirty_bgs_lock);
10313 mutex_unlock(&trans->transaction->cache_write_mutex);
10314
10315 if (!IS_ERR(inode)) {
10316 ret = btrfs_orphan_add(trans, inode);
10317 if (ret) {
10318 btrfs_add_delayed_iput(inode);
10319 goto out;
10320 }
10321 clear_nlink(inode);
10322
10323 spin_lock(&block_group->lock);
10324 if (block_group->iref) {
10325 block_group->iref = 0;
10326 block_group->inode = NULL;
10327 spin_unlock(&block_group->lock);
10328 iput(inode);
10329 } else {
10330 spin_unlock(&block_group->lock);
10331 }
10332
10333 btrfs_add_delayed_iput(inode);
10334 }
10335
10336 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
10337 key.offset = block_group->key.objectid;
10338 key.type = 0;
10339
10340 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
10341 if (ret < 0)
10342 goto out;
10343 if (ret > 0)
10344 btrfs_release_path(path);
10345 if (ret == 0) {
10346 ret = btrfs_del_item(trans, tree_root, path);
10347 if (ret)
10348 goto out;
10349 btrfs_release_path(path);
10350 }
10351
10352 spin_lock(&root->fs_info->block_group_cache_lock);
10353 rb_erase(&block_group->cache_node,
10354 &root->fs_info->block_group_cache_tree);
10355 RB_CLEAR_NODE(&block_group->cache_node);
10356
10357 if (root->fs_info->first_logical_byte == block_group->key.objectid)
10358 root->fs_info->first_logical_byte = (u64)-1;
10359 spin_unlock(&root->fs_info->block_group_cache_lock);
10360
10361 down_write(&block_group->space_info->groups_sem);
10362
10363
10364
10365
10366 list_del_init(&block_group->list);
10367 if (list_empty(&block_group->space_info->block_groups[index])) {
10368 kobj = block_group->space_info->block_group_kobjs[index];
10369 block_group->space_info->block_group_kobjs[index] = NULL;
10370 clear_avail_alloc_bits(root->fs_info, block_group->flags);
10371 }
10372 up_write(&block_group->space_info->groups_sem);
10373 if (kobj) {
10374 kobject_del(kobj);
10375 kobject_put(kobj);
10376 }
10377
10378 if (block_group->has_caching_ctl)
10379 caching_ctl = get_caching_control(block_group);
10380 if (block_group->cached == BTRFS_CACHE_STARTED)
10381 wait_block_group_cache_done(block_group);
10382 if (block_group->has_caching_ctl) {
10383 down_write(&root->fs_info->commit_root_sem);
10384 if (!caching_ctl) {
10385 struct btrfs_caching_control *ctl;
10386
10387 list_for_each_entry(ctl,
10388 &root->fs_info->caching_block_groups, list)
10389 if (ctl->block_group == block_group) {
10390 caching_ctl = ctl;
10391 atomic_inc(&caching_ctl->count);
10392 break;
10393 }
10394 }
10395 if (caching_ctl)
10396 list_del_init(&caching_ctl->list);
10397 up_write(&root->fs_info->commit_root_sem);
10398 if (caching_ctl) {
10399
10400 put_caching_control(caching_ctl);
10401 put_caching_control(caching_ctl);
10402 }
10403 }
10404
10405 spin_lock(&trans->transaction->dirty_bgs_lock);
10406 if (!list_empty(&block_group->dirty_list)) {
10407 WARN_ON(1);
10408 }
10409 if (!list_empty(&block_group->io_list)) {
10410 WARN_ON(1);
10411 }
10412 spin_unlock(&trans->transaction->dirty_bgs_lock);
10413 btrfs_remove_free_space_cache(block_group);
10414
10415 spin_lock(&block_group->space_info->lock);
10416 list_del_init(&block_group->ro_list);
10417
10418 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
10419 WARN_ON(block_group->space_info->total_bytes
10420 < block_group->key.offset);
10421 WARN_ON(block_group->space_info->bytes_readonly
10422 < block_group->key.offset);
10423 WARN_ON(block_group->space_info->disk_total
10424 < block_group->key.offset * factor);
10425 }
10426 block_group->space_info->total_bytes -= block_group->key.offset;
10427 block_group->space_info->bytes_readonly -= block_group->key.offset;
10428 block_group->space_info->disk_total -= block_group->key.offset * factor;
10429
10430 spin_unlock(&block_group->space_info->lock);
10431
10432 memcpy(&key, &block_group->key, sizeof(key));
10433
10434 lock_chunks(root);
10435 if (!list_empty(&em->list)) {
10436
10437 free_extent_map(em);
10438 }
10439 spin_lock(&block_group->lock);
10440 block_group->removed = 1;
10441
10442
10443
10444
10445
10446
10447
10448
10449
10450
10451
10452
10453
10454
10455
10456
10457
10458
10459
10460
10461
10462
10463
10464 remove_em = (atomic_read(&block_group->trimming) == 0);
10465
10466
10467
10468
10469
10470 if (!remove_em) {
10471
10472
10473
10474
10475
10476
10477
10478
10479
10480
10481
10482 list_move_tail(&em->list, &root->fs_info->pinned_chunks);
10483 }
10484 spin_unlock(&block_group->lock);
10485
10486 if (remove_em) {
10487 struct extent_map_tree *em_tree;
10488
10489 em_tree = &root->fs_info->mapping_tree.map_tree;
10490 write_lock(&em_tree->lock);
10491
10492
10493
10494
10495
10496 remove_extent_mapping(em_tree, em);
10497 write_unlock(&em_tree->lock);
10498
10499 free_extent_map(em);
10500 }
10501
10502 unlock_chunks(root);
10503
10504 ret = remove_block_group_free_space(trans, root->fs_info, block_group);
10505 if (ret)
10506 goto out;
10507
10508 btrfs_put_block_group(block_group);
10509 btrfs_put_block_group(block_group);
10510
10511 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10512 if (ret > 0)
10513 ret = -EIO;
10514 if (ret < 0)
10515 goto out;
10516
10517 ret = btrfs_del_item(trans, root, path);
10518out:
10519 btrfs_free_path(path);
10520 return ret;
10521}
10522
10523struct btrfs_trans_handle *
10524btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
10525 const u64 chunk_offset)
10526{
10527 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
10528 struct extent_map *em;
10529 struct map_lookup *map;
10530 unsigned int num_items;
10531
10532 read_lock(&em_tree->lock);
10533 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
10534 read_unlock(&em_tree->lock);
10535 ASSERT(em && em->start == chunk_offset);
10536
10537
10538
10539
10540
10541
10542
10543
10544
10545
10546
10547
10548
10549
10550
10551
10552
10553
10554
10555
10556 map = em->map_lookup;
10557 num_items = 3 + map->num_stripes;
10558 free_extent_map(em);
10559
10560 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
10561 num_items, 1);
10562}
10563
10564
10565
10566
10567
10568void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10569{
10570 struct btrfs_block_group_cache *block_group;
10571 struct btrfs_space_info *space_info;
10572 struct btrfs_root *root = fs_info->extent_root;
10573 struct btrfs_trans_handle *trans;
10574 int ret = 0;
10575
10576 if (!fs_info->open)
10577 return;
10578
10579 spin_lock(&fs_info->unused_bgs_lock);
10580 while (!list_empty(&fs_info->unused_bgs)) {
10581 u64 start, end;
10582 int trimming;
10583
10584 block_group = list_first_entry(&fs_info->unused_bgs,
10585 struct btrfs_block_group_cache,
10586 bg_list);
10587 list_del_init(&block_group->bg_list);
10588
10589 space_info = block_group->space_info;
10590
10591 if (ret || btrfs_mixed_space_info(space_info)) {
10592 btrfs_put_block_group(block_group);
10593 continue;
10594 }
10595 spin_unlock(&fs_info->unused_bgs_lock);
10596
10597 mutex_lock(&fs_info->delete_unused_bgs_mutex);
10598
10599
10600 down_write(&space_info->groups_sem);
10601 spin_lock(&block_group->lock);
10602 if (block_group->reserved ||
10603 btrfs_block_group_used(&block_group->item) ||
10604 block_group->ro ||
10605 list_is_singular(&block_group->list)) {
10606
10607
10608
10609
10610
10611
10612 spin_unlock(&block_group->lock);
10613 up_write(&space_info->groups_sem);
10614 goto next;
10615 }
10616 spin_unlock(&block_group->lock);
10617
10618
10619 ret = inc_block_group_ro(block_group, 0);
10620 up_write(&space_info->groups_sem);
10621 if (ret < 0) {
10622 ret = 0;
10623 goto next;
10624 }
10625
10626
10627
10628
10629
10630 trans = btrfs_start_trans_remove_block_group(fs_info,
10631 block_group->key.objectid);
10632 if (IS_ERR(trans)) {
10633 btrfs_dec_block_group_ro(root, block_group);
10634 ret = PTR_ERR(trans);
10635 goto next;
10636 }
10637
10638
10639
10640
10641
10642 start = block_group->key.objectid;
10643 end = start + block_group->key.offset - 1;
10644
10645
10646
10647
10648
10649
10650
10651
10652
10653
10654
10655 mutex_lock(&fs_info->unused_bg_unpin_mutex);
10656 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
10657 EXTENT_DIRTY);
10658 if (ret) {
10659 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10660 btrfs_dec_block_group_ro(root, block_group);
10661 goto end_trans;
10662 }
10663 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
10664 EXTENT_DIRTY);
10665 if (ret) {
10666 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10667 btrfs_dec_block_group_ro(root, block_group);
10668 goto end_trans;
10669 }
10670 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10671
10672
10673 spin_lock(&space_info->lock);
10674 spin_lock(&block_group->lock);
10675
10676 space_info->bytes_pinned -= block_group->pinned;
10677 space_info->bytes_readonly += block_group->pinned;
10678 percpu_counter_add(&space_info->total_bytes_pinned,
10679 -block_group->pinned);
10680 block_group->pinned = 0;
10681
10682 spin_unlock(&block_group->lock);
10683 spin_unlock(&space_info->lock);
10684
10685
10686 trimming = btrfs_test_opt(root, DISCARD);
10687
10688
10689 if (trimming)
10690 btrfs_get_block_group_trimming(block_group);
10691
10692
10693
10694
10695
10696 ret = btrfs_remove_chunk(trans, root,
10697 block_group->key.objectid);
10698
10699 if (ret) {
10700 if (trimming)
10701 btrfs_put_block_group_trimming(block_group);
10702 goto end_trans;
10703 }
10704
10705
10706
10707
10708
10709
10710 if (trimming) {
10711 spin_lock(&fs_info->unused_bgs_lock);
10712
10713
10714
10715
10716
10717 list_move(&block_group->bg_list,
10718 &trans->transaction->deleted_bgs);
10719 spin_unlock(&fs_info->unused_bgs_lock);
10720 btrfs_get_block_group(block_group);
10721 }
10722end_trans:
10723 btrfs_end_transaction(trans, root);
10724next:
10725 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
10726 btrfs_put_block_group(block_group);
10727 spin_lock(&fs_info->unused_bgs_lock);
10728 }
10729 spin_unlock(&fs_info->unused_bgs_lock);
10730}
10731
10732int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
10733{
10734 struct btrfs_space_info *space_info;
10735 struct btrfs_super_block *disk_super;
10736 u64 features;
10737 u64 flags;
10738 int mixed = 0;
10739 int ret;
10740
10741 disk_super = fs_info->super_copy;
10742 if (!btrfs_super_root(disk_super))
10743 return -EINVAL;
10744
10745 features = btrfs_super_incompat_flags(disk_super);
10746 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
10747 mixed = 1;
10748
10749 flags = BTRFS_BLOCK_GROUP_SYSTEM;
10750 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10751 if (ret)
10752 goto out;
10753
10754 if (mixed) {
10755 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
10756 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10757 } else {
10758 flags = BTRFS_BLOCK_GROUP_METADATA;
10759 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10760 if (ret)
10761 goto out;
10762
10763 flags = BTRFS_BLOCK_GROUP_DATA;
10764 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10765 }
10766out:
10767 return ret;
10768}
10769
10770int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
10771{
10772 return unpin_extent_range(root, start, end, false);
10773}
10774
10775
10776
10777
10778
10779
10780
10781
10782
10783
10784
10785
10786
10787
10788
10789
10790
10791
10792
10793static int btrfs_trim_free_extents(struct btrfs_device *device,
10794 u64 minlen, u64 *trimmed)
10795{
10796 u64 start = 0, len = 0;
10797 int ret;
10798
10799 *trimmed = 0;
10800
10801
10802 if (!device->writeable)
10803 return 0;
10804
10805
10806 if (device->total_bytes <= device->bytes_used)
10807 return 0;
10808
10809 ret = 0;
10810
10811 while (1) {
10812 struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
10813 struct btrfs_transaction *trans;
10814 u64 bytes;
10815
10816 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
10817 if (ret)
10818 return ret;
10819
10820 down_read(&fs_info->commit_root_sem);
10821
10822 spin_lock(&fs_info->trans_lock);
10823 trans = fs_info->running_transaction;
10824 if (trans)
10825 atomic_inc(&trans->use_count);
10826 spin_unlock(&fs_info->trans_lock);
10827
10828 ret = find_free_dev_extent_start(trans, device, minlen, start,
10829 &start, &len);
10830 if (trans)
10831 btrfs_put_transaction(trans);
10832
10833 if (ret) {
10834 up_read(&fs_info->commit_root_sem);
10835 mutex_unlock(&fs_info->chunk_mutex);
10836 if (ret == -ENOSPC)
10837 ret = 0;
10838 break;
10839 }
10840
10841 ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
10842 up_read(&fs_info->commit_root_sem);
10843 mutex_unlock(&fs_info->chunk_mutex);
10844
10845 if (ret)
10846 break;
10847
10848 start += len;
10849 *trimmed += bytes;
10850
10851 if (fatal_signal_pending(current)) {
10852 ret = -ERESTARTSYS;
10853 break;
10854 }
10855
10856 cond_resched();
10857 }
10858
10859 return ret;
10860}
10861
10862int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
10863{
10864 struct btrfs_fs_info *fs_info = root->fs_info;
10865 struct btrfs_block_group_cache *cache = NULL;
10866 struct btrfs_device *device;
10867 struct list_head *devices;
10868 u64 group_trimmed;
10869 u64 start;
10870 u64 end;
10871 u64 trimmed = 0;
10872 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10873 int ret = 0;
10874
10875
10876
10877
10878 if (range->len == total_bytes)
10879 cache = btrfs_lookup_first_block_group(fs_info, range->start);
10880 else
10881 cache = btrfs_lookup_block_group(fs_info, range->start);
10882
10883 while (cache) {
10884 if (cache->key.objectid >= (range->start + range->len)) {
10885 btrfs_put_block_group(cache);
10886 break;
10887 }
10888
10889 start = max(range->start, cache->key.objectid);
10890 end = min(range->start + range->len,
10891 cache->key.objectid + cache->key.offset);
10892
10893 if (end - start >= range->minlen) {
10894 if (!block_group_cache_done(cache)) {
10895 ret = cache_block_group(cache, 0);
10896 if (ret) {
10897 btrfs_put_block_group(cache);
10898 break;
10899 }
10900 ret = wait_block_group_cache_done(cache);
10901 if (ret) {
10902 btrfs_put_block_group(cache);
10903 break;
10904 }
10905 }
10906 ret = btrfs_trim_block_group(cache,
10907 &group_trimmed,
10908 start,
10909 end,
10910 range->minlen);
10911
10912 trimmed += group_trimmed;
10913 if (ret) {
10914 btrfs_put_block_group(cache);
10915 break;
10916 }
10917 }
10918
10919 cache = next_block_group(fs_info->tree_root, cache);
10920 }
10921
10922 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
10923 devices = &root->fs_info->fs_devices->alloc_list;
10924 list_for_each_entry(device, devices, dev_alloc_list) {
10925 ret = btrfs_trim_free_extents(device, range->minlen,
10926 &group_trimmed);
10927 if (ret)
10928 break;
10929
10930 trimmed += group_trimmed;
10931 }
10932 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
10933
10934 range->len = trimmed;
10935 return ret;
10936}
10937
10938
10939
10940
10941
10942
10943
10944
10945
10946void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
10947{
10948 percpu_counter_dec(&root->subv_writers->counter);
10949
10950
10951
10952 smp_mb();
10953 if (waitqueue_active(&root->subv_writers->wait))
10954 wake_up(&root->subv_writers->wait);
10955}
10956
10957int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
10958{
10959 if (atomic_read(&root->will_be_snapshoted))
10960 return 0;
10961
10962 percpu_counter_inc(&root->subv_writers->counter);
10963
10964
10965
10966 smp_mb();
10967 if (atomic_read(&root->will_be_snapshoted)) {
10968 btrfs_end_write_no_snapshoting(root);
10969 return 0;
10970 }
10971 return 1;
10972}
10973
10974static int wait_snapshoting_atomic_t(atomic_t *a)
10975{
10976 schedule();
10977 return 0;
10978}
10979
10980void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
10981{
10982 while (true) {
10983 int ret;
10984
10985 ret = btrfs_start_write_no_snapshoting(root);
10986 if (ret)
10987 break;
10988 wait_on_atomic_t(&root->will_be_snapshoted,
10989 wait_snapshoting_atomic_t,
10990 TASK_UNINTERRUPTIBLE);
10991 }
10992}
10993