1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/sched.h>
19#include <linux/pagemap.h>
20#include <linux/writeback.h>
21#include <linux/blkdev.h>
22#include <linux/sort.h>
23#include <linux/rcupdate.h>
24#include <linux/kthread.h>
25#include <linux/slab.h>
26#include <linux/ratelimit.h>
27#include <linux/percpu_counter.h>
28#include "hash.h"
29#include "tree-log.h"
30#include "disk-io.h"
31#include "print-tree.h"
32#include "volumes.h"
33#include "raid56.h"
34#include "locking.h"
35#include "free-space-cache.h"
36#include "math.h"
37#include "sysfs.h"
38#include "qgroup.h"
39
40#undef SCRAMBLE_DELAYED_REFS
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56enum {
57 CHUNK_ALLOC_NO_FORCE = 0,
58 CHUNK_ALLOC_LIMITED = 1,
59 CHUNK_ALLOC_FORCE = 2,
60};
61
62
63
64
65
66
67
68
69
70
71enum {
72 RESERVE_FREE = 0,
73 RESERVE_ALLOC = 1,
74 RESERVE_ALLOC_NO_ACCOUNT = 2,
75};
76
77static int update_block_group(struct btrfs_root *root,
78 u64 bytenr, u64 num_bytes, int alloc);
79static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
80 struct btrfs_root *root,
81 u64 bytenr, u64 num_bytes, u64 parent,
82 u64 root_objectid, u64 owner_objectid,
83 u64 owner_offset, int refs_to_drop,
84 struct btrfs_delayed_extent_op *extra_op,
85 int no_quota);
86static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
87 struct extent_buffer *leaf,
88 struct btrfs_extent_item *ei);
89static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
90 struct btrfs_root *root,
91 u64 parent, u64 root_objectid,
92 u64 flags, u64 owner, u64 offset,
93 struct btrfs_key *ins, int ref_mod);
94static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
95 struct btrfs_root *root,
96 u64 parent, u64 root_objectid,
97 u64 flags, struct btrfs_disk_key *key,
98 int level, struct btrfs_key *ins,
99 int no_quota);
100static int do_chunk_alloc(struct btrfs_trans_handle *trans,
101 struct btrfs_root *extent_root, u64 flags,
102 int force);
103static int find_next_key(struct btrfs_path *path, int level,
104 struct btrfs_key *key);
105static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
106 int dump_block_groups);
107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
108 u64 num_bytes, int reserve,
109 int delalloc);
110static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
111 u64 num_bytes);
112int btrfs_pin_extent(struct btrfs_root *root,
113 u64 bytenr, u64 num_bytes, int reserved);
114
115static noinline int
116block_group_cache_done(struct btrfs_block_group_cache *cache)
117{
118 smp_mb();
119 return cache->cached == BTRFS_CACHE_FINISHED ||
120 cache->cached == BTRFS_CACHE_ERROR;
121}
122
123static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
124{
125 return (cache->flags & bits) == bits;
126}
127
128static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
129{
130 atomic_inc(&cache->count);
131}
132
133void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
134{
135 if (atomic_dec_and_test(&cache->count)) {
136 WARN_ON(cache->pinned > 0);
137 WARN_ON(cache->reserved > 0);
138 kfree(cache->free_space_ctl);
139 kfree(cache);
140 }
141}
142
143
144
145
146
147static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
148 struct btrfs_block_group_cache *block_group)
149{
150 struct rb_node **p;
151 struct rb_node *parent = NULL;
152 struct btrfs_block_group_cache *cache;
153
154 spin_lock(&info->block_group_cache_lock);
155 p = &info->block_group_cache_tree.rb_node;
156
157 while (*p) {
158 parent = *p;
159 cache = rb_entry(parent, struct btrfs_block_group_cache,
160 cache_node);
161 if (block_group->key.objectid < cache->key.objectid) {
162 p = &(*p)->rb_left;
163 } else if (block_group->key.objectid > cache->key.objectid) {
164 p = &(*p)->rb_right;
165 } else {
166 spin_unlock(&info->block_group_cache_lock);
167 return -EEXIST;
168 }
169 }
170
171 rb_link_node(&block_group->cache_node, parent, p);
172 rb_insert_color(&block_group->cache_node,
173 &info->block_group_cache_tree);
174
175 if (info->first_logical_byte > block_group->key.objectid)
176 info->first_logical_byte = block_group->key.objectid;
177
178 spin_unlock(&info->block_group_cache_lock);
179
180 return 0;
181}
182
183
184
185
186
187static struct btrfs_block_group_cache *
188block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
189 int contains)
190{
191 struct btrfs_block_group_cache *cache, *ret = NULL;
192 struct rb_node *n;
193 u64 end, start;
194
195 spin_lock(&info->block_group_cache_lock);
196 n = info->block_group_cache_tree.rb_node;
197
198 while (n) {
199 cache = rb_entry(n, struct btrfs_block_group_cache,
200 cache_node);
201 end = cache->key.objectid + cache->key.offset - 1;
202 start = cache->key.objectid;
203
204 if (bytenr < start) {
205 if (!contains && (!ret || start < ret->key.objectid))
206 ret = cache;
207 n = n->rb_left;
208 } else if (bytenr > start) {
209 if (contains && bytenr <= end) {
210 ret = cache;
211 break;
212 }
213 n = n->rb_right;
214 } else {
215 ret = cache;
216 break;
217 }
218 }
219 if (ret) {
220 btrfs_get_block_group(ret);
221 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
222 info->first_logical_byte = ret->key.objectid;
223 }
224 spin_unlock(&info->block_group_cache_lock);
225
226 return ret;
227}
228
229static int add_excluded_extent(struct btrfs_root *root,
230 u64 start, u64 num_bytes)
231{
232 u64 end = start + num_bytes - 1;
233 set_extent_bits(&root->fs_info->freed_extents[0],
234 start, end, EXTENT_UPTODATE, GFP_NOFS);
235 set_extent_bits(&root->fs_info->freed_extents[1],
236 start, end, EXTENT_UPTODATE, GFP_NOFS);
237 return 0;
238}
239
240static void free_excluded_extents(struct btrfs_root *root,
241 struct btrfs_block_group_cache *cache)
242{
243 u64 start, end;
244
245 start = cache->key.objectid;
246 end = start + cache->key.offset - 1;
247
248 clear_extent_bits(&root->fs_info->freed_extents[0],
249 start, end, EXTENT_UPTODATE, GFP_NOFS);
250 clear_extent_bits(&root->fs_info->freed_extents[1],
251 start, end, EXTENT_UPTODATE, GFP_NOFS);
252}
253
254static int exclude_super_stripes(struct btrfs_root *root,
255 struct btrfs_block_group_cache *cache)
256{
257 u64 bytenr;
258 u64 *logical;
259 int stripe_len;
260 int i, nr, ret;
261
262 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
263 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
264 cache->bytes_super += stripe_len;
265 ret = add_excluded_extent(root, cache->key.objectid,
266 stripe_len);
267 if (ret)
268 return ret;
269 }
270
271 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
272 bytenr = btrfs_sb_offset(i);
273 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
274 cache->key.objectid, bytenr,
275 0, &logical, &nr, &stripe_len);
276 if (ret)
277 return ret;
278
279 while (nr--) {
280 u64 start, len;
281
282 if (logical[nr] > cache->key.objectid +
283 cache->key.offset)
284 continue;
285
286 if (logical[nr] + stripe_len <= cache->key.objectid)
287 continue;
288
289 start = logical[nr];
290 if (start < cache->key.objectid) {
291 start = cache->key.objectid;
292 len = (logical[nr] + stripe_len) - start;
293 } else {
294 len = min_t(u64, stripe_len,
295 cache->key.objectid +
296 cache->key.offset - start);
297 }
298
299 cache->bytes_super += len;
300 ret = add_excluded_extent(root, start, len);
301 if (ret) {
302 kfree(logical);
303 return ret;
304 }
305 }
306
307 kfree(logical);
308 }
309 return 0;
310}
311
312static struct btrfs_caching_control *
313get_caching_control(struct btrfs_block_group_cache *cache)
314{
315 struct btrfs_caching_control *ctl;
316
317 spin_lock(&cache->lock);
318 if (cache->cached != BTRFS_CACHE_STARTED) {
319 spin_unlock(&cache->lock);
320 return NULL;
321 }
322
323
324 if (!cache->caching_ctl) {
325 spin_unlock(&cache->lock);
326 return NULL;
327 }
328
329 ctl = cache->caching_ctl;
330 atomic_inc(&ctl->count);
331 spin_unlock(&cache->lock);
332 return ctl;
333}
334
335static void put_caching_control(struct btrfs_caching_control *ctl)
336{
337 if (atomic_dec_and_test(&ctl->count))
338 kfree(ctl);
339}
340
341
342
343
344
345
346static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
347 struct btrfs_fs_info *info, u64 start, u64 end)
348{
349 u64 extent_start, extent_end, size, total_added = 0;
350 int ret;
351
352 while (start < end) {
353 ret = find_first_extent_bit(info->pinned_extents, start,
354 &extent_start, &extent_end,
355 EXTENT_DIRTY | EXTENT_UPTODATE,
356 NULL);
357 if (ret)
358 break;
359
360 if (extent_start <= start) {
361 start = extent_end + 1;
362 } else if (extent_start > start && extent_start < end) {
363 size = extent_start - start;
364 total_added += size;
365 ret = btrfs_add_free_space(block_group, start,
366 size);
367 BUG_ON(ret);
368 start = extent_end + 1;
369 } else {
370 break;
371 }
372 }
373
374 if (start < end) {
375 size = end - start;
376 total_added += size;
377 ret = btrfs_add_free_space(block_group, start, size);
378 BUG_ON(ret);
379 }
380
381 return total_added;
382}
383
384static noinline void caching_thread(struct btrfs_work *work)
385{
386 struct btrfs_block_group_cache *block_group;
387 struct btrfs_fs_info *fs_info;
388 struct btrfs_caching_control *caching_ctl;
389 struct btrfs_root *extent_root;
390 struct btrfs_path *path;
391 struct extent_buffer *leaf;
392 struct btrfs_key key;
393 u64 total_found = 0;
394 u64 last = 0;
395 u32 nritems;
396 int ret = -ENOMEM;
397
398 caching_ctl = container_of(work, struct btrfs_caching_control, work);
399 block_group = caching_ctl->block_group;
400 fs_info = block_group->fs_info;
401 extent_root = fs_info->extent_root;
402
403 path = btrfs_alloc_path();
404 if (!path)
405 goto out;
406
407 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
408
409
410
411
412
413
414
415 path->skip_locking = 1;
416 path->search_commit_root = 1;
417 path->reada = 1;
418
419 key.objectid = last;
420 key.offset = 0;
421 key.type = BTRFS_EXTENT_ITEM_KEY;
422again:
423 mutex_lock(&caching_ctl->mutex);
424
425 down_read(&fs_info->commit_root_sem);
426
427next:
428 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
429 if (ret < 0)
430 goto err;
431
432 leaf = path->nodes[0];
433 nritems = btrfs_header_nritems(leaf);
434
435 while (1) {
436 if (btrfs_fs_closing(fs_info) > 1) {
437 last = (u64)-1;
438 break;
439 }
440
441 if (path->slots[0] < nritems) {
442 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
443 } else {
444 ret = find_next_key(path, 0, &key);
445 if (ret)
446 break;
447
448 if (need_resched() ||
449 rwsem_is_contended(&fs_info->commit_root_sem)) {
450 caching_ctl->progress = last;
451 btrfs_release_path(path);
452 up_read(&fs_info->commit_root_sem);
453 mutex_unlock(&caching_ctl->mutex);
454 cond_resched();
455 goto again;
456 }
457
458 ret = btrfs_next_leaf(extent_root, path);
459 if (ret < 0)
460 goto err;
461 if (ret)
462 break;
463 leaf = path->nodes[0];
464 nritems = btrfs_header_nritems(leaf);
465 continue;
466 }
467
468 if (key.objectid < last) {
469 key.objectid = last;
470 key.offset = 0;
471 key.type = BTRFS_EXTENT_ITEM_KEY;
472
473 caching_ctl->progress = last;
474 btrfs_release_path(path);
475 goto next;
476 }
477
478 if (key.objectid < block_group->key.objectid) {
479 path->slots[0]++;
480 continue;
481 }
482
483 if (key.objectid >= block_group->key.objectid +
484 block_group->key.offset)
485 break;
486
487 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
488 key.type == BTRFS_METADATA_ITEM_KEY) {
489 total_found += add_new_free_space(block_group,
490 fs_info, last,
491 key.objectid);
492 if (key.type == BTRFS_METADATA_ITEM_KEY)
493 last = key.objectid +
494 fs_info->tree_root->leafsize;
495 else
496 last = key.objectid + key.offset;
497
498 if (total_found > (1024 * 1024 * 2)) {
499 total_found = 0;
500 wake_up(&caching_ctl->wait);
501 }
502 }
503 path->slots[0]++;
504 }
505 ret = 0;
506
507 total_found += add_new_free_space(block_group, fs_info, last,
508 block_group->key.objectid +
509 block_group->key.offset);
510 caching_ctl->progress = (u64)-1;
511
512 spin_lock(&block_group->lock);
513 block_group->caching_ctl = NULL;
514 block_group->cached = BTRFS_CACHE_FINISHED;
515 spin_unlock(&block_group->lock);
516
517err:
518 btrfs_free_path(path);
519 up_read(&fs_info->commit_root_sem);
520
521 free_excluded_extents(extent_root, block_group);
522
523 mutex_unlock(&caching_ctl->mutex);
524out:
525 if (ret) {
526 spin_lock(&block_group->lock);
527 block_group->caching_ctl = NULL;
528 block_group->cached = BTRFS_CACHE_ERROR;
529 spin_unlock(&block_group->lock);
530 }
531 wake_up(&caching_ctl->wait);
532
533 put_caching_control(caching_ctl);
534 btrfs_put_block_group(block_group);
535}
536
537static int cache_block_group(struct btrfs_block_group_cache *cache,
538 int load_cache_only)
539{
540 DEFINE_WAIT(wait);
541 struct btrfs_fs_info *fs_info = cache->fs_info;
542 struct btrfs_caching_control *caching_ctl;
543 int ret = 0;
544
545 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
546 if (!caching_ctl)
547 return -ENOMEM;
548
549 INIT_LIST_HEAD(&caching_ctl->list);
550 mutex_init(&caching_ctl->mutex);
551 init_waitqueue_head(&caching_ctl->wait);
552 caching_ctl->block_group = cache;
553 caching_ctl->progress = cache->key.objectid;
554 atomic_set(&caching_ctl->count, 1);
555 btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
556
557 spin_lock(&cache->lock);
558
559
560
561
562
563
564
565
566
567
568
569
570 while (cache->cached == BTRFS_CACHE_FAST) {
571 struct btrfs_caching_control *ctl;
572
573 ctl = cache->caching_ctl;
574 atomic_inc(&ctl->count);
575 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
576 spin_unlock(&cache->lock);
577
578 schedule();
579
580 finish_wait(&ctl->wait, &wait);
581 put_caching_control(ctl);
582 spin_lock(&cache->lock);
583 }
584
585 if (cache->cached != BTRFS_CACHE_NO) {
586 spin_unlock(&cache->lock);
587 kfree(caching_ctl);
588 return 0;
589 }
590 WARN_ON(cache->caching_ctl);
591 cache->caching_ctl = caching_ctl;
592 cache->cached = BTRFS_CACHE_FAST;
593 spin_unlock(&cache->lock);
594
595 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
596 ret = load_free_space_cache(fs_info, cache);
597
598 spin_lock(&cache->lock);
599 if (ret == 1) {
600 cache->caching_ctl = NULL;
601 cache->cached = BTRFS_CACHE_FINISHED;
602 cache->last_byte_to_unpin = (u64)-1;
603 } else {
604 if (load_cache_only) {
605 cache->caching_ctl = NULL;
606 cache->cached = BTRFS_CACHE_NO;
607 } else {
608 cache->cached = BTRFS_CACHE_STARTED;
609 }
610 }
611 spin_unlock(&cache->lock);
612 wake_up(&caching_ctl->wait);
613 if (ret == 1) {
614 put_caching_control(caching_ctl);
615 free_excluded_extents(fs_info->extent_root, cache);
616 return 0;
617 }
618 } else {
619
620
621
622
623 spin_lock(&cache->lock);
624 if (load_cache_only) {
625 cache->caching_ctl = NULL;
626 cache->cached = BTRFS_CACHE_NO;
627 } else {
628 cache->cached = BTRFS_CACHE_STARTED;
629 }
630 spin_unlock(&cache->lock);
631 wake_up(&caching_ctl->wait);
632 }
633
634 if (load_cache_only) {
635 put_caching_control(caching_ctl);
636 return 0;
637 }
638
639 down_write(&fs_info->commit_root_sem);
640 atomic_inc(&caching_ctl->count);
641 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
642 up_write(&fs_info->commit_root_sem);
643
644 btrfs_get_block_group(cache);
645
646 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
647
648 return ret;
649}
650
651
652
653
654static struct btrfs_block_group_cache *
655btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
656{
657 struct btrfs_block_group_cache *cache;
658
659 cache = block_group_cache_tree_search(info, bytenr, 0);
660
661 return cache;
662}
663
664
665
666
667struct btrfs_block_group_cache *btrfs_lookup_block_group(
668 struct btrfs_fs_info *info,
669 u64 bytenr)
670{
671 struct btrfs_block_group_cache *cache;
672
673 cache = block_group_cache_tree_search(info, bytenr, 1);
674
675 return cache;
676}
677
678static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
679 u64 flags)
680{
681 struct list_head *head = &info->space_info;
682 struct btrfs_space_info *found;
683
684 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
685
686 rcu_read_lock();
687 list_for_each_entry_rcu(found, head, list) {
688 if (found->flags & flags) {
689 rcu_read_unlock();
690 return found;
691 }
692 }
693 rcu_read_unlock();
694 return NULL;
695}
696
697
698
699
700
701void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
702{
703 struct list_head *head = &info->space_info;
704 struct btrfs_space_info *found;
705
706 rcu_read_lock();
707 list_for_each_entry_rcu(found, head, list)
708 found->full = 0;
709 rcu_read_unlock();
710}
711
712
713int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
714{
715 int ret;
716 struct btrfs_key key;
717 struct btrfs_path *path;
718
719 path = btrfs_alloc_path();
720 if (!path)
721 return -ENOMEM;
722
723 key.objectid = start;
724 key.offset = len;
725 key.type = BTRFS_EXTENT_ITEM_KEY;
726 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
727 0, 0);
728 if (ret > 0) {
729 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
730 if (key.objectid == start &&
731 key.type == BTRFS_METADATA_ITEM_KEY)
732 ret = 0;
733 }
734 btrfs_free_path(path);
735 return ret;
736}
737
738
739
740
741
742
743
744
745
746
747int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
748 struct btrfs_root *root, u64 bytenr,
749 u64 offset, int metadata, u64 *refs, u64 *flags)
750{
751 struct btrfs_delayed_ref_head *head;
752 struct btrfs_delayed_ref_root *delayed_refs;
753 struct btrfs_path *path;
754 struct btrfs_extent_item *ei;
755 struct extent_buffer *leaf;
756 struct btrfs_key key;
757 u32 item_size;
758 u64 num_refs;
759 u64 extent_flags;
760 int ret;
761
762
763
764
765
766 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
767 offset = root->leafsize;
768 metadata = 0;
769 }
770
771 path = btrfs_alloc_path();
772 if (!path)
773 return -ENOMEM;
774
775 if (!trans) {
776 path->skip_locking = 1;
777 path->search_commit_root = 1;
778 }
779
780search_again:
781 key.objectid = bytenr;
782 key.offset = offset;
783 if (metadata)
784 key.type = BTRFS_METADATA_ITEM_KEY;
785 else
786 key.type = BTRFS_EXTENT_ITEM_KEY;
787
788again:
789 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
790 &key, path, 0, 0);
791 if (ret < 0)
792 goto out_free;
793
794 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
795 if (path->slots[0]) {
796 path->slots[0]--;
797 btrfs_item_key_to_cpu(path->nodes[0], &key,
798 path->slots[0]);
799 if (key.objectid == bytenr &&
800 key.type == BTRFS_EXTENT_ITEM_KEY &&
801 key.offset == root->leafsize)
802 ret = 0;
803 }
804 if (ret) {
805 key.objectid = bytenr;
806 key.type = BTRFS_EXTENT_ITEM_KEY;
807 key.offset = root->leafsize;
808 btrfs_release_path(path);
809 goto again;
810 }
811 }
812
813 if (ret == 0) {
814 leaf = path->nodes[0];
815 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
816 if (item_size >= sizeof(*ei)) {
817 ei = btrfs_item_ptr(leaf, path->slots[0],
818 struct btrfs_extent_item);
819 num_refs = btrfs_extent_refs(leaf, ei);
820 extent_flags = btrfs_extent_flags(leaf, ei);
821 } else {
822#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
823 struct btrfs_extent_item_v0 *ei0;
824 BUG_ON(item_size != sizeof(*ei0));
825 ei0 = btrfs_item_ptr(leaf, path->slots[0],
826 struct btrfs_extent_item_v0);
827 num_refs = btrfs_extent_refs_v0(leaf, ei0);
828
829 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
830#else
831 BUG();
832#endif
833 }
834 BUG_ON(num_refs == 0);
835 } else {
836 num_refs = 0;
837 extent_flags = 0;
838 ret = 0;
839 }
840
841 if (!trans)
842 goto out;
843
844 delayed_refs = &trans->transaction->delayed_refs;
845 spin_lock(&delayed_refs->lock);
846 head = btrfs_find_delayed_ref_head(trans, bytenr);
847 if (head) {
848 if (!mutex_trylock(&head->mutex)) {
849 atomic_inc(&head->node.refs);
850 spin_unlock(&delayed_refs->lock);
851
852 btrfs_release_path(path);
853
854
855
856
857
858 mutex_lock(&head->mutex);
859 mutex_unlock(&head->mutex);
860 btrfs_put_delayed_ref(&head->node);
861 goto search_again;
862 }
863 spin_lock(&head->lock);
864 if (head->extent_op && head->extent_op->update_flags)
865 extent_flags |= head->extent_op->flags_to_set;
866 else
867 BUG_ON(num_refs == 0);
868
869 num_refs += head->node.ref_mod;
870 spin_unlock(&head->lock);
871 mutex_unlock(&head->mutex);
872 }
873 spin_unlock(&delayed_refs->lock);
874out:
875 WARN_ON(num_refs == 0);
876 if (refs)
877 *refs = num_refs;
878 if (flags)
879 *flags = extent_flags;
880out_free:
881 btrfs_free_path(path);
882 return ret;
883}
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
992static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
993 struct btrfs_root *root,
994 struct btrfs_path *path,
995 u64 owner, u32 extra_size)
996{
997 struct btrfs_extent_item *item;
998 struct btrfs_extent_item_v0 *ei0;
999 struct btrfs_extent_ref_v0 *ref0;
1000 struct btrfs_tree_block_info *bi;
1001 struct extent_buffer *leaf;
1002 struct btrfs_key key;
1003 struct btrfs_key found_key;
1004 u32 new_size = sizeof(*item);
1005 u64 refs;
1006 int ret;
1007
1008 leaf = path->nodes[0];
1009 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
1010
1011 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1012 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1013 struct btrfs_extent_item_v0);
1014 refs = btrfs_extent_refs_v0(leaf, ei0);
1015
1016 if (owner == (u64)-1) {
1017 while (1) {
1018 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1019 ret = btrfs_next_leaf(root, path);
1020 if (ret < 0)
1021 return ret;
1022 BUG_ON(ret > 0);
1023 leaf = path->nodes[0];
1024 }
1025 btrfs_item_key_to_cpu(leaf, &found_key,
1026 path->slots[0]);
1027 BUG_ON(key.objectid != found_key.objectid);
1028 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1029 path->slots[0]++;
1030 continue;
1031 }
1032 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1033 struct btrfs_extent_ref_v0);
1034 owner = btrfs_ref_objectid_v0(leaf, ref0);
1035 break;
1036 }
1037 }
1038 btrfs_release_path(path);
1039
1040 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1041 new_size += sizeof(*bi);
1042
1043 new_size -= sizeof(*ei0);
1044 ret = btrfs_search_slot(trans, root, &key, path,
1045 new_size + extra_size, 1);
1046 if (ret < 0)
1047 return ret;
1048 BUG_ON(ret);
1049
1050 btrfs_extend_item(root, path, new_size);
1051
1052 leaf = path->nodes[0];
1053 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1054 btrfs_set_extent_refs(leaf, item, refs);
1055
1056 btrfs_set_extent_generation(leaf, item, 0);
1057 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1058 btrfs_set_extent_flags(leaf, item,
1059 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1060 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1061 bi = (struct btrfs_tree_block_info *)(item + 1);
1062
1063 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
1064 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1065 } else {
1066 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1067 }
1068 btrfs_mark_buffer_dirty(leaf);
1069 return 0;
1070}
1071#endif
1072
1073static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1074{
1075 u32 high_crc = ~(u32)0;
1076 u32 low_crc = ~(u32)0;
1077 __le64 lenum;
1078
1079 lenum = cpu_to_le64(root_objectid);
1080 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1081 lenum = cpu_to_le64(owner);
1082 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1083 lenum = cpu_to_le64(offset);
1084 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1085
1086 return ((u64)high_crc << 31) ^ (u64)low_crc;
1087}
1088
1089static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1090 struct btrfs_extent_data_ref *ref)
1091{
1092 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1093 btrfs_extent_data_ref_objectid(leaf, ref),
1094 btrfs_extent_data_ref_offset(leaf, ref));
1095}
1096
1097static int match_extent_data_ref(struct extent_buffer *leaf,
1098 struct btrfs_extent_data_ref *ref,
1099 u64 root_objectid, u64 owner, u64 offset)
1100{
1101 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1102 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1103 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1104 return 0;
1105 return 1;
1106}
1107
1108static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1109 struct btrfs_root *root,
1110 struct btrfs_path *path,
1111 u64 bytenr, u64 parent,
1112 u64 root_objectid,
1113 u64 owner, u64 offset)
1114{
1115 struct btrfs_key key;
1116 struct btrfs_extent_data_ref *ref;
1117 struct extent_buffer *leaf;
1118 u32 nritems;
1119 int ret;
1120 int recow;
1121 int err = -ENOENT;
1122
1123 key.objectid = bytenr;
1124 if (parent) {
1125 key.type = BTRFS_SHARED_DATA_REF_KEY;
1126 key.offset = parent;
1127 } else {
1128 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1129 key.offset = hash_extent_data_ref(root_objectid,
1130 owner, offset);
1131 }
1132again:
1133 recow = 0;
1134 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1135 if (ret < 0) {
1136 err = ret;
1137 goto fail;
1138 }
1139
1140 if (parent) {
1141 if (!ret)
1142 return 0;
1143#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1144 key.type = BTRFS_EXTENT_REF_V0_KEY;
1145 btrfs_release_path(path);
1146 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1147 if (ret < 0) {
1148 err = ret;
1149 goto fail;
1150 }
1151 if (!ret)
1152 return 0;
1153#endif
1154 goto fail;
1155 }
1156
1157 leaf = path->nodes[0];
1158 nritems = btrfs_header_nritems(leaf);
1159 while (1) {
1160 if (path->slots[0] >= nritems) {
1161 ret = btrfs_next_leaf(root, path);
1162 if (ret < 0)
1163 err = ret;
1164 if (ret)
1165 goto fail;
1166
1167 leaf = path->nodes[0];
1168 nritems = btrfs_header_nritems(leaf);
1169 recow = 1;
1170 }
1171
1172 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1173 if (key.objectid != bytenr ||
1174 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1175 goto fail;
1176
1177 ref = btrfs_item_ptr(leaf, path->slots[0],
1178 struct btrfs_extent_data_ref);
1179
1180 if (match_extent_data_ref(leaf, ref, root_objectid,
1181 owner, offset)) {
1182 if (recow) {
1183 btrfs_release_path(path);
1184 goto again;
1185 }
1186 err = 0;
1187 break;
1188 }
1189 path->slots[0]++;
1190 }
1191fail:
1192 return err;
1193}
1194
1195static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1196 struct btrfs_root *root,
1197 struct btrfs_path *path,
1198 u64 bytenr, u64 parent,
1199 u64 root_objectid, u64 owner,
1200 u64 offset, int refs_to_add)
1201{
1202 struct btrfs_key key;
1203 struct extent_buffer *leaf;
1204 u32 size;
1205 u32 num_refs;
1206 int ret;
1207
1208 key.objectid = bytenr;
1209 if (parent) {
1210 key.type = BTRFS_SHARED_DATA_REF_KEY;
1211 key.offset = parent;
1212 size = sizeof(struct btrfs_shared_data_ref);
1213 } else {
1214 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1215 key.offset = hash_extent_data_ref(root_objectid,
1216 owner, offset);
1217 size = sizeof(struct btrfs_extent_data_ref);
1218 }
1219
1220 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1221 if (ret && ret != -EEXIST)
1222 goto fail;
1223
1224 leaf = path->nodes[0];
1225 if (parent) {
1226 struct btrfs_shared_data_ref *ref;
1227 ref = btrfs_item_ptr(leaf, path->slots[0],
1228 struct btrfs_shared_data_ref);
1229 if (ret == 0) {
1230 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1231 } else {
1232 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1233 num_refs += refs_to_add;
1234 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1235 }
1236 } else {
1237 struct btrfs_extent_data_ref *ref;
1238 while (ret == -EEXIST) {
1239 ref = btrfs_item_ptr(leaf, path->slots[0],
1240 struct btrfs_extent_data_ref);
1241 if (match_extent_data_ref(leaf, ref, root_objectid,
1242 owner, offset))
1243 break;
1244 btrfs_release_path(path);
1245 key.offset++;
1246 ret = btrfs_insert_empty_item(trans, root, path, &key,
1247 size);
1248 if (ret && ret != -EEXIST)
1249 goto fail;
1250
1251 leaf = path->nodes[0];
1252 }
1253 ref = btrfs_item_ptr(leaf, path->slots[0],
1254 struct btrfs_extent_data_ref);
1255 if (ret == 0) {
1256 btrfs_set_extent_data_ref_root(leaf, ref,
1257 root_objectid);
1258 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1259 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1260 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1261 } else {
1262 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1263 num_refs += refs_to_add;
1264 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1265 }
1266 }
1267 btrfs_mark_buffer_dirty(leaf);
1268 ret = 0;
1269fail:
1270 btrfs_release_path(path);
1271 return ret;
1272}
1273
1274static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1275 struct btrfs_root *root,
1276 struct btrfs_path *path,
1277 int refs_to_drop, int *last_ref)
1278{
1279 struct btrfs_key key;
1280 struct btrfs_extent_data_ref *ref1 = NULL;
1281 struct btrfs_shared_data_ref *ref2 = NULL;
1282 struct extent_buffer *leaf;
1283 u32 num_refs = 0;
1284 int ret = 0;
1285
1286 leaf = path->nodes[0];
1287 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1288
1289 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1290 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1291 struct btrfs_extent_data_ref);
1292 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1293 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1294 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1295 struct btrfs_shared_data_ref);
1296 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1297#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1298 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1299 struct btrfs_extent_ref_v0 *ref0;
1300 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1301 struct btrfs_extent_ref_v0);
1302 num_refs = btrfs_ref_count_v0(leaf, ref0);
1303#endif
1304 } else {
1305 BUG();
1306 }
1307
1308 BUG_ON(num_refs < refs_to_drop);
1309 num_refs -= refs_to_drop;
1310
1311 if (num_refs == 0) {
1312 ret = btrfs_del_item(trans, root, path);
1313 *last_ref = 1;
1314 } else {
1315 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1316 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1317 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1318 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1319#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1320 else {
1321 struct btrfs_extent_ref_v0 *ref0;
1322 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1323 struct btrfs_extent_ref_v0);
1324 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1325 }
1326#endif
1327 btrfs_mark_buffer_dirty(leaf);
1328 }
1329 return ret;
1330}
1331
1332static noinline u32 extent_data_ref_count(struct btrfs_root *root,
1333 struct btrfs_path *path,
1334 struct btrfs_extent_inline_ref *iref)
1335{
1336 struct btrfs_key key;
1337 struct extent_buffer *leaf;
1338 struct btrfs_extent_data_ref *ref1;
1339 struct btrfs_shared_data_ref *ref2;
1340 u32 num_refs = 0;
1341
1342 leaf = path->nodes[0];
1343 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1344 if (iref) {
1345 if (btrfs_extent_inline_ref_type(leaf, iref) ==
1346 BTRFS_EXTENT_DATA_REF_KEY) {
1347 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1348 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1349 } else {
1350 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1351 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1352 }
1353 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1354 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1355 struct btrfs_extent_data_ref);
1356 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1357 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1358 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1359 struct btrfs_shared_data_ref);
1360 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1361#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1362 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1363 struct btrfs_extent_ref_v0 *ref0;
1364 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1365 struct btrfs_extent_ref_v0);
1366 num_refs = btrfs_ref_count_v0(leaf, ref0);
1367#endif
1368 } else {
1369 WARN_ON(1);
1370 }
1371 return num_refs;
1372}
1373
1374static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1375 struct btrfs_root *root,
1376 struct btrfs_path *path,
1377 u64 bytenr, u64 parent,
1378 u64 root_objectid)
1379{
1380 struct btrfs_key key;
1381 int ret;
1382
1383 key.objectid = bytenr;
1384 if (parent) {
1385 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1386 key.offset = parent;
1387 } else {
1388 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1389 key.offset = root_objectid;
1390 }
1391
1392 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1393 if (ret > 0)
1394 ret = -ENOENT;
1395#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1396 if (ret == -ENOENT && parent) {
1397 btrfs_release_path(path);
1398 key.type = BTRFS_EXTENT_REF_V0_KEY;
1399 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1400 if (ret > 0)
1401 ret = -ENOENT;
1402 }
1403#endif
1404 return ret;
1405}
1406
1407static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1408 struct btrfs_root *root,
1409 struct btrfs_path *path,
1410 u64 bytenr, u64 parent,
1411 u64 root_objectid)
1412{
1413 struct btrfs_key key;
1414 int ret;
1415
1416 key.objectid = bytenr;
1417 if (parent) {
1418 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1419 key.offset = parent;
1420 } else {
1421 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1422 key.offset = root_objectid;
1423 }
1424
1425 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1426 btrfs_release_path(path);
1427 return ret;
1428}
1429
1430static inline int extent_ref_type(u64 parent, u64 owner)
1431{
1432 int type;
1433 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1434 if (parent > 0)
1435 type = BTRFS_SHARED_BLOCK_REF_KEY;
1436 else
1437 type = BTRFS_TREE_BLOCK_REF_KEY;
1438 } else {
1439 if (parent > 0)
1440 type = BTRFS_SHARED_DATA_REF_KEY;
1441 else
1442 type = BTRFS_EXTENT_DATA_REF_KEY;
1443 }
1444 return type;
1445}
1446
1447static int find_next_key(struct btrfs_path *path, int level,
1448 struct btrfs_key *key)
1449
1450{
1451 for (; level < BTRFS_MAX_LEVEL; level++) {
1452 if (!path->nodes[level])
1453 break;
1454 if (path->slots[level] + 1 >=
1455 btrfs_header_nritems(path->nodes[level]))
1456 continue;
1457 if (level == 0)
1458 btrfs_item_key_to_cpu(path->nodes[level], key,
1459 path->slots[level] + 1);
1460 else
1461 btrfs_node_key_to_cpu(path->nodes[level], key,
1462 path->slots[level] + 1);
1463 return 0;
1464 }
1465 return 1;
1466}
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481static noinline_for_stack
1482int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1483 struct btrfs_root *root,
1484 struct btrfs_path *path,
1485 struct btrfs_extent_inline_ref **ref_ret,
1486 u64 bytenr, u64 num_bytes,
1487 u64 parent, u64 root_objectid,
1488 u64 owner, u64 offset, int insert)
1489{
1490 struct btrfs_key key;
1491 struct extent_buffer *leaf;
1492 struct btrfs_extent_item *ei;
1493 struct btrfs_extent_inline_ref *iref;
1494 u64 flags;
1495 u64 item_size;
1496 unsigned long ptr;
1497 unsigned long end;
1498 int extra_size;
1499 int type;
1500 int want;
1501 int ret;
1502 int err = 0;
1503 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1504 SKINNY_METADATA);
1505
1506 key.objectid = bytenr;
1507 key.type = BTRFS_EXTENT_ITEM_KEY;
1508 key.offset = num_bytes;
1509
1510 want = extent_ref_type(parent, owner);
1511 if (insert) {
1512 extra_size = btrfs_extent_inline_ref_size(want);
1513 path->keep_locks = 1;
1514 } else
1515 extra_size = -1;
1516
1517
1518
1519
1520
1521 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1522 key.type = BTRFS_METADATA_ITEM_KEY;
1523 key.offset = owner;
1524 }
1525
1526again:
1527 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1528 if (ret < 0) {
1529 err = ret;
1530 goto out;
1531 }
1532
1533
1534
1535
1536
1537 if (ret > 0 && skinny_metadata) {
1538 skinny_metadata = false;
1539 if (path->slots[0]) {
1540 path->slots[0]--;
1541 btrfs_item_key_to_cpu(path->nodes[0], &key,
1542 path->slots[0]);
1543 if (key.objectid == bytenr &&
1544 key.type == BTRFS_EXTENT_ITEM_KEY &&
1545 key.offset == num_bytes)
1546 ret = 0;
1547 }
1548 if (ret) {
1549 key.objectid = bytenr;
1550 key.type = BTRFS_EXTENT_ITEM_KEY;
1551 key.offset = num_bytes;
1552 btrfs_release_path(path);
1553 goto again;
1554 }
1555 }
1556
1557 if (ret && !insert) {
1558 err = -ENOENT;
1559 goto out;
1560 } else if (WARN_ON(ret)) {
1561 err = -EIO;
1562 goto out;
1563 }
1564
1565 leaf = path->nodes[0];
1566 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1567#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1568 if (item_size < sizeof(*ei)) {
1569 if (!insert) {
1570 err = -ENOENT;
1571 goto out;
1572 }
1573 ret = convert_extent_item_v0(trans, root, path, owner,
1574 extra_size);
1575 if (ret < 0) {
1576 err = ret;
1577 goto out;
1578 }
1579 leaf = path->nodes[0];
1580 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1581 }
1582#endif
1583 BUG_ON(item_size < sizeof(*ei));
1584
1585 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1586 flags = btrfs_extent_flags(leaf, ei);
1587
1588 ptr = (unsigned long)(ei + 1);
1589 end = (unsigned long)ei + item_size;
1590
1591 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1592 ptr += sizeof(struct btrfs_tree_block_info);
1593 BUG_ON(ptr > end);
1594 }
1595
1596 err = -ENOENT;
1597 while (1) {
1598 if (ptr >= end) {
1599 WARN_ON(ptr > end);
1600 break;
1601 }
1602 iref = (struct btrfs_extent_inline_ref *)ptr;
1603 type = btrfs_extent_inline_ref_type(leaf, iref);
1604 if (want < type)
1605 break;
1606 if (want > type) {
1607 ptr += btrfs_extent_inline_ref_size(type);
1608 continue;
1609 }
1610
1611 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1612 struct btrfs_extent_data_ref *dref;
1613 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1614 if (match_extent_data_ref(leaf, dref, root_objectid,
1615 owner, offset)) {
1616 err = 0;
1617 break;
1618 }
1619 if (hash_extent_data_ref_item(leaf, dref) <
1620 hash_extent_data_ref(root_objectid, owner, offset))
1621 break;
1622 } else {
1623 u64 ref_offset;
1624 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1625 if (parent > 0) {
1626 if (parent == ref_offset) {
1627 err = 0;
1628 break;
1629 }
1630 if (ref_offset < parent)
1631 break;
1632 } else {
1633 if (root_objectid == ref_offset) {
1634 err = 0;
1635 break;
1636 }
1637 if (ref_offset < root_objectid)
1638 break;
1639 }
1640 }
1641 ptr += btrfs_extent_inline_ref_size(type);
1642 }
1643 if (err == -ENOENT && insert) {
1644 if (item_size + extra_size >=
1645 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1646 err = -EAGAIN;
1647 goto out;
1648 }
1649
1650
1651
1652
1653
1654
1655 if (find_next_key(path, 0, &key) == 0 &&
1656 key.objectid == bytenr &&
1657 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1658 err = -EAGAIN;
1659 goto out;
1660 }
1661 }
1662 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1663out:
1664 if (insert) {
1665 path->keep_locks = 0;
1666 btrfs_unlock_up_safe(path, 1);
1667 }
1668 return err;
1669}
1670
1671
1672
1673
1674static noinline_for_stack
1675void setup_inline_extent_backref(struct btrfs_root *root,
1676 struct btrfs_path *path,
1677 struct btrfs_extent_inline_ref *iref,
1678 u64 parent, u64 root_objectid,
1679 u64 owner, u64 offset, int refs_to_add,
1680 struct btrfs_delayed_extent_op *extent_op)
1681{
1682 struct extent_buffer *leaf;
1683 struct btrfs_extent_item *ei;
1684 unsigned long ptr;
1685 unsigned long end;
1686 unsigned long item_offset;
1687 u64 refs;
1688 int size;
1689 int type;
1690
1691 leaf = path->nodes[0];
1692 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1693 item_offset = (unsigned long)iref - (unsigned long)ei;
1694
1695 type = extent_ref_type(parent, owner);
1696 size = btrfs_extent_inline_ref_size(type);
1697
1698 btrfs_extend_item(root, path, size);
1699
1700 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1701 refs = btrfs_extent_refs(leaf, ei);
1702 refs += refs_to_add;
1703 btrfs_set_extent_refs(leaf, ei, refs);
1704 if (extent_op)
1705 __run_delayed_extent_op(extent_op, leaf, ei);
1706
1707 ptr = (unsigned long)ei + item_offset;
1708 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1709 if (ptr < end - size)
1710 memmove_extent_buffer(leaf, ptr + size, ptr,
1711 end - size - ptr);
1712
1713 iref = (struct btrfs_extent_inline_ref *)ptr;
1714 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1715 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1716 struct btrfs_extent_data_ref *dref;
1717 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1718 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1719 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1720 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1721 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1722 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1723 struct btrfs_shared_data_ref *sref;
1724 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1725 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1726 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1727 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1728 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1729 } else {
1730 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1731 }
1732 btrfs_mark_buffer_dirty(leaf);
1733}
1734
1735static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1736 struct btrfs_root *root,
1737 struct btrfs_path *path,
1738 struct btrfs_extent_inline_ref **ref_ret,
1739 u64 bytenr, u64 num_bytes, u64 parent,
1740 u64 root_objectid, u64 owner, u64 offset)
1741{
1742 int ret;
1743
1744 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1745 bytenr, num_bytes, parent,
1746 root_objectid, owner, offset, 0);
1747 if (ret != -ENOENT)
1748 return ret;
1749
1750 btrfs_release_path(path);
1751 *ref_ret = NULL;
1752
1753 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1754 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1755 root_objectid);
1756 } else {
1757 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1758 root_objectid, owner, offset);
1759 }
1760 return ret;
1761}
1762
1763
1764
1765
1766static noinline_for_stack
1767void update_inline_extent_backref(struct btrfs_root *root,
1768 struct btrfs_path *path,
1769 struct btrfs_extent_inline_ref *iref,
1770 int refs_to_mod,
1771 struct btrfs_delayed_extent_op *extent_op,
1772 int *last_ref)
1773{
1774 struct extent_buffer *leaf;
1775 struct btrfs_extent_item *ei;
1776 struct btrfs_extent_data_ref *dref = NULL;
1777 struct btrfs_shared_data_ref *sref = NULL;
1778 unsigned long ptr;
1779 unsigned long end;
1780 u32 item_size;
1781 int size;
1782 int type;
1783 u64 refs;
1784
1785 leaf = path->nodes[0];
1786 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1787 refs = btrfs_extent_refs(leaf, ei);
1788 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1789 refs += refs_to_mod;
1790 btrfs_set_extent_refs(leaf, ei, refs);
1791 if (extent_op)
1792 __run_delayed_extent_op(extent_op, leaf, ei);
1793
1794 type = btrfs_extent_inline_ref_type(leaf, iref);
1795
1796 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1797 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1798 refs = btrfs_extent_data_ref_count(leaf, dref);
1799 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1800 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1801 refs = btrfs_shared_data_ref_count(leaf, sref);
1802 } else {
1803 refs = 1;
1804 BUG_ON(refs_to_mod != -1);
1805 }
1806
1807 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1808 refs += refs_to_mod;
1809
1810 if (refs > 0) {
1811 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1812 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1813 else
1814 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1815 } else {
1816 *last_ref = 1;
1817 size = btrfs_extent_inline_ref_size(type);
1818 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1819 ptr = (unsigned long)iref;
1820 end = (unsigned long)ei + item_size;
1821 if (ptr + size < end)
1822 memmove_extent_buffer(leaf, ptr, ptr + size,
1823 end - ptr - size);
1824 item_size -= size;
1825 btrfs_truncate_item(root, path, item_size, 1);
1826 }
1827 btrfs_mark_buffer_dirty(leaf);
1828}
1829
1830static noinline_for_stack
1831int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1832 struct btrfs_root *root,
1833 struct btrfs_path *path,
1834 u64 bytenr, u64 num_bytes, u64 parent,
1835 u64 root_objectid, u64 owner,
1836 u64 offset, int refs_to_add,
1837 struct btrfs_delayed_extent_op *extent_op)
1838{
1839 struct btrfs_extent_inline_ref *iref;
1840 int ret;
1841
1842 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1843 bytenr, num_bytes, parent,
1844 root_objectid, owner, offset, 1);
1845 if (ret == 0) {
1846 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1847 update_inline_extent_backref(root, path, iref,
1848 refs_to_add, extent_op, NULL);
1849 } else if (ret == -ENOENT) {
1850 setup_inline_extent_backref(root, path, iref, parent,
1851 root_objectid, owner, offset,
1852 refs_to_add, extent_op);
1853 ret = 0;
1854 }
1855 return ret;
1856}
1857
1858static int insert_extent_backref(struct btrfs_trans_handle *trans,
1859 struct btrfs_root *root,
1860 struct btrfs_path *path,
1861 u64 bytenr, u64 parent, u64 root_objectid,
1862 u64 owner, u64 offset, int refs_to_add)
1863{
1864 int ret;
1865 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1866 BUG_ON(refs_to_add != 1);
1867 ret = insert_tree_block_ref(trans, root, path, bytenr,
1868 parent, root_objectid);
1869 } else {
1870 ret = insert_extent_data_ref(trans, root, path, bytenr,
1871 parent, root_objectid,
1872 owner, offset, refs_to_add);
1873 }
1874 return ret;
1875}
1876
1877static int remove_extent_backref(struct btrfs_trans_handle *trans,
1878 struct btrfs_root *root,
1879 struct btrfs_path *path,
1880 struct btrfs_extent_inline_ref *iref,
1881 int refs_to_drop, int is_data, int *last_ref)
1882{
1883 int ret = 0;
1884
1885 BUG_ON(!is_data && refs_to_drop != 1);
1886 if (iref) {
1887 update_inline_extent_backref(root, path, iref,
1888 -refs_to_drop, NULL, last_ref);
1889 } else if (is_data) {
1890 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
1891 last_ref);
1892 } else {
1893 *last_ref = 1;
1894 ret = btrfs_del_item(trans, root, path);
1895 }
1896 return ret;
1897}
1898
1899static int btrfs_issue_discard(struct block_device *bdev,
1900 u64 start, u64 len)
1901{
1902 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
1903}
1904
1905static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1906 u64 num_bytes, u64 *actual_bytes)
1907{
1908 int ret;
1909 u64 discarded_bytes = 0;
1910 struct btrfs_bio *bbio = NULL;
1911
1912
1913
1914 ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
1915 bytenr, &num_bytes, &bbio, 0);
1916
1917 if (!ret) {
1918 struct btrfs_bio_stripe *stripe = bbio->stripes;
1919 int i;
1920
1921
1922 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1923 if (!stripe->dev->can_discard)
1924 continue;
1925
1926 ret = btrfs_issue_discard(stripe->dev->bdev,
1927 stripe->physical,
1928 stripe->length);
1929 if (!ret)
1930 discarded_bytes += stripe->length;
1931 else if (ret != -EOPNOTSUPP)
1932 break;
1933
1934
1935
1936
1937
1938
1939 ret = 0;
1940 }
1941 kfree(bbio);
1942 }
1943
1944 if (actual_bytes)
1945 *actual_bytes = discarded_bytes;
1946
1947
1948 if (ret == -EOPNOTSUPP)
1949 ret = 0;
1950 return ret;
1951}
1952
1953
1954int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1955 struct btrfs_root *root,
1956 u64 bytenr, u64 num_bytes, u64 parent,
1957 u64 root_objectid, u64 owner, u64 offset,
1958 int no_quota)
1959{
1960 int ret;
1961 struct btrfs_fs_info *fs_info = root->fs_info;
1962
1963 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1964 root_objectid == BTRFS_TREE_LOG_OBJECTID);
1965
1966 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1967 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
1968 num_bytes,
1969 parent, root_objectid, (int)owner,
1970 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
1971 } else {
1972 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
1973 num_bytes,
1974 parent, root_objectid, owner, offset,
1975 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
1976 }
1977 return ret;
1978}
1979
1980static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1981 struct btrfs_root *root,
1982 u64 bytenr, u64 num_bytes,
1983 u64 parent, u64 root_objectid,
1984 u64 owner, u64 offset, int refs_to_add,
1985 int no_quota,
1986 struct btrfs_delayed_extent_op *extent_op)
1987{
1988 struct btrfs_fs_info *fs_info = root->fs_info;
1989 struct btrfs_path *path;
1990 struct extent_buffer *leaf;
1991 struct btrfs_extent_item *item;
1992 struct btrfs_key key;
1993 u64 refs;
1994 int ret;
1995 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
1996
1997 path = btrfs_alloc_path();
1998 if (!path)
1999 return -ENOMEM;
2000
2001 if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
2002 no_quota = 1;
2003
2004 path->reada = 1;
2005 path->leave_spinning = 1;
2006
2007 ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
2008 bytenr, num_bytes, parent,
2009 root_objectid, owner, offset,
2010 refs_to_add, extent_op);
2011 if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
2012 goto out;
2013
2014
2015
2016
2017 if (!ret && !no_quota) {
2018 ASSERT(root->fs_info->quota_enabled);
2019 leaf = path->nodes[0];
2020 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2021 item = btrfs_item_ptr(leaf, path->slots[0],
2022 struct btrfs_extent_item);
2023 if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
2024 type = BTRFS_QGROUP_OPER_ADD_SHARED;
2025 btrfs_release_path(path);
2026
2027 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
2028 bytenr, num_bytes, type, 0);
2029 goto out;
2030 }
2031
2032
2033
2034
2035
2036
2037 leaf = path->nodes[0];
2038 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2039 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2040 refs = btrfs_extent_refs(leaf, item);
2041 if (refs)
2042 type = BTRFS_QGROUP_OPER_ADD_SHARED;
2043 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2044 if (extent_op)
2045 __run_delayed_extent_op(extent_op, leaf, item);
2046
2047 btrfs_mark_buffer_dirty(leaf);
2048 btrfs_release_path(path);
2049
2050 if (!no_quota) {
2051 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
2052 bytenr, num_bytes, type, 0);
2053 if (ret)
2054 goto out;
2055 }
2056
2057 path->reada = 1;
2058 path->leave_spinning = 1;
2059
2060 ret = insert_extent_backref(trans, root->fs_info->extent_root,
2061 path, bytenr, parent, root_objectid,
2062 owner, offset, refs_to_add);
2063 if (ret)
2064 btrfs_abort_transaction(trans, root, ret);
2065out:
2066 btrfs_free_path(path);
2067 return ret;
2068}
2069
2070static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2071 struct btrfs_root *root,
2072 struct btrfs_delayed_ref_node *node,
2073 struct btrfs_delayed_extent_op *extent_op,
2074 int insert_reserved)
2075{
2076 int ret = 0;
2077 struct btrfs_delayed_data_ref *ref;
2078 struct btrfs_key ins;
2079 u64 parent = 0;
2080 u64 ref_root = 0;
2081 u64 flags = 0;
2082
2083 ins.objectid = node->bytenr;
2084 ins.offset = node->num_bytes;
2085 ins.type = BTRFS_EXTENT_ITEM_KEY;
2086
2087 ref = btrfs_delayed_node_to_data_ref(node);
2088 trace_run_delayed_data_ref(node, ref, node->action);
2089
2090 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2091 parent = ref->parent;
2092 ref_root = ref->root;
2093
2094 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2095 if (extent_op)
2096 flags |= extent_op->flags_to_set;
2097 ret = alloc_reserved_file_extent(trans, root,
2098 parent, ref_root, flags,
2099 ref->objectid, ref->offset,
2100 &ins, node->ref_mod);
2101 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2102 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
2103 node->num_bytes, parent,
2104 ref_root, ref->objectid,
2105 ref->offset, node->ref_mod,
2106 node->no_quota, extent_op);
2107 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2108 ret = __btrfs_free_extent(trans, root, node->bytenr,
2109 node->num_bytes, parent,
2110 ref_root, ref->objectid,
2111 ref->offset, node->ref_mod,
2112 extent_op, node->no_quota);
2113 } else {
2114 BUG();
2115 }
2116 return ret;
2117}
2118
2119static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2120 struct extent_buffer *leaf,
2121 struct btrfs_extent_item *ei)
2122{
2123 u64 flags = btrfs_extent_flags(leaf, ei);
2124 if (extent_op->update_flags) {
2125 flags |= extent_op->flags_to_set;
2126 btrfs_set_extent_flags(leaf, ei, flags);
2127 }
2128
2129 if (extent_op->update_key) {
2130 struct btrfs_tree_block_info *bi;
2131 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2132 bi = (struct btrfs_tree_block_info *)(ei + 1);
2133 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2134 }
2135}
2136
2137static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_delayed_ref_node *node,
2140 struct btrfs_delayed_extent_op *extent_op)
2141{
2142 struct btrfs_key key;
2143 struct btrfs_path *path;
2144 struct btrfs_extent_item *ei;
2145 struct extent_buffer *leaf;
2146 u32 item_size;
2147 int ret;
2148 int err = 0;
2149 int metadata = !extent_op->is_data;
2150
2151 if (trans->aborted)
2152 return 0;
2153
2154 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2155 metadata = 0;
2156
2157 path = btrfs_alloc_path();
2158 if (!path)
2159 return -ENOMEM;
2160
2161 key.objectid = node->bytenr;
2162
2163 if (metadata) {
2164 key.type = BTRFS_METADATA_ITEM_KEY;
2165 key.offset = extent_op->level;
2166 } else {
2167 key.type = BTRFS_EXTENT_ITEM_KEY;
2168 key.offset = node->num_bytes;
2169 }
2170
2171again:
2172 path->reada = 1;
2173 path->leave_spinning = 1;
2174 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
2175 path, 0, 1);
2176 if (ret < 0) {
2177 err = ret;
2178 goto out;
2179 }
2180 if (ret > 0) {
2181 if (metadata) {
2182 if (path->slots[0] > 0) {
2183 path->slots[0]--;
2184 btrfs_item_key_to_cpu(path->nodes[0], &key,
2185 path->slots[0]);
2186 if (key.objectid == node->bytenr &&
2187 key.type == BTRFS_EXTENT_ITEM_KEY &&
2188 key.offset == node->num_bytes)
2189 ret = 0;
2190 }
2191 if (ret > 0) {
2192 btrfs_release_path(path);
2193 metadata = 0;
2194
2195 key.objectid = node->bytenr;
2196 key.offset = node->num_bytes;
2197 key.type = BTRFS_EXTENT_ITEM_KEY;
2198 goto again;
2199 }
2200 } else {
2201 err = -EIO;
2202 goto out;
2203 }
2204 }
2205
2206 leaf = path->nodes[0];
2207 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2208#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2209 if (item_size < sizeof(*ei)) {
2210 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
2211 path, (u64)-1, 0);
2212 if (ret < 0) {
2213 err = ret;
2214 goto out;
2215 }
2216 leaf = path->nodes[0];
2217 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2218 }
2219#endif
2220 BUG_ON(item_size < sizeof(*ei));
2221 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2222 __run_delayed_extent_op(extent_op, leaf, ei);
2223
2224 btrfs_mark_buffer_dirty(leaf);
2225out:
2226 btrfs_free_path(path);
2227 return err;
2228}
2229
2230static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2231 struct btrfs_root *root,
2232 struct btrfs_delayed_ref_node *node,
2233 struct btrfs_delayed_extent_op *extent_op,
2234 int insert_reserved)
2235{
2236 int ret = 0;
2237 struct btrfs_delayed_tree_ref *ref;
2238 struct btrfs_key ins;
2239 u64 parent = 0;
2240 u64 ref_root = 0;
2241 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2242 SKINNY_METADATA);
2243
2244 ref = btrfs_delayed_node_to_tree_ref(node);
2245 trace_run_delayed_tree_ref(node, ref, node->action);
2246
2247 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2248 parent = ref->parent;
2249 ref_root = ref->root;
2250
2251 ins.objectid = node->bytenr;
2252 if (skinny_metadata) {
2253 ins.offset = ref->level;
2254 ins.type = BTRFS_METADATA_ITEM_KEY;
2255 } else {
2256 ins.offset = node->num_bytes;
2257 ins.type = BTRFS_EXTENT_ITEM_KEY;
2258 }
2259
2260 BUG_ON(node->ref_mod != 1);
2261 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2262 BUG_ON(!extent_op || !extent_op->update_flags);
2263 ret = alloc_reserved_tree_block(trans, root,
2264 parent, ref_root,
2265 extent_op->flags_to_set,
2266 &extent_op->key,
2267 ref->level, &ins,
2268 node->no_quota);
2269 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2270 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
2271 node->num_bytes, parent, ref_root,
2272 ref->level, 0, 1, node->no_quota,
2273 extent_op);
2274 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2275 ret = __btrfs_free_extent(trans, root, node->bytenr,
2276 node->num_bytes, parent, ref_root,
2277 ref->level, 0, 1, extent_op,
2278 node->no_quota);
2279 } else {
2280 BUG();
2281 }
2282 return ret;
2283}
2284
2285
2286static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2287 struct btrfs_root *root,
2288 struct btrfs_delayed_ref_node *node,
2289 struct btrfs_delayed_extent_op *extent_op,
2290 int insert_reserved)
2291{
2292 int ret = 0;
2293
2294 if (trans->aborted) {
2295 if (insert_reserved)
2296 btrfs_pin_extent(root, node->bytenr,
2297 node->num_bytes, 1);
2298 return 0;
2299 }
2300
2301 if (btrfs_delayed_ref_is_head(node)) {
2302 struct btrfs_delayed_ref_head *head;
2303
2304
2305
2306
2307
2308
2309 BUG_ON(extent_op);
2310 head = btrfs_delayed_node_to_head(node);
2311 trace_run_delayed_ref_head(node, head, node->action);
2312
2313 if (insert_reserved) {
2314 btrfs_pin_extent(root, node->bytenr,
2315 node->num_bytes, 1);
2316 if (head->is_data) {
2317 ret = btrfs_del_csums(trans, root,
2318 node->bytenr,
2319 node->num_bytes);
2320 }
2321 }
2322 return ret;
2323 }
2324
2325 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2326 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2327 ret = run_delayed_tree_ref(trans, root, node, extent_op,
2328 insert_reserved);
2329 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2330 node->type == BTRFS_SHARED_DATA_REF_KEY)
2331 ret = run_delayed_data_ref(trans, root, node, extent_op,
2332 insert_reserved);
2333 else
2334 BUG();
2335 return ret;
2336}
2337
2338static noinline struct btrfs_delayed_ref_node *
2339select_delayed_ref(struct btrfs_delayed_ref_head *head)
2340{
2341 struct rb_node *node;
2342 struct btrfs_delayed_ref_node *ref, *last = NULL;;
2343
2344
2345
2346
2347
2348
2349 node = rb_first(&head->ref_root);
2350 while (node) {
2351 ref = rb_entry(node, struct btrfs_delayed_ref_node,
2352 rb_node);
2353 if (ref->action == BTRFS_ADD_DELAYED_REF)
2354 return ref;
2355 else if (last == NULL)
2356 last = ref;
2357 node = rb_next(node);
2358 }
2359 return last;
2360}
2361
2362
2363
2364
2365
2366static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2367 struct btrfs_root *root,
2368 unsigned long nr)
2369{
2370 struct btrfs_delayed_ref_root *delayed_refs;
2371 struct btrfs_delayed_ref_node *ref;
2372 struct btrfs_delayed_ref_head *locked_ref = NULL;
2373 struct btrfs_delayed_extent_op *extent_op;
2374 struct btrfs_fs_info *fs_info = root->fs_info;
2375 ktime_t start = ktime_get();
2376 int ret;
2377 unsigned long count = 0;
2378 unsigned long actual_count = 0;
2379 int must_insert_reserved = 0;
2380
2381 delayed_refs = &trans->transaction->delayed_refs;
2382 while (1) {
2383 if (!locked_ref) {
2384 if (count >= nr)
2385 break;
2386
2387 spin_lock(&delayed_refs->lock);
2388 locked_ref = btrfs_select_ref_head(trans);
2389 if (!locked_ref) {
2390 spin_unlock(&delayed_refs->lock);
2391 break;
2392 }
2393
2394
2395
2396 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2397 spin_unlock(&delayed_refs->lock);
2398
2399
2400
2401
2402
2403
2404 if (ret == -EAGAIN) {
2405 locked_ref = NULL;
2406 count++;
2407 continue;
2408 }
2409 }
2410
2411
2412
2413
2414
2415
2416
2417
2418 spin_lock(&locked_ref->lock);
2419 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2420 locked_ref);
2421
2422
2423
2424
2425
2426 ref = select_delayed_ref(locked_ref);
2427
2428 if (ref && ref->seq &&
2429 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2430 spin_unlock(&locked_ref->lock);
2431 btrfs_delayed_ref_unlock(locked_ref);
2432 spin_lock(&delayed_refs->lock);
2433 locked_ref->processing = 0;
2434 delayed_refs->num_heads_ready++;
2435 spin_unlock(&delayed_refs->lock);
2436 locked_ref = NULL;
2437 cond_resched();
2438 count++;
2439 continue;
2440 }
2441
2442
2443
2444
2445
2446 must_insert_reserved = locked_ref->must_insert_reserved;
2447 locked_ref->must_insert_reserved = 0;
2448
2449 extent_op = locked_ref->extent_op;
2450 locked_ref->extent_op = NULL;
2451
2452 if (!ref) {
2453
2454
2455
2456
2457
2458
2459 ref = &locked_ref->node;
2460
2461 if (extent_op && must_insert_reserved) {
2462 btrfs_free_delayed_extent_op(extent_op);
2463 extent_op = NULL;
2464 }
2465
2466 if (extent_op) {
2467 spin_unlock(&locked_ref->lock);
2468 ret = run_delayed_extent_op(trans, root,
2469 ref, extent_op);
2470 btrfs_free_delayed_extent_op(extent_op);
2471
2472 if (ret) {
2473
2474
2475
2476
2477
2478
2479 if (must_insert_reserved)
2480 locked_ref->must_insert_reserved = 1;
2481 locked_ref->processing = 0;
2482 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2483 btrfs_delayed_ref_unlock(locked_ref);
2484 return ret;
2485 }
2486 continue;
2487 }
2488
2489
2490
2491
2492
2493
2494 spin_unlock(&locked_ref->lock);
2495 spin_lock(&delayed_refs->lock);
2496 spin_lock(&locked_ref->lock);
2497 if (rb_first(&locked_ref->ref_root) ||
2498 locked_ref->extent_op) {
2499 spin_unlock(&locked_ref->lock);
2500 spin_unlock(&delayed_refs->lock);
2501 continue;
2502 }
2503 ref->in_tree = 0;
2504 delayed_refs->num_heads--;
2505 rb_erase(&locked_ref->href_node,
2506 &delayed_refs->href_root);
2507 spin_unlock(&delayed_refs->lock);
2508 } else {
2509 actual_count++;
2510 ref->in_tree = 0;
2511 rb_erase(&ref->rb_node, &locked_ref->ref_root);
2512 }
2513 atomic_dec(&delayed_refs->num_entries);
2514
2515 if (!btrfs_delayed_ref_is_head(ref)) {
2516
2517
2518
2519
2520 switch (ref->action) {
2521 case BTRFS_ADD_DELAYED_REF:
2522 case BTRFS_ADD_DELAYED_EXTENT:
2523 locked_ref->node.ref_mod -= ref->ref_mod;
2524 break;
2525 case BTRFS_DROP_DELAYED_REF:
2526 locked_ref->node.ref_mod += ref->ref_mod;
2527 break;
2528 default:
2529 WARN_ON(1);
2530 }
2531 }
2532 spin_unlock(&locked_ref->lock);
2533
2534 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2535 must_insert_reserved);
2536
2537 btrfs_free_delayed_extent_op(extent_op);
2538 if (ret) {
2539 locked_ref->processing = 0;
2540 btrfs_delayed_ref_unlock(locked_ref);
2541 btrfs_put_delayed_ref(ref);
2542 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2543 return ret;
2544 }
2545
2546
2547
2548
2549
2550
2551
2552 if (btrfs_delayed_ref_is_head(ref)) {
2553 btrfs_delayed_ref_unlock(locked_ref);
2554 locked_ref = NULL;
2555 }
2556 btrfs_put_delayed_ref(ref);
2557 count++;
2558 cond_resched();
2559 }
2560
2561
2562
2563
2564
2565
2566 if (actual_count > 0) {
2567 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2568 u64 avg;
2569
2570
2571
2572
2573
2574 spin_lock(&delayed_refs->lock);
2575 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2576 avg = div64_u64(avg, 4);
2577 fs_info->avg_delayed_ref_runtime = avg;
2578 spin_unlock(&delayed_refs->lock);
2579 }
2580 return 0;
2581}
2582
2583#ifdef SCRAMBLE_DELAYED_REFS
2584
2585
2586
2587
2588
2589static u64 find_middle(struct rb_root *root)
2590{
2591 struct rb_node *n = root->rb_node;
2592 struct btrfs_delayed_ref_node *entry;
2593 int alt = 1;
2594 u64 middle;
2595 u64 first = 0, last = 0;
2596
2597 n = rb_first(root);
2598 if (n) {
2599 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2600 first = entry->bytenr;
2601 }
2602 n = rb_last(root);
2603 if (n) {
2604 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2605 last = entry->bytenr;
2606 }
2607 n = root->rb_node;
2608
2609 while (n) {
2610 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2611 WARN_ON(!entry->in_tree);
2612
2613 middle = entry->bytenr;
2614
2615 if (alt)
2616 n = n->rb_left;
2617 else
2618 n = n->rb_right;
2619
2620 alt = 1 - alt;
2621 }
2622 return middle;
2623}
2624#endif
2625
2626static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2627{
2628 u64 num_bytes;
2629
2630 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2631 sizeof(struct btrfs_extent_inline_ref));
2632 if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2633 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2634
2635
2636
2637
2638
2639 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2640}
2641
2642int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2643 struct btrfs_root *root)
2644{
2645 struct btrfs_block_rsv *global_rsv;
2646 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2647 u64 num_bytes;
2648 int ret = 0;
2649
2650 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
2651 num_heads = heads_to_leaves(root, num_heads);
2652 if (num_heads > 1)
2653 num_bytes += (num_heads - 1) * root->leafsize;
2654 num_bytes <<= 1;
2655 global_rsv = &root->fs_info->global_block_rsv;
2656
2657
2658
2659
2660
2661 if (global_rsv->space_info->full)
2662 num_bytes <<= 1;
2663
2664 spin_lock(&global_rsv->lock);
2665 if (global_rsv->reserved <= num_bytes)
2666 ret = 1;
2667 spin_unlock(&global_rsv->lock);
2668 return ret;
2669}
2670
2671int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2672 struct btrfs_root *root)
2673{
2674 struct btrfs_fs_info *fs_info = root->fs_info;
2675 u64 num_entries =
2676 atomic_read(&trans->transaction->delayed_refs.num_entries);
2677 u64 avg_runtime;
2678 u64 val;
2679
2680 smp_mb();
2681 avg_runtime = fs_info->avg_delayed_ref_runtime;
2682 val = num_entries * avg_runtime;
2683 if (num_entries * avg_runtime >= NSEC_PER_SEC)
2684 return 1;
2685 if (val >= NSEC_PER_SEC / 2)
2686 return 2;
2687
2688 return btrfs_check_space_for_delayed_refs(trans, root);
2689}
2690
2691struct async_delayed_refs {
2692 struct btrfs_root *root;
2693 int count;
2694 int error;
2695 int sync;
2696 struct completion wait;
2697 struct btrfs_work work;
2698};
2699
2700static void delayed_ref_async_start(struct btrfs_work *work)
2701{
2702 struct async_delayed_refs *async;
2703 struct btrfs_trans_handle *trans;
2704 int ret;
2705
2706 async = container_of(work, struct async_delayed_refs, work);
2707
2708 trans = btrfs_join_transaction(async->root);
2709 if (IS_ERR(trans)) {
2710 async->error = PTR_ERR(trans);
2711 goto done;
2712 }
2713
2714
2715
2716
2717
2718 trans->sync = true;
2719 ret = btrfs_run_delayed_refs(trans, async->root, async->count);
2720 if (ret)
2721 async->error = ret;
2722
2723 ret = btrfs_end_transaction(trans, async->root);
2724 if (ret && !async->error)
2725 async->error = ret;
2726done:
2727 if (async->sync)
2728 complete(&async->wait);
2729 else
2730 kfree(async);
2731}
2732
2733int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2734 unsigned long count, int wait)
2735{
2736 struct async_delayed_refs *async;
2737 int ret;
2738
2739 async = kmalloc(sizeof(*async), GFP_NOFS);
2740 if (!async)
2741 return -ENOMEM;
2742
2743 async->root = root->fs_info->tree_root;
2744 async->count = count;
2745 async->error = 0;
2746 if (wait)
2747 async->sync = 1;
2748 else
2749 async->sync = 0;
2750 init_completion(&async->wait);
2751
2752 btrfs_init_work(&async->work, delayed_ref_async_start,
2753 NULL, NULL);
2754
2755 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2756
2757 if (wait) {
2758 wait_for_completion(&async->wait);
2759 ret = async->error;
2760 kfree(async);
2761 return ret;
2762 }
2763 return 0;
2764}
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2777 struct btrfs_root *root, unsigned long count)
2778{
2779 struct rb_node *node;
2780 struct btrfs_delayed_ref_root *delayed_refs;
2781 struct btrfs_delayed_ref_head *head;
2782 int ret;
2783 int run_all = count == (unsigned long)-1;
2784 int run_most = 0;
2785
2786
2787 if (trans->aborted)
2788 return 0;
2789
2790 if (root == root->fs_info->extent_root)
2791 root = root->fs_info->tree_root;
2792
2793 delayed_refs = &trans->transaction->delayed_refs;
2794 if (count == 0) {
2795 count = atomic_read(&delayed_refs->num_entries) * 2;
2796 run_most = 1;
2797 }
2798
2799again:
2800#ifdef SCRAMBLE_DELAYED_REFS
2801 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2802#endif
2803 ret = __btrfs_run_delayed_refs(trans, root, count);
2804 if (ret < 0) {
2805 btrfs_abort_transaction(trans, root, ret);
2806 return ret;
2807 }
2808
2809 if (run_all) {
2810 if (!list_empty(&trans->new_bgs))
2811 btrfs_create_pending_block_groups(trans, root);
2812
2813 spin_lock(&delayed_refs->lock);
2814 node = rb_first(&delayed_refs->href_root);
2815 if (!node) {
2816 spin_unlock(&delayed_refs->lock);
2817 goto out;
2818 }
2819 count = (unsigned long)-1;
2820
2821 while (node) {
2822 head = rb_entry(node, struct btrfs_delayed_ref_head,
2823 href_node);
2824 if (btrfs_delayed_ref_is_head(&head->node)) {
2825 struct btrfs_delayed_ref_node *ref;
2826
2827 ref = &head->node;
2828 atomic_inc(&ref->refs);
2829
2830 spin_unlock(&delayed_refs->lock);
2831
2832
2833
2834
2835 mutex_lock(&head->mutex);
2836 mutex_unlock(&head->mutex);
2837
2838 btrfs_put_delayed_ref(ref);
2839 cond_resched();
2840 goto again;
2841 } else {
2842 WARN_ON(1);
2843 }
2844 node = rb_next(node);
2845 }
2846 spin_unlock(&delayed_refs->lock);
2847 cond_resched();
2848 goto again;
2849 }
2850out:
2851 ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
2852 if (ret)
2853 return ret;
2854 assert_qgroups_uptodate(trans);
2855 return 0;
2856}
2857
2858int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2859 struct btrfs_root *root,
2860 u64 bytenr, u64 num_bytes, u64 flags,
2861 int level, int is_data)
2862{
2863 struct btrfs_delayed_extent_op *extent_op;
2864 int ret;
2865
2866 extent_op = btrfs_alloc_delayed_extent_op();
2867 if (!extent_op)
2868 return -ENOMEM;
2869
2870 extent_op->flags_to_set = flags;
2871 extent_op->update_flags = 1;
2872 extent_op->update_key = 0;
2873 extent_op->is_data = is_data ? 1 : 0;
2874 extent_op->level = level;
2875
2876 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
2877 num_bytes, extent_op);
2878 if (ret)
2879 btrfs_free_delayed_extent_op(extent_op);
2880 return ret;
2881}
2882
2883static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2884 struct btrfs_root *root,
2885 struct btrfs_path *path,
2886 u64 objectid, u64 offset, u64 bytenr)
2887{
2888 struct btrfs_delayed_ref_head *head;
2889 struct btrfs_delayed_ref_node *ref;
2890 struct btrfs_delayed_data_ref *data_ref;
2891 struct btrfs_delayed_ref_root *delayed_refs;
2892 struct rb_node *node;
2893 int ret = 0;
2894
2895 delayed_refs = &trans->transaction->delayed_refs;
2896 spin_lock(&delayed_refs->lock);
2897 head = btrfs_find_delayed_ref_head(trans, bytenr);
2898 if (!head) {
2899 spin_unlock(&delayed_refs->lock);
2900 return 0;
2901 }
2902
2903 if (!mutex_trylock(&head->mutex)) {
2904 atomic_inc(&head->node.refs);
2905 spin_unlock(&delayed_refs->lock);
2906
2907 btrfs_release_path(path);
2908
2909
2910
2911
2912
2913 mutex_lock(&head->mutex);
2914 mutex_unlock(&head->mutex);
2915 btrfs_put_delayed_ref(&head->node);
2916 return -EAGAIN;
2917 }
2918 spin_unlock(&delayed_refs->lock);
2919
2920 spin_lock(&head->lock);
2921 node = rb_first(&head->ref_root);
2922 while (node) {
2923 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2924 node = rb_next(node);
2925
2926
2927 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2928 ret = 1;
2929 break;
2930 }
2931
2932 data_ref = btrfs_delayed_node_to_data_ref(ref);
2933
2934
2935
2936
2937
2938 if (data_ref->root != root->root_key.objectid ||
2939 data_ref->objectid != objectid ||
2940 data_ref->offset != offset) {
2941 ret = 1;
2942 break;
2943 }
2944 }
2945 spin_unlock(&head->lock);
2946 mutex_unlock(&head->mutex);
2947 return ret;
2948}
2949
2950static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2951 struct btrfs_root *root,
2952 struct btrfs_path *path,
2953 u64 objectid, u64 offset, u64 bytenr)
2954{
2955 struct btrfs_root *extent_root = root->fs_info->extent_root;
2956 struct extent_buffer *leaf;
2957 struct btrfs_extent_data_ref *ref;
2958 struct btrfs_extent_inline_ref *iref;
2959 struct btrfs_extent_item *ei;
2960 struct btrfs_key key;
2961 u32 item_size;
2962 int ret;
2963
2964 key.objectid = bytenr;
2965 key.offset = (u64)-1;
2966 key.type = BTRFS_EXTENT_ITEM_KEY;
2967
2968 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2969 if (ret < 0)
2970 goto out;
2971 BUG_ON(ret == 0);
2972
2973 ret = -ENOENT;
2974 if (path->slots[0] == 0)
2975 goto out;
2976
2977 path->slots[0]--;
2978 leaf = path->nodes[0];
2979 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2980
2981 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2982 goto out;
2983
2984 ret = 1;
2985 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2986#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2987 if (item_size < sizeof(*ei)) {
2988 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2989 goto out;
2990 }
2991#endif
2992 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2993
2994 if (item_size != sizeof(*ei) +
2995 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2996 goto out;
2997
2998 if (btrfs_extent_generation(leaf, ei) <=
2999 btrfs_root_last_snapshot(&root->root_item))
3000 goto out;
3001
3002 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3003 if (btrfs_extent_inline_ref_type(leaf, iref) !=
3004 BTRFS_EXTENT_DATA_REF_KEY)
3005 goto out;
3006
3007 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3008 if (btrfs_extent_refs(leaf, ei) !=
3009 btrfs_extent_data_ref_count(leaf, ref) ||
3010 btrfs_extent_data_ref_root(leaf, ref) !=
3011 root->root_key.objectid ||
3012 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3013 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3014 goto out;
3015
3016 ret = 0;
3017out:
3018 return ret;
3019}
3020
3021int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
3022 struct btrfs_root *root,
3023 u64 objectid, u64 offset, u64 bytenr)
3024{
3025 struct btrfs_path *path;
3026 int ret;
3027 int ret2;
3028
3029 path = btrfs_alloc_path();
3030 if (!path)
3031 return -ENOENT;
3032
3033 do {
3034 ret = check_committed_ref(trans, root, path, objectid,
3035 offset, bytenr);
3036 if (ret && ret != -ENOENT)
3037 goto out;
3038
3039 ret2 = check_delayed_ref(trans, root, path, objectid,
3040 offset, bytenr);
3041 } while (ret2 == -EAGAIN);
3042
3043 if (ret2 && ret2 != -ENOENT) {
3044 ret = ret2;
3045 goto out;
3046 }
3047
3048 if (ret != -ENOENT || ret2 != -ENOENT)
3049 ret = 0;
3050out:
3051 btrfs_free_path(path);
3052 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3053 WARN_ON(ret > 0);
3054 return ret;
3055}
3056
3057static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3058 struct btrfs_root *root,
3059 struct extent_buffer *buf,
3060 int full_backref, int inc, int no_quota)
3061{
3062 u64 bytenr;
3063 u64 num_bytes;
3064 u64 parent;
3065 u64 ref_root;
3066 u32 nritems;
3067 struct btrfs_key key;
3068 struct btrfs_file_extent_item *fi;
3069 int i;
3070 int level;
3071 int ret = 0;
3072 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
3073 u64, u64, u64, u64, u64, u64, int);
3074
3075#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3076 if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
3077 return 0;
3078#endif
3079 ref_root = btrfs_header_owner(buf);
3080 nritems = btrfs_header_nritems(buf);
3081 level = btrfs_header_level(buf);
3082
3083 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3084 return 0;
3085
3086 if (inc)
3087 process_func = btrfs_inc_extent_ref;
3088 else
3089 process_func = btrfs_free_extent;
3090
3091 if (full_backref)
3092 parent = buf->start;
3093 else
3094 parent = 0;
3095
3096 for (i = 0; i < nritems; i++) {
3097 if (level == 0) {
3098 btrfs_item_key_to_cpu(buf, &key, i);
3099 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3100 continue;
3101 fi = btrfs_item_ptr(buf, i,
3102 struct btrfs_file_extent_item);
3103 if (btrfs_file_extent_type(buf, fi) ==
3104 BTRFS_FILE_EXTENT_INLINE)
3105 continue;
3106 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3107 if (bytenr == 0)
3108 continue;
3109
3110 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3111 key.offset -= btrfs_file_extent_offset(buf, fi);
3112 ret = process_func(trans, root, bytenr, num_bytes,
3113 parent, ref_root, key.objectid,
3114 key.offset, no_quota);
3115 if (ret)
3116 goto fail;
3117 } else {
3118 bytenr = btrfs_node_blockptr(buf, i);
3119 num_bytes = btrfs_level_size(root, level - 1);
3120 ret = process_func(trans, root, bytenr, num_bytes,
3121 parent, ref_root, level - 1, 0,
3122 no_quota);
3123 if (ret)
3124 goto fail;
3125 }
3126 }
3127 return 0;
3128fail:
3129 return ret;
3130}
3131
3132int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3133 struct extent_buffer *buf, int full_backref, int no_quota)
3134{
3135 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota);
3136}
3137
3138int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3139 struct extent_buffer *buf, int full_backref, int no_quota)
3140{
3141 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota);
3142}
3143
3144static int write_one_cache_group(struct btrfs_trans_handle *trans,
3145 struct btrfs_root *root,
3146 struct btrfs_path *path,
3147 struct btrfs_block_group_cache *cache)
3148{
3149 int ret;
3150 struct btrfs_root *extent_root = root->fs_info->extent_root;
3151 unsigned long bi;
3152 struct extent_buffer *leaf;
3153
3154 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3155 if (ret < 0)
3156 goto fail;
3157 BUG_ON(ret);
3158
3159 leaf = path->nodes[0];
3160 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3161 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3162 btrfs_mark_buffer_dirty(leaf);
3163 btrfs_release_path(path);
3164fail:
3165 if (ret) {
3166 btrfs_abort_transaction(trans, root, ret);
3167 return ret;
3168 }
3169 return 0;
3170
3171}
3172
3173static struct btrfs_block_group_cache *
3174next_block_group(struct btrfs_root *root,
3175 struct btrfs_block_group_cache *cache)
3176{
3177 struct rb_node *node;
3178 spin_lock(&root->fs_info->block_group_cache_lock);
3179 node = rb_next(&cache->cache_node);
3180 btrfs_put_block_group(cache);
3181 if (node) {
3182 cache = rb_entry(node, struct btrfs_block_group_cache,
3183 cache_node);
3184 btrfs_get_block_group(cache);
3185 } else
3186 cache = NULL;
3187 spin_unlock(&root->fs_info->block_group_cache_lock);
3188 return cache;
3189}
3190
3191static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3192 struct btrfs_trans_handle *trans,
3193 struct btrfs_path *path)
3194{
3195 struct btrfs_root *root = block_group->fs_info->tree_root;
3196 struct inode *inode = NULL;
3197 u64 alloc_hint = 0;
3198 int dcs = BTRFS_DC_ERROR;
3199 int num_pages = 0;
3200 int retries = 0;
3201 int ret = 0;
3202
3203
3204
3205
3206
3207 if (block_group->key.offset < (100 * 1024 * 1024)) {
3208 spin_lock(&block_group->lock);
3209 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3210 spin_unlock(&block_group->lock);
3211 return 0;
3212 }
3213
3214again:
3215 inode = lookup_free_space_inode(root, block_group, path);
3216 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3217 ret = PTR_ERR(inode);
3218 btrfs_release_path(path);
3219 goto out;
3220 }
3221
3222 if (IS_ERR(inode)) {
3223 BUG_ON(retries);
3224 retries++;
3225
3226 if (block_group->ro)
3227 goto out_free;
3228
3229 ret = create_free_space_inode(root, trans, block_group, path);
3230 if (ret)
3231 goto out_free;
3232 goto again;
3233 }
3234
3235
3236 if (block_group->cache_generation == trans->transid &&
3237 i_size_read(inode)) {
3238 dcs = BTRFS_DC_SETUP;
3239 goto out_put;
3240 }
3241
3242
3243
3244
3245
3246
3247 BTRFS_I(inode)->generation = 0;
3248 ret = btrfs_update_inode(trans, root, inode);
3249 WARN_ON(ret);
3250
3251 if (i_size_read(inode) > 0) {
3252 ret = btrfs_check_trunc_cache_free_space(root,
3253 &root->fs_info->global_block_rsv);
3254 if (ret)
3255 goto out_put;
3256
3257 ret = btrfs_truncate_free_space_cache(root, trans, inode);
3258 if (ret)
3259 goto out_put;
3260 }
3261
3262 spin_lock(&block_group->lock);
3263 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3264 !btrfs_test_opt(root, SPACE_CACHE) ||
3265 block_group->delalloc_bytes) {
3266
3267
3268
3269
3270
3271 dcs = BTRFS_DC_WRITTEN;
3272 spin_unlock(&block_group->lock);
3273 goto out_put;
3274 }
3275 spin_unlock(&block_group->lock);
3276
3277
3278
3279
3280
3281
3282
3283 num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
3284 if (!num_pages)
3285 num_pages = 1;
3286
3287 num_pages *= 16;
3288 num_pages *= PAGE_CACHE_SIZE;
3289
3290 ret = btrfs_check_data_free_space(inode, num_pages);
3291 if (ret)
3292 goto out_put;
3293
3294 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3295 num_pages, num_pages,
3296 &alloc_hint);
3297 if (!ret)
3298 dcs = BTRFS_DC_SETUP;
3299 btrfs_free_reserved_data_space(inode, num_pages);
3300
3301out_put:
3302 iput(inode);
3303out_free:
3304 btrfs_release_path(path);
3305out:
3306 spin_lock(&block_group->lock);
3307 if (!ret && dcs == BTRFS_DC_SETUP)
3308 block_group->cache_generation = trans->transid;
3309 block_group->disk_cache_state = dcs;
3310 spin_unlock(&block_group->lock);
3311
3312 return ret;
3313}
3314
3315int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3316 struct btrfs_root *root)
3317{
3318 struct btrfs_block_group_cache *cache;
3319 int err = 0;
3320 struct btrfs_path *path;
3321 u64 last = 0;
3322
3323 path = btrfs_alloc_path();
3324 if (!path)
3325 return -ENOMEM;
3326
3327again:
3328 while (1) {
3329 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3330 while (cache) {
3331 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3332 break;
3333 cache = next_block_group(root, cache);
3334 }
3335 if (!cache) {
3336 if (last == 0)
3337 break;
3338 last = 0;
3339 continue;
3340 }
3341 err = cache_save_setup(cache, trans, path);
3342 last = cache->key.objectid + cache->key.offset;
3343 btrfs_put_block_group(cache);
3344 }
3345
3346 while (1) {
3347 if (last == 0) {
3348 err = btrfs_run_delayed_refs(trans, root,
3349 (unsigned long)-1);
3350 if (err)
3351 goto out;
3352 }
3353
3354 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3355 while (cache) {
3356 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
3357 btrfs_put_block_group(cache);
3358 goto again;
3359 }
3360
3361 if (cache->dirty)
3362 break;
3363 cache = next_block_group(root, cache);
3364 }
3365 if (!cache) {
3366 if (last == 0)
3367 break;
3368 last = 0;
3369 continue;
3370 }
3371
3372 if (cache->disk_cache_state == BTRFS_DC_SETUP)
3373 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
3374 cache->dirty = 0;
3375 last = cache->key.objectid + cache->key.offset;
3376
3377 err = write_one_cache_group(trans, root, path, cache);
3378 btrfs_put_block_group(cache);
3379 if (err)
3380 goto out;
3381 }
3382
3383 while (1) {
3384
3385
3386
3387
3388
3389 if (last == 0) {
3390 err = btrfs_run_delayed_refs(trans, root,
3391 (unsigned long)-1);
3392 if (err)
3393 goto out;
3394 }
3395
3396 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3397 while (cache) {
3398
3399
3400
3401
3402
3403 if (cache->dirty) {
3404 btrfs_put_block_group(cache);
3405 goto again;
3406 }
3407 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3408 break;
3409 cache = next_block_group(root, cache);
3410 }
3411 if (!cache) {
3412 if (last == 0)
3413 break;
3414 last = 0;
3415 continue;
3416 }
3417
3418 err = btrfs_write_out_cache(root, trans, cache, path);
3419
3420
3421
3422
3423
3424 if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3425 cache->disk_cache_state = BTRFS_DC_WRITTEN;
3426 last = cache->key.objectid + cache->key.offset;
3427 btrfs_put_block_group(cache);
3428 }
3429out:
3430
3431 btrfs_free_path(path);
3432 return err;
3433}
3434
3435int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3436{
3437 struct btrfs_block_group_cache *block_group;
3438 int readonly = 0;
3439
3440 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
3441 if (!block_group || block_group->ro)
3442 readonly = 1;
3443 if (block_group)
3444 btrfs_put_block_group(block_group);
3445 return readonly;
3446}
3447
3448static const char *alloc_name(u64 flags)
3449{
3450 switch (flags) {
3451 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3452 return "mixed";
3453 case BTRFS_BLOCK_GROUP_METADATA:
3454 return "metadata";
3455 case BTRFS_BLOCK_GROUP_DATA:
3456 return "data";
3457 case BTRFS_BLOCK_GROUP_SYSTEM:
3458 return "system";
3459 default:
3460 WARN_ON(1);
3461 return "invalid-combination";
3462 };
3463}
3464
3465static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3466 u64 total_bytes, u64 bytes_used,
3467 struct btrfs_space_info **space_info)
3468{
3469 struct btrfs_space_info *found;
3470 int i;
3471 int factor;
3472 int ret;
3473
3474 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3475 BTRFS_BLOCK_GROUP_RAID10))
3476 factor = 2;
3477 else
3478 factor = 1;
3479
3480 found = __find_space_info(info, flags);
3481 if (found) {
3482 spin_lock(&found->lock);
3483 found->total_bytes += total_bytes;
3484 found->disk_total += total_bytes * factor;
3485 found->bytes_used += bytes_used;
3486 found->disk_used += bytes_used * factor;
3487 found->full = 0;
3488 spin_unlock(&found->lock);
3489 *space_info = found;
3490 return 0;
3491 }
3492 found = kzalloc(sizeof(*found), GFP_NOFS);
3493 if (!found)
3494 return -ENOMEM;
3495
3496 ret = percpu_counter_init(&found->total_bytes_pinned, 0);
3497 if (ret) {
3498 kfree(found);
3499 return ret;
3500 }
3501
3502 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3503 INIT_LIST_HEAD(&found->block_groups[i]);
3504 init_rwsem(&found->groups_sem);
3505 spin_lock_init(&found->lock);
3506 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3507 found->total_bytes = total_bytes;
3508 found->disk_total = total_bytes * factor;
3509 found->bytes_used = bytes_used;
3510 found->disk_used = bytes_used * factor;
3511 found->bytes_pinned = 0;
3512 found->bytes_reserved = 0;
3513 found->bytes_readonly = 0;
3514 found->bytes_may_use = 0;
3515 found->full = 0;
3516 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3517 found->chunk_alloc = 0;
3518 found->flush = 0;
3519 init_waitqueue_head(&found->wait);
3520
3521 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3522 info->space_info_kobj, "%s",
3523 alloc_name(found->flags));
3524 if (ret) {
3525 kfree(found);
3526 return ret;
3527 }
3528
3529 *space_info = found;
3530 list_add_rcu(&found->list, &info->space_info);
3531 if (flags & BTRFS_BLOCK_GROUP_DATA)
3532 info->data_sinfo = found;
3533
3534 return ret;
3535}
3536
3537static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3538{
3539 u64 extra_flags = chunk_to_extended(flags) &
3540 BTRFS_EXTENDED_PROFILE_MASK;
3541
3542 write_seqlock(&fs_info->profiles_lock);
3543 if (flags & BTRFS_BLOCK_GROUP_DATA)
3544 fs_info->avail_data_alloc_bits |= extra_flags;
3545 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3546 fs_info->avail_metadata_alloc_bits |= extra_flags;
3547 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3548 fs_info->avail_system_alloc_bits |= extra_flags;
3549 write_sequnlock(&fs_info->profiles_lock);
3550}
3551
3552
3553
3554
3555
3556
3557
3558static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3559{
3560 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3561 u64 target = 0;
3562
3563 if (!bctl)
3564 return 0;
3565
3566 if (flags & BTRFS_BLOCK_GROUP_DATA &&
3567 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3568 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3569 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
3570 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3571 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3572 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
3573 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3574 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3575 }
3576
3577 return target;
3578}
3579
3580
3581
3582
3583
3584
3585
3586
3587static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3588{
3589
3590
3591
3592
3593
3594 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3595 root->fs_info->fs_devices->missing_devices;
3596 u64 target;
3597 u64 tmp;
3598
3599
3600
3601
3602
3603 spin_lock(&root->fs_info->balance_lock);
3604 target = get_restripe_target(root->fs_info, flags);
3605 if (target) {
3606
3607 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
3608 spin_unlock(&root->fs_info->balance_lock);
3609 return extended_to_chunk(target);
3610 }
3611 }
3612 spin_unlock(&root->fs_info->balance_lock);
3613
3614
3615 if (num_devices == 1)
3616 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 |
3617 BTRFS_BLOCK_GROUP_RAID5);
3618 if (num_devices < 3)
3619 flags &= ~BTRFS_BLOCK_GROUP_RAID6;
3620 if (num_devices < 4)
3621 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
3622
3623 tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
3624 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 |
3625 BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10);
3626 flags &= ~tmp;
3627
3628 if (tmp & BTRFS_BLOCK_GROUP_RAID6)
3629 tmp = BTRFS_BLOCK_GROUP_RAID6;
3630 else if (tmp & BTRFS_BLOCK_GROUP_RAID5)
3631 tmp = BTRFS_BLOCK_GROUP_RAID5;
3632 else if (tmp & BTRFS_BLOCK_GROUP_RAID10)
3633 tmp = BTRFS_BLOCK_GROUP_RAID10;
3634 else if (tmp & BTRFS_BLOCK_GROUP_RAID1)
3635 tmp = BTRFS_BLOCK_GROUP_RAID1;
3636 else if (tmp & BTRFS_BLOCK_GROUP_RAID0)
3637 tmp = BTRFS_BLOCK_GROUP_RAID0;
3638
3639 return extended_to_chunk(flags | tmp);
3640}
3641
3642static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
3643{
3644 unsigned seq;
3645 u64 flags;
3646
3647 do {
3648 flags = orig_flags;
3649 seq = read_seqbegin(&root->fs_info->profiles_lock);
3650
3651 if (flags & BTRFS_BLOCK_GROUP_DATA)
3652 flags |= root->fs_info->avail_data_alloc_bits;
3653 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3654 flags |= root->fs_info->avail_system_alloc_bits;
3655 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
3656 flags |= root->fs_info->avail_metadata_alloc_bits;
3657 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
3658
3659 return btrfs_reduce_alloc_profile(root, flags);
3660}
3661
3662u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
3663{
3664 u64 flags;
3665 u64 ret;
3666
3667 if (data)
3668 flags = BTRFS_BLOCK_GROUP_DATA;
3669 else if (root == root->fs_info->chunk_root)
3670 flags = BTRFS_BLOCK_GROUP_SYSTEM;
3671 else
3672 flags = BTRFS_BLOCK_GROUP_METADATA;
3673
3674 ret = get_alloc_profile(root, flags);
3675 return ret;
3676}
3677
3678
3679
3680
3681
3682int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3683{
3684 struct btrfs_space_info *data_sinfo;
3685 struct btrfs_root *root = BTRFS_I(inode)->root;
3686 struct btrfs_fs_info *fs_info = root->fs_info;
3687 u64 used;
3688 int ret = 0, committed = 0, alloc_chunk = 1;
3689
3690
3691 bytes = ALIGN(bytes, root->sectorsize);
3692
3693 if (btrfs_is_free_space_inode(inode)) {
3694 committed = 1;
3695 ASSERT(current->journal_info);
3696 }
3697
3698 data_sinfo = fs_info->data_sinfo;
3699 if (!data_sinfo)
3700 goto alloc;
3701
3702again:
3703
3704 spin_lock(&data_sinfo->lock);
3705 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
3706 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
3707 data_sinfo->bytes_may_use;
3708
3709 if (used + bytes > data_sinfo->total_bytes) {
3710 struct btrfs_trans_handle *trans;
3711
3712
3713
3714
3715
3716 if (!data_sinfo->full && alloc_chunk) {
3717 u64 alloc_target;
3718
3719 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3720 spin_unlock(&data_sinfo->lock);
3721alloc:
3722 alloc_target = btrfs_get_alloc_profile(root, 1);
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733 trans = btrfs_join_transaction(root);
3734 if (IS_ERR(trans))
3735 return PTR_ERR(trans);
3736
3737 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3738 alloc_target,
3739 CHUNK_ALLOC_NO_FORCE);
3740 btrfs_end_transaction(trans, root);
3741 if (ret < 0) {
3742 if (ret != -ENOSPC)
3743 return ret;
3744 else
3745 goto commit_trans;
3746 }
3747
3748 if (!data_sinfo)
3749 data_sinfo = fs_info->data_sinfo;
3750
3751 goto again;
3752 }
3753
3754
3755
3756
3757
3758 if (percpu_counter_compare(&data_sinfo->total_bytes_pinned,
3759 bytes) < 0)
3760 committed = 1;
3761 spin_unlock(&data_sinfo->lock);
3762
3763
3764commit_trans:
3765 if (!committed &&
3766 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3767 committed = 1;
3768
3769 trans = btrfs_join_transaction(root);
3770 if (IS_ERR(trans))
3771 return PTR_ERR(trans);
3772 ret = btrfs_commit_transaction(trans, root);
3773 if (ret)
3774 return ret;
3775 goto again;
3776 }
3777
3778 trace_btrfs_space_reservation(root->fs_info,
3779 "space_info:enospc",
3780 data_sinfo->flags, bytes, 1);
3781 return -ENOSPC;
3782 }
3783 data_sinfo->bytes_may_use += bytes;
3784 trace_btrfs_space_reservation(root->fs_info, "space_info",
3785 data_sinfo->flags, bytes, 1);
3786 spin_unlock(&data_sinfo->lock);
3787
3788 return 0;
3789}
3790
3791
3792
3793
3794void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3795{
3796 struct btrfs_root *root = BTRFS_I(inode)->root;
3797 struct btrfs_space_info *data_sinfo;
3798
3799
3800 bytes = ALIGN(bytes, root->sectorsize);
3801
3802 data_sinfo = root->fs_info->data_sinfo;
3803 spin_lock(&data_sinfo->lock);
3804 WARN_ON(data_sinfo->bytes_may_use < bytes);
3805 data_sinfo->bytes_may_use -= bytes;
3806 trace_btrfs_space_reservation(root->fs_info, "space_info",
3807 data_sinfo->flags, bytes, 0);
3808 spin_unlock(&data_sinfo->lock);
3809}
3810
3811static void force_metadata_allocation(struct btrfs_fs_info *info)
3812{
3813 struct list_head *head = &info->space_info;
3814 struct btrfs_space_info *found;
3815
3816 rcu_read_lock();
3817 list_for_each_entry_rcu(found, head, list) {
3818 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3819 found->force_alloc = CHUNK_ALLOC_FORCE;
3820 }
3821 rcu_read_unlock();
3822}
3823
3824static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
3825{
3826 return (global->size << 1);
3827}
3828
3829static int should_alloc_chunk(struct btrfs_root *root,
3830 struct btrfs_space_info *sinfo, int force)
3831{
3832 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3833 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3834 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3835 u64 thresh;
3836
3837 if (force == CHUNK_ALLOC_FORCE)
3838 return 1;
3839
3840
3841
3842
3843
3844
3845 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
3846 num_allocated += calc_global_rsv_need_space(global_rsv);
3847
3848
3849
3850
3851
3852 if (force == CHUNK_ALLOC_LIMITED) {
3853 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
3854 thresh = max_t(u64, 64 * 1024 * 1024,
3855 div_factor_fine(thresh, 1));
3856
3857 if (num_bytes - num_allocated < thresh)
3858 return 1;
3859 }
3860
3861 if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8))
3862 return 0;
3863 return 1;
3864}
3865
3866static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
3867{
3868 u64 num_dev;
3869
3870 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
3871 BTRFS_BLOCK_GROUP_RAID0 |
3872 BTRFS_BLOCK_GROUP_RAID5 |
3873 BTRFS_BLOCK_GROUP_RAID6))
3874 num_dev = root->fs_info->fs_devices->rw_devices;
3875 else if (type & BTRFS_BLOCK_GROUP_RAID1)
3876 num_dev = 2;
3877 else
3878 num_dev = 1;
3879
3880
3881 return btrfs_calc_trans_metadata_size(root, num_dev + 1);
3882}
3883
3884static void check_system_chunk(struct btrfs_trans_handle *trans,
3885 struct btrfs_root *root, u64 type)
3886{
3887 struct btrfs_space_info *info;
3888 u64 left;
3889 u64 thresh;
3890
3891 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3892 spin_lock(&info->lock);
3893 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
3894 info->bytes_reserved - info->bytes_readonly;
3895 spin_unlock(&info->lock);
3896
3897 thresh = get_system_chunk_thresh(root, type);
3898 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
3899 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
3900 left, thresh, type);
3901 dump_space_info(info, 0, 0);
3902 }
3903
3904 if (left < thresh) {
3905 u64 flags;
3906
3907 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
3908 btrfs_alloc_chunk(trans, root, flags);
3909 }
3910}
3911
3912static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3913 struct btrfs_root *extent_root, u64 flags, int force)
3914{
3915 struct btrfs_space_info *space_info;
3916 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3917 int wait_for_alloc = 0;
3918 int ret = 0;
3919
3920
3921 if (trans->allocating_chunk)
3922 return -ENOSPC;
3923
3924 space_info = __find_space_info(extent_root->fs_info, flags);
3925 if (!space_info) {
3926 ret = update_space_info(extent_root->fs_info, flags,
3927 0, 0, &space_info);
3928 BUG_ON(ret);
3929 }
3930 BUG_ON(!space_info);
3931
3932again:
3933 spin_lock(&space_info->lock);
3934 if (force < space_info->force_alloc)
3935 force = space_info->force_alloc;
3936 if (space_info->full) {
3937 if (should_alloc_chunk(extent_root, space_info, force))
3938 ret = -ENOSPC;
3939 else
3940 ret = 0;
3941 spin_unlock(&space_info->lock);
3942 return ret;
3943 }
3944
3945 if (!should_alloc_chunk(extent_root, space_info, force)) {
3946 spin_unlock(&space_info->lock);
3947 return 0;
3948 } else if (space_info->chunk_alloc) {
3949 wait_for_alloc = 1;
3950 } else {
3951 space_info->chunk_alloc = 1;
3952 }
3953
3954 spin_unlock(&space_info->lock);
3955
3956 mutex_lock(&fs_info->chunk_mutex);
3957
3958
3959
3960
3961
3962
3963
3964 if (wait_for_alloc) {
3965 mutex_unlock(&fs_info->chunk_mutex);
3966 wait_for_alloc = 0;
3967 goto again;
3968 }
3969
3970 trans->allocating_chunk = true;
3971
3972
3973
3974
3975
3976 if (btrfs_mixed_space_info(space_info))
3977 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3978
3979
3980
3981
3982
3983
3984 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3985 fs_info->data_chunk_allocations++;
3986 if (!(fs_info->data_chunk_allocations %
3987 fs_info->metadata_ratio))
3988 force_metadata_allocation(fs_info);
3989 }
3990
3991
3992
3993
3994
3995 check_system_chunk(trans, extent_root, flags);
3996
3997 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3998 trans->allocating_chunk = false;
3999
4000 spin_lock(&space_info->lock);
4001 if (ret < 0 && ret != -ENOSPC)
4002 goto out;
4003 if (ret)
4004 space_info->full = 1;
4005 else
4006 ret = 1;
4007
4008 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4009out:
4010 space_info->chunk_alloc = 0;
4011 spin_unlock(&space_info->lock);
4012 mutex_unlock(&fs_info->chunk_mutex);
4013 return ret;
4014}
4015
4016static int can_overcommit(struct btrfs_root *root,
4017 struct btrfs_space_info *space_info, u64 bytes,
4018 enum btrfs_reserve_flush_enum flush)
4019{
4020 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4021 u64 profile = btrfs_get_alloc_profile(root, 0);
4022 u64 space_size;
4023 u64 avail;
4024 u64 used;
4025
4026 used = space_info->bytes_used + space_info->bytes_reserved +
4027 space_info->bytes_pinned + space_info->bytes_readonly;
4028
4029
4030
4031
4032
4033
4034
4035 spin_lock(&global_rsv->lock);
4036 space_size = calc_global_rsv_need_space(global_rsv);
4037 spin_unlock(&global_rsv->lock);
4038 if (used + space_size >= space_info->total_bytes)
4039 return 0;
4040
4041 used += space_info->bytes_may_use;
4042
4043 spin_lock(&root->fs_info->free_chunk_lock);
4044 avail = root->fs_info->free_chunk_space;
4045 spin_unlock(&root->fs_info->free_chunk_lock);
4046
4047
4048
4049
4050
4051
4052
4053 if (profile & (BTRFS_BLOCK_GROUP_DUP |
4054 BTRFS_BLOCK_GROUP_RAID1 |
4055 BTRFS_BLOCK_GROUP_RAID10))
4056 avail >>= 1;
4057
4058
4059
4060
4061
4062
4063 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4064 avail >>= 3;
4065 else
4066 avail >>= 1;
4067
4068 if (used + bytes < space_info->total_bytes + avail)
4069 return 1;
4070 return 0;
4071}
4072
4073static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
4074 unsigned long nr_pages, int nr_items)
4075{
4076 struct super_block *sb = root->fs_info->sb;
4077
4078 if (down_read_trylock(&sb->s_umount)) {
4079 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4080 up_read(&sb->s_umount);
4081 } else {
4082
4083
4084
4085
4086
4087
4088
4089 btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
4090 if (!current->journal_info)
4091 btrfs_wait_ordered_roots(root->fs_info, nr_items);
4092 }
4093}
4094
4095static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
4096{
4097 u64 bytes;
4098 int nr;
4099
4100 bytes = btrfs_calc_trans_metadata_size(root, 1);
4101 nr = (int)div64_u64(to_reclaim, bytes);
4102 if (!nr)
4103 nr = 1;
4104 return nr;
4105}
4106
4107#define EXTENT_SIZE_PER_ITEM (256 * 1024)
4108
4109
4110
4111
4112static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4113 bool wait_ordered)
4114{
4115 struct btrfs_block_rsv *block_rsv;
4116 struct btrfs_space_info *space_info;
4117 struct btrfs_trans_handle *trans;
4118 u64 delalloc_bytes;
4119 u64 max_reclaim;
4120 long time_left;
4121 unsigned long nr_pages;
4122 int loops;
4123 int items;
4124 enum btrfs_reserve_flush_enum flush;
4125
4126
4127 items = calc_reclaim_items_nr(root, to_reclaim);
4128 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
4129
4130 trans = (struct btrfs_trans_handle *)current->journal_info;
4131 block_rsv = &root->fs_info->delalloc_block_rsv;
4132 space_info = block_rsv->space_info;
4133
4134 delalloc_bytes = percpu_counter_sum_positive(
4135 &root->fs_info->delalloc_bytes);
4136 if (delalloc_bytes == 0) {
4137 if (trans)
4138 return;
4139 if (wait_ordered)
4140 btrfs_wait_ordered_roots(root->fs_info, items);
4141 return;
4142 }
4143
4144 loops = 0;
4145 while (delalloc_bytes && loops < 3) {
4146 max_reclaim = min(delalloc_bytes, to_reclaim);
4147 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
4148 btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
4149
4150
4151
4152
4153 max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
4154 if (!max_reclaim)
4155 goto skip_async;
4156
4157 if (max_reclaim <= nr_pages)
4158 max_reclaim = 0;
4159 else
4160 max_reclaim -= nr_pages;
4161
4162 wait_event(root->fs_info->async_submit_wait,
4163 atomic_read(&root->fs_info->async_delalloc_pages) <=
4164 (int)max_reclaim);
4165skip_async:
4166 if (!trans)
4167 flush = BTRFS_RESERVE_FLUSH_ALL;
4168 else
4169 flush = BTRFS_RESERVE_NO_FLUSH;
4170 spin_lock(&space_info->lock);
4171 if (can_overcommit(root, space_info, orig, flush)) {
4172 spin_unlock(&space_info->lock);
4173 break;
4174 }
4175 spin_unlock(&space_info->lock);
4176
4177 loops++;
4178 if (wait_ordered && !trans) {
4179 btrfs_wait_ordered_roots(root->fs_info, items);
4180 } else {
4181 time_left = schedule_timeout_killable(1);
4182 if (time_left)
4183 break;
4184 }
4185 delalloc_bytes = percpu_counter_sum_positive(
4186 &root->fs_info->delalloc_bytes);
4187 }
4188}
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200static int may_commit_transaction(struct btrfs_root *root,
4201 struct btrfs_space_info *space_info,
4202 u64 bytes, int force)
4203{
4204 struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
4205 struct btrfs_trans_handle *trans;
4206
4207 trans = (struct btrfs_trans_handle *)current->journal_info;
4208 if (trans)
4209 return -EAGAIN;
4210
4211 if (force)
4212 goto commit;
4213
4214
4215 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4216 bytes) >= 0)
4217 goto commit;
4218
4219
4220
4221
4222
4223 if (space_info != delayed_rsv->space_info)
4224 return -ENOSPC;
4225
4226 spin_lock(&delayed_rsv->lock);
4227 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4228 bytes - delayed_rsv->size) >= 0) {
4229 spin_unlock(&delayed_rsv->lock);
4230 return -ENOSPC;
4231 }
4232 spin_unlock(&delayed_rsv->lock);
4233
4234commit:
4235 trans = btrfs_join_transaction(root);
4236 if (IS_ERR(trans))
4237 return -ENOSPC;
4238
4239 return btrfs_commit_transaction(trans, root);
4240}
4241
4242enum flush_state {
4243 FLUSH_DELAYED_ITEMS_NR = 1,
4244 FLUSH_DELAYED_ITEMS = 2,
4245 FLUSH_DELALLOC = 3,
4246 FLUSH_DELALLOC_WAIT = 4,
4247 ALLOC_CHUNK = 5,
4248 COMMIT_TRANS = 6,
4249};
4250
4251static int flush_space(struct btrfs_root *root,
4252 struct btrfs_space_info *space_info, u64 num_bytes,
4253 u64 orig_bytes, int state)
4254{
4255 struct btrfs_trans_handle *trans;
4256 int nr;
4257 int ret = 0;
4258
4259 switch (state) {
4260 case FLUSH_DELAYED_ITEMS_NR:
4261 case FLUSH_DELAYED_ITEMS:
4262 if (state == FLUSH_DELAYED_ITEMS_NR)
4263 nr = calc_reclaim_items_nr(root, num_bytes) * 2;
4264 else
4265 nr = -1;
4266
4267 trans = btrfs_join_transaction(root);
4268 if (IS_ERR(trans)) {
4269 ret = PTR_ERR(trans);
4270 break;
4271 }
4272 ret = btrfs_run_delayed_items_nr(trans, root, nr);
4273 btrfs_end_transaction(trans, root);
4274 break;
4275 case FLUSH_DELALLOC:
4276 case FLUSH_DELALLOC_WAIT:
4277 shrink_delalloc(root, num_bytes * 2, orig_bytes,
4278 state == FLUSH_DELALLOC_WAIT);
4279 break;
4280 case ALLOC_CHUNK:
4281 trans = btrfs_join_transaction(root);
4282 if (IS_ERR(trans)) {
4283 ret = PTR_ERR(trans);
4284 break;
4285 }
4286 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4287 btrfs_get_alloc_profile(root, 0),
4288 CHUNK_ALLOC_NO_FORCE);
4289 btrfs_end_transaction(trans, root);
4290 if (ret == -ENOSPC)
4291 ret = 0;
4292 break;
4293 case COMMIT_TRANS:
4294 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
4295 break;
4296 default:
4297 ret = -ENOSPC;
4298 break;
4299 }
4300
4301 return ret;
4302}
4303
4304static inline u64
4305btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4306 struct btrfs_space_info *space_info)
4307{
4308 u64 used;
4309 u64 expected;
4310 u64 to_reclaim;
4311
4312 to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
4313 16 * 1024 * 1024);
4314 spin_lock(&space_info->lock);
4315 if (can_overcommit(root, space_info, to_reclaim,
4316 BTRFS_RESERVE_FLUSH_ALL)) {
4317 to_reclaim = 0;
4318 goto out;
4319 }
4320
4321 used = space_info->bytes_used + space_info->bytes_reserved +
4322 space_info->bytes_pinned + space_info->bytes_readonly +
4323 space_info->bytes_may_use;
4324 if (can_overcommit(root, space_info, 1024 * 1024,
4325 BTRFS_RESERVE_FLUSH_ALL))
4326 expected = div_factor_fine(space_info->total_bytes, 95);
4327 else
4328 expected = div_factor_fine(space_info->total_bytes, 90);
4329
4330 if (used > expected)
4331 to_reclaim = used - expected;
4332 else
4333 to_reclaim = 0;
4334 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4335 space_info->bytes_reserved);
4336out:
4337 spin_unlock(&space_info->lock);
4338
4339 return to_reclaim;
4340}
4341
4342static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4343 struct btrfs_fs_info *fs_info, u64 used)
4344{
4345 return (used >= div_factor_fine(space_info->total_bytes, 98) &&
4346 !btrfs_fs_closing(fs_info) &&
4347 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4348}
4349
4350static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
4351 struct btrfs_fs_info *fs_info)
4352{
4353 u64 used;
4354
4355 spin_lock(&space_info->lock);
4356 used = space_info->bytes_used + space_info->bytes_reserved +
4357 space_info->bytes_pinned + space_info->bytes_readonly +
4358 space_info->bytes_may_use;
4359 if (need_do_async_reclaim(space_info, fs_info, used)) {
4360 spin_unlock(&space_info->lock);
4361 return 1;
4362 }
4363 spin_unlock(&space_info->lock);
4364
4365 return 0;
4366}
4367
4368static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4369{
4370 struct btrfs_fs_info *fs_info;
4371 struct btrfs_space_info *space_info;
4372 u64 to_reclaim;
4373 int flush_state;
4374
4375 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4376 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4377
4378 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4379 space_info);
4380 if (!to_reclaim)
4381 return;
4382
4383 flush_state = FLUSH_DELAYED_ITEMS_NR;
4384 do {
4385 flush_space(fs_info->fs_root, space_info, to_reclaim,
4386 to_reclaim, flush_state);
4387 flush_state++;
4388 if (!btrfs_need_do_async_reclaim(space_info, fs_info))
4389 return;
4390 } while (flush_state <= COMMIT_TRANS);
4391
4392 if (btrfs_need_do_async_reclaim(space_info, fs_info))
4393 queue_work(system_unbound_wq, work);
4394}
4395
4396void btrfs_init_async_reclaim_work(struct work_struct *work)
4397{
4398 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
4399}
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415static int reserve_metadata_bytes(struct btrfs_root *root,
4416 struct btrfs_block_rsv *block_rsv,
4417 u64 orig_bytes,
4418 enum btrfs_reserve_flush_enum flush)
4419{
4420 struct btrfs_space_info *space_info = block_rsv->space_info;
4421 u64 used;
4422 u64 num_bytes = orig_bytes;
4423 int flush_state = FLUSH_DELAYED_ITEMS_NR;
4424 int ret = 0;
4425 bool flushing = false;
4426
4427again:
4428 ret = 0;
4429 spin_lock(&space_info->lock);
4430
4431
4432
4433
4434 while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
4435 space_info->flush) {
4436 spin_unlock(&space_info->lock);
4437
4438
4439
4440
4441
4442
4443 if (current->journal_info)
4444 return -EAGAIN;
4445 ret = wait_event_killable(space_info->wait, !space_info->flush);
4446
4447 if (ret)
4448 return -EINTR;
4449
4450 spin_lock(&space_info->lock);
4451 }
4452
4453 ret = -ENOSPC;
4454 used = space_info->bytes_used + space_info->bytes_reserved +
4455 space_info->bytes_pinned + space_info->bytes_readonly +
4456 space_info->bytes_may_use;
4457
4458
4459
4460
4461
4462
4463
4464
4465 if (used <= space_info->total_bytes) {
4466 if (used + orig_bytes <= space_info->total_bytes) {
4467 space_info->bytes_may_use += orig_bytes;
4468 trace_btrfs_space_reservation(root->fs_info,
4469 "space_info", space_info->flags, orig_bytes, 1);
4470 ret = 0;
4471 } else {
4472
4473
4474
4475
4476
4477 num_bytes = orig_bytes;
4478 }
4479 } else {
4480
4481
4482
4483
4484
4485 num_bytes = used - space_info->total_bytes +
4486 (orig_bytes * 2);
4487 }
4488
4489 if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
4490 space_info->bytes_may_use += orig_bytes;
4491 trace_btrfs_space_reservation(root->fs_info, "space_info",
4492 space_info->flags, orig_bytes,
4493 1);
4494 ret = 0;
4495 }
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
4506 flushing = true;
4507 space_info->flush = 1;
4508 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
4509 used += orig_bytes;
4510 if (need_do_async_reclaim(space_info, root->fs_info, used) &&
4511 !work_busy(&root->fs_info->async_reclaim_work))
4512 queue_work(system_unbound_wq,
4513 &root->fs_info->async_reclaim_work);
4514 }
4515 spin_unlock(&space_info->lock);
4516
4517 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
4518 goto out;
4519
4520 ret = flush_space(root, space_info, num_bytes, orig_bytes,
4521 flush_state);
4522 flush_state++;
4523
4524
4525
4526
4527
4528 if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4529 (flush_state == FLUSH_DELALLOC ||
4530 flush_state == FLUSH_DELALLOC_WAIT))
4531 flush_state = ALLOC_CHUNK;
4532
4533 if (!ret)
4534 goto again;
4535 else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4536 flush_state < COMMIT_TRANS)
4537 goto again;
4538 else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
4539 flush_state <= COMMIT_TRANS)
4540 goto again;
4541
4542out:
4543 if (ret == -ENOSPC &&
4544 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
4545 struct btrfs_block_rsv *global_rsv =
4546 &root->fs_info->global_block_rsv;
4547
4548 if (block_rsv != global_rsv &&
4549 !block_rsv_use_bytes(global_rsv, orig_bytes))
4550 ret = 0;
4551 }
4552 if (ret == -ENOSPC)
4553 trace_btrfs_space_reservation(root->fs_info,
4554 "space_info:enospc",
4555 space_info->flags, orig_bytes, 1);
4556 if (flushing) {
4557 spin_lock(&space_info->lock);
4558 space_info->flush = 0;
4559 wake_up_all(&space_info->wait);
4560 spin_unlock(&space_info->lock);
4561 }
4562 return ret;
4563}
4564
4565static struct btrfs_block_rsv *get_block_rsv(
4566 const struct btrfs_trans_handle *trans,
4567 const struct btrfs_root *root)
4568{
4569 struct btrfs_block_rsv *block_rsv = NULL;
4570
4571 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4572 block_rsv = trans->block_rsv;
4573
4574 if (root == root->fs_info->csum_root && trans->adding_csums)
4575 block_rsv = trans->block_rsv;
4576
4577 if (root == root->fs_info->uuid_root)
4578 block_rsv = trans->block_rsv;
4579
4580 if (!block_rsv)
4581 block_rsv = root->block_rsv;
4582
4583 if (!block_rsv)
4584 block_rsv = &root->fs_info->empty_block_rsv;
4585
4586 return block_rsv;
4587}
4588
4589static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
4590 u64 num_bytes)
4591{
4592 int ret = -ENOSPC;
4593 spin_lock(&block_rsv->lock);
4594 if (block_rsv->reserved >= num_bytes) {
4595 block_rsv->reserved -= num_bytes;
4596 if (block_rsv->reserved < block_rsv->size)
4597 block_rsv->full = 0;
4598 ret = 0;
4599 }
4600 spin_unlock(&block_rsv->lock);
4601 return ret;
4602}
4603
4604static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
4605 u64 num_bytes, int update_size)
4606{
4607 spin_lock(&block_rsv->lock);
4608 block_rsv->reserved += num_bytes;
4609 if (update_size)
4610 block_rsv->size += num_bytes;
4611 else if (block_rsv->reserved >= block_rsv->size)
4612 block_rsv->full = 1;
4613 spin_unlock(&block_rsv->lock);
4614}
4615
4616int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
4617 struct btrfs_block_rsv *dest, u64 num_bytes,
4618 int min_factor)
4619{
4620 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4621 u64 min_bytes;
4622
4623 if (global_rsv->space_info != dest->space_info)
4624 return -ENOSPC;
4625
4626 spin_lock(&global_rsv->lock);
4627 min_bytes = div_factor(global_rsv->size, min_factor);
4628 if (global_rsv->reserved < min_bytes + num_bytes) {
4629 spin_unlock(&global_rsv->lock);
4630 return -ENOSPC;
4631 }
4632 global_rsv->reserved -= num_bytes;
4633 if (global_rsv->reserved < global_rsv->size)
4634 global_rsv->full = 0;
4635 spin_unlock(&global_rsv->lock);
4636
4637 block_rsv_add_bytes(dest, num_bytes, 1);
4638 return 0;
4639}
4640
4641static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
4642 struct btrfs_block_rsv *block_rsv,
4643 struct btrfs_block_rsv *dest, u64 num_bytes)
4644{
4645 struct btrfs_space_info *space_info = block_rsv->space_info;
4646
4647 spin_lock(&block_rsv->lock);
4648 if (num_bytes == (u64)-1)
4649 num_bytes = block_rsv->size;
4650 block_rsv->size -= num_bytes;
4651 if (block_rsv->reserved >= block_rsv->size) {
4652 num_bytes = block_rsv->reserved - block_rsv->size;
4653 block_rsv->reserved = block_rsv->size;
4654 block_rsv->full = 1;
4655 } else {
4656 num_bytes = 0;
4657 }
4658 spin_unlock(&block_rsv->lock);
4659
4660 if (num_bytes > 0) {
4661 if (dest) {
4662 spin_lock(&dest->lock);
4663 if (!dest->full) {
4664 u64 bytes_to_add;
4665
4666 bytes_to_add = dest->size - dest->reserved;
4667 bytes_to_add = min(num_bytes, bytes_to_add);
4668 dest->reserved += bytes_to_add;
4669 if (dest->reserved >= dest->size)
4670 dest->full = 1;
4671 num_bytes -= bytes_to_add;
4672 }
4673 spin_unlock(&dest->lock);
4674 }
4675 if (num_bytes) {
4676 spin_lock(&space_info->lock);
4677 space_info->bytes_may_use -= num_bytes;
4678 trace_btrfs_space_reservation(fs_info, "space_info",
4679 space_info->flags, num_bytes, 0);
4680 spin_unlock(&space_info->lock);
4681 }
4682 }
4683}
4684
4685static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
4686 struct btrfs_block_rsv *dst, u64 num_bytes)
4687{
4688 int ret;
4689
4690 ret = block_rsv_use_bytes(src, num_bytes);
4691 if (ret)
4692 return ret;
4693
4694 block_rsv_add_bytes(dst, num_bytes, 1);
4695 return 0;
4696}
4697
4698void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
4699{
4700 memset(rsv, 0, sizeof(*rsv));
4701 spin_lock_init(&rsv->lock);
4702 rsv->type = type;
4703}
4704
4705struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
4706 unsigned short type)
4707{
4708 struct btrfs_block_rsv *block_rsv;
4709 struct btrfs_fs_info *fs_info = root->fs_info;
4710
4711 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
4712 if (!block_rsv)
4713 return NULL;
4714
4715 btrfs_init_block_rsv(block_rsv, type);
4716 block_rsv->space_info = __find_space_info(fs_info,
4717 BTRFS_BLOCK_GROUP_METADATA);
4718 return block_rsv;
4719}
4720
4721void btrfs_free_block_rsv(struct btrfs_root *root,
4722 struct btrfs_block_rsv *rsv)
4723{
4724 if (!rsv)
4725 return;
4726 btrfs_block_rsv_release(root, rsv, (u64)-1);
4727 kfree(rsv);
4728}
4729
4730int btrfs_block_rsv_add(struct btrfs_root *root,
4731 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
4732 enum btrfs_reserve_flush_enum flush)
4733{
4734 int ret;
4735
4736 if (num_bytes == 0)
4737 return 0;
4738
4739 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
4740 if (!ret) {
4741 block_rsv_add_bytes(block_rsv, num_bytes, 1);
4742 return 0;
4743 }
4744
4745 return ret;
4746}
4747
4748int btrfs_block_rsv_check(struct btrfs_root *root,
4749 struct btrfs_block_rsv *block_rsv, int min_factor)
4750{
4751 u64 num_bytes = 0;
4752 int ret = -ENOSPC;
4753
4754 if (!block_rsv)
4755 return 0;
4756
4757 spin_lock(&block_rsv->lock);
4758 num_bytes = div_factor(block_rsv->size, min_factor);
4759 if (block_rsv->reserved >= num_bytes)
4760 ret = 0;
4761 spin_unlock(&block_rsv->lock);
4762
4763 return ret;
4764}
4765
4766int btrfs_block_rsv_refill(struct btrfs_root *root,
4767 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
4768 enum btrfs_reserve_flush_enum flush)
4769{
4770 u64 num_bytes = 0;
4771 int ret = -ENOSPC;
4772
4773 if (!block_rsv)
4774 return 0;
4775
4776 spin_lock(&block_rsv->lock);
4777 num_bytes = min_reserved;
4778 if (block_rsv->reserved >= num_bytes)
4779 ret = 0;
4780 else
4781 num_bytes -= block_rsv->reserved;
4782 spin_unlock(&block_rsv->lock);
4783
4784 if (!ret)
4785 return 0;
4786
4787 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
4788 if (!ret) {
4789 block_rsv_add_bytes(block_rsv, num_bytes, 0);
4790 return 0;
4791 }
4792
4793 return ret;
4794}
4795
4796int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
4797 struct btrfs_block_rsv *dst_rsv,
4798 u64 num_bytes)
4799{
4800 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4801}
4802
4803void btrfs_block_rsv_release(struct btrfs_root *root,
4804 struct btrfs_block_rsv *block_rsv,
4805 u64 num_bytes)
4806{
4807 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4808 if (global_rsv == block_rsv ||
4809 block_rsv->space_info != global_rsv->space_info)
4810 global_rsv = NULL;
4811 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
4812 num_bytes);
4813}
4814
4815
4816
4817
4818
4819
4820static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
4821{
4822 struct btrfs_space_info *sinfo;
4823 u64 num_bytes;
4824 u64 meta_used;
4825 u64 data_used;
4826 int csum_size = btrfs_super_csum_size(fs_info->super_copy);
4827
4828 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
4829 spin_lock(&sinfo->lock);
4830 data_used = sinfo->bytes_used;
4831 spin_unlock(&sinfo->lock);
4832
4833 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4834 spin_lock(&sinfo->lock);
4835 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
4836 data_used = 0;
4837 meta_used = sinfo->bytes_used;
4838 spin_unlock(&sinfo->lock);
4839
4840 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
4841 csum_size * 2;
4842 num_bytes += div64_u64(data_used + meta_used, 50);
4843
4844 if (num_bytes * 3 > meta_used)
4845 num_bytes = div64_u64(meta_used, 3);
4846
4847 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
4848}
4849
4850static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4851{
4852 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
4853 struct btrfs_space_info *sinfo = block_rsv->space_info;
4854 u64 num_bytes;
4855
4856 num_bytes = calc_global_metadata_size(fs_info);
4857
4858 spin_lock(&sinfo->lock);
4859 spin_lock(&block_rsv->lock);
4860
4861 block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
4862
4863 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
4864 sinfo->bytes_reserved + sinfo->bytes_readonly +
4865 sinfo->bytes_may_use;
4866
4867 if (sinfo->total_bytes > num_bytes) {
4868 num_bytes = sinfo->total_bytes - num_bytes;
4869 block_rsv->reserved += num_bytes;
4870 sinfo->bytes_may_use += num_bytes;
4871 trace_btrfs_space_reservation(fs_info, "space_info",
4872 sinfo->flags, num_bytes, 1);
4873 }
4874
4875 if (block_rsv->reserved >= block_rsv->size) {
4876 num_bytes = block_rsv->reserved - block_rsv->size;
4877 sinfo->bytes_may_use -= num_bytes;
4878 trace_btrfs_space_reservation(fs_info, "space_info",
4879 sinfo->flags, num_bytes, 0);
4880 block_rsv->reserved = block_rsv->size;
4881 block_rsv->full = 1;
4882 }
4883
4884 spin_unlock(&block_rsv->lock);
4885 spin_unlock(&sinfo->lock);
4886}
4887
4888static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
4889{
4890 struct btrfs_space_info *space_info;
4891
4892 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4893 fs_info->chunk_block_rsv.space_info = space_info;
4894
4895 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4896 fs_info->global_block_rsv.space_info = space_info;
4897 fs_info->delalloc_block_rsv.space_info = space_info;
4898 fs_info->trans_block_rsv.space_info = space_info;
4899 fs_info->empty_block_rsv.space_info = space_info;
4900 fs_info->delayed_block_rsv.space_info = space_info;
4901
4902 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
4903 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
4904 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
4905 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
4906 if (fs_info->quota_root)
4907 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
4908 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
4909
4910 update_global_block_rsv(fs_info);
4911}
4912
4913static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
4914{
4915 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
4916 (u64)-1);
4917 WARN_ON(fs_info->delalloc_block_rsv.size > 0);
4918 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
4919 WARN_ON(fs_info->trans_block_rsv.size > 0);
4920 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
4921 WARN_ON(fs_info->chunk_block_rsv.size > 0);
4922 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
4923 WARN_ON(fs_info->delayed_block_rsv.size > 0);
4924 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
4925}
4926
4927void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
4928 struct btrfs_root *root)
4929{
4930 if (!trans->block_rsv)
4931 return;
4932
4933 if (!trans->bytes_reserved)
4934 return;
4935
4936 trace_btrfs_space_reservation(root->fs_info, "transaction",
4937 trans->transid, trans->bytes_reserved, 0);
4938 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
4939 trans->bytes_reserved = 0;
4940}
4941
4942
4943int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4944 struct inode *inode)
4945{
4946 struct btrfs_root *root = BTRFS_I(inode)->root;
4947 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
4948 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
4949
4950
4951
4952
4953
4954
4955 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
4956 trace_btrfs_space_reservation(root->fs_info, "orphan",
4957 btrfs_ino(inode), num_bytes, 1);
4958 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4959}
4960
4961void btrfs_orphan_release_metadata(struct inode *inode)
4962{
4963 struct btrfs_root *root = BTRFS_I(inode)->root;
4964 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
4965 trace_btrfs_space_reservation(root->fs_info, "orphan",
4966 btrfs_ino(inode), num_bytes, 0);
4967 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
4968}
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
4985 struct btrfs_block_rsv *rsv,
4986 int items,
4987 u64 *qgroup_reserved,
4988 bool use_global_rsv)
4989{
4990 u64 num_bytes;
4991 int ret;
4992 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4993
4994 if (root->fs_info->quota_enabled) {
4995
4996 num_bytes = 3 * root->leafsize;
4997 ret = btrfs_qgroup_reserve(root, num_bytes);
4998 if (ret)
4999 return ret;
5000 } else {
5001 num_bytes = 0;
5002 }
5003
5004 *qgroup_reserved = num_bytes;
5005
5006 num_bytes = btrfs_calc_trans_metadata_size(root, items);
5007 rsv->space_info = __find_space_info(root->fs_info,
5008 BTRFS_BLOCK_GROUP_METADATA);
5009 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
5010 BTRFS_RESERVE_FLUSH_ALL);
5011
5012 if (ret == -ENOSPC && use_global_rsv)
5013 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
5014
5015 if (ret) {
5016 if (*qgroup_reserved)
5017 btrfs_qgroup_free(root, *qgroup_reserved);
5018 }
5019
5020 return ret;
5021}
5022
5023void btrfs_subvolume_release_metadata(struct btrfs_root *root,
5024 struct btrfs_block_rsv *rsv,
5025 u64 qgroup_reserved)
5026{
5027 btrfs_block_rsv_release(root, rsv, (u64)-1);
5028 if (qgroup_reserved)
5029 btrfs_qgroup_free(root, qgroup_reserved);
5030}
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041static unsigned drop_outstanding_extent(struct inode *inode)
5042{
5043 unsigned drop_inode_space = 0;
5044 unsigned dropped_extents = 0;
5045
5046 BUG_ON(!BTRFS_I(inode)->outstanding_extents);
5047 BTRFS_I(inode)->outstanding_extents--;
5048
5049 if (BTRFS_I(inode)->outstanding_extents == 0 &&
5050 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5051 &BTRFS_I(inode)->runtime_flags))
5052 drop_inode_space = 1;
5053
5054
5055
5056
5057
5058 if (BTRFS_I(inode)->outstanding_extents >=
5059 BTRFS_I(inode)->reserved_extents)
5060 return drop_inode_space;
5061
5062 dropped_extents = BTRFS_I(inode)->reserved_extents -
5063 BTRFS_I(inode)->outstanding_extents;
5064 BTRFS_I(inode)->reserved_extents -= dropped_extents;
5065 return dropped_extents + drop_inode_space;
5066}
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5087 int reserve)
5088{
5089 struct btrfs_root *root = BTRFS_I(inode)->root;
5090 u64 csum_size;
5091 int num_csums_per_leaf;
5092 int num_csums;
5093 int old_csums;
5094
5095 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5096 BTRFS_I(inode)->csum_bytes == 0)
5097 return 0;
5098
5099 old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
5100 if (reserve)
5101 BTRFS_I(inode)->csum_bytes += num_bytes;
5102 else
5103 BTRFS_I(inode)->csum_bytes -= num_bytes;
5104 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
5105 num_csums_per_leaf = (int)div64_u64(csum_size,
5106 sizeof(struct btrfs_csum_item) +
5107 sizeof(struct btrfs_disk_key));
5108 num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
5109 num_csums = num_csums + num_csums_per_leaf - 1;
5110 num_csums = num_csums / num_csums_per_leaf;
5111
5112 old_csums = old_csums + num_csums_per_leaf - 1;
5113 old_csums = old_csums / num_csums_per_leaf;
5114
5115
5116 if (old_csums == num_csums)
5117 return 0;
5118
5119 if (reserve)
5120 return btrfs_calc_trans_metadata_size(root,
5121 num_csums - old_csums);
5122
5123 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
5124}
5125
5126int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5127{
5128 struct btrfs_root *root = BTRFS_I(inode)->root;
5129 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
5130 u64 to_reserve = 0;
5131 u64 csum_bytes;
5132 unsigned nr_extents = 0;
5133 int extra_reserve = 0;
5134 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
5135 int ret = 0;
5136 bool delalloc_lock = true;
5137 u64 to_free = 0;
5138 unsigned dropped;
5139
5140
5141
5142
5143
5144
5145 if (btrfs_is_free_space_inode(inode)) {
5146 flush = BTRFS_RESERVE_NO_FLUSH;
5147 delalloc_lock = false;
5148 }
5149
5150 if (flush != BTRFS_RESERVE_NO_FLUSH &&
5151 btrfs_transaction_in_commit(root->fs_info))
5152 schedule_timeout(1);
5153
5154 if (delalloc_lock)
5155 mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
5156
5157 num_bytes = ALIGN(num_bytes, root->sectorsize);
5158
5159 spin_lock(&BTRFS_I(inode)->lock);
5160 BTRFS_I(inode)->outstanding_extents++;
5161
5162 if (BTRFS_I(inode)->outstanding_extents >
5163 BTRFS_I(inode)->reserved_extents)
5164 nr_extents = BTRFS_I(inode)->outstanding_extents -
5165 BTRFS_I(inode)->reserved_extents;
5166
5167
5168
5169
5170
5171 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5172 &BTRFS_I(inode)->runtime_flags)) {
5173 nr_extents++;
5174 extra_reserve = 1;
5175 }
5176
5177 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
5178 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
5179 csum_bytes = BTRFS_I(inode)->csum_bytes;
5180 spin_unlock(&BTRFS_I(inode)->lock);
5181
5182 if (root->fs_info->quota_enabled) {
5183 ret = btrfs_qgroup_reserve(root, num_bytes +
5184 nr_extents * root->leafsize);
5185 if (ret)
5186 goto out_fail;
5187 }
5188
5189 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
5190 if (unlikely(ret)) {
5191 if (root->fs_info->quota_enabled)
5192 btrfs_qgroup_free(root, num_bytes +
5193 nr_extents * root->leafsize);
5194 goto out_fail;
5195 }
5196
5197 spin_lock(&BTRFS_I(inode)->lock);
5198 if (extra_reserve) {
5199 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5200 &BTRFS_I(inode)->runtime_flags);
5201 nr_extents--;
5202 }
5203 BTRFS_I(inode)->reserved_extents += nr_extents;
5204 spin_unlock(&BTRFS_I(inode)->lock);
5205
5206 if (delalloc_lock)
5207 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5208
5209 if (to_reserve)
5210 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5211 btrfs_ino(inode), to_reserve, 1);
5212 block_rsv_add_bytes(block_rsv, to_reserve, 1);
5213
5214 return 0;
5215
5216out_fail:
5217 spin_lock(&BTRFS_I(inode)->lock);
5218 dropped = drop_outstanding_extent(inode);
5219
5220
5221
5222
5223
5224 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
5225 calc_csum_metadata_size(inode, num_bytes, 0);
5226 } else {
5227 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
5228 u64 bytes;
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
5239 BTRFS_I(inode)->csum_bytes = csum_bytes;
5240 to_free = calc_csum_metadata_size(inode, bytes, 0);
5241
5242
5243
5244
5245
5246
5247
5248 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
5249 bytes = csum_bytes - orig_csum_bytes;
5250 bytes = calc_csum_metadata_size(inode, bytes, 0);
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
5261 if (bytes > to_free)
5262 to_free = bytes - to_free;
5263 else
5264 to_free = 0;
5265 }
5266 spin_unlock(&BTRFS_I(inode)->lock);
5267 if (dropped)
5268 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5269
5270 if (to_free) {
5271 btrfs_block_rsv_release(root, block_rsv, to_free);
5272 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5273 btrfs_ino(inode), to_free, 0);
5274 }
5275 if (delalloc_lock)
5276 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5277 return ret;
5278}
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5290{
5291 struct btrfs_root *root = BTRFS_I(inode)->root;
5292 u64 to_free = 0;
5293 unsigned dropped;
5294
5295 num_bytes = ALIGN(num_bytes, root->sectorsize);
5296 spin_lock(&BTRFS_I(inode)->lock);
5297 dropped = drop_outstanding_extent(inode);
5298
5299 if (num_bytes)
5300 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
5301 spin_unlock(&BTRFS_I(inode)->lock);
5302 if (dropped > 0)
5303 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5304
5305 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5306 btrfs_ino(inode), to_free, 0);
5307 if (root->fs_info->quota_enabled) {
5308 btrfs_qgroup_free(root, num_bytes +
5309 dropped * root->leafsize);
5310 }
5311
5312 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
5313 to_free);
5314}
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
5332{
5333 int ret;
5334
5335 ret = btrfs_check_data_free_space(inode, num_bytes);
5336 if (ret)
5337 return ret;
5338
5339 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
5340 if (ret) {
5341 btrfs_free_reserved_data_space(inode, num_bytes);
5342 return ret;
5343 }
5344
5345 return 0;
5346}
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
5362{
5363 btrfs_delalloc_release_metadata(inode, num_bytes);
5364 btrfs_free_reserved_data_space(inode, num_bytes);
5365}
5366
5367static int update_block_group(struct btrfs_root *root,
5368 u64 bytenr, u64 num_bytes, int alloc)
5369{
5370 struct btrfs_block_group_cache *cache = NULL;
5371 struct btrfs_fs_info *info = root->fs_info;
5372 u64 total = num_bytes;
5373 u64 old_val;
5374 u64 byte_in_group;
5375 int factor;
5376
5377
5378 spin_lock(&info->delalloc_root_lock);
5379 old_val = btrfs_super_bytes_used(info->super_copy);
5380 if (alloc)
5381 old_val += num_bytes;
5382 else
5383 old_val -= num_bytes;
5384 btrfs_set_super_bytes_used(info->super_copy, old_val);
5385 spin_unlock(&info->delalloc_root_lock);
5386
5387 while (total) {
5388 cache = btrfs_lookup_block_group(info, bytenr);
5389 if (!cache)
5390 return -ENOENT;
5391 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
5392 BTRFS_BLOCK_GROUP_RAID1 |
5393 BTRFS_BLOCK_GROUP_RAID10))
5394 factor = 2;
5395 else
5396 factor = 1;
5397
5398
5399
5400
5401
5402
5403 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5404 cache_block_group(cache, 1);
5405
5406 byte_in_group = bytenr - cache->key.objectid;
5407 WARN_ON(byte_in_group > cache->key.offset);
5408
5409 spin_lock(&cache->space_info->lock);
5410 spin_lock(&cache->lock);
5411
5412 if (btrfs_test_opt(root, SPACE_CACHE) &&
5413 cache->disk_cache_state < BTRFS_DC_CLEAR)
5414 cache->disk_cache_state = BTRFS_DC_CLEAR;
5415
5416 cache->dirty = 1;
5417 old_val = btrfs_block_group_used(&cache->item);
5418 num_bytes = min(total, cache->key.offset - byte_in_group);
5419 if (alloc) {
5420 old_val += num_bytes;
5421 btrfs_set_block_group_used(&cache->item, old_val);
5422 cache->reserved -= num_bytes;
5423 cache->space_info->bytes_reserved -= num_bytes;
5424 cache->space_info->bytes_used += num_bytes;
5425 cache->space_info->disk_used += num_bytes * factor;
5426 spin_unlock(&cache->lock);
5427 spin_unlock(&cache->space_info->lock);
5428 } else {
5429 old_val -= num_bytes;
5430 btrfs_set_block_group_used(&cache->item, old_val);
5431 cache->pinned += num_bytes;
5432 cache->space_info->bytes_pinned += num_bytes;
5433 cache->space_info->bytes_used -= num_bytes;
5434 cache->space_info->disk_used -= num_bytes * factor;
5435 spin_unlock(&cache->lock);
5436 spin_unlock(&cache->space_info->lock);
5437
5438 set_extent_dirty(info->pinned_extents,
5439 bytenr, bytenr + num_bytes - 1,
5440 GFP_NOFS | __GFP_NOFAIL);
5441 }
5442 btrfs_put_block_group(cache);
5443 total -= num_bytes;
5444 bytenr += num_bytes;
5445 }
5446 return 0;
5447}
5448
5449static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
5450{
5451 struct btrfs_block_group_cache *cache;
5452 u64 bytenr;
5453
5454 spin_lock(&root->fs_info->block_group_cache_lock);
5455 bytenr = root->fs_info->first_logical_byte;
5456 spin_unlock(&root->fs_info->block_group_cache_lock);
5457
5458 if (bytenr < (u64)-1)
5459 return bytenr;
5460
5461 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
5462 if (!cache)
5463 return 0;
5464
5465 bytenr = cache->key.objectid;
5466 btrfs_put_block_group(cache);
5467
5468 return bytenr;
5469}
5470
5471static int pin_down_extent(struct btrfs_root *root,
5472 struct btrfs_block_group_cache *cache,
5473 u64 bytenr, u64 num_bytes, int reserved)
5474{
5475 spin_lock(&cache->space_info->lock);
5476 spin_lock(&cache->lock);
5477 cache->pinned += num_bytes;
5478 cache->space_info->bytes_pinned += num_bytes;
5479 if (reserved) {
5480 cache->reserved -= num_bytes;
5481 cache->space_info->bytes_reserved -= num_bytes;
5482 }
5483 spin_unlock(&cache->lock);
5484 spin_unlock(&cache->space_info->lock);
5485
5486 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
5487 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
5488 if (reserved)
5489 trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
5490 return 0;
5491}
5492
5493
5494
5495
5496int btrfs_pin_extent(struct btrfs_root *root,
5497 u64 bytenr, u64 num_bytes, int reserved)
5498{
5499 struct btrfs_block_group_cache *cache;
5500
5501 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
5502 BUG_ON(!cache);
5503
5504 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
5505
5506 btrfs_put_block_group(cache);
5507 return 0;
5508}
5509
5510
5511
5512
5513int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
5514 u64 bytenr, u64 num_bytes)
5515{
5516 struct btrfs_block_group_cache *cache;
5517 int ret;
5518
5519 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
5520 if (!cache)
5521 return -EINVAL;
5522
5523
5524
5525
5526
5527
5528
5529 cache_block_group(cache, 1);
5530
5531 pin_down_extent(root, cache, bytenr, num_bytes, 0);
5532
5533
5534 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
5535 btrfs_put_block_group(cache);
5536 return ret;
5537}
5538
5539static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes)
5540{
5541 int ret;
5542 struct btrfs_block_group_cache *block_group;
5543 struct btrfs_caching_control *caching_ctl;
5544
5545 block_group = btrfs_lookup_block_group(root->fs_info, start);
5546 if (!block_group)
5547 return -EINVAL;
5548
5549 cache_block_group(block_group, 0);
5550 caching_ctl = get_caching_control(block_group);
5551
5552 if (!caching_ctl) {
5553
5554 BUG_ON(!block_group_cache_done(block_group));
5555 ret = btrfs_remove_free_space(block_group, start, num_bytes);
5556 } else {
5557 mutex_lock(&caching_ctl->mutex);
5558
5559 if (start >= caching_ctl->progress) {
5560 ret = add_excluded_extent(root, start, num_bytes);
5561 } else if (start + num_bytes <= caching_ctl->progress) {
5562 ret = btrfs_remove_free_space(block_group,
5563 start, num_bytes);
5564 } else {
5565 num_bytes = caching_ctl->progress - start;
5566 ret = btrfs_remove_free_space(block_group,
5567 start, num_bytes);
5568 if (ret)
5569 goto out_lock;
5570
5571 num_bytes = (start + num_bytes) -
5572 caching_ctl->progress;
5573 start = caching_ctl->progress;
5574 ret = add_excluded_extent(root, start, num_bytes);
5575 }
5576out_lock:
5577 mutex_unlock(&caching_ctl->mutex);
5578 put_caching_control(caching_ctl);
5579 }
5580 btrfs_put_block_group(block_group);
5581 return ret;
5582}
5583
5584int btrfs_exclude_logged_extents(struct btrfs_root *log,
5585 struct extent_buffer *eb)
5586{
5587 struct btrfs_file_extent_item *item;
5588 struct btrfs_key key;
5589 int found_type;
5590 int i;
5591
5592 if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS))
5593 return 0;
5594
5595 for (i = 0; i < btrfs_header_nritems(eb); i++) {
5596 btrfs_item_key_to_cpu(eb, &key, i);
5597 if (key.type != BTRFS_EXTENT_DATA_KEY)
5598 continue;
5599 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
5600 found_type = btrfs_file_extent_type(eb, item);
5601 if (found_type == BTRFS_FILE_EXTENT_INLINE)
5602 continue;
5603 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
5604 continue;
5605 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
5606 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
5607 __exclude_logged_extent(log, key.objectid, key.offset);
5608 }
5609
5610 return 0;
5611}
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5637 u64 num_bytes, int reserve, int delalloc)
5638{
5639 struct btrfs_space_info *space_info = cache->space_info;
5640 int ret = 0;
5641
5642 spin_lock(&space_info->lock);
5643 spin_lock(&cache->lock);
5644 if (reserve != RESERVE_FREE) {
5645 if (cache->ro) {
5646 ret = -EAGAIN;
5647 } else {
5648 cache->reserved += num_bytes;
5649 space_info->bytes_reserved += num_bytes;
5650 if (reserve == RESERVE_ALLOC) {
5651 trace_btrfs_space_reservation(cache->fs_info,
5652 "space_info", space_info->flags,
5653 num_bytes, 0);
5654 space_info->bytes_may_use -= num_bytes;
5655 }
5656
5657 if (delalloc)
5658 cache->delalloc_bytes += num_bytes;
5659 }
5660 } else {
5661 if (cache->ro)
5662 space_info->bytes_readonly += num_bytes;
5663 cache->reserved -= num_bytes;
5664 space_info->bytes_reserved -= num_bytes;
5665
5666 if (delalloc)
5667 cache->delalloc_bytes -= num_bytes;
5668 }
5669 spin_unlock(&cache->lock);
5670 spin_unlock(&space_info->lock);
5671 return ret;
5672}
5673
5674void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5675 struct btrfs_root *root)
5676{
5677 struct btrfs_fs_info *fs_info = root->fs_info;
5678 struct btrfs_caching_control *next;
5679 struct btrfs_caching_control *caching_ctl;
5680 struct btrfs_block_group_cache *cache;
5681
5682 down_write(&fs_info->commit_root_sem);
5683
5684 list_for_each_entry_safe(caching_ctl, next,
5685 &fs_info->caching_block_groups, list) {
5686 cache = caching_ctl->block_group;
5687 if (block_group_cache_done(cache)) {
5688 cache->last_byte_to_unpin = (u64)-1;
5689 list_del_init(&caching_ctl->list);
5690 put_caching_control(caching_ctl);
5691 } else {
5692 cache->last_byte_to_unpin = caching_ctl->progress;
5693 }
5694 }
5695
5696 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
5697 fs_info->pinned_extents = &fs_info->freed_extents[1];
5698 else
5699 fs_info->pinned_extents = &fs_info->freed_extents[0];
5700
5701 up_write(&fs_info->commit_root_sem);
5702
5703 update_global_block_rsv(fs_info);
5704}
5705
5706static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
5707{
5708 struct btrfs_fs_info *fs_info = root->fs_info;
5709 struct btrfs_block_group_cache *cache = NULL;
5710 struct btrfs_space_info *space_info;
5711 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5712 u64 len;
5713 bool readonly;
5714
5715 while (start <= end) {
5716 readonly = false;
5717 if (!cache ||
5718 start >= cache->key.objectid + cache->key.offset) {
5719 if (cache)
5720 btrfs_put_block_group(cache);
5721 cache = btrfs_lookup_block_group(fs_info, start);
5722 BUG_ON(!cache);
5723 }
5724
5725 len = cache->key.objectid + cache->key.offset - start;
5726 len = min(len, end + 1 - start);
5727
5728 if (start < cache->last_byte_to_unpin) {
5729 len = min(len, cache->last_byte_to_unpin - start);
5730 btrfs_add_free_space(cache, start, len);
5731 }
5732
5733 start += len;
5734 space_info = cache->space_info;
5735
5736 spin_lock(&space_info->lock);
5737 spin_lock(&cache->lock);
5738 cache->pinned -= len;
5739 space_info->bytes_pinned -= len;
5740 percpu_counter_add(&space_info->total_bytes_pinned, -len);
5741 if (cache->ro) {
5742 space_info->bytes_readonly += len;
5743 readonly = true;
5744 }
5745 spin_unlock(&cache->lock);
5746 if (!readonly && global_rsv->space_info == space_info) {
5747 spin_lock(&global_rsv->lock);
5748 if (!global_rsv->full) {
5749 len = min(len, global_rsv->size -
5750 global_rsv->reserved);
5751 global_rsv->reserved += len;
5752 space_info->bytes_may_use += len;
5753 if (global_rsv->reserved >= global_rsv->size)
5754 global_rsv->full = 1;
5755 }
5756 spin_unlock(&global_rsv->lock);
5757 }
5758 spin_unlock(&space_info->lock);
5759 }
5760
5761 if (cache)
5762 btrfs_put_block_group(cache);
5763 return 0;
5764}
5765
5766int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5767 struct btrfs_root *root)
5768{
5769 struct btrfs_fs_info *fs_info = root->fs_info;
5770 struct extent_io_tree *unpin;
5771 u64 start;
5772 u64 end;
5773 int ret;
5774
5775 if (trans->aborted)
5776 return 0;
5777
5778 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
5779 unpin = &fs_info->freed_extents[1];
5780 else
5781 unpin = &fs_info->freed_extents[0];
5782
5783 while (1) {
5784 ret = find_first_extent_bit(unpin, 0, &start, &end,
5785 EXTENT_DIRTY, NULL);
5786 if (ret)
5787 break;
5788
5789 if (btrfs_test_opt(root, DISCARD))
5790 ret = btrfs_discard_extent(root, start,
5791 end + 1 - start, NULL);
5792
5793 clear_extent_dirty(unpin, start, end, GFP_NOFS);
5794 unpin_extent_range(root, start, end);
5795 cond_resched();
5796 }
5797
5798 return 0;
5799}
5800
5801static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
5802 u64 owner, u64 root_objectid)
5803{
5804 struct btrfs_space_info *space_info;
5805 u64 flags;
5806
5807 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
5808 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
5809 flags = BTRFS_BLOCK_GROUP_SYSTEM;
5810 else
5811 flags = BTRFS_BLOCK_GROUP_METADATA;
5812 } else {
5813 flags = BTRFS_BLOCK_GROUP_DATA;
5814 }
5815
5816 space_info = __find_space_info(fs_info, flags);
5817 BUG_ON(!space_info);
5818 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
5819}
5820
5821
5822static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5823 struct btrfs_root *root,
5824 u64 bytenr, u64 num_bytes, u64 parent,
5825 u64 root_objectid, u64 owner_objectid,
5826 u64 owner_offset, int refs_to_drop,
5827 struct btrfs_delayed_extent_op *extent_op,
5828 int no_quota)
5829{
5830 struct btrfs_key key;
5831 struct btrfs_path *path;
5832 struct btrfs_fs_info *info = root->fs_info;
5833 struct btrfs_root *extent_root = info->extent_root;
5834 struct extent_buffer *leaf;
5835 struct btrfs_extent_item *ei;
5836 struct btrfs_extent_inline_ref *iref;
5837 int ret;
5838 int is_data;
5839 int extent_slot = 0;
5840 int found_extent = 0;
5841 int num_to_del = 1;
5842 u32 item_size;
5843 u64 refs;
5844 int last_ref = 0;
5845 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
5846 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
5847 SKINNY_METADATA);
5848
5849 if (!info->quota_enabled || !is_fstree(root_objectid))
5850 no_quota = 1;
5851
5852 path = btrfs_alloc_path();
5853 if (!path)
5854 return -ENOMEM;
5855
5856 path->reada = 1;
5857 path->leave_spinning = 1;
5858
5859 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
5860 BUG_ON(!is_data && refs_to_drop != 1);
5861
5862 if (is_data)
5863 skinny_metadata = 0;
5864
5865 ret = lookup_extent_backref(trans, extent_root, path, &iref,
5866 bytenr, num_bytes, parent,
5867 root_objectid, owner_objectid,
5868 owner_offset);
5869 if (ret == 0) {
5870 extent_slot = path->slots[0];
5871 while (extent_slot >= 0) {
5872 btrfs_item_key_to_cpu(path->nodes[0], &key,
5873 extent_slot);
5874 if (key.objectid != bytenr)
5875 break;
5876 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
5877 key.offset == num_bytes) {
5878 found_extent = 1;
5879 break;
5880 }
5881 if (key.type == BTRFS_METADATA_ITEM_KEY &&
5882 key.offset == owner_objectid) {
5883 found_extent = 1;
5884 break;
5885 }
5886 if (path->slots[0] - extent_slot > 5)
5887 break;
5888 extent_slot--;
5889 }
5890#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5891 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
5892 if (found_extent && item_size < sizeof(*ei))
5893 found_extent = 0;
5894#endif
5895 if (!found_extent) {
5896 BUG_ON(iref);
5897 ret = remove_extent_backref(trans, extent_root, path,
5898 NULL, refs_to_drop,
5899 is_data, &last_ref);
5900 if (ret) {
5901 btrfs_abort_transaction(trans, extent_root, ret);
5902 goto out;
5903 }
5904 btrfs_release_path(path);
5905 path->leave_spinning = 1;
5906
5907 key.objectid = bytenr;
5908 key.type = BTRFS_EXTENT_ITEM_KEY;
5909 key.offset = num_bytes;
5910
5911 if (!is_data && skinny_metadata) {
5912 key.type = BTRFS_METADATA_ITEM_KEY;
5913 key.offset = owner_objectid;
5914 }
5915
5916 ret = btrfs_search_slot(trans, extent_root,
5917 &key, path, -1, 1);
5918 if (ret > 0 && skinny_metadata && path->slots[0]) {
5919
5920
5921
5922
5923 path->slots[0]--;
5924 btrfs_item_key_to_cpu(path->nodes[0], &key,
5925 path->slots[0]);
5926 if (key.objectid == bytenr &&
5927 key.type == BTRFS_EXTENT_ITEM_KEY &&
5928 key.offset == num_bytes)
5929 ret = 0;
5930 }
5931
5932 if (ret > 0 && skinny_metadata) {
5933 skinny_metadata = false;
5934 key.objectid = bytenr;
5935 key.type = BTRFS_EXTENT_ITEM_KEY;
5936 key.offset = num_bytes;
5937 btrfs_release_path(path);
5938 ret = btrfs_search_slot(trans, extent_root,
5939 &key, path, -1, 1);
5940 }
5941
5942 if (ret) {
5943 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5944 ret, bytenr);
5945 if (ret > 0)
5946 btrfs_print_leaf(extent_root,
5947 path->nodes[0]);
5948 }
5949 if (ret < 0) {
5950 btrfs_abort_transaction(trans, extent_root, ret);
5951 goto out;
5952 }
5953 extent_slot = path->slots[0];
5954 }
5955 } else if (WARN_ON(ret == -ENOENT)) {
5956 btrfs_print_leaf(extent_root, path->nodes[0]);
5957 btrfs_err(info,
5958 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
5959 bytenr, parent, root_objectid, owner_objectid,
5960 owner_offset);
5961 btrfs_abort_transaction(trans, extent_root, ret);
5962 goto out;
5963 } else {
5964 btrfs_abort_transaction(trans, extent_root, ret);
5965 goto out;
5966 }
5967
5968 leaf = path->nodes[0];
5969 item_size = btrfs_item_size_nr(leaf, extent_slot);
5970#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5971 if (item_size < sizeof(*ei)) {
5972 BUG_ON(found_extent || extent_slot != path->slots[0]);
5973 ret = convert_extent_item_v0(trans, extent_root, path,
5974 owner_objectid, 0);
5975 if (ret < 0) {
5976 btrfs_abort_transaction(trans, extent_root, ret);
5977 goto out;
5978 }
5979
5980 btrfs_release_path(path);
5981 path->leave_spinning = 1;
5982
5983 key.objectid = bytenr;
5984 key.type = BTRFS_EXTENT_ITEM_KEY;
5985 key.offset = num_bytes;
5986
5987 ret = btrfs_search_slot(trans, extent_root, &key, path,
5988 -1, 1);
5989 if (ret) {
5990 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5991 ret, bytenr);
5992 btrfs_print_leaf(extent_root, path->nodes[0]);
5993 }
5994 if (ret < 0) {
5995 btrfs_abort_transaction(trans, extent_root, ret);
5996 goto out;
5997 }
5998
5999 extent_slot = path->slots[0];
6000 leaf = path->nodes[0];
6001 item_size = btrfs_item_size_nr(leaf, extent_slot);
6002 }
6003#endif
6004 BUG_ON(item_size < sizeof(*ei));
6005 ei = btrfs_item_ptr(leaf, extent_slot,
6006 struct btrfs_extent_item);
6007 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
6008 key.type == BTRFS_EXTENT_ITEM_KEY) {
6009 struct btrfs_tree_block_info *bi;
6010 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
6011 bi = (struct btrfs_tree_block_info *)(ei + 1);
6012 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
6013 }
6014
6015 refs = btrfs_extent_refs(leaf, ei);
6016 if (refs < refs_to_drop) {
6017 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
6018 "for bytenr %Lu", refs_to_drop, refs, bytenr);
6019 ret = -EINVAL;
6020 btrfs_abort_transaction(trans, extent_root, ret);
6021 goto out;
6022 }
6023 refs -= refs_to_drop;
6024
6025 if (refs > 0) {
6026 type = BTRFS_QGROUP_OPER_SUB_SHARED;
6027 if (extent_op)
6028 __run_delayed_extent_op(extent_op, leaf, ei);
6029
6030
6031
6032
6033 if (iref) {
6034 BUG_ON(!found_extent);
6035 } else {
6036 btrfs_set_extent_refs(leaf, ei, refs);
6037 btrfs_mark_buffer_dirty(leaf);
6038 }
6039 if (found_extent) {
6040 ret = remove_extent_backref(trans, extent_root, path,
6041 iref, refs_to_drop,
6042 is_data, &last_ref);
6043 if (ret) {
6044 btrfs_abort_transaction(trans, extent_root, ret);
6045 goto out;
6046 }
6047 }
6048 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
6049 root_objectid);
6050 } else {
6051 if (found_extent) {
6052 BUG_ON(is_data && refs_to_drop !=
6053 extent_data_ref_count(root, path, iref));
6054 if (iref) {
6055 BUG_ON(path->slots[0] != extent_slot);
6056 } else {
6057 BUG_ON(path->slots[0] != extent_slot + 1);
6058 path->slots[0] = extent_slot;
6059 num_to_del = 2;
6060 }
6061 }
6062
6063 last_ref = 1;
6064 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
6065 num_to_del);
6066 if (ret) {
6067 btrfs_abort_transaction(trans, extent_root, ret);
6068 goto out;
6069 }
6070 btrfs_release_path(path);
6071
6072 if (is_data) {
6073 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
6074 if (ret) {
6075 btrfs_abort_transaction(trans, extent_root, ret);
6076 goto out;
6077 }
6078 }
6079
6080 ret = update_block_group(root, bytenr, num_bytes, 0);
6081 if (ret) {
6082 btrfs_abort_transaction(trans, extent_root, ret);
6083 goto out;
6084 }
6085 }
6086 btrfs_release_path(path);
6087
6088
6089 if (!ret && last_ref && !no_quota) {
6090 int mod_seq = 0;
6091
6092 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
6093 type == BTRFS_QGROUP_OPER_SUB_SHARED)
6094 mod_seq = 1;
6095
6096 ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
6097 bytenr, num_bytes, type,
6098 mod_seq);
6099 }
6100out:
6101 btrfs_free_path(path);
6102 return ret;
6103}
6104
6105
6106
6107
6108
6109
6110
6111static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
6112 struct btrfs_root *root, u64 bytenr)
6113{
6114 struct btrfs_delayed_ref_head *head;
6115 struct btrfs_delayed_ref_root *delayed_refs;
6116 int ret = 0;
6117
6118 delayed_refs = &trans->transaction->delayed_refs;
6119 spin_lock(&delayed_refs->lock);
6120 head = btrfs_find_delayed_ref_head(trans, bytenr);
6121 if (!head)
6122 goto out_delayed_unlock;
6123
6124 spin_lock(&head->lock);
6125 if (rb_first(&head->ref_root))
6126 goto out;
6127
6128 if (head->extent_op) {
6129 if (!head->must_insert_reserved)
6130 goto out;
6131 btrfs_free_delayed_extent_op(head->extent_op);
6132 head->extent_op = NULL;
6133 }
6134
6135
6136
6137
6138
6139 if (!mutex_trylock(&head->mutex))
6140 goto out;
6141
6142
6143
6144
6145
6146 head->node.in_tree = 0;
6147 rb_erase(&head->href_node, &delayed_refs->href_root);
6148
6149 atomic_dec(&delayed_refs->num_entries);
6150
6151
6152
6153
6154
6155 delayed_refs->num_heads--;
6156 if (head->processing == 0)
6157 delayed_refs->num_heads_ready--;
6158 head->processing = 0;
6159 spin_unlock(&head->lock);
6160 spin_unlock(&delayed_refs->lock);
6161
6162 BUG_ON(head->extent_op);
6163 if (head->must_insert_reserved)
6164 ret = 1;
6165
6166 mutex_unlock(&head->mutex);
6167 btrfs_put_delayed_ref(&head->node);
6168 return ret;
6169out:
6170 spin_unlock(&head->lock);
6171
6172out_delayed_unlock:
6173 spin_unlock(&delayed_refs->lock);
6174 return 0;
6175}
6176
6177void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6178 struct btrfs_root *root,
6179 struct extent_buffer *buf,
6180 u64 parent, int last_ref)
6181{
6182 struct btrfs_block_group_cache *cache = NULL;
6183 int pin = 1;
6184 int ret;
6185
6186 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6187 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
6188 buf->start, buf->len,
6189 parent, root->root_key.objectid,
6190 btrfs_header_level(buf),
6191 BTRFS_DROP_DELAYED_REF, NULL, 0);
6192 BUG_ON(ret);
6193 }
6194
6195 if (!last_ref)
6196 return;
6197
6198 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6199
6200 if (btrfs_header_generation(buf) == trans->transid) {
6201 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6202 ret = check_ref_cleanup(trans, root, buf->start);
6203 if (!ret)
6204 goto out;
6205 }
6206
6207 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
6208 pin_down_extent(root, cache, buf->start, buf->len, 1);
6209 goto out;
6210 }
6211
6212 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
6213
6214 btrfs_add_free_space(cache, buf->start, buf->len);
6215 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
6216 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
6217 pin = 0;
6218 }
6219out:
6220 if (pin)
6221 add_pinned_bytes(root->fs_info, buf->len,
6222 btrfs_header_level(buf),
6223 root->root_key.objectid);
6224
6225
6226
6227
6228
6229 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
6230 btrfs_put_block_group(cache);
6231}
6232
6233
6234int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6235 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
6236 u64 owner, u64 offset, int no_quota)
6237{
6238 int ret;
6239 struct btrfs_fs_info *fs_info = root->fs_info;
6240
6241#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
6242 if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
6243 return 0;
6244#endif
6245 add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
6246
6247
6248
6249
6250
6251 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
6252 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
6253
6254 btrfs_pin_extent(root, bytenr, num_bytes, 1);
6255 ret = 0;
6256 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
6257 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
6258 num_bytes,
6259 parent, root_objectid, (int)owner,
6260 BTRFS_DROP_DELAYED_REF, NULL, no_quota);
6261 } else {
6262 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
6263 num_bytes,
6264 parent, root_objectid, owner,
6265 offset, BTRFS_DROP_DELAYED_REF,
6266 NULL, no_quota);
6267 }
6268 return ret;
6269}
6270
6271static u64 stripe_align(struct btrfs_root *root,
6272 struct btrfs_block_group_cache *cache,
6273 u64 val, u64 num_bytes)
6274{
6275 u64 ret = ALIGN(val, root->stripesize);
6276 return ret;
6277}
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293static noinline void
6294wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6295 u64 num_bytes)
6296{
6297 struct btrfs_caching_control *caching_ctl;
6298
6299 caching_ctl = get_caching_control(cache);
6300 if (!caching_ctl)
6301 return;
6302
6303 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
6304 (cache->free_space_ctl->free_space >= num_bytes));
6305
6306 put_caching_control(caching_ctl);
6307}
6308
6309static noinline int
6310wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
6311{
6312 struct btrfs_caching_control *caching_ctl;
6313 int ret = 0;
6314
6315 caching_ctl = get_caching_control(cache);
6316 if (!caching_ctl)
6317 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
6318
6319 wait_event(caching_ctl->wait, block_group_cache_done(cache));
6320 if (cache->cached == BTRFS_CACHE_ERROR)
6321 ret = -EIO;
6322 put_caching_control(caching_ctl);
6323 return ret;
6324}
6325
6326int __get_raid_index(u64 flags)
6327{
6328 if (flags & BTRFS_BLOCK_GROUP_RAID10)
6329 return BTRFS_RAID_RAID10;
6330 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
6331 return BTRFS_RAID_RAID1;
6332 else if (flags & BTRFS_BLOCK_GROUP_DUP)
6333 return BTRFS_RAID_DUP;
6334 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
6335 return BTRFS_RAID_RAID0;
6336 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
6337 return BTRFS_RAID_RAID5;
6338 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
6339 return BTRFS_RAID_RAID6;
6340
6341 return BTRFS_RAID_SINGLE;
6342}
6343
6344int get_block_group_index(struct btrfs_block_group_cache *cache)
6345{
6346 return __get_raid_index(cache->flags);
6347}
6348
6349static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
6350 [BTRFS_RAID_RAID10] = "raid10",
6351 [BTRFS_RAID_RAID1] = "raid1",
6352 [BTRFS_RAID_DUP] = "dup",
6353 [BTRFS_RAID_RAID0] = "raid0",
6354 [BTRFS_RAID_SINGLE] = "single",
6355 [BTRFS_RAID_RAID5] = "raid5",
6356 [BTRFS_RAID_RAID6] = "raid6",
6357};
6358
6359static const char *get_raid_name(enum btrfs_raid_types type)
6360{
6361 if (type >= BTRFS_NR_RAID_TYPES)
6362 return NULL;
6363
6364 return btrfs_raid_type_names[type];
6365}
6366
6367enum btrfs_loop_type {
6368 LOOP_CACHING_NOWAIT = 0,
6369 LOOP_CACHING_WAIT = 1,
6370 LOOP_ALLOC_CHUNK = 2,
6371 LOOP_NO_EMPTY_SIZE = 3,
6372};
6373
6374static inline void
6375btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
6376 int delalloc)
6377{
6378 if (delalloc)
6379 down_read(&cache->data_rwsem);
6380}
6381
6382static inline void
6383btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
6384 int delalloc)
6385{
6386 btrfs_get_block_group(cache);
6387 if (delalloc)
6388 down_read(&cache->data_rwsem);
6389}
6390
6391static struct btrfs_block_group_cache *
6392btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
6393 struct btrfs_free_cluster *cluster,
6394 int delalloc)
6395{
6396 struct btrfs_block_group_cache *used_bg;
6397 bool locked = false;
6398again:
6399 spin_lock(&cluster->refill_lock);
6400 if (locked) {
6401 if (used_bg == cluster->block_group)
6402 return used_bg;
6403
6404 up_read(&used_bg->data_rwsem);
6405 btrfs_put_block_group(used_bg);
6406 }
6407
6408 used_bg = cluster->block_group;
6409 if (!used_bg)
6410 return NULL;
6411
6412 if (used_bg == block_group)
6413 return used_bg;
6414
6415 btrfs_get_block_group(used_bg);
6416
6417 if (!delalloc)
6418 return used_bg;
6419
6420 if (down_read_trylock(&used_bg->data_rwsem))
6421 return used_bg;
6422
6423 spin_unlock(&cluster->refill_lock);
6424 down_read(&used_bg->data_rwsem);
6425 locked = true;
6426 goto again;
6427}
6428
6429static inline void
6430btrfs_release_block_group(struct btrfs_block_group_cache *cache,
6431 int delalloc)
6432{
6433 if (delalloc)
6434 up_read(&cache->data_rwsem);
6435 btrfs_put_block_group(cache);
6436}
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449static noinline int find_free_extent(struct btrfs_root *orig_root,
6450 u64 num_bytes, u64 empty_size,
6451 u64 hint_byte, struct btrfs_key *ins,
6452 u64 flags, int delalloc)
6453{
6454 int ret = 0;
6455 struct btrfs_root *root = orig_root->fs_info->extent_root;
6456 struct btrfs_free_cluster *last_ptr = NULL;
6457 struct btrfs_block_group_cache *block_group = NULL;
6458 u64 search_start = 0;
6459 u64 max_extent_size = 0;
6460 int empty_cluster = 2 * 1024 * 1024;
6461 struct btrfs_space_info *space_info;
6462 int loop = 0;
6463 int index = __get_raid_index(flags);
6464 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
6465 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
6466 bool failed_cluster_refill = false;
6467 bool failed_alloc = false;
6468 bool use_cluster = true;
6469 bool have_caching_bg = false;
6470
6471 WARN_ON(num_bytes < root->sectorsize);
6472 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
6473 ins->objectid = 0;
6474 ins->offset = 0;
6475
6476 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
6477
6478 space_info = __find_space_info(root->fs_info, flags);
6479 if (!space_info) {
6480 btrfs_err(root->fs_info, "No space info for %llu", flags);
6481 return -ENOSPC;
6482 }
6483
6484
6485
6486
6487
6488 if (btrfs_mixed_space_info(space_info))
6489 use_cluster = false;
6490
6491 if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
6492 last_ptr = &root->fs_info->meta_alloc_cluster;
6493 if (!btrfs_test_opt(root, SSD))
6494 empty_cluster = 64 * 1024;
6495 }
6496
6497 if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
6498 btrfs_test_opt(root, SSD)) {
6499 last_ptr = &root->fs_info->data_alloc_cluster;
6500 }
6501
6502 if (last_ptr) {
6503 spin_lock(&last_ptr->lock);
6504 if (last_ptr->block_group)
6505 hint_byte = last_ptr->window_start;
6506 spin_unlock(&last_ptr->lock);
6507 }
6508
6509 search_start = max(search_start, first_logical_byte(root, 0));
6510 search_start = max(search_start, hint_byte);
6511
6512 if (!last_ptr)
6513 empty_cluster = 0;
6514
6515 if (search_start == hint_byte) {
6516 block_group = btrfs_lookup_block_group(root->fs_info,
6517 search_start);
6518
6519
6520
6521
6522
6523
6524
6525 if (block_group && block_group_bits(block_group, flags) &&
6526 block_group->cached != BTRFS_CACHE_NO) {
6527 down_read(&space_info->groups_sem);
6528 if (list_empty(&block_group->list) ||
6529 block_group->ro) {
6530
6531
6532
6533
6534
6535
6536 btrfs_put_block_group(block_group);
6537 up_read(&space_info->groups_sem);
6538 } else {
6539 index = get_block_group_index(block_group);
6540 btrfs_lock_block_group(block_group, delalloc);
6541 goto have_block_group;
6542 }
6543 } else if (block_group) {
6544 btrfs_put_block_group(block_group);
6545 }
6546 }
6547search:
6548 have_caching_bg = false;
6549 down_read(&space_info->groups_sem);
6550 list_for_each_entry(block_group, &space_info->block_groups[index],
6551 list) {
6552 u64 offset;
6553 int cached;
6554
6555 btrfs_grab_block_group(block_group, delalloc);
6556 search_start = block_group->key.objectid;
6557
6558
6559
6560
6561
6562
6563 if (!block_group_bits(block_group, flags)) {
6564 u64 extra = BTRFS_BLOCK_GROUP_DUP |
6565 BTRFS_BLOCK_GROUP_RAID1 |
6566 BTRFS_BLOCK_GROUP_RAID5 |
6567 BTRFS_BLOCK_GROUP_RAID6 |
6568 BTRFS_BLOCK_GROUP_RAID10;
6569
6570
6571
6572
6573
6574
6575 if ((flags & extra) && !(block_group->flags & extra))
6576 goto loop;
6577 }
6578
6579have_block_group:
6580 cached = block_group_cache_done(block_group);
6581 if (unlikely(!cached)) {
6582 ret = cache_block_group(block_group, 0);
6583 BUG_ON(ret < 0);
6584 ret = 0;
6585 }
6586
6587 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
6588 goto loop;
6589 if (unlikely(block_group->ro))
6590 goto loop;
6591
6592
6593
6594
6595
6596 if (last_ptr) {
6597 struct btrfs_block_group_cache *used_block_group;
6598 unsigned long aligned_cluster;
6599
6600
6601
6602
6603 used_block_group = btrfs_lock_cluster(block_group,
6604 last_ptr,
6605 delalloc);
6606 if (!used_block_group)
6607 goto refill_cluster;
6608
6609 if (used_block_group != block_group &&
6610 (used_block_group->ro ||
6611 !block_group_bits(used_block_group, flags)))
6612 goto release_cluster;
6613
6614 offset = btrfs_alloc_from_cluster(used_block_group,
6615 last_ptr,
6616 num_bytes,
6617 used_block_group->key.objectid,
6618 &max_extent_size);
6619 if (offset) {
6620
6621 spin_unlock(&last_ptr->refill_lock);
6622 trace_btrfs_reserve_extent_cluster(root,
6623 used_block_group,
6624 search_start, num_bytes);
6625 if (used_block_group != block_group) {
6626 btrfs_release_block_group(block_group,
6627 delalloc);
6628 block_group = used_block_group;
6629 }
6630 goto checks;
6631 }
6632
6633 WARN_ON(last_ptr->block_group != used_block_group);
6634release_cluster:
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650 if (loop >= LOOP_NO_EMPTY_SIZE &&
6651 used_block_group != block_group) {
6652 spin_unlock(&last_ptr->refill_lock);
6653 btrfs_release_block_group(used_block_group,
6654 delalloc);
6655 goto unclustered_alloc;
6656 }
6657
6658
6659
6660
6661
6662 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6663
6664 if (used_block_group != block_group)
6665 btrfs_release_block_group(used_block_group,
6666 delalloc);
6667refill_cluster:
6668 if (loop >= LOOP_NO_EMPTY_SIZE) {
6669 spin_unlock(&last_ptr->refill_lock);
6670 goto unclustered_alloc;
6671 }
6672
6673 aligned_cluster = max_t(unsigned long,
6674 empty_cluster + empty_size,
6675 block_group->full_stripe_len);
6676
6677
6678 ret = btrfs_find_space_cluster(root, block_group,
6679 last_ptr, search_start,
6680 num_bytes,
6681 aligned_cluster);
6682 if (ret == 0) {
6683
6684
6685
6686
6687 offset = btrfs_alloc_from_cluster(block_group,
6688 last_ptr,
6689 num_bytes,
6690 search_start,
6691 &max_extent_size);
6692 if (offset) {
6693
6694 spin_unlock(&last_ptr->refill_lock);
6695 trace_btrfs_reserve_extent_cluster(root,
6696 block_group, search_start,
6697 num_bytes);
6698 goto checks;
6699 }
6700 } else if (!cached && loop > LOOP_CACHING_NOWAIT
6701 && !failed_cluster_refill) {
6702 spin_unlock(&last_ptr->refill_lock);
6703
6704 failed_cluster_refill = true;
6705 wait_block_group_cache_progress(block_group,
6706 num_bytes + empty_cluster + empty_size);
6707 goto have_block_group;
6708 }
6709
6710
6711
6712
6713
6714
6715
6716 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6717 spin_unlock(&last_ptr->refill_lock);
6718 goto loop;
6719 }
6720
6721unclustered_alloc:
6722 spin_lock(&block_group->free_space_ctl->tree_lock);
6723 if (cached &&
6724 block_group->free_space_ctl->free_space <
6725 num_bytes + empty_cluster + empty_size) {
6726 if (block_group->free_space_ctl->free_space >
6727 max_extent_size)
6728 max_extent_size =
6729 block_group->free_space_ctl->free_space;
6730 spin_unlock(&block_group->free_space_ctl->tree_lock);
6731 goto loop;
6732 }
6733 spin_unlock(&block_group->free_space_ctl->tree_lock);
6734
6735 offset = btrfs_find_space_for_alloc(block_group, search_start,
6736 num_bytes, empty_size,
6737 &max_extent_size);
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747 if (!offset && !failed_alloc && !cached &&
6748 loop > LOOP_CACHING_NOWAIT) {
6749 wait_block_group_cache_progress(block_group,
6750 num_bytes + empty_size);
6751 failed_alloc = true;
6752 goto have_block_group;
6753 } else if (!offset) {
6754 if (!cached)
6755 have_caching_bg = true;
6756 goto loop;
6757 }
6758checks:
6759 search_start = stripe_align(root, block_group,
6760 offset, num_bytes);
6761
6762
6763 if (search_start + num_bytes >
6764 block_group->key.objectid + block_group->key.offset) {
6765 btrfs_add_free_space(block_group, offset, num_bytes);
6766 goto loop;
6767 }
6768
6769 if (offset < search_start)
6770 btrfs_add_free_space(block_group, offset,
6771 search_start - offset);
6772 BUG_ON(offset > search_start);
6773
6774 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
6775 alloc_type, delalloc);
6776 if (ret == -EAGAIN) {
6777 btrfs_add_free_space(block_group, offset, num_bytes);
6778 goto loop;
6779 }
6780
6781
6782 ins->objectid = search_start;
6783 ins->offset = num_bytes;
6784
6785 trace_btrfs_reserve_extent(orig_root, block_group,
6786 search_start, num_bytes);
6787 btrfs_release_block_group(block_group, delalloc);
6788 break;
6789loop:
6790 failed_cluster_refill = false;
6791 failed_alloc = false;
6792 BUG_ON(index != get_block_group_index(block_group));
6793 btrfs_release_block_group(block_group, delalloc);
6794 }
6795 up_read(&space_info->groups_sem);
6796
6797 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
6798 goto search;
6799
6800 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
6801 goto search;
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
6812 index = 0;
6813 loop++;
6814 if (loop == LOOP_ALLOC_CHUNK) {
6815 struct btrfs_trans_handle *trans;
6816 int exist = 0;
6817
6818 trans = current->journal_info;
6819 if (trans)
6820 exist = 1;
6821 else
6822 trans = btrfs_join_transaction(root);
6823
6824 if (IS_ERR(trans)) {
6825 ret = PTR_ERR(trans);
6826 goto out;
6827 }
6828
6829 ret = do_chunk_alloc(trans, root, flags,
6830 CHUNK_ALLOC_FORCE);
6831
6832
6833
6834
6835 if (ret < 0 && ret != -ENOSPC)
6836 btrfs_abort_transaction(trans,
6837 root, ret);
6838 else
6839 ret = 0;
6840 if (!exist)
6841 btrfs_end_transaction(trans, root);
6842 if (ret)
6843 goto out;
6844 }
6845
6846 if (loop == LOOP_NO_EMPTY_SIZE) {
6847 empty_size = 0;
6848 empty_cluster = 0;
6849 }
6850
6851 goto search;
6852 } else if (!ins->objectid) {
6853 ret = -ENOSPC;
6854 } else if (ins->objectid) {
6855 ret = 0;
6856 }
6857out:
6858 if (ret == -ENOSPC)
6859 ins->offset = max_extent_size;
6860 return ret;
6861}
6862
6863static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6864 int dump_block_groups)
6865{
6866 struct btrfs_block_group_cache *cache;
6867 int index = 0;
6868
6869 spin_lock(&info->lock);
6870 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
6871 info->flags,
6872 info->total_bytes - info->bytes_used - info->bytes_pinned -
6873 info->bytes_reserved - info->bytes_readonly,
6874 (info->full) ? "" : "not ");
6875 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
6876 "reserved=%llu, may_use=%llu, readonly=%llu\n",
6877 info->total_bytes, info->bytes_used, info->bytes_pinned,
6878 info->bytes_reserved, info->bytes_may_use,
6879 info->bytes_readonly);
6880 spin_unlock(&info->lock);
6881
6882 if (!dump_block_groups)
6883 return;
6884
6885 down_read(&info->groups_sem);
6886again:
6887 list_for_each_entry(cache, &info->block_groups[index], list) {
6888 spin_lock(&cache->lock);
6889 printk(KERN_INFO "BTRFS: "
6890 "block group %llu has %llu bytes, "
6891 "%llu used %llu pinned %llu reserved %s\n",
6892 cache->key.objectid, cache->key.offset,
6893 btrfs_block_group_used(&cache->item), cache->pinned,
6894 cache->reserved, cache->ro ? "[readonly]" : "");
6895 btrfs_dump_free_space(cache, bytes);
6896 spin_unlock(&cache->lock);
6897 }
6898 if (++index < BTRFS_NR_RAID_TYPES)
6899 goto again;
6900 up_read(&info->groups_sem);
6901}
6902
6903int btrfs_reserve_extent(struct btrfs_root *root,
6904 u64 num_bytes, u64 min_alloc_size,
6905 u64 empty_size, u64 hint_byte,
6906 struct btrfs_key *ins, int is_data, int delalloc)
6907{
6908 bool final_tried = false;
6909 u64 flags;
6910 int ret;
6911
6912 flags = btrfs_get_alloc_profile(root, is_data);
6913again:
6914 WARN_ON(num_bytes < root->sectorsize);
6915 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
6916 flags, delalloc);
6917
6918 if (ret == -ENOSPC) {
6919 if (!final_tried && ins->offset) {
6920 num_bytes = min(num_bytes >> 1, ins->offset);
6921 num_bytes = round_down(num_bytes, root->sectorsize);
6922 num_bytes = max(num_bytes, min_alloc_size);
6923 if (num_bytes == min_alloc_size)
6924 final_tried = true;
6925 goto again;
6926 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
6927 struct btrfs_space_info *sinfo;
6928
6929 sinfo = __find_space_info(root->fs_info, flags);
6930 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
6931 flags, num_bytes);
6932 if (sinfo)
6933 dump_space_info(sinfo, num_bytes, 1);
6934 }
6935 }
6936
6937 return ret;
6938}
6939
6940static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6941 u64 start, u64 len,
6942 int pin, int delalloc)
6943{
6944 struct btrfs_block_group_cache *cache;
6945 int ret = 0;
6946
6947 cache = btrfs_lookup_block_group(root->fs_info, start);
6948 if (!cache) {
6949 btrfs_err(root->fs_info, "Unable to find block group for %llu",
6950 start);
6951 return -ENOSPC;
6952 }
6953
6954 if (btrfs_test_opt(root, DISCARD))
6955 ret = btrfs_discard_extent(root, start, len, NULL);
6956
6957 if (pin)
6958 pin_down_extent(root, cache, start, len, 1);
6959 else {
6960 btrfs_add_free_space(cache, start, len);
6961 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
6962 }
6963 btrfs_put_block_group(cache);
6964
6965 trace_btrfs_reserved_extent_free(root, start, len);
6966
6967 return ret;
6968}
6969
6970int btrfs_free_reserved_extent(struct btrfs_root *root,
6971 u64 start, u64 len, int delalloc)
6972{
6973 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
6974}
6975
6976int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
6977 u64 start, u64 len)
6978{
6979 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
6980}
6981
6982static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6983 struct btrfs_root *root,
6984 u64 parent, u64 root_objectid,
6985 u64 flags, u64 owner, u64 offset,
6986 struct btrfs_key *ins, int ref_mod)
6987{
6988 int ret;
6989 struct btrfs_fs_info *fs_info = root->fs_info;
6990 struct btrfs_extent_item *extent_item;
6991 struct btrfs_extent_inline_ref *iref;
6992 struct btrfs_path *path;
6993 struct extent_buffer *leaf;
6994 int type;
6995 u32 size;
6996
6997 if (parent > 0)
6998 type = BTRFS_SHARED_DATA_REF_KEY;
6999 else
7000 type = BTRFS_EXTENT_DATA_REF_KEY;
7001
7002 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
7003
7004 path = btrfs_alloc_path();
7005 if (!path)
7006 return -ENOMEM;
7007
7008 path->leave_spinning = 1;
7009 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7010 ins, size);
7011 if (ret) {
7012 btrfs_free_path(path);
7013 return ret;
7014 }
7015
7016 leaf = path->nodes[0];
7017 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7018 struct btrfs_extent_item);
7019 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
7020 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7021 btrfs_set_extent_flags(leaf, extent_item,
7022 flags | BTRFS_EXTENT_FLAG_DATA);
7023
7024 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7025 btrfs_set_extent_inline_ref_type(leaf, iref, type);
7026 if (parent > 0) {
7027 struct btrfs_shared_data_ref *ref;
7028 ref = (struct btrfs_shared_data_ref *)(iref + 1);
7029 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7030 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
7031 } else {
7032 struct btrfs_extent_data_ref *ref;
7033 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
7034 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
7035 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
7036 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
7037 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
7038 }
7039
7040 btrfs_mark_buffer_dirty(path->nodes[0]);
7041 btrfs_free_path(path);
7042
7043
7044 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
7045 ins->objectid, ins->offset,
7046 BTRFS_QGROUP_OPER_ADD_EXCL, 0);
7047 if (ret)
7048 return ret;
7049
7050 ret = update_block_group(root, ins->objectid, ins->offset, 1);
7051 if (ret) {
7052 btrfs_err(fs_info, "update block group failed for %llu %llu",
7053 ins->objectid, ins->offset);
7054 BUG();
7055 }
7056 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
7057 return ret;
7058}
7059
7060static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7061 struct btrfs_root *root,
7062 u64 parent, u64 root_objectid,
7063 u64 flags, struct btrfs_disk_key *key,
7064 int level, struct btrfs_key *ins,
7065 int no_quota)
7066{
7067 int ret;
7068 struct btrfs_fs_info *fs_info = root->fs_info;
7069 struct btrfs_extent_item *extent_item;
7070 struct btrfs_tree_block_info *block_info;
7071 struct btrfs_extent_inline_ref *iref;
7072 struct btrfs_path *path;
7073 struct extent_buffer *leaf;
7074 u32 size = sizeof(*extent_item) + sizeof(*iref);
7075 u64 num_bytes = ins->offset;
7076 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7077 SKINNY_METADATA);
7078
7079 if (!skinny_metadata)
7080 size += sizeof(*block_info);
7081
7082 path = btrfs_alloc_path();
7083 if (!path) {
7084 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7085 root->leafsize);
7086 return -ENOMEM;
7087 }
7088
7089 path->leave_spinning = 1;
7090 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7091 ins, size);
7092 if (ret) {
7093 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7094 root->leafsize);
7095 btrfs_free_path(path);
7096 return ret;
7097 }
7098
7099 leaf = path->nodes[0];
7100 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7101 struct btrfs_extent_item);
7102 btrfs_set_extent_refs(leaf, extent_item, 1);
7103 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7104 btrfs_set_extent_flags(leaf, extent_item,
7105 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
7106
7107 if (skinny_metadata) {
7108 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7109 num_bytes = root->leafsize;
7110 } else {
7111 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
7112 btrfs_set_tree_block_key(leaf, block_info, key);
7113 btrfs_set_tree_block_level(leaf, block_info, level);
7114 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
7115 }
7116
7117 if (parent > 0) {
7118 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
7119 btrfs_set_extent_inline_ref_type(leaf, iref,
7120 BTRFS_SHARED_BLOCK_REF_KEY);
7121 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7122 } else {
7123 btrfs_set_extent_inline_ref_type(leaf, iref,
7124 BTRFS_TREE_BLOCK_REF_KEY);
7125 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
7126 }
7127
7128 btrfs_mark_buffer_dirty(leaf);
7129 btrfs_free_path(path);
7130
7131 if (!no_quota) {
7132 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
7133 ins->objectid, num_bytes,
7134 BTRFS_QGROUP_OPER_ADD_EXCL, 0);
7135 if (ret)
7136 return ret;
7137 }
7138
7139 ret = update_block_group(root, ins->objectid, root->leafsize, 1);
7140 if (ret) {
7141 btrfs_err(fs_info, "update block group failed for %llu %llu",
7142 ins->objectid, ins->offset);
7143 BUG();
7144 }
7145
7146 trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize);
7147 return ret;
7148}
7149
7150int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7151 struct btrfs_root *root,
7152 u64 root_objectid, u64 owner,
7153 u64 offset, struct btrfs_key *ins)
7154{
7155 int ret;
7156
7157 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
7158
7159 ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
7160 ins->offset, 0,
7161 root_objectid, owner, offset,
7162 BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
7163 return ret;
7164}
7165
7166
7167
7168
7169
7170
7171int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7172 struct btrfs_root *root,
7173 u64 root_objectid, u64 owner, u64 offset,
7174 struct btrfs_key *ins)
7175{
7176 int ret;
7177 struct btrfs_block_group_cache *block_group;
7178
7179
7180
7181
7182
7183 if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
7184 ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
7185 if (ret)
7186 return ret;
7187 }
7188
7189 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
7190 if (!block_group)
7191 return -EINVAL;
7192
7193 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
7194 RESERVE_ALLOC_NO_ACCOUNT, 0);
7195 BUG_ON(ret);
7196 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
7197 0, owner, offset, ins, 1);
7198 btrfs_put_block_group(block_group);
7199 return ret;
7200}
7201
7202static struct extent_buffer *
7203btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
7204 u64 bytenr, u32 blocksize, int level)
7205{
7206 struct extent_buffer *buf;
7207
7208 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
7209 if (!buf)
7210 return ERR_PTR(-ENOMEM);
7211 btrfs_set_header_generation(buf, trans->transid);
7212 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
7213 btrfs_tree_lock(buf);
7214 clean_tree_block(trans, root, buf);
7215 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
7216
7217 btrfs_set_lock_blocking(buf);
7218 btrfs_set_buffer_uptodate(buf);
7219
7220 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
7221
7222
7223
7224
7225 if (root->log_transid % 2 == 0)
7226 set_extent_dirty(&root->dirty_log_pages, buf->start,
7227 buf->start + buf->len - 1, GFP_NOFS);
7228 else
7229 set_extent_new(&root->dirty_log_pages, buf->start,
7230 buf->start + buf->len - 1, GFP_NOFS);
7231 } else {
7232 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
7233 buf->start + buf->len - 1, GFP_NOFS);
7234 }
7235 trans->blocks_used++;
7236
7237 return buf;
7238}
7239
7240static struct btrfs_block_rsv *
7241use_block_rsv(struct btrfs_trans_handle *trans,
7242 struct btrfs_root *root, u32 blocksize)
7243{
7244 struct btrfs_block_rsv *block_rsv;
7245 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
7246 int ret;
7247 bool global_updated = false;
7248
7249 block_rsv = get_block_rsv(trans, root);
7250
7251 if (unlikely(block_rsv->size == 0))
7252 goto try_reserve;
7253again:
7254 ret = block_rsv_use_bytes(block_rsv, blocksize);
7255 if (!ret)
7256 return block_rsv;
7257
7258 if (block_rsv->failfast)
7259 return ERR_PTR(ret);
7260
7261 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
7262 global_updated = true;
7263 update_global_block_rsv(root->fs_info);
7264 goto again;
7265 }
7266
7267 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7268 static DEFINE_RATELIMIT_STATE(_rs,
7269 DEFAULT_RATELIMIT_INTERVAL * 10,
7270 1);
7271 if (__ratelimit(&_rs))
7272 WARN(1, KERN_DEBUG
7273 "BTRFS: block rsv returned %d\n", ret);
7274 }
7275try_reserve:
7276 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
7277 BTRFS_RESERVE_NO_FLUSH);
7278 if (!ret)
7279 return block_rsv;
7280
7281
7282
7283
7284
7285 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
7286 block_rsv->space_info == global_rsv->space_info) {
7287 ret = block_rsv_use_bytes(global_rsv, blocksize);
7288 if (!ret)
7289 return global_rsv;
7290 }
7291 return ERR_PTR(ret);
7292}
7293
7294static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
7295 struct btrfs_block_rsv *block_rsv, u32 blocksize)
7296{
7297 block_rsv_add_bytes(block_rsv, blocksize, 0);
7298 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
7299}
7300
7301
7302
7303
7304
7305
7306
7307
7308struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
7309 struct btrfs_root *root, u32 blocksize,
7310 u64 parent, u64 root_objectid,
7311 struct btrfs_disk_key *key, int level,
7312 u64 hint, u64 empty_size)
7313{
7314 struct btrfs_key ins;
7315 struct btrfs_block_rsv *block_rsv;
7316 struct extent_buffer *buf;
7317 u64 flags = 0;
7318 int ret;
7319 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7320 SKINNY_METADATA);
7321
7322#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
7323 if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) {
7324 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
7325 blocksize, level);
7326 if (!IS_ERR(buf))
7327 root->alloc_bytenr += blocksize;
7328 return buf;
7329 }
7330#endif
7331 block_rsv = use_block_rsv(trans, root, blocksize);
7332 if (IS_ERR(block_rsv))
7333 return ERR_CAST(block_rsv);
7334
7335 ret = btrfs_reserve_extent(root, blocksize, blocksize,
7336 empty_size, hint, &ins, 0, 0);
7337 if (ret) {
7338 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
7339 return ERR_PTR(ret);
7340 }
7341
7342 buf = btrfs_init_new_buffer(trans, root, ins.objectid,
7343 blocksize, level);
7344 BUG_ON(IS_ERR(buf));
7345
7346 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
7347 if (parent == 0)
7348 parent = ins.objectid;
7349 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7350 } else
7351 BUG_ON(parent > 0);
7352
7353 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
7354 struct btrfs_delayed_extent_op *extent_op;
7355 extent_op = btrfs_alloc_delayed_extent_op();
7356 BUG_ON(!extent_op);
7357 if (key)
7358 memcpy(&extent_op->key, key, sizeof(extent_op->key));
7359 else
7360 memset(&extent_op->key, 0, sizeof(extent_op->key));
7361 extent_op->flags_to_set = flags;
7362 if (skinny_metadata)
7363 extent_op->update_key = 0;
7364 else
7365 extent_op->update_key = 1;
7366 extent_op->update_flags = 1;
7367 extent_op->is_data = 0;
7368 extent_op->level = level;
7369
7370 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
7371 ins.objectid,
7372 ins.offset, parent, root_objectid,
7373 level, BTRFS_ADD_DELAYED_EXTENT,
7374 extent_op, 0);
7375 BUG_ON(ret);
7376 }
7377 return buf;
7378}
7379
7380struct walk_control {
7381 u64 refs[BTRFS_MAX_LEVEL];
7382 u64 flags[BTRFS_MAX_LEVEL];
7383 struct btrfs_key update_progress;
7384 int stage;
7385 int level;
7386 int shared_level;
7387 int update_ref;
7388 int keep_locks;
7389 int reada_slot;
7390 int reada_count;
7391 int for_reloc;
7392};
7393
7394#define DROP_REFERENCE 1
7395#define UPDATE_BACKREF 2
7396
7397static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
7398 struct btrfs_root *root,
7399 struct walk_control *wc,
7400 struct btrfs_path *path)
7401{
7402 u64 bytenr;
7403 u64 generation;
7404 u64 refs;
7405 u64 flags;
7406 u32 nritems;
7407 u32 blocksize;
7408 struct btrfs_key key;
7409 struct extent_buffer *eb;
7410 int ret;
7411 int slot;
7412 int nread = 0;
7413
7414 if (path->slots[wc->level] < wc->reada_slot) {
7415 wc->reada_count = wc->reada_count * 2 / 3;
7416 wc->reada_count = max(wc->reada_count, 2);
7417 } else {
7418 wc->reada_count = wc->reada_count * 3 / 2;
7419 wc->reada_count = min_t(int, wc->reada_count,
7420 BTRFS_NODEPTRS_PER_BLOCK(root));
7421 }
7422
7423 eb = path->nodes[wc->level];
7424 nritems = btrfs_header_nritems(eb);
7425 blocksize = btrfs_level_size(root, wc->level - 1);
7426
7427 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
7428 if (nread >= wc->reada_count)
7429 break;
7430
7431 cond_resched();
7432 bytenr = btrfs_node_blockptr(eb, slot);
7433 generation = btrfs_node_ptr_generation(eb, slot);
7434
7435 if (slot == path->slots[wc->level])
7436 goto reada;
7437
7438 if (wc->stage == UPDATE_BACKREF &&
7439 generation <= root->root_key.offset)
7440 continue;
7441
7442
7443 ret = btrfs_lookup_extent_info(trans, root, bytenr,
7444 wc->level - 1, 1, &refs,
7445 &flags);
7446
7447 if (ret < 0)
7448 continue;
7449 BUG_ON(refs == 0);
7450
7451 if (wc->stage == DROP_REFERENCE) {
7452 if (refs == 1)
7453 goto reada;
7454
7455 if (wc->level == 1 &&
7456 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7457 continue;
7458 if (!wc->update_ref ||
7459 generation <= root->root_key.offset)
7460 continue;
7461 btrfs_node_key_to_cpu(eb, &key, slot);
7462 ret = btrfs_comp_cpu_keys(&key,
7463 &wc->update_progress);
7464 if (ret < 0)
7465 continue;
7466 } else {
7467 if (wc->level == 1 &&
7468 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7469 continue;
7470 }
7471reada:
7472 ret = readahead_tree_block(root, bytenr, blocksize,
7473 generation);
7474 if (ret)
7475 break;
7476 nread++;
7477 }
7478 wc->reada_slot = slot;
7479}
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
7490 struct btrfs_root *root,
7491 struct btrfs_path *path,
7492 struct walk_control *wc, int lookup_info)
7493{
7494 int level = wc->level;
7495 struct extent_buffer *eb = path->nodes[level];
7496 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
7497 int ret;
7498
7499 if (wc->stage == UPDATE_BACKREF &&
7500 btrfs_header_owner(eb) != root->root_key.objectid)
7501 return 1;
7502
7503
7504
7505
7506
7507 if (lookup_info &&
7508 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
7509 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
7510 BUG_ON(!path->locks[level]);
7511 ret = btrfs_lookup_extent_info(trans, root,
7512 eb->start, level, 1,
7513 &wc->refs[level],
7514 &wc->flags[level]);
7515 BUG_ON(ret == -ENOMEM);
7516 if (ret)
7517 return ret;
7518 BUG_ON(wc->refs[level] == 0);
7519 }
7520
7521 if (wc->stage == DROP_REFERENCE) {
7522 if (wc->refs[level] > 1)
7523 return 1;
7524
7525 if (path->locks[level] && !wc->keep_locks) {
7526 btrfs_tree_unlock_rw(eb, path->locks[level]);
7527 path->locks[level] = 0;
7528 }
7529 return 0;
7530 }
7531
7532
7533 if (!(wc->flags[level] & flag)) {
7534 BUG_ON(!path->locks[level]);
7535 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
7536 BUG_ON(ret);
7537 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
7538 BUG_ON(ret);
7539 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
7540 eb->len, flag,
7541 btrfs_header_level(eb), 0);
7542 BUG_ON(ret);
7543 wc->flags[level] |= flag;
7544 }
7545
7546
7547
7548
7549
7550 if (path->locks[level] && level > 0) {
7551 btrfs_tree_unlock_rw(eb, path->locks[level]);
7552 path->locks[level] = 0;
7553 }
7554 return 0;
7555}
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7571 struct btrfs_root *root,
7572 struct btrfs_path *path,
7573 struct walk_control *wc, int *lookup_info)
7574{
7575 u64 bytenr;
7576 u64 generation;
7577 u64 parent;
7578 u32 blocksize;
7579 struct btrfs_key key;
7580 struct extent_buffer *next;
7581 int level = wc->level;
7582 int reada = 0;
7583 int ret = 0;
7584
7585 generation = btrfs_node_ptr_generation(path->nodes[level],
7586 path->slots[level]);
7587
7588
7589
7590
7591
7592 if (wc->stage == UPDATE_BACKREF &&
7593 generation <= root->root_key.offset) {
7594 *lookup_info = 1;
7595 return 1;
7596 }
7597
7598 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
7599 blocksize = btrfs_level_size(root, level - 1);
7600
7601 next = btrfs_find_tree_block(root, bytenr, blocksize);
7602 if (!next) {
7603 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
7604 if (!next)
7605 return -ENOMEM;
7606 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
7607 level - 1);
7608 reada = 1;
7609 }
7610 btrfs_tree_lock(next);
7611 btrfs_set_lock_blocking(next);
7612
7613 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
7614 &wc->refs[level - 1],
7615 &wc->flags[level - 1]);
7616 if (ret < 0) {
7617 btrfs_tree_unlock(next);
7618 return ret;
7619 }
7620
7621 if (unlikely(wc->refs[level - 1] == 0)) {
7622 btrfs_err(root->fs_info, "Missing references.");
7623 BUG();
7624 }
7625 *lookup_info = 0;
7626
7627 if (wc->stage == DROP_REFERENCE) {
7628 if (wc->refs[level - 1] > 1) {
7629 if (level == 1 &&
7630 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7631 goto skip;
7632
7633 if (!wc->update_ref ||
7634 generation <= root->root_key.offset)
7635 goto skip;
7636
7637 btrfs_node_key_to_cpu(path->nodes[level], &key,
7638 path->slots[level]);
7639 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
7640 if (ret < 0)
7641 goto skip;
7642
7643 wc->stage = UPDATE_BACKREF;
7644 wc->shared_level = level - 1;
7645 }
7646 } else {
7647 if (level == 1 &&
7648 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7649 goto skip;
7650 }
7651
7652 if (!btrfs_buffer_uptodate(next, generation, 0)) {
7653 btrfs_tree_unlock(next);
7654 free_extent_buffer(next);
7655 next = NULL;
7656 *lookup_info = 1;
7657 }
7658
7659 if (!next) {
7660 if (reada && level == 1)
7661 reada_walk_down(trans, root, wc, path);
7662 next = read_tree_block(root, bytenr, blocksize, generation);
7663 if (!next || !extent_buffer_uptodate(next)) {
7664 free_extent_buffer(next);
7665 return -EIO;
7666 }
7667 btrfs_tree_lock(next);
7668 btrfs_set_lock_blocking(next);
7669 }
7670
7671 level--;
7672 BUG_ON(level != btrfs_header_level(next));
7673 path->nodes[level] = next;
7674 path->slots[level] = 0;
7675 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7676 wc->level = level;
7677 if (wc->level == 1)
7678 wc->reada_slot = 0;
7679 return 0;
7680skip:
7681 wc->refs[level - 1] = 0;
7682 wc->flags[level - 1] = 0;
7683 if (wc->stage == DROP_REFERENCE) {
7684 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7685 parent = path->nodes[level]->start;
7686 } else {
7687 BUG_ON(root->root_key.objectid !=
7688 btrfs_header_owner(path->nodes[level]));
7689 parent = 0;
7690 }
7691
7692 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
7693 root->root_key.objectid, level - 1, 0, 0);
7694 BUG_ON(ret);
7695 }
7696 btrfs_tree_unlock(next);
7697 free_extent_buffer(next);
7698 *lookup_info = 1;
7699 return 1;
7700}
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
7715 struct btrfs_root *root,
7716 struct btrfs_path *path,
7717 struct walk_control *wc)
7718{
7719 int ret;
7720 int level = wc->level;
7721 struct extent_buffer *eb = path->nodes[level];
7722 u64 parent = 0;
7723
7724 if (wc->stage == UPDATE_BACKREF) {
7725 BUG_ON(wc->shared_level < level);
7726 if (level < wc->shared_level)
7727 goto out;
7728
7729 ret = find_next_key(path, level + 1, &wc->update_progress);
7730 if (ret > 0)
7731 wc->update_ref = 0;
7732
7733 wc->stage = DROP_REFERENCE;
7734 wc->shared_level = -1;
7735 path->slots[level] = 0;
7736
7737
7738
7739
7740
7741
7742 if (!path->locks[level]) {
7743 BUG_ON(level == 0);
7744 btrfs_tree_lock(eb);
7745 btrfs_set_lock_blocking(eb);
7746 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7747
7748 ret = btrfs_lookup_extent_info(trans, root,
7749 eb->start, level, 1,
7750 &wc->refs[level],
7751 &wc->flags[level]);
7752 if (ret < 0) {
7753 btrfs_tree_unlock_rw(eb, path->locks[level]);
7754 path->locks[level] = 0;
7755 return ret;
7756 }
7757 BUG_ON(wc->refs[level] == 0);
7758 if (wc->refs[level] == 1) {
7759 btrfs_tree_unlock_rw(eb, path->locks[level]);
7760 path->locks[level] = 0;
7761 return 1;
7762 }
7763 }
7764 }
7765
7766
7767 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
7768
7769 if (wc->refs[level] == 1) {
7770 if (level == 0) {
7771 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7772 ret = btrfs_dec_ref(trans, root, eb, 1,
7773 wc->for_reloc);
7774 else
7775 ret = btrfs_dec_ref(trans, root, eb, 0,
7776 wc->for_reloc);
7777 BUG_ON(ret);
7778 }
7779
7780 if (!path->locks[level] &&
7781 btrfs_header_generation(eb) == trans->transid) {
7782 btrfs_tree_lock(eb);
7783 btrfs_set_lock_blocking(eb);
7784 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7785 }
7786 clean_tree_block(trans, root, eb);
7787 }
7788
7789 if (eb == root->node) {
7790 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7791 parent = eb->start;
7792 else
7793 BUG_ON(root->root_key.objectid !=
7794 btrfs_header_owner(eb));
7795 } else {
7796 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7797 parent = path->nodes[level + 1]->start;
7798 else
7799 BUG_ON(root->root_key.objectid !=
7800 btrfs_header_owner(path->nodes[level + 1]));
7801 }
7802
7803 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
7804out:
7805 wc->refs[level] = 0;
7806 wc->flags[level] = 0;
7807 return 0;
7808}
7809
7810static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
7811 struct btrfs_root *root,
7812 struct btrfs_path *path,
7813 struct walk_control *wc)
7814{
7815 int level = wc->level;
7816 int lookup_info = 1;
7817 int ret;
7818
7819 while (level >= 0) {
7820 ret = walk_down_proc(trans, root, path, wc, lookup_info);
7821 if (ret > 0)
7822 break;
7823
7824 if (level == 0)
7825 break;
7826
7827 if (path->slots[level] >=
7828 btrfs_header_nritems(path->nodes[level]))
7829 break;
7830
7831 ret = do_walk_down(trans, root, path, wc, &lookup_info);
7832 if (ret > 0) {
7833 path->slots[level]++;
7834 continue;
7835 } else if (ret < 0)
7836 return ret;
7837 level = wc->level;
7838 }
7839 return 0;
7840}
7841
7842static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
7843 struct btrfs_root *root,
7844 struct btrfs_path *path,
7845 struct walk_control *wc, int max_level)
7846{
7847 int level = wc->level;
7848 int ret;
7849
7850 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
7851 while (level < max_level && path->nodes[level]) {
7852 wc->level = level;
7853 if (path->slots[level] + 1 <
7854 btrfs_header_nritems(path->nodes[level])) {
7855 path->slots[level]++;
7856 return 0;
7857 } else {
7858 ret = walk_up_proc(trans, root, path, wc);
7859 if (ret > 0)
7860 return 0;
7861
7862 if (path->locks[level]) {
7863 btrfs_tree_unlock_rw(path->nodes[level],
7864 path->locks[level]);
7865 path->locks[level] = 0;
7866 }
7867 free_extent_buffer(path->nodes[level]);
7868 path->nodes[level] = NULL;
7869 level++;
7870 }
7871 }
7872 return 1;
7873}
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888int btrfs_drop_snapshot(struct btrfs_root *root,
7889 struct btrfs_block_rsv *block_rsv, int update_ref,
7890 int for_reloc)
7891{
7892 struct btrfs_path *path;
7893 struct btrfs_trans_handle *trans;
7894 struct btrfs_root *tree_root = root->fs_info->tree_root;
7895 struct btrfs_root_item *root_item = &root->root_item;
7896 struct walk_control *wc;
7897 struct btrfs_key key;
7898 int err = 0;
7899 int ret;
7900 int level;
7901 bool root_dropped = false;
7902
7903 path = btrfs_alloc_path();
7904 if (!path) {
7905 err = -ENOMEM;
7906 goto out;
7907 }
7908
7909 wc = kzalloc(sizeof(*wc), GFP_NOFS);
7910 if (!wc) {
7911 btrfs_free_path(path);
7912 err = -ENOMEM;
7913 goto out;
7914 }
7915
7916 trans = btrfs_start_transaction(tree_root, 0);
7917 if (IS_ERR(trans)) {
7918 err = PTR_ERR(trans);
7919 goto out_free;
7920 }
7921
7922 if (block_rsv)
7923 trans->block_rsv = block_rsv;
7924
7925 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
7926 level = btrfs_header_level(root->node);
7927 path->nodes[level] = btrfs_lock_root_node(root);
7928 btrfs_set_lock_blocking(path->nodes[level]);
7929 path->slots[level] = 0;
7930 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7931 memset(&wc->update_progress, 0,
7932 sizeof(wc->update_progress));
7933 } else {
7934 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
7935 memcpy(&wc->update_progress, &key,
7936 sizeof(wc->update_progress));
7937
7938 level = root_item->drop_level;
7939 BUG_ON(level == 0);
7940 path->lowest_level = level;
7941 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7942 path->lowest_level = 0;
7943 if (ret < 0) {
7944 err = ret;
7945 goto out_end_trans;
7946 }
7947 WARN_ON(ret > 0);
7948
7949
7950
7951
7952
7953 btrfs_unlock_up_safe(path, 0);
7954
7955 level = btrfs_header_level(root->node);
7956 while (1) {
7957 btrfs_tree_lock(path->nodes[level]);
7958 btrfs_set_lock_blocking(path->nodes[level]);
7959 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7960
7961 ret = btrfs_lookup_extent_info(trans, root,
7962 path->nodes[level]->start,
7963 level, 1, &wc->refs[level],
7964 &wc->flags[level]);
7965 if (ret < 0) {
7966 err = ret;
7967 goto out_end_trans;
7968 }
7969 BUG_ON(wc->refs[level] == 0);
7970
7971 if (level == root_item->drop_level)
7972 break;
7973
7974 btrfs_tree_unlock(path->nodes[level]);
7975 path->locks[level] = 0;
7976 WARN_ON(wc->refs[level] != 1);
7977 level--;
7978 }
7979 }
7980
7981 wc->level = level;
7982 wc->shared_level = -1;
7983 wc->stage = DROP_REFERENCE;
7984 wc->update_ref = update_ref;
7985 wc->keep_locks = 0;
7986 wc->for_reloc = for_reloc;
7987 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
7988
7989 while (1) {
7990
7991 ret = walk_down_tree(trans, root, path, wc);
7992 if (ret < 0) {
7993 err = ret;
7994 break;
7995 }
7996
7997 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
7998 if (ret < 0) {
7999 err = ret;
8000 break;
8001 }
8002
8003 if (ret > 0) {
8004 BUG_ON(wc->stage != DROP_REFERENCE);
8005 break;
8006 }
8007
8008 if (wc->stage == DROP_REFERENCE) {
8009 level = wc->level;
8010 btrfs_node_key(path->nodes[level],
8011 &root_item->drop_progress,
8012 path->slots[level]);
8013 root_item->drop_level = level;
8014 }
8015
8016 BUG_ON(wc->level == 0);
8017 if (btrfs_should_end_transaction(trans, tree_root) ||
8018 (!for_reloc && btrfs_need_cleaner_sleep(root))) {
8019 ret = btrfs_update_root(trans, tree_root,
8020 &root->root_key,
8021 root_item);
8022 if (ret) {
8023 btrfs_abort_transaction(trans, tree_root, ret);
8024 err = ret;
8025 goto out_end_trans;
8026 }
8027
8028 btrfs_end_transaction_throttle(trans, tree_root);
8029 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
8030 pr_debug("BTRFS: drop snapshot early exit\n");
8031 err = -EAGAIN;
8032 goto out_free;
8033 }
8034
8035 trans = btrfs_start_transaction(tree_root, 0);
8036 if (IS_ERR(trans)) {
8037 err = PTR_ERR(trans);
8038 goto out_free;
8039 }
8040 if (block_rsv)
8041 trans->block_rsv = block_rsv;
8042 }
8043 }
8044 btrfs_release_path(path);
8045 if (err)
8046 goto out_end_trans;
8047
8048 ret = btrfs_del_root(trans, tree_root, &root->root_key);
8049 if (ret) {
8050 btrfs_abort_transaction(trans, tree_root, ret);
8051 goto out_end_trans;
8052 }
8053
8054 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8055 ret = btrfs_find_root(tree_root, &root->root_key, path,
8056 NULL, NULL);
8057 if (ret < 0) {
8058 btrfs_abort_transaction(trans, tree_root, ret);
8059 err = ret;
8060 goto out_end_trans;
8061 } else if (ret > 0) {
8062
8063
8064
8065
8066
8067 btrfs_del_orphan_item(trans, tree_root,
8068 root->root_key.objectid);
8069 }
8070 }
8071
8072 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
8073 btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
8074 } else {
8075 free_extent_buffer(root->node);
8076 free_extent_buffer(root->commit_root);
8077 btrfs_put_fs_root(root);
8078 }
8079 root_dropped = true;
8080out_end_trans:
8081 btrfs_end_transaction_throttle(trans, tree_root);
8082out_free:
8083 kfree(wc);
8084 btrfs_free_path(path);
8085out:
8086
8087
8088
8089
8090
8091
8092
8093 if (!for_reloc && root_dropped == false)
8094 btrfs_add_dead_root(root);
8095 if (err && err != -EAGAIN)
8096 btrfs_std_error(root->fs_info, err);
8097 return err;
8098}
8099
8100
8101
8102
8103
8104
8105
8106int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
8107 struct btrfs_root *root,
8108 struct extent_buffer *node,
8109 struct extent_buffer *parent)
8110{
8111 struct btrfs_path *path;
8112 struct walk_control *wc;
8113 int level;
8114 int parent_level;
8115 int ret = 0;
8116 int wret;
8117
8118 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
8119
8120 path = btrfs_alloc_path();
8121 if (!path)
8122 return -ENOMEM;
8123
8124 wc = kzalloc(sizeof(*wc), GFP_NOFS);
8125 if (!wc) {
8126 btrfs_free_path(path);
8127 return -ENOMEM;
8128 }
8129
8130 btrfs_assert_tree_locked(parent);
8131 parent_level = btrfs_header_level(parent);
8132 extent_buffer_get(parent);
8133 path->nodes[parent_level] = parent;
8134 path->slots[parent_level] = btrfs_header_nritems(parent);
8135
8136 btrfs_assert_tree_locked(node);
8137 level = btrfs_header_level(node);
8138 path->nodes[level] = node;
8139 path->slots[level] = 0;
8140 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8141
8142 wc->refs[parent_level] = 1;
8143 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8144 wc->level = level;
8145 wc->shared_level = -1;
8146 wc->stage = DROP_REFERENCE;
8147 wc->update_ref = 0;
8148 wc->keep_locks = 1;
8149 wc->for_reloc = 1;
8150 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
8151
8152 while (1) {
8153 wret = walk_down_tree(trans, root, path, wc);
8154 if (wret < 0) {
8155 ret = wret;
8156 break;
8157 }
8158
8159 wret = walk_up_tree(trans, root, path, wc, parent_level);
8160 if (wret < 0)
8161 ret = wret;
8162 if (wret != 0)
8163 break;
8164 }
8165
8166 kfree(wc);
8167 btrfs_free_path(path);
8168 return ret;
8169}
8170
8171static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8172{
8173 u64 num_devices;
8174 u64 stripped;
8175
8176
8177
8178
8179
8180 stripped = get_restripe_target(root->fs_info, flags);
8181 if (stripped)
8182 return extended_to_chunk(stripped);
8183
8184
8185
8186
8187
8188
8189 num_devices = root->fs_info->fs_devices->rw_devices +
8190 root->fs_info->fs_devices->missing_devices;
8191
8192 stripped = BTRFS_BLOCK_GROUP_RAID0 |
8193 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
8194 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
8195
8196 if (num_devices == 1) {
8197 stripped |= BTRFS_BLOCK_GROUP_DUP;
8198 stripped = flags & ~stripped;
8199
8200
8201 if (flags & BTRFS_BLOCK_GROUP_RAID0)
8202 return stripped;
8203
8204
8205 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
8206 BTRFS_BLOCK_GROUP_RAID10))
8207 return stripped | BTRFS_BLOCK_GROUP_DUP;
8208 } else {
8209
8210 if (flags & stripped)
8211 return flags;
8212
8213 stripped |= BTRFS_BLOCK_GROUP_DUP;
8214 stripped = flags & ~stripped;
8215
8216
8217 if (flags & BTRFS_BLOCK_GROUP_DUP)
8218 return stripped | BTRFS_BLOCK_GROUP_RAID1;
8219
8220
8221 }
8222
8223 return flags;
8224}
8225
8226static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
8227{
8228 struct btrfs_space_info *sinfo = cache->space_info;
8229 u64 num_bytes;
8230 u64 min_allocable_bytes;
8231 int ret = -ENOSPC;
8232
8233
8234
8235
8236
8237
8238
8239 if ((sinfo->flags &
8240 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
8241 !force)
8242 min_allocable_bytes = 1 * 1024 * 1024;
8243 else
8244 min_allocable_bytes = 0;
8245
8246 spin_lock(&sinfo->lock);
8247 spin_lock(&cache->lock);
8248
8249 if (cache->ro) {
8250 ret = 0;
8251 goto out;
8252 }
8253
8254 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
8255 cache->bytes_super - btrfs_block_group_used(&cache->item);
8256
8257 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
8258 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
8259 min_allocable_bytes <= sinfo->total_bytes) {
8260 sinfo->bytes_readonly += num_bytes;
8261 cache->ro = 1;
8262 ret = 0;
8263 }
8264out:
8265 spin_unlock(&cache->lock);
8266 spin_unlock(&sinfo->lock);
8267 return ret;
8268}
8269
8270int btrfs_set_block_group_ro(struct btrfs_root *root,
8271 struct btrfs_block_group_cache *cache)
8272
8273{
8274 struct btrfs_trans_handle *trans;
8275 u64 alloc_flags;
8276 int ret;
8277
8278 BUG_ON(cache->ro);
8279
8280 trans = btrfs_join_transaction(root);
8281 if (IS_ERR(trans))
8282 return PTR_ERR(trans);
8283
8284 alloc_flags = update_block_group_flags(root, cache->flags);
8285 if (alloc_flags != cache->flags) {
8286 ret = do_chunk_alloc(trans, root, alloc_flags,
8287 CHUNK_ALLOC_FORCE);
8288 if (ret < 0)
8289 goto out;
8290 }
8291
8292 ret = set_block_group_ro(cache, 0);
8293 if (!ret)
8294 goto out;
8295 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8296 ret = do_chunk_alloc(trans, root, alloc_flags,
8297 CHUNK_ALLOC_FORCE);
8298 if (ret < 0)
8299 goto out;
8300 ret = set_block_group_ro(cache, 0);
8301out:
8302 btrfs_end_transaction(trans, root);
8303 return ret;
8304}
8305
8306int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8307 struct btrfs_root *root, u64 type)
8308{
8309 u64 alloc_flags = get_alloc_profile(root, type);
8310 return do_chunk_alloc(trans, root, alloc_flags,
8311 CHUNK_ALLOC_FORCE);
8312}
8313
8314
8315
8316
8317
8318static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
8319{
8320 struct btrfs_block_group_cache *block_group;
8321 u64 free_bytes = 0;
8322 int factor;
8323
8324 list_for_each_entry(block_group, groups_list, list) {
8325 spin_lock(&block_group->lock);
8326
8327 if (!block_group->ro) {
8328 spin_unlock(&block_group->lock);
8329 continue;
8330 }
8331
8332 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
8333 BTRFS_BLOCK_GROUP_RAID10 |
8334 BTRFS_BLOCK_GROUP_DUP))
8335 factor = 2;
8336 else
8337 factor = 1;
8338
8339 free_bytes += (block_group->key.offset -
8340 btrfs_block_group_used(&block_group->item)) *
8341 factor;
8342
8343 spin_unlock(&block_group->lock);
8344 }
8345
8346 return free_bytes;
8347}
8348
8349
8350
8351
8352
8353u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
8354{
8355 int i;
8356 u64 free_bytes = 0;
8357
8358 spin_lock(&sinfo->lock);
8359
8360 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
8361 if (!list_empty(&sinfo->block_groups[i]))
8362 free_bytes += __btrfs_get_ro_block_group_free_space(
8363 &sinfo->block_groups[i]);
8364
8365 spin_unlock(&sinfo->lock);
8366
8367 return free_bytes;
8368}
8369
8370void btrfs_set_block_group_rw(struct btrfs_root *root,
8371 struct btrfs_block_group_cache *cache)
8372{
8373 struct btrfs_space_info *sinfo = cache->space_info;
8374 u64 num_bytes;
8375
8376 BUG_ON(!cache->ro);
8377
8378 spin_lock(&sinfo->lock);
8379 spin_lock(&cache->lock);
8380 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
8381 cache->bytes_super - btrfs_block_group_used(&cache->item);
8382 sinfo->bytes_readonly -= num_bytes;
8383 cache->ro = 0;
8384 spin_unlock(&cache->lock);
8385 spin_unlock(&sinfo->lock);
8386}
8387
8388
8389
8390
8391
8392
8393
8394int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
8395{
8396 struct btrfs_block_group_cache *block_group;
8397 struct btrfs_space_info *space_info;
8398 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
8399 struct btrfs_device *device;
8400 struct btrfs_trans_handle *trans;
8401 u64 min_free;
8402 u64 dev_min = 1;
8403 u64 dev_nr = 0;
8404 u64 target;
8405 int index;
8406 int full = 0;
8407 int ret = 0;
8408
8409 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
8410
8411
8412 if (!block_group)
8413 return -1;
8414
8415 min_free = btrfs_block_group_used(&block_group->item);
8416
8417
8418 if (!min_free)
8419 goto out;
8420
8421 space_info = block_group->space_info;
8422 spin_lock(&space_info->lock);
8423
8424 full = space_info->full;
8425
8426
8427
8428
8429
8430
8431
8432
8433 if ((space_info->total_bytes != block_group->key.offset) &&
8434 (space_info->bytes_used + space_info->bytes_reserved +
8435 space_info->bytes_pinned + space_info->bytes_readonly +
8436 min_free < space_info->total_bytes)) {
8437 spin_unlock(&space_info->lock);
8438 goto out;
8439 }
8440 spin_unlock(&space_info->lock);
8441
8442
8443
8444
8445
8446
8447
8448
8449 ret = -1;
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459 target = get_restripe_target(root->fs_info, block_group->flags);
8460 if (target) {
8461 index = __get_raid_index(extended_to_chunk(target));
8462 } else {
8463
8464
8465
8466
8467 if (full)
8468 goto out;
8469
8470 index = get_block_group_index(block_group);
8471 }
8472
8473 if (index == BTRFS_RAID_RAID10) {
8474 dev_min = 4;
8475
8476 min_free >>= 1;
8477 } else if (index == BTRFS_RAID_RAID1) {
8478 dev_min = 2;
8479 } else if (index == BTRFS_RAID_DUP) {
8480
8481 min_free <<= 1;
8482 } else if (index == BTRFS_RAID_RAID0) {
8483 dev_min = fs_devices->rw_devices;
8484 do_div(min_free, dev_min);
8485 }
8486
8487
8488 trans = btrfs_join_transaction(root);
8489 if (IS_ERR(trans)) {
8490 ret = PTR_ERR(trans);
8491 goto out;
8492 }
8493
8494 mutex_lock(&root->fs_info->chunk_mutex);
8495 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
8496 u64 dev_offset;
8497
8498
8499
8500
8501
8502 if (device->total_bytes > device->bytes_used + min_free &&
8503 !device->is_tgtdev_for_dev_replace) {
8504 ret = find_free_dev_extent(trans, device, min_free,
8505 &dev_offset, NULL);
8506 if (!ret)
8507 dev_nr++;
8508
8509 if (dev_nr >= dev_min)
8510 break;
8511
8512 ret = -1;
8513 }
8514 }
8515 mutex_unlock(&root->fs_info->chunk_mutex);
8516 btrfs_end_transaction(trans, root);
8517out:
8518 btrfs_put_block_group(block_group);
8519 return ret;
8520}
8521
8522static int find_first_block_group(struct btrfs_root *root,
8523 struct btrfs_path *path, struct btrfs_key *key)
8524{
8525 int ret = 0;
8526 struct btrfs_key found_key;
8527 struct extent_buffer *leaf;
8528 int slot;
8529
8530 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
8531 if (ret < 0)
8532 goto out;
8533
8534 while (1) {
8535 slot = path->slots[0];
8536 leaf = path->nodes[0];
8537 if (slot >= btrfs_header_nritems(leaf)) {
8538 ret = btrfs_next_leaf(root, path);
8539 if (ret == 0)
8540 continue;
8541 if (ret < 0)
8542 goto out;
8543 break;
8544 }
8545 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8546
8547 if (found_key.objectid >= key->objectid &&
8548 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8549 ret = 0;
8550 goto out;
8551 }
8552 path->slots[0]++;
8553 }
8554out:
8555 return ret;
8556}
8557
8558void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
8559{
8560 struct btrfs_block_group_cache *block_group;
8561 u64 last = 0;
8562
8563 while (1) {
8564 struct inode *inode;
8565
8566 block_group = btrfs_lookup_first_block_group(info, last);
8567 while (block_group) {
8568 spin_lock(&block_group->lock);
8569 if (block_group->iref)
8570 break;
8571 spin_unlock(&block_group->lock);
8572 block_group = next_block_group(info->tree_root,
8573 block_group);
8574 }
8575 if (!block_group) {
8576 if (last == 0)
8577 break;
8578 last = 0;
8579 continue;
8580 }
8581
8582 inode = block_group->inode;
8583 block_group->iref = 0;
8584 block_group->inode = NULL;
8585 spin_unlock(&block_group->lock);
8586 iput(inode);
8587 last = block_group->key.objectid + block_group->key.offset;
8588 btrfs_put_block_group(block_group);
8589 }
8590}
8591
8592int btrfs_free_block_groups(struct btrfs_fs_info *info)
8593{
8594 struct btrfs_block_group_cache *block_group;
8595 struct btrfs_space_info *space_info;
8596 struct btrfs_caching_control *caching_ctl;
8597 struct rb_node *n;
8598
8599 down_write(&info->commit_root_sem);
8600 while (!list_empty(&info->caching_block_groups)) {
8601 caching_ctl = list_entry(info->caching_block_groups.next,
8602 struct btrfs_caching_control, list);
8603 list_del(&caching_ctl->list);
8604 put_caching_control(caching_ctl);
8605 }
8606 up_write(&info->commit_root_sem);
8607
8608 spin_lock(&info->block_group_cache_lock);
8609 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
8610 block_group = rb_entry(n, struct btrfs_block_group_cache,
8611 cache_node);
8612 rb_erase(&block_group->cache_node,
8613 &info->block_group_cache_tree);
8614 spin_unlock(&info->block_group_cache_lock);
8615
8616 down_write(&block_group->space_info->groups_sem);
8617 list_del(&block_group->list);
8618 up_write(&block_group->space_info->groups_sem);
8619
8620 if (block_group->cached == BTRFS_CACHE_STARTED)
8621 wait_block_group_cache_done(block_group);
8622
8623
8624
8625
8626
8627 if (block_group->cached == BTRFS_CACHE_NO ||
8628 block_group->cached == BTRFS_CACHE_ERROR)
8629 free_excluded_extents(info->extent_root, block_group);
8630
8631 btrfs_remove_free_space_cache(block_group);
8632 btrfs_put_block_group(block_group);
8633
8634 spin_lock(&info->block_group_cache_lock);
8635 }
8636 spin_unlock(&info->block_group_cache_lock);
8637
8638
8639
8640
8641
8642
8643
8644 synchronize_rcu();
8645
8646 release_global_block_rsv(info);
8647
8648 while (!list_empty(&info->space_info)) {
8649 int i;
8650
8651 space_info = list_entry(info->space_info.next,
8652 struct btrfs_space_info,
8653 list);
8654 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
8655 if (WARN_ON(space_info->bytes_pinned > 0 ||
8656 space_info->bytes_reserved > 0 ||
8657 space_info->bytes_may_use > 0)) {
8658 dump_space_info(space_info, 0, 0);
8659 }
8660 }
8661 list_del(&space_info->list);
8662 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
8663 struct kobject *kobj;
8664 kobj = space_info->block_group_kobjs[i];
8665 space_info->block_group_kobjs[i] = NULL;
8666 if (kobj) {
8667 kobject_del(kobj);
8668 kobject_put(kobj);
8669 }
8670 }
8671 kobject_del(&space_info->kobj);
8672 kobject_put(&space_info->kobj);
8673 }
8674 return 0;
8675}
8676
8677static void __link_block_group(struct btrfs_space_info *space_info,
8678 struct btrfs_block_group_cache *cache)
8679{
8680 int index = get_block_group_index(cache);
8681 bool first = false;
8682
8683 down_write(&space_info->groups_sem);
8684 if (list_empty(&space_info->block_groups[index]))
8685 first = true;
8686 list_add_tail(&cache->list, &space_info->block_groups[index]);
8687 up_write(&space_info->groups_sem);
8688
8689 if (first) {
8690 struct raid_kobject *rkobj;
8691 int ret;
8692
8693 rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
8694 if (!rkobj)
8695 goto out_err;
8696 rkobj->raid_type = index;
8697 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
8698 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
8699 "%s", get_raid_name(index));
8700 if (ret) {
8701 kobject_put(&rkobj->kobj);
8702 goto out_err;
8703 }
8704 space_info->block_group_kobjs[index] = &rkobj->kobj;
8705 }
8706
8707 return;
8708out_err:
8709 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
8710}
8711
8712static struct btrfs_block_group_cache *
8713btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
8714{
8715 struct btrfs_block_group_cache *cache;
8716
8717 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8718 if (!cache)
8719 return NULL;
8720
8721 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8722 GFP_NOFS);
8723 if (!cache->free_space_ctl) {
8724 kfree(cache);
8725 return NULL;
8726 }
8727
8728 cache->key.objectid = start;
8729 cache->key.offset = size;
8730 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8731
8732 cache->sectorsize = root->sectorsize;
8733 cache->fs_info = root->fs_info;
8734 cache->full_stripe_len = btrfs_full_stripe_len(root,
8735 &root->fs_info->mapping_tree,
8736 start);
8737 atomic_set(&cache->count, 1);
8738 spin_lock_init(&cache->lock);
8739 init_rwsem(&cache->data_rwsem);
8740 INIT_LIST_HEAD(&cache->list);
8741 INIT_LIST_HEAD(&cache->cluster_list);
8742 INIT_LIST_HEAD(&cache->new_bg_list);
8743 btrfs_init_free_space_ctl(cache);
8744
8745 return cache;
8746}
8747
8748int btrfs_read_block_groups(struct btrfs_root *root)
8749{
8750 struct btrfs_path *path;
8751 int ret;
8752 struct btrfs_block_group_cache *cache;
8753 struct btrfs_fs_info *info = root->fs_info;
8754 struct btrfs_space_info *space_info;
8755 struct btrfs_key key;
8756 struct btrfs_key found_key;
8757 struct extent_buffer *leaf;
8758 int need_clear = 0;
8759 u64 cache_gen;
8760
8761 root = info->extent_root;
8762 key.objectid = 0;
8763 key.offset = 0;
8764 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
8765 path = btrfs_alloc_path();
8766 if (!path)
8767 return -ENOMEM;
8768 path->reada = 1;
8769
8770 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
8771 if (btrfs_test_opt(root, SPACE_CACHE) &&
8772 btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
8773 need_clear = 1;
8774 if (btrfs_test_opt(root, CLEAR_CACHE))
8775 need_clear = 1;
8776
8777 while (1) {
8778 ret = find_first_block_group(root, path, &key);
8779 if (ret > 0)
8780 break;
8781 if (ret != 0)
8782 goto error;
8783
8784 leaf = path->nodes[0];
8785 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8786
8787 cache = btrfs_create_block_group_cache(root, found_key.objectid,
8788 found_key.offset);
8789 if (!cache) {
8790 ret = -ENOMEM;
8791 goto error;
8792 }
8793
8794 if (need_clear) {
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805 cache->disk_cache_state = BTRFS_DC_CLEAR;
8806 if (btrfs_test_opt(root, SPACE_CACHE))
8807 cache->dirty = 1;
8808 }
8809
8810 read_extent_buffer(leaf, &cache->item,
8811 btrfs_item_ptr_offset(leaf, path->slots[0]),
8812 sizeof(cache->item));
8813 cache->flags = btrfs_block_group_flags(&cache->item);
8814
8815 key.objectid = found_key.objectid + found_key.offset;
8816 btrfs_release_path(path);
8817
8818
8819
8820
8821
8822
8823 ret = exclude_super_stripes(root, cache);
8824 if (ret) {
8825
8826
8827
8828
8829 free_excluded_extents(root, cache);
8830 btrfs_put_block_group(cache);
8831 goto error;
8832 }
8833
8834
8835
8836
8837
8838
8839
8840
8841 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
8842 cache->last_byte_to_unpin = (u64)-1;
8843 cache->cached = BTRFS_CACHE_FINISHED;
8844 free_excluded_extents(root, cache);
8845 } else if (btrfs_block_group_used(&cache->item) == 0) {
8846 cache->last_byte_to_unpin = (u64)-1;
8847 cache->cached = BTRFS_CACHE_FINISHED;
8848 add_new_free_space(cache, root->fs_info,
8849 found_key.objectid,
8850 found_key.objectid +
8851 found_key.offset);
8852 free_excluded_extents(root, cache);
8853 }
8854
8855 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8856 if (ret) {
8857 btrfs_remove_free_space_cache(cache);
8858 btrfs_put_block_group(cache);
8859 goto error;
8860 }
8861
8862 ret = update_space_info(info, cache->flags, found_key.offset,
8863 btrfs_block_group_used(&cache->item),
8864 &space_info);
8865 if (ret) {
8866 btrfs_remove_free_space_cache(cache);
8867 spin_lock(&info->block_group_cache_lock);
8868 rb_erase(&cache->cache_node,
8869 &info->block_group_cache_tree);
8870 spin_unlock(&info->block_group_cache_lock);
8871 btrfs_put_block_group(cache);
8872 goto error;
8873 }
8874
8875 cache->space_info = space_info;
8876 spin_lock(&cache->space_info->lock);
8877 cache->space_info->bytes_readonly += cache->bytes_super;
8878 spin_unlock(&cache->space_info->lock);
8879
8880 __link_block_group(space_info, cache);
8881
8882 set_avail_alloc_bits(root->fs_info, cache->flags);
8883 if (btrfs_chunk_readonly(root, cache->key.objectid))
8884 set_block_group_ro(cache, 1);
8885 }
8886
8887 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
8888 if (!(get_alloc_profile(root, space_info->flags) &
8889 (BTRFS_BLOCK_GROUP_RAID10 |
8890 BTRFS_BLOCK_GROUP_RAID1 |
8891 BTRFS_BLOCK_GROUP_RAID5 |
8892 BTRFS_BLOCK_GROUP_RAID6 |
8893 BTRFS_BLOCK_GROUP_DUP)))
8894 continue;
8895
8896
8897
8898
8899 list_for_each_entry(cache,
8900 &space_info->block_groups[BTRFS_RAID_RAID0],
8901 list)
8902 set_block_group_ro(cache, 1);
8903 list_for_each_entry(cache,
8904 &space_info->block_groups[BTRFS_RAID_SINGLE],
8905 list)
8906 set_block_group_ro(cache, 1);
8907 }
8908
8909 init_global_block_rsv(info);
8910 ret = 0;
8911error:
8912 btrfs_free_path(path);
8913 return ret;
8914}
8915
8916void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
8917 struct btrfs_root *root)
8918{
8919 struct btrfs_block_group_cache *block_group, *tmp;
8920 struct btrfs_root *extent_root = root->fs_info->extent_root;
8921 struct btrfs_block_group_item item;
8922 struct btrfs_key key;
8923 int ret = 0;
8924
8925 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs,
8926 new_bg_list) {
8927 list_del_init(&block_group->new_bg_list);
8928
8929 if (ret)
8930 continue;
8931
8932 spin_lock(&block_group->lock);
8933 memcpy(&item, &block_group->item, sizeof(item));
8934 memcpy(&key, &block_group->key, sizeof(key));
8935 spin_unlock(&block_group->lock);
8936
8937 ret = btrfs_insert_item(trans, extent_root, &key, &item,
8938 sizeof(item));
8939 if (ret)
8940 btrfs_abort_transaction(trans, extent_root, ret);
8941 ret = btrfs_finish_chunk_alloc(trans, extent_root,
8942 key.objectid, key.offset);
8943 if (ret)
8944 btrfs_abort_transaction(trans, extent_root, ret);
8945 }
8946}
8947
8948int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8949 struct btrfs_root *root, u64 bytes_used,
8950 u64 type, u64 chunk_objectid, u64 chunk_offset,
8951 u64 size)
8952{
8953 int ret;
8954 struct btrfs_root *extent_root;
8955 struct btrfs_block_group_cache *cache;
8956
8957 extent_root = root->fs_info->extent_root;
8958
8959 btrfs_set_log_full_commit(root->fs_info, trans);
8960
8961 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
8962 if (!cache)
8963 return -ENOMEM;
8964
8965 btrfs_set_block_group_used(&cache->item, bytes_used);
8966 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8967 btrfs_set_block_group_flags(&cache->item, type);
8968
8969 cache->flags = type;
8970 cache->last_byte_to_unpin = (u64)-1;
8971 cache->cached = BTRFS_CACHE_FINISHED;
8972 ret = exclude_super_stripes(root, cache);
8973 if (ret) {
8974
8975
8976
8977
8978 free_excluded_extents(root, cache);
8979 btrfs_put_block_group(cache);
8980 return ret;
8981 }
8982
8983 add_new_free_space(cache, root->fs_info, chunk_offset,
8984 chunk_offset + size);
8985
8986 free_excluded_extents(root, cache);
8987
8988 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8989 if (ret) {
8990 btrfs_remove_free_space_cache(cache);
8991 btrfs_put_block_group(cache);
8992 return ret;
8993 }
8994
8995 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
8996 &cache->space_info);
8997 if (ret) {
8998 btrfs_remove_free_space_cache(cache);
8999 spin_lock(&root->fs_info->block_group_cache_lock);
9000 rb_erase(&cache->cache_node,
9001 &root->fs_info->block_group_cache_tree);
9002 spin_unlock(&root->fs_info->block_group_cache_lock);
9003 btrfs_put_block_group(cache);
9004 return ret;
9005 }
9006 update_global_block_rsv(root->fs_info);
9007
9008 spin_lock(&cache->space_info->lock);
9009 cache->space_info->bytes_readonly += cache->bytes_super;
9010 spin_unlock(&cache->space_info->lock);
9011
9012 __link_block_group(cache->space_info, cache);
9013
9014 list_add_tail(&cache->new_bg_list, &trans->new_bgs);
9015
9016 set_avail_alloc_bits(extent_root->fs_info, type);
9017
9018 return 0;
9019}
9020
9021static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
9022{
9023 u64 extra_flags = chunk_to_extended(flags) &
9024 BTRFS_EXTENDED_PROFILE_MASK;
9025
9026 write_seqlock(&fs_info->profiles_lock);
9027 if (flags & BTRFS_BLOCK_GROUP_DATA)
9028 fs_info->avail_data_alloc_bits &= ~extra_flags;
9029 if (flags & BTRFS_BLOCK_GROUP_METADATA)
9030 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
9031 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
9032 fs_info->avail_system_alloc_bits &= ~extra_flags;
9033 write_sequnlock(&fs_info->profiles_lock);
9034}
9035
9036int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9037 struct btrfs_root *root, u64 group_start)
9038{
9039 struct btrfs_path *path;
9040 struct btrfs_block_group_cache *block_group;
9041 struct btrfs_free_cluster *cluster;
9042 struct btrfs_root *tree_root = root->fs_info->tree_root;
9043 struct btrfs_key key;
9044 struct inode *inode;
9045 struct kobject *kobj = NULL;
9046 int ret;
9047 int index;
9048 int factor;
9049
9050 root = root->fs_info->extent_root;
9051
9052 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
9053 BUG_ON(!block_group);
9054 BUG_ON(!block_group->ro);
9055
9056
9057
9058
9059
9060 free_excluded_extents(root, block_group);
9061
9062 memcpy(&key, &block_group->key, sizeof(key));
9063 index = get_block_group_index(block_group);
9064 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
9065 BTRFS_BLOCK_GROUP_RAID1 |
9066 BTRFS_BLOCK_GROUP_RAID10))
9067 factor = 2;
9068 else
9069 factor = 1;
9070
9071
9072 cluster = &root->fs_info->data_alloc_cluster;
9073 spin_lock(&cluster->refill_lock);
9074 btrfs_return_cluster_to_free_space(block_group, cluster);
9075 spin_unlock(&cluster->refill_lock);
9076
9077
9078
9079
9080
9081 cluster = &root->fs_info->meta_alloc_cluster;
9082 spin_lock(&cluster->refill_lock);
9083 btrfs_return_cluster_to_free_space(block_group, cluster);
9084 spin_unlock(&cluster->refill_lock);
9085
9086 path = btrfs_alloc_path();
9087 if (!path) {
9088 ret = -ENOMEM;
9089 goto out;
9090 }
9091
9092 inode = lookup_free_space_inode(tree_root, block_group, path);
9093 if (!IS_ERR(inode)) {
9094 ret = btrfs_orphan_add(trans, inode);
9095 if (ret) {
9096 btrfs_add_delayed_iput(inode);
9097 goto out;
9098 }
9099 clear_nlink(inode);
9100
9101 spin_lock(&block_group->lock);
9102 if (block_group->iref) {
9103 block_group->iref = 0;
9104 block_group->inode = NULL;
9105 spin_unlock(&block_group->lock);
9106 iput(inode);
9107 } else {
9108 spin_unlock(&block_group->lock);
9109 }
9110
9111 btrfs_add_delayed_iput(inode);
9112 }
9113
9114 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
9115 key.offset = block_group->key.objectid;
9116 key.type = 0;
9117
9118 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
9119 if (ret < 0)
9120 goto out;
9121 if (ret > 0)
9122 btrfs_release_path(path);
9123 if (ret == 0) {
9124 ret = btrfs_del_item(trans, tree_root, path);
9125 if (ret)
9126 goto out;
9127 btrfs_release_path(path);
9128 }
9129
9130 spin_lock(&root->fs_info->block_group_cache_lock);
9131 rb_erase(&block_group->cache_node,
9132 &root->fs_info->block_group_cache_tree);
9133
9134 if (root->fs_info->first_logical_byte == block_group->key.objectid)
9135 root->fs_info->first_logical_byte = (u64)-1;
9136 spin_unlock(&root->fs_info->block_group_cache_lock);
9137
9138 down_write(&block_group->space_info->groups_sem);
9139
9140
9141
9142
9143 list_del_init(&block_group->list);
9144 if (list_empty(&block_group->space_info->block_groups[index])) {
9145 kobj = block_group->space_info->block_group_kobjs[index];
9146 block_group->space_info->block_group_kobjs[index] = NULL;
9147 clear_avail_alloc_bits(root->fs_info, block_group->flags);
9148 }
9149 up_write(&block_group->space_info->groups_sem);
9150 if (kobj) {
9151 kobject_del(kobj);
9152 kobject_put(kobj);
9153 }
9154
9155 if (block_group->cached == BTRFS_CACHE_STARTED)
9156 wait_block_group_cache_done(block_group);
9157
9158 btrfs_remove_free_space_cache(block_group);
9159
9160 spin_lock(&block_group->space_info->lock);
9161 block_group->space_info->total_bytes -= block_group->key.offset;
9162 block_group->space_info->bytes_readonly -= block_group->key.offset;
9163 block_group->space_info->disk_total -= block_group->key.offset * factor;
9164 spin_unlock(&block_group->space_info->lock);
9165
9166 memcpy(&key, &block_group->key, sizeof(key));
9167
9168 btrfs_clear_space_info_full(root->fs_info);
9169
9170 btrfs_put_block_group(block_group);
9171 btrfs_put_block_group(block_group);
9172
9173 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9174 if (ret > 0)
9175 ret = -EIO;
9176 if (ret < 0)
9177 goto out;
9178
9179 ret = btrfs_del_item(trans, root, path);
9180out:
9181 btrfs_free_path(path);
9182 return ret;
9183}
9184
9185int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
9186{
9187 struct btrfs_space_info *space_info;
9188 struct btrfs_super_block *disk_super;
9189 u64 features;
9190 u64 flags;
9191 int mixed = 0;
9192 int ret;
9193
9194 disk_super = fs_info->super_copy;
9195 if (!btrfs_super_root(disk_super))
9196 return 1;
9197
9198 features = btrfs_super_incompat_flags(disk_super);
9199 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
9200 mixed = 1;
9201
9202 flags = BTRFS_BLOCK_GROUP_SYSTEM;
9203 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
9204 if (ret)
9205 goto out;
9206
9207 if (mixed) {
9208 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
9209 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
9210 } else {
9211 flags = BTRFS_BLOCK_GROUP_METADATA;
9212 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
9213 if (ret)
9214 goto out;
9215
9216 flags = BTRFS_BLOCK_GROUP_DATA;
9217 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
9218 }
9219out:
9220 return ret;
9221}
9222
9223int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
9224{
9225 return unpin_extent_range(root, start, end);
9226}
9227
9228int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
9229 u64 num_bytes, u64 *actual_bytes)
9230{
9231 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
9232}
9233
9234int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
9235{
9236 struct btrfs_fs_info *fs_info = root->fs_info;
9237 struct btrfs_block_group_cache *cache = NULL;
9238 u64 group_trimmed;
9239 u64 start;
9240 u64 end;
9241 u64 trimmed = 0;
9242 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
9243 int ret = 0;
9244
9245
9246
9247
9248 if (range->len == total_bytes)
9249 cache = btrfs_lookup_first_block_group(fs_info, range->start);
9250 else
9251 cache = btrfs_lookup_block_group(fs_info, range->start);
9252
9253 while (cache) {
9254 if (cache->key.objectid >= (range->start + range->len)) {
9255 btrfs_put_block_group(cache);
9256 break;
9257 }
9258
9259 start = max(range->start, cache->key.objectid);
9260 end = min(range->start + range->len,
9261 cache->key.objectid + cache->key.offset);
9262
9263 if (end - start >= range->minlen) {
9264 if (!block_group_cache_done(cache)) {
9265 ret = cache_block_group(cache, 0);
9266 if (ret) {
9267 btrfs_put_block_group(cache);
9268 break;
9269 }
9270 ret = wait_block_group_cache_done(cache);
9271 if (ret) {
9272 btrfs_put_block_group(cache);
9273 break;
9274 }
9275 }
9276 ret = btrfs_trim_block_group(cache,
9277 &group_trimmed,
9278 start,
9279 end,
9280 range->minlen);
9281
9282 trimmed += group_trimmed;
9283 if (ret) {
9284 btrfs_put_block_group(cache);
9285 break;
9286 }
9287 }
9288
9289 cache = next_block_group(fs_info->tree_root, cache);
9290 }
9291
9292 range->len = trimmed;
9293 return ret;
9294}
9295
9296
9297
9298
9299
9300
9301
9302void btrfs_end_nocow_write(struct btrfs_root *root)
9303{
9304 percpu_counter_dec(&root->subv_writers->counter);
9305
9306
9307
9308
9309 smp_mb();
9310 if (waitqueue_active(&root->subv_writers->wait))
9311 wake_up(&root->subv_writers->wait);
9312}
9313
9314int btrfs_start_nocow_write(struct btrfs_root *root)
9315{
9316 if (unlikely(atomic_read(&root->will_be_snapshoted)))
9317 return 0;
9318
9319 percpu_counter_inc(&root->subv_writers->counter);
9320
9321
9322
9323 smp_mb();
9324 if (unlikely(atomic_read(&root->will_be_snapshoted))) {
9325 btrfs_end_nocow_write(root);
9326 return 0;
9327 }
9328 return 1;
9329}
9330