1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/sched.h>
19#include <linux/pagemap.h>
20#include <linux/writeback.h>
21#include <linux/blkdev.h>
22#include <linux/sort.h>
23#include <linux/rcupdate.h>
24#include <linux/kthread.h>
25#include <linux/slab.h>
26#include <linux/ratelimit.h>
27#include <linux/percpu_counter.h>
28#include "hash.h"
29#include "tree-log.h"
30#include "disk-io.h"
31#include "print-tree.h"
32#include "volumes.h"
33#include "raid56.h"
34#include "locking.h"
35#include "free-space-cache.h"
36#include "math.h"
37#include "sysfs.h"
38#include "qgroup.h"
39
40#undef SCRAMBLE_DELAYED_REFS
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56enum {
57 CHUNK_ALLOC_NO_FORCE = 0,
58 CHUNK_ALLOC_LIMITED = 1,
59 CHUNK_ALLOC_FORCE = 2,
60};
61
62
63
64
65
66
67
68
69
70
71enum {
72 RESERVE_FREE = 0,
73 RESERVE_ALLOC = 1,
74 RESERVE_ALLOC_NO_ACCOUNT = 2,
75};
76
77static int update_block_group(struct btrfs_trans_handle *trans,
78 struct btrfs_root *root, u64 bytenr,
79 u64 num_bytes, int alloc);
80static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
81 struct btrfs_root *root,
82 u64 bytenr, u64 num_bytes, u64 parent,
83 u64 root_objectid, u64 owner_objectid,
84 u64 owner_offset, int refs_to_drop,
85 struct btrfs_delayed_extent_op *extra_op,
86 int no_quota);
87static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
88 struct extent_buffer *leaf,
89 struct btrfs_extent_item *ei);
90static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
91 struct btrfs_root *root,
92 u64 parent, u64 root_objectid,
93 u64 flags, u64 owner, u64 offset,
94 struct btrfs_key *ins, int ref_mod);
95static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
96 struct btrfs_root *root,
97 u64 parent, u64 root_objectid,
98 u64 flags, struct btrfs_disk_key *key,
99 int level, struct btrfs_key *ins,
100 int no_quota);
101static int do_chunk_alloc(struct btrfs_trans_handle *trans,
102 struct btrfs_root *extent_root, u64 flags,
103 int force);
104static int find_next_key(struct btrfs_path *path, int level,
105 struct btrfs_key *key);
106static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
107 int dump_block_groups);
108static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
109 u64 num_bytes, int reserve,
110 int delalloc);
111static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
112 u64 num_bytes);
113int btrfs_pin_extent(struct btrfs_root *root,
114 u64 bytenr, u64 num_bytes, int reserved);
115
116static noinline int
117block_group_cache_done(struct btrfs_block_group_cache *cache)
118{
119 smp_mb();
120 return cache->cached == BTRFS_CACHE_FINISHED ||
121 cache->cached == BTRFS_CACHE_ERROR;
122}
123
124static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
125{
126 return (cache->flags & bits) == bits;
127}
128
129static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
130{
131 atomic_inc(&cache->count);
132}
133
134void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
135{
136 if (atomic_dec_and_test(&cache->count)) {
137 WARN_ON(cache->pinned > 0);
138 WARN_ON(cache->reserved > 0);
139 kfree(cache->free_space_ctl);
140 kfree(cache);
141 }
142}
143
144
145
146
147
148static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
149 struct btrfs_block_group_cache *block_group)
150{
151 struct rb_node **p;
152 struct rb_node *parent = NULL;
153 struct btrfs_block_group_cache *cache;
154
155 spin_lock(&info->block_group_cache_lock);
156 p = &info->block_group_cache_tree.rb_node;
157
158 while (*p) {
159 parent = *p;
160 cache = rb_entry(parent, struct btrfs_block_group_cache,
161 cache_node);
162 if (block_group->key.objectid < cache->key.objectid) {
163 p = &(*p)->rb_left;
164 } else if (block_group->key.objectid > cache->key.objectid) {
165 p = &(*p)->rb_right;
166 } else {
167 spin_unlock(&info->block_group_cache_lock);
168 return -EEXIST;
169 }
170 }
171
172 rb_link_node(&block_group->cache_node, parent, p);
173 rb_insert_color(&block_group->cache_node,
174 &info->block_group_cache_tree);
175
176 if (info->first_logical_byte > block_group->key.objectid)
177 info->first_logical_byte = block_group->key.objectid;
178
179 spin_unlock(&info->block_group_cache_lock);
180
181 return 0;
182}
183
184
185
186
187
188static struct btrfs_block_group_cache *
189block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
190 int contains)
191{
192 struct btrfs_block_group_cache *cache, *ret = NULL;
193 struct rb_node *n;
194 u64 end, start;
195
196 spin_lock(&info->block_group_cache_lock);
197 n = info->block_group_cache_tree.rb_node;
198
199 while (n) {
200 cache = rb_entry(n, struct btrfs_block_group_cache,
201 cache_node);
202 end = cache->key.objectid + cache->key.offset - 1;
203 start = cache->key.objectid;
204
205 if (bytenr < start) {
206 if (!contains && (!ret || start < ret->key.objectid))
207 ret = cache;
208 n = n->rb_left;
209 } else if (bytenr > start) {
210 if (contains && bytenr <= end) {
211 ret = cache;
212 break;
213 }
214 n = n->rb_right;
215 } else {
216 ret = cache;
217 break;
218 }
219 }
220 if (ret) {
221 btrfs_get_block_group(ret);
222 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
223 info->first_logical_byte = ret->key.objectid;
224 }
225 spin_unlock(&info->block_group_cache_lock);
226
227 return ret;
228}
229
230static int add_excluded_extent(struct btrfs_root *root,
231 u64 start, u64 num_bytes)
232{
233 u64 end = start + num_bytes - 1;
234 set_extent_bits(&root->fs_info->freed_extents[0],
235 start, end, EXTENT_UPTODATE, GFP_NOFS);
236 set_extent_bits(&root->fs_info->freed_extents[1],
237 start, end, EXTENT_UPTODATE, GFP_NOFS);
238 return 0;
239}
240
241static void free_excluded_extents(struct btrfs_root *root,
242 struct btrfs_block_group_cache *cache)
243{
244 u64 start, end;
245
246 start = cache->key.objectid;
247 end = start + cache->key.offset - 1;
248
249 clear_extent_bits(&root->fs_info->freed_extents[0],
250 start, end, EXTENT_UPTODATE, GFP_NOFS);
251 clear_extent_bits(&root->fs_info->freed_extents[1],
252 start, end, EXTENT_UPTODATE, GFP_NOFS);
253}
254
255static int exclude_super_stripes(struct btrfs_root *root,
256 struct btrfs_block_group_cache *cache)
257{
258 u64 bytenr;
259 u64 *logical;
260 int stripe_len;
261 int i, nr, ret;
262
263 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
264 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
265 cache->bytes_super += stripe_len;
266 ret = add_excluded_extent(root, cache->key.objectid,
267 stripe_len);
268 if (ret)
269 return ret;
270 }
271
272 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
273 bytenr = btrfs_sb_offset(i);
274 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
275 cache->key.objectid, bytenr,
276 0, &logical, &nr, &stripe_len);
277 if (ret)
278 return ret;
279
280 while (nr--) {
281 u64 start, len;
282
283 if (logical[nr] > cache->key.objectid +
284 cache->key.offset)
285 continue;
286
287 if (logical[nr] + stripe_len <= cache->key.objectid)
288 continue;
289
290 start = logical[nr];
291 if (start < cache->key.objectid) {
292 start = cache->key.objectid;
293 len = (logical[nr] + stripe_len) - start;
294 } else {
295 len = min_t(u64, stripe_len,
296 cache->key.objectid +
297 cache->key.offset - start);
298 }
299
300 cache->bytes_super += len;
301 ret = add_excluded_extent(root, start, len);
302 if (ret) {
303 kfree(logical);
304 return ret;
305 }
306 }
307
308 kfree(logical);
309 }
310 return 0;
311}
312
313static struct btrfs_caching_control *
314get_caching_control(struct btrfs_block_group_cache *cache)
315{
316 struct btrfs_caching_control *ctl;
317
318 spin_lock(&cache->lock);
319 if (!cache->caching_ctl) {
320 spin_unlock(&cache->lock);
321 return NULL;
322 }
323
324 ctl = cache->caching_ctl;
325 atomic_inc(&ctl->count);
326 spin_unlock(&cache->lock);
327 return ctl;
328}
329
330static void put_caching_control(struct btrfs_caching_control *ctl)
331{
332 if (atomic_dec_and_test(&ctl->count))
333 kfree(ctl);
334}
335
336
337
338
339
340
341static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
342 struct btrfs_fs_info *info, u64 start, u64 end)
343{
344 u64 extent_start, extent_end, size, total_added = 0;
345 int ret;
346
347 while (start < end) {
348 ret = find_first_extent_bit(info->pinned_extents, start,
349 &extent_start, &extent_end,
350 EXTENT_DIRTY | EXTENT_UPTODATE,
351 NULL);
352 if (ret)
353 break;
354
355 if (extent_start <= start) {
356 start = extent_end + 1;
357 } else if (extent_start > start && extent_start < end) {
358 size = extent_start - start;
359 total_added += size;
360 ret = btrfs_add_free_space(block_group, start,
361 size);
362 BUG_ON(ret);
363 start = extent_end + 1;
364 } else {
365 break;
366 }
367 }
368
369 if (start < end) {
370 size = end - start;
371 total_added += size;
372 ret = btrfs_add_free_space(block_group, start, size);
373 BUG_ON(ret);
374 }
375
376 return total_added;
377}
378
379static noinline void caching_thread(struct btrfs_work *work)
380{
381 struct btrfs_block_group_cache *block_group;
382 struct btrfs_fs_info *fs_info;
383 struct btrfs_caching_control *caching_ctl;
384 struct btrfs_root *extent_root;
385 struct btrfs_path *path;
386 struct extent_buffer *leaf;
387 struct btrfs_key key;
388 u64 total_found = 0;
389 u64 last = 0;
390 u32 nritems;
391 int ret = -ENOMEM;
392
393 caching_ctl = container_of(work, struct btrfs_caching_control, work);
394 block_group = caching_ctl->block_group;
395 fs_info = block_group->fs_info;
396 extent_root = fs_info->extent_root;
397
398 path = btrfs_alloc_path();
399 if (!path)
400 goto out;
401
402 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
403
404
405
406
407
408
409
410 path->skip_locking = 1;
411 path->search_commit_root = 1;
412 path->reada = 1;
413
414 key.objectid = last;
415 key.offset = 0;
416 key.type = BTRFS_EXTENT_ITEM_KEY;
417again:
418 mutex_lock(&caching_ctl->mutex);
419
420 down_read(&fs_info->commit_root_sem);
421
422next:
423 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
424 if (ret < 0)
425 goto err;
426
427 leaf = path->nodes[0];
428 nritems = btrfs_header_nritems(leaf);
429
430 while (1) {
431 if (btrfs_fs_closing(fs_info) > 1) {
432 last = (u64)-1;
433 break;
434 }
435
436 if (path->slots[0] < nritems) {
437 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
438 } else {
439 ret = find_next_key(path, 0, &key);
440 if (ret)
441 break;
442
443 if (need_resched() ||
444 rwsem_is_contended(&fs_info->commit_root_sem)) {
445 caching_ctl->progress = last;
446 btrfs_release_path(path);
447 up_read(&fs_info->commit_root_sem);
448 mutex_unlock(&caching_ctl->mutex);
449 cond_resched();
450 goto again;
451 }
452
453 ret = btrfs_next_leaf(extent_root, path);
454 if (ret < 0)
455 goto err;
456 if (ret)
457 break;
458 leaf = path->nodes[0];
459 nritems = btrfs_header_nritems(leaf);
460 continue;
461 }
462
463 if (key.objectid < last) {
464 key.objectid = last;
465 key.offset = 0;
466 key.type = BTRFS_EXTENT_ITEM_KEY;
467
468 caching_ctl->progress = last;
469 btrfs_release_path(path);
470 goto next;
471 }
472
473 if (key.objectid < block_group->key.objectid) {
474 path->slots[0]++;
475 continue;
476 }
477
478 if (key.objectid >= block_group->key.objectid +
479 block_group->key.offset)
480 break;
481
482 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
483 key.type == BTRFS_METADATA_ITEM_KEY) {
484 total_found += add_new_free_space(block_group,
485 fs_info, last,
486 key.objectid);
487 if (key.type == BTRFS_METADATA_ITEM_KEY)
488 last = key.objectid +
489 fs_info->tree_root->nodesize;
490 else
491 last = key.objectid + key.offset;
492
493 if (total_found > (1024 * 1024 * 2)) {
494 total_found = 0;
495 wake_up(&caching_ctl->wait);
496 }
497 }
498 path->slots[0]++;
499 }
500 ret = 0;
501
502 total_found += add_new_free_space(block_group, fs_info, last,
503 block_group->key.objectid +
504 block_group->key.offset);
505 caching_ctl->progress = (u64)-1;
506
507 spin_lock(&block_group->lock);
508 block_group->caching_ctl = NULL;
509 block_group->cached = BTRFS_CACHE_FINISHED;
510 spin_unlock(&block_group->lock);
511
512err:
513 btrfs_free_path(path);
514 up_read(&fs_info->commit_root_sem);
515
516 free_excluded_extents(extent_root, block_group);
517
518 mutex_unlock(&caching_ctl->mutex);
519out:
520 if (ret) {
521 spin_lock(&block_group->lock);
522 block_group->caching_ctl = NULL;
523 block_group->cached = BTRFS_CACHE_ERROR;
524 spin_unlock(&block_group->lock);
525 }
526 wake_up(&caching_ctl->wait);
527
528 put_caching_control(caching_ctl);
529 btrfs_put_block_group(block_group);
530}
531
532static int cache_block_group(struct btrfs_block_group_cache *cache,
533 int load_cache_only)
534{
535 DEFINE_WAIT(wait);
536 struct btrfs_fs_info *fs_info = cache->fs_info;
537 struct btrfs_caching_control *caching_ctl;
538 int ret = 0;
539
540 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
541 if (!caching_ctl)
542 return -ENOMEM;
543
544 INIT_LIST_HEAD(&caching_ctl->list);
545 mutex_init(&caching_ctl->mutex);
546 init_waitqueue_head(&caching_ctl->wait);
547 caching_ctl->block_group = cache;
548 caching_ctl->progress = cache->key.objectid;
549 atomic_set(&caching_ctl->count, 1);
550 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
551 caching_thread, NULL, NULL);
552
553 spin_lock(&cache->lock);
554
555
556
557
558
559
560
561
562
563
564
565
566 while (cache->cached == BTRFS_CACHE_FAST) {
567 struct btrfs_caching_control *ctl;
568
569 ctl = cache->caching_ctl;
570 atomic_inc(&ctl->count);
571 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
572 spin_unlock(&cache->lock);
573
574 schedule();
575
576 finish_wait(&ctl->wait, &wait);
577 put_caching_control(ctl);
578 spin_lock(&cache->lock);
579 }
580
581 if (cache->cached != BTRFS_CACHE_NO) {
582 spin_unlock(&cache->lock);
583 kfree(caching_ctl);
584 return 0;
585 }
586 WARN_ON(cache->caching_ctl);
587 cache->caching_ctl = caching_ctl;
588 cache->cached = BTRFS_CACHE_FAST;
589 spin_unlock(&cache->lock);
590
591 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
592 mutex_lock(&caching_ctl->mutex);
593 ret = load_free_space_cache(fs_info, cache);
594
595 spin_lock(&cache->lock);
596 if (ret == 1) {
597 cache->caching_ctl = NULL;
598 cache->cached = BTRFS_CACHE_FINISHED;
599 cache->last_byte_to_unpin = (u64)-1;
600 caching_ctl->progress = (u64)-1;
601 } else {
602 if (load_cache_only) {
603 cache->caching_ctl = NULL;
604 cache->cached = BTRFS_CACHE_NO;
605 } else {
606 cache->cached = BTRFS_CACHE_STARTED;
607 cache->has_caching_ctl = 1;
608 }
609 }
610 spin_unlock(&cache->lock);
611 mutex_unlock(&caching_ctl->mutex);
612
613 wake_up(&caching_ctl->wait);
614 if (ret == 1) {
615 put_caching_control(caching_ctl);
616 free_excluded_extents(fs_info->extent_root, cache);
617 return 0;
618 }
619 } else {
620
621
622
623
624 spin_lock(&cache->lock);
625 if (load_cache_only) {
626 cache->caching_ctl = NULL;
627 cache->cached = BTRFS_CACHE_NO;
628 } else {
629 cache->cached = BTRFS_CACHE_STARTED;
630 cache->has_caching_ctl = 1;
631 }
632 spin_unlock(&cache->lock);
633 wake_up(&caching_ctl->wait);
634 }
635
636 if (load_cache_only) {
637 put_caching_control(caching_ctl);
638 return 0;
639 }
640
641 down_write(&fs_info->commit_root_sem);
642 atomic_inc(&caching_ctl->count);
643 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
644 up_write(&fs_info->commit_root_sem);
645
646 btrfs_get_block_group(cache);
647
648 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
649
650 return ret;
651}
652
653
654
655
656static struct btrfs_block_group_cache *
657btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
658{
659 struct btrfs_block_group_cache *cache;
660
661 cache = block_group_cache_tree_search(info, bytenr, 0);
662
663 return cache;
664}
665
666
667
668
669struct btrfs_block_group_cache *btrfs_lookup_block_group(
670 struct btrfs_fs_info *info,
671 u64 bytenr)
672{
673 struct btrfs_block_group_cache *cache;
674
675 cache = block_group_cache_tree_search(info, bytenr, 1);
676
677 return cache;
678}
679
680static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
681 u64 flags)
682{
683 struct list_head *head = &info->space_info;
684 struct btrfs_space_info *found;
685
686 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
687
688 rcu_read_lock();
689 list_for_each_entry_rcu(found, head, list) {
690 if (found->flags & flags) {
691 rcu_read_unlock();
692 return found;
693 }
694 }
695 rcu_read_unlock();
696 return NULL;
697}
698
699
700
701
702
703void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
704{
705 struct list_head *head = &info->space_info;
706 struct btrfs_space_info *found;
707
708 rcu_read_lock();
709 list_for_each_entry_rcu(found, head, list)
710 found->full = 0;
711 rcu_read_unlock();
712}
713
714
715int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len)
716{
717 int ret;
718 struct btrfs_key key;
719 struct btrfs_path *path;
720
721 path = btrfs_alloc_path();
722 if (!path)
723 return -ENOMEM;
724
725 key.objectid = start;
726 key.offset = len;
727 key.type = BTRFS_EXTENT_ITEM_KEY;
728 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
729 0, 0);
730 btrfs_free_path(path);
731 return ret;
732}
733
734
735
736
737
738
739
740
741
742
743int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
744 struct btrfs_root *root, u64 bytenr,
745 u64 offset, int metadata, u64 *refs, u64 *flags)
746{
747 struct btrfs_delayed_ref_head *head;
748 struct btrfs_delayed_ref_root *delayed_refs;
749 struct btrfs_path *path;
750 struct btrfs_extent_item *ei;
751 struct extent_buffer *leaf;
752 struct btrfs_key key;
753 u32 item_size;
754 u64 num_refs;
755 u64 extent_flags;
756 int ret;
757
758
759
760
761
762 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
763 offset = root->nodesize;
764 metadata = 0;
765 }
766
767 path = btrfs_alloc_path();
768 if (!path)
769 return -ENOMEM;
770
771 if (!trans) {
772 path->skip_locking = 1;
773 path->search_commit_root = 1;
774 }
775
776search_again:
777 key.objectid = bytenr;
778 key.offset = offset;
779 if (metadata)
780 key.type = BTRFS_METADATA_ITEM_KEY;
781 else
782 key.type = BTRFS_EXTENT_ITEM_KEY;
783
784 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
785 &key, path, 0, 0);
786 if (ret < 0)
787 goto out_free;
788
789 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
790 if (path->slots[0]) {
791 path->slots[0]--;
792 btrfs_item_key_to_cpu(path->nodes[0], &key,
793 path->slots[0]);
794 if (key.objectid == bytenr &&
795 key.type == BTRFS_EXTENT_ITEM_KEY &&
796 key.offset == root->nodesize)
797 ret = 0;
798 }
799 }
800
801 if (ret == 0) {
802 leaf = path->nodes[0];
803 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
804 if (item_size >= sizeof(*ei)) {
805 ei = btrfs_item_ptr(leaf, path->slots[0],
806 struct btrfs_extent_item);
807 num_refs = btrfs_extent_refs(leaf, ei);
808 extent_flags = btrfs_extent_flags(leaf, ei);
809 } else {
810#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
811 struct btrfs_extent_item_v0 *ei0;
812 BUG_ON(item_size != sizeof(*ei0));
813 ei0 = btrfs_item_ptr(leaf, path->slots[0],
814 struct btrfs_extent_item_v0);
815 num_refs = btrfs_extent_refs_v0(leaf, ei0);
816
817 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
818#else
819 BUG();
820#endif
821 }
822 BUG_ON(num_refs == 0);
823 } else {
824 num_refs = 0;
825 extent_flags = 0;
826 ret = 0;
827 }
828
829 if (!trans)
830 goto out;
831
832 delayed_refs = &trans->transaction->delayed_refs;
833 spin_lock(&delayed_refs->lock);
834 head = btrfs_find_delayed_ref_head(trans, bytenr);
835 if (head) {
836 if (!mutex_trylock(&head->mutex)) {
837 atomic_inc(&head->node.refs);
838 spin_unlock(&delayed_refs->lock);
839
840 btrfs_release_path(path);
841
842
843
844
845
846 mutex_lock(&head->mutex);
847 mutex_unlock(&head->mutex);
848 btrfs_put_delayed_ref(&head->node);
849 goto search_again;
850 }
851 spin_lock(&head->lock);
852 if (head->extent_op && head->extent_op->update_flags)
853 extent_flags |= head->extent_op->flags_to_set;
854 else
855 BUG_ON(num_refs == 0);
856
857 num_refs += head->node.ref_mod;
858 spin_unlock(&head->lock);
859 mutex_unlock(&head->mutex);
860 }
861 spin_unlock(&delayed_refs->lock);
862out:
863 WARN_ON(num_refs == 0);
864 if (refs)
865 *refs = num_refs;
866 if (flags)
867 *flags = extent_flags;
868out_free:
869 btrfs_free_path(path);
870 return ret;
871}
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
980static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
981 struct btrfs_root *root,
982 struct btrfs_path *path,
983 u64 owner, u32 extra_size)
984{
985 struct btrfs_extent_item *item;
986 struct btrfs_extent_item_v0 *ei0;
987 struct btrfs_extent_ref_v0 *ref0;
988 struct btrfs_tree_block_info *bi;
989 struct extent_buffer *leaf;
990 struct btrfs_key key;
991 struct btrfs_key found_key;
992 u32 new_size = sizeof(*item);
993 u64 refs;
994 int ret;
995
996 leaf = path->nodes[0];
997 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
998
999 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1000 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1001 struct btrfs_extent_item_v0);
1002 refs = btrfs_extent_refs_v0(leaf, ei0);
1003
1004 if (owner == (u64)-1) {
1005 while (1) {
1006 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1007 ret = btrfs_next_leaf(root, path);
1008 if (ret < 0)
1009 return ret;
1010 BUG_ON(ret > 0);
1011 leaf = path->nodes[0];
1012 }
1013 btrfs_item_key_to_cpu(leaf, &found_key,
1014 path->slots[0]);
1015 BUG_ON(key.objectid != found_key.objectid);
1016 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1017 path->slots[0]++;
1018 continue;
1019 }
1020 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1021 struct btrfs_extent_ref_v0);
1022 owner = btrfs_ref_objectid_v0(leaf, ref0);
1023 break;
1024 }
1025 }
1026 btrfs_release_path(path);
1027
1028 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1029 new_size += sizeof(*bi);
1030
1031 new_size -= sizeof(*ei0);
1032 ret = btrfs_search_slot(trans, root, &key, path,
1033 new_size + extra_size, 1);
1034 if (ret < 0)
1035 return ret;
1036 BUG_ON(ret);
1037
1038 btrfs_extend_item(root, path, new_size);
1039
1040 leaf = path->nodes[0];
1041 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1042 btrfs_set_extent_refs(leaf, item, refs);
1043
1044 btrfs_set_extent_generation(leaf, item, 0);
1045 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1046 btrfs_set_extent_flags(leaf, item,
1047 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1048 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1049 bi = (struct btrfs_tree_block_info *)(item + 1);
1050
1051 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
1052 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1053 } else {
1054 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1055 }
1056 btrfs_mark_buffer_dirty(leaf);
1057 return 0;
1058}
1059#endif
1060
1061static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1062{
1063 u32 high_crc = ~(u32)0;
1064 u32 low_crc = ~(u32)0;
1065 __le64 lenum;
1066
1067 lenum = cpu_to_le64(root_objectid);
1068 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1069 lenum = cpu_to_le64(owner);
1070 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1071 lenum = cpu_to_le64(offset);
1072 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1073
1074 return ((u64)high_crc << 31) ^ (u64)low_crc;
1075}
1076
1077static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1078 struct btrfs_extent_data_ref *ref)
1079{
1080 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1081 btrfs_extent_data_ref_objectid(leaf, ref),
1082 btrfs_extent_data_ref_offset(leaf, ref));
1083}
1084
1085static int match_extent_data_ref(struct extent_buffer *leaf,
1086 struct btrfs_extent_data_ref *ref,
1087 u64 root_objectid, u64 owner, u64 offset)
1088{
1089 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1090 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1091 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1092 return 0;
1093 return 1;
1094}
1095
1096static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1097 struct btrfs_root *root,
1098 struct btrfs_path *path,
1099 u64 bytenr, u64 parent,
1100 u64 root_objectid,
1101 u64 owner, u64 offset)
1102{
1103 struct btrfs_key key;
1104 struct btrfs_extent_data_ref *ref;
1105 struct extent_buffer *leaf;
1106 u32 nritems;
1107 int ret;
1108 int recow;
1109 int err = -ENOENT;
1110
1111 key.objectid = bytenr;
1112 if (parent) {
1113 key.type = BTRFS_SHARED_DATA_REF_KEY;
1114 key.offset = parent;
1115 } else {
1116 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1117 key.offset = hash_extent_data_ref(root_objectid,
1118 owner, offset);
1119 }
1120again:
1121 recow = 0;
1122 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1123 if (ret < 0) {
1124 err = ret;
1125 goto fail;
1126 }
1127
1128 if (parent) {
1129 if (!ret)
1130 return 0;
1131#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1132 key.type = BTRFS_EXTENT_REF_V0_KEY;
1133 btrfs_release_path(path);
1134 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1135 if (ret < 0) {
1136 err = ret;
1137 goto fail;
1138 }
1139 if (!ret)
1140 return 0;
1141#endif
1142 goto fail;
1143 }
1144
1145 leaf = path->nodes[0];
1146 nritems = btrfs_header_nritems(leaf);
1147 while (1) {
1148 if (path->slots[0] >= nritems) {
1149 ret = btrfs_next_leaf(root, path);
1150 if (ret < 0)
1151 err = ret;
1152 if (ret)
1153 goto fail;
1154
1155 leaf = path->nodes[0];
1156 nritems = btrfs_header_nritems(leaf);
1157 recow = 1;
1158 }
1159
1160 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1161 if (key.objectid != bytenr ||
1162 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1163 goto fail;
1164
1165 ref = btrfs_item_ptr(leaf, path->slots[0],
1166 struct btrfs_extent_data_ref);
1167
1168 if (match_extent_data_ref(leaf, ref, root_objectid,
1169 owner, offset)) {
1170 if (recow) {
1171 btrfs_release_path(path);
1172 goto again;
1173 }
1174 err = 0;
1175 break;
1176 }
1177 path->slots[0]++;
1178 }
1179fail:
1180 return err;
1181}
1182
1183static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1184 struct btrfs_root *root,
1185 struct btrfs_path *path,
1186 u64 bytenr, u64 parent,
1187 u64 root_objectid, u64 owner,
1188 u64 offset, int refs_to_add)
1189{
1190 struct btrfs_key key;
1191 struct extent_buffer *leaf;
1192 u32 size;
1193 u32 num_refs;
1194 int ret;
1195
1196 key.objectid = bytenr;
1197 if (parent) {
1198 key.type = BTRFS_SHARED_DATA_REF_KEY;
1199 key.offset = parent;
1200 size = sizeof(struct btrfs_shared_data_ref);
1201 } else {
1202 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1203 key.offset = hash_extent_data_ref(root_objectid,
1204 owner, offset);
1205 size = sizeof(struct btrfs_extent_data_ref);
1206 }
1207
1208 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1209 if (ret && ret != -EEXIST)
1210 goto fail;
1211
1212 leaf = path->nodes[0];
1213 if (parent) {
1214 struct btrfs_shared_data_ref *ref;
1215 ref = btrfs_item_ptr(leaf, path->slots[0],
1216 struct btrfs_shared_data_ref);
1217 if (ret == 0) {
1218 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1219 } else {
1220 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1221 num_refs += refs_to_add;
1222 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1223 }
1224 } else {
1225 struct btrfs_extent_data_ref *ref;
1226 while (ret == -EEXIST) {
1227 ref = btrfs_item_ptr(leaf, path->slots[0],
1228 struct btrfs_extent_data_ref);
1229 if (match_extent_data_ref(leaf, ref, root_objectid,
1230 owner, offset))
1231 break;
1232 btrfs_release_path(path);
1233 key.offset++;
1234 ret = btrfs_insert_empty_item(trans, root, path, &key,
1235 size);
1236 if (ret && ret != -EEXIST)
1237 goto fail;
1238
1239 leaf = path->nodes[0];
1240 }
1241 ref = btrfs_item_ptr(leaf, path->slots[0],
1242 struct btrfs_extent_data_ref);
1243 if (ret == 0) {
1244 btrfs_set_extent_data_ref_root(leaf, ref,
1245 root_objectid);
1246 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1247 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1248 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1249 } else {
1250 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1251 num_refs += refs_to_add;
1252 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1253 }
1254 }
1255 btrfs_mark_buffer_dirty(leaf);
1256 ret = 0;
1257fail:
1258 btrfs_release_path(path);
1259 return ret;
1260}
1261
1262static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1263 struct btrfs_root *root,
1264 struct btrfs_path *path,
1265 int refs_to_drop, int *last_ref)
1266{
1267 struct btrfs_key key;
1268 struct btrfs_extent_data_ref *ref1 = NULL;
1269 struct btrfs_shared_data_ref *ref2 = NULL;
1270 struct extent_buffer *leaf;
1271 u32 num_refs = 0;
1272 int ret = 0;
1273
1274 leaf = path->nodes[0];
1275 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1276
1277 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1278 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1279 struct btrfs_extent_data_ref);
1280 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1281 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1282 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1283 struct btrfs_shared_data_ref);
1284 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1285#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1286 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1287 struct btrfs_extent_ref_v0 *ref0;
1288 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1289 struct btrfs_extent_ref_v0);
1290 num_refs = btrfs_ref_count_v0(leaf, ref0);
1291#endif
1292 } else {
1293 BUG();
1294 }
1295
1296 BUG_ON(num_refs < refs_to_drop);
1297 num_refs -= refs_to_drop;
1298
1299 if (num_refs == 0) {
1300 ret = btrfs_del_item(trans, root, path);
1301 *last_ref = 1;
1302 } else {
1303 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1304 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1305 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1306 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1307#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1308 else {
1309 struct btrfs_extent_ref_v0 *ref0;
1310 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1311 struct btrfs_extent_ref_v0);
1312 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1313 }
1314#endif
1315 btrfs_mark_buffer_dirty(leaf);
1316 }
1317 return ret;
1318}
1319
1320static noinline u32 extent_data_ref_count(struct btrfs_root *root,
1321 struct btrfs_path *path,
1322 struct btrfs_extent_inline_ref *iref)
1323{
1324 struct btrfs_key key;
1325 struct extent_buffer *leaf;
1326 struct btrfs_extent_data_ref *ref1;
1327 struct btrfs_shared_data_ref *ref2;
1328 u32 num_refs = 0;
1329
1330 leaf = path->nodes[0];
1331 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1332 if (iref) {
1333 if (btrfs_extent_inline_ref_type(leaf, iref) ==
1334 BTRFS_EXTENT_DATA_REF_KEY) {
1335 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1336 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1337 } else {
1338 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1339 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1340 }
1341 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1342 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1343 struct btrfs_extent_data_ref);
1344 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1345 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1346 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1347 struct btrfs_shared_data_ref);
1348 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1349#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1350 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1351 struct btrfs_extent_ref_v0 *ref0;
1352 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1353 struct btrfs_extent_ref_v0);
1354 num_refs = btrfs_ref_count_v0(leaf, ref0);
1355#endif
1356 } else {
1357 WARN_ON(1);
1358 }
1359 return num_refs;
1360}
1361
1362static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1363 struct btrfs_root *root,
1364 struct btrfs_path *path,
1365 u64 bytenr, u64 parent,
1366 u64 root_objectid)
1367{
1368 struct btrfs_key key;
1369 int ret;
1370
1371 key.objectid = bytenr;
1372 if (parent) {
1373 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1374 key.offset = parent;
1375 } else {
1376 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1377 key.offset = root_objectid;
1378 }
1379
1380 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1381 if (ret > 0)
1382 ret = -ENOENT;
1383#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1384 if (ret == -ENOENT && parent) {
1385 btrfs_release_path(path);
1386 key.type = BTRFS_EXTENT_REF_V0_KEY;
1387 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1388 if (ret > 0)
1389 ret = -ENOENT;
1390 }
1391#endif
1392 return ret;
1393}
1394
1395static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1396 struct btrfs_root *root,
1397 struct btrfs_path *path,
1398 u64 bytenr, u64 parent,
1399 u64 root_objectid)
1400{
1401 struct btrfs_key key;
1402 int ret;
1403
1404 key.objectid = bytenr;
1405 if (parent) {
1406 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1407 key.offset = parent;
1408 } else {
1409 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1410 key.offset = root_objectid;
1411 }
1412
1413 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1414 btrfs_release_path(path);
1415 return ret;
1416}
1417
1418static inline int extent_ref_type(u64 parent, u64 owner)
1419{
1420 int type;
1421 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1422 if (parent > 0)
1423 type = BTRFS_SHARED_BLOCK_REF_KEY;
1424 else
1425 type = BTRFS_TREE_BLOCK_REF_KEY;
1426 } else {
1427 if (parent > 0)
1428 type = BTRFS_SHARED_DATA_REF_KEY;
1429 else
1430 type = BTRFS_EXTENT_DATA_REF_KEY;
1431 }
1432 return type;
1433}
1434
1435static int find_next_key(struct btrfs_path *path, int level,
1436 struct btrfs_key *key)
1437
1438{
1439 for (; level < BTRFS_MAX_LEVEL; level++) {
1440 if (!path->nodes[level])
1441 break;
1442 if (path->slots[level] + 1 >=
1443 btrfs_header_nritems(path->nodes[level]))
1444 continue;
1445 if (level == 0)
1446 btrfs_item_key_to_cpu(path->nodes[level], key,
1447 path->slots[level] + 1);
1448 else
1449 btrfs_node_key_to_cpu(path->nodes[level], key,
1450 path->slots[level] + 1);
1451 return 0;
1452 }
1453 return 1;
1454}
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469static noinline_for_stack
1470int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1471 struct btrfs_root *root,
1472 struct btrfs_path *path,
1473 struct btrfs_extent_inline_ref **ref_ret,
1474 u64 bytenr, u64 num_bytes,
1475 u64 parent, u64 root_objectid,
1476 u64 owner, u64 offset, int insert)
1477{
1478 struct btrfs_key key;
1479 struct extent_buffer *leaf;
1480 struct btrfs_extent_item *ei;
1481 struct btrfs_extent_inline_ref *iref;
1482 u64 flags;
1483 u64 item_size;
1484 unsigned long ptr;
1485 unsigned long end;
1486 int extra_size;
1487 int type;
1488 int want;
1489 int ret;
1490 int err = 0;
1491 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1492 SKINNY_METADATA);
1493
1494 key.objectid = bytenr;
1495 key.type = BTRFS_EXTENT_ITEM_KEY;
1496 key.offset = num_bytes;
1497
1498 want = extent_ref_type(parent, owner);
1499 if (insert) {
1500 extra_size = btrfs_extent_inline_ref_size(want);
1501 path->keep_locks = 1;
1502 } else
1503 extra_size = -1;
1504
1505
1506
1507
1508
1509 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1510 key.type = BTRFS_METADATA_ITEM_KEY;
1511 key.offset = owner;
1512 }
1513
1514again:
1515 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1516 if (ret < 0) {
1517 err = ret;
1518 goto out;
1519 }
1520
1521
1522
1523
1524
1525 if (ret > 0 && skinny_metadata) {
1526 skinny_metadata = false;
1527 if (path->slots[0]) {
1528 path->slots[0]--;
1529 btrfs_item_key_to_cpu(path->nodes[0], &key,
1530 path->slots[0]);
1531 if (key.objectid == bytenr &&
1532 key.type == BTRFS_EXTENT_ITEM_KEY &&
1533 key.offset == num_bytes)
1534 ret = 0;
1535 }
1536 if (ret) {
1537 key.objectid = bytenr;
1538 key.type = BTRFS_EXTENT_ITEM_KEY;
1539 key.offset = num_bytes;
1540 btrfs_release_path(path);
1541 goto again;
1542 }
1543 }
1544
1545 if (ret && !insert) {
1546 err = -ENOENT;
1547 goto out;
1548 } else if (WARN_ON(ret)) {
1549 err = -EIO;
1550 goto out;
1551 }
1552
1553 leaf = path->nodes[0];
1554 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1555#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1556 if (item_size < sizeof(*ei)) {
1557 if (!insert) {
1558 err = -ENOENT;
1559 goto out;
1560 }
1561 ret = convert_extent_item_v0(trans, root, path, owner,
1562 extra_size);
1563 if (ret < 0) {
1564 err = ret;
1565 goto out;
1566 }
1567 leaf = path->nodes[0];
1568 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1569 }
1570#endif
1571 BUG_ON(item_size < sizeof(*ei));
1572
1573 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1574 flags = btrfs_extent_flags(leaf, ei);
1575
1576 ptr = (unsigned long)(ei + 1);
1577 end = (unsigned long)ei + item_size;
1578
1579 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1580 ptr += sizeof(struct btrfs_tree_block_info);
1581 BUG_ON(ptr > end);
1582 }
1583
1584 err = -ENOENT;
1585 while (1) {
1586 if (ptr >= end) {
1587 WARN_ON(ptr > end);
1588 break;
1589 }
1590 iref = (struct btrfs_extent_inline_ref *)ptr;
1591 type = btrfs_extent_inline_ref_type(leaf, iref);
1592 if (want < type)
1593 break;
1594 if (want > type) {
1595 ptr += btrfs_extent_inline_ref_size(type);
1596 continue;
1597 }
1598
1599 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1600 struct btrfs_extent_data_ref *dref;
1601 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1602 if (match_extent_data_ref(leaf, dref, root_objectid,
1603 owner, offset)) {
1604 err = 0;
1605 break;
1606 }
1607 if (hash_extent_data_ref_item(leaf, dref) <
1608 hash_extent_data_ref(root_objectid, owner, offset))
1609 break;
1610 } else {
1611 u64 ref_offset;
1612 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1613 if (parent > 0) {
1614 if (parent == ref_offset) {
1615 err = 0;
1616 break;
1617 }
1618 if (ref_offset < parent)
1619 break;
1620 } else {
1621 if (root_objectid == ref_offset) {
1622 err = 0;
1623 break;
1624 }
1625 if (ref_offset < root_objectid)
1626 break;
1627 }
1628 }
1629 ptr += btrfs_extent_inline_ref_size(type);
1630 }
1631 if (err == -ENOENT && insert) {
1632 if (item_size + extra_size >=
1633 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1634 err = -EAGAIN;
1635 goto out;
1636 }
1637
1638
1639
1640
1641
1642
1643 if (find_next_key(path, 0, &key) == 0 &&
1644 key.objectid == bytenr &&
1645 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1646 err = -EAGAIN;
1647 goto out;
1648 }
1649 }
1650 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1651out:
1652 if (insert) {
1653 path->keep_locks = 0;
1654 btrfs_unlock_up_safe(path, 1);
1655 }
1656 return err;
1657}
1658
1659
1660
1661
1662static noinline_for_stack
1663void setup_inline_extent_backref(struct btrfs_root *root,
1664 struct btrfs_path *path,
1665 struct btrfs_extent_inline_ref *iref,
1666 u64 parent, u64 root_objectid,
1667 u64 owner, u64 offset, int refs_to_add,
1668 struct btrfs_delayed_extent_op *extent_op)
1669{
1670 struct extent_buffer *leaf;
1671 struct btrfs_extent_item *ei;
1672 unsigned long ptr;
1673 unsigned long end;
1674 unsigned long item_offset;
1675 u64 refs;
1676 int size;
1677 int type;
1678
1679 leaf = path->nodes[0];
1680 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1681 item_offset = (unsigned long)iref - (unsigned long)ei;
1682
1683 type = extent_ref_type(parent, owner);
1684 size = btrfs_extent_inline_ref_size(type);
1685
1686 btrfs_extend_item(root, path, size);
1687
1688 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1689 refs = btrfs_extent_refs(leaf, ei);
1690 refs += refs_to_add;
1691 btrfs_set_extent_refs(leaf, ei, refs);
1692 if (extent_op)
1693 __run_delayed_extent_op(extent_op, leaf, ei);
1694
1695 ptr = (unsigned long)ei + item_offset;
1696 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1697 if (ptr < end - size)
1698 memmove_extent_buffer(leaf, ptr + size, ptr,
1699 end - size - ptr);
1700
1701 iref = (struct btrfs_extent_inline_ref *)ptr;
1702 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1703 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1704 struct btrfs_extent_data_ref *dref;
1705 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1706 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1707 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1708 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1709 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1710 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1711 struct btrfs_shared_data_ref *sref;
1712 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1713 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1714 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1715 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1716 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1717 } else {
1718 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1719 }
1720 btrfs_mark_buffer_dirty(leaf);
1721}
1722
1723static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1724 struct btrfs_root *root,
1725 struct btrfs_path *path,
1726 struct btrfs_extent_inline_ref **ref_ret,
1727 u64 bytenr, u64 num_bytes, u64 parent,
1728 u64 root_objectid, u64 owner, u64 offset)
1729{
1730 int ret;
1731
1732 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1733 bytenr, num_bytes, parent,
1734 root_objectid, owner, offset, 0);
1735 if (ret != -ENOENT)
1736 return ret;
1737
1738 btrfs_release_path(path);
1739 *ref_ret = NULL;
1740
1741 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1742 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1743 root_objectid);
1744 } else {
1745 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1746 root_objectid, owner, offset);
1747 }
1748 return ret;
1749}
1750
1751
1752
1753
1754static noinline_for_stack
1755void update_inline_extent_backref(struct btrfs_root *root,
1756 struct btrfs_path *path,
1757 struct btrfs_extent_inline_ref *iref,
1758 int refs_to_mod,
1759 struct btrfs_delayed_extent_op *extent_op,
1760 int *last_ref)
1761{
1762 struct extent_buffer *leaf;
1763 struct btrfs_extent_item *ei;
1764 struct btrfs_extent_data_ref *dref = NULL;
1765 struct btrfs_shared_data_ref *sref = NULL;
1766 unsigned long ptr;
1767 unsigned long end;
1768 u32 item_size;
1769 int size;
1770 int type;
1771 u64 refs;
1772
1773 leaf = path->nodes[0];
1774 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1775 refs = btrfs_extent_refs(leaf, ei);
1776 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1777 refs += refs_to_mod;
1778 btrfs_set_extent_refs(leaf, ei, refs);
1779 if (extent_op)
1780 __run_delayed_extent_op(extent_op, leaf, ei);
1781
1782 type = btrfs_extent_inline_ref_type(leaf, iref);
1783
1784 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1785 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1786 refs = btrfs_extent_data_ref_count(leaf, dref);
1787 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1788 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1789 refs = btrfs_shared_data_ref_count(leaf, sref);
1790 } else {
1791 refs = 1;
1792 BUG_ON(refs_to_mod != -1);
1793 }
1794
1795 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1796 refs += refs_to_mod;
1797
1798 if (refs > 0) {
1799 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1800 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1801 else
1802 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1803 } else {
1804 *last_ref = 1;
1805 size = btrfs_extent_inline_ref_size(type);
1806 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1807 ptr = (unsigned long)iref;
1808 end = (unsigned long)ei + item_size;
1809 if (ptr + size < end)
1810 memmove_extent_buffer(leaf, ptr, ptr + size,
1811 end - ptr - size);
1812 item_size -= size;
1813 btrfs_truncate_item(root, path, item_size, 1);
1814 }
1815 btrfs_mark_buffer_dirty(leaf);
1816}
1817
1818static noinline_for_stack
1819int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1820 struct btrfs_root *root,
1821 struct btrfs_path *path,
1822 u64 bytenr, u64 num_bytes, u64 parent,
1823 u64 root_objectid, u64 owner,
1824 u64 offset, int refs_to_add,
1825 struct btrfs_delayed_extent_op *extent_op)
1826{
1827 struct btrfs_extent_inline_ref *iref;
1828 int ret;
1829
1830 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1831 bytenr, num_bytes, parent,
1832 root_objectid, owner, offset, 1);
1833 if (ret == 0) {
1834 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1835 update_inline_extent_backref(root, path, iref,
1836 refs_to_add, extent_op, NULL);
1837 } else if (ret == -ENOENT) {
1838 setup_inline_extent_backref(root, path, iref, parent,
1839 root_objectid, owner, offset,
1840 refs_to_add, extent_op);
1841 ret = 0;
1842 }
1843 return ret;
1844}
1845
1846static int insert_extent_backref(struct btrfs_trans_handle *trans,
1847 struct btrfs_root *root,
1848 struct btrfs_path *path,
1849 u64 bytenr, u64 parent, u64 root_objectid,
1850 u64 owner, u64 offset, int refs_to_add)
1851{
1852 int ret;
1853 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1854 BUG_ON(refs_to_add != 1);
1855 ret = insert_tree_block_ref(trans, root, path, bytenr,
1856 parent, root_objectid);
1857 } else {
1858 ret = insert_extent_data_ref(trans, root, path, bytenr,
1859 parent, root_objectid,
1860 owner, offset, refs_to_add);
1861 }
1862 return ret;
1863}
1864
1865static int remove_extent_backref(struct btrfs_trans_handle *trans,
1866 struct btrfs_root *root,
1867 struct btrfs_path *path,
1868 struct btrfs_extent_inline_ref *iref,
1869 int refs_to_drop, int is_data, int *last_ref)
1870{
1871 int ret = 0;
1872
1873 BUG_ON(!is_data && refs_to_drop != 1);
1874 if (iref) {
1875 update_inline_extent_backref(root, path, iref,
1876 -refs_to_drop, NULL, last_ref);
1877 } else if (is_data) {
1878 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
1879 last_ref);
1880 } else {
1881 *last_ref = 1;
1882 ret = btrfs_del_item(trans, root, path);
1883 }
1884 return ret;
1885}
1886
1887static int btrfs_issue_discard(struct block_device *bdev,
1888 u64 start, u64 len)
1889{
1890 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
1891}
1892
1893int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1894 u64 num_bytes, u64 *actual_bytes)
1895{
1896 int ret;
1897 u64 discarded_bytes = 0;
1898 struct btrfs_bio *bbio = NULL;
1899
1900
1901
1902 ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
1903 bytenr, &num_bytes, &bbio, 0);
1904
1905 if (!ret) {
1906 struct btrfs_bio_stripe *stripe = bbio->stripes;
1907 int i;
1908
1909
1910 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1911 if (!stripe->dev->can_discard)
1912 continue;
1913
1914 ret = btrfs_issue_discard(stripe->dev->bdev,
1915 stripe->physical,
1916 stripe->length);
1917 if (!ret)
1918 discarded_bytes += stripe->length;
1919 else if (ret != -EOPNOTSUPP)
1920 break;
1921
1922
1923
1924
1925
1926
1927 ret = 0;
1928 }
1929 btrfs_put_bbio(bbio);
1930 }
1931
1932 if (actual_bytes)
1933 *actual_bytes = discarded_bytes;
1934
1935
1936 if (ret == -EOPNOTSUPP)
1937 ret = 0;
1938 return ret;
1939}
1940
1941
1942int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1943 struct btrfs_root *root,
1944 u64 bytenr, u64 num_bytes, u64 parent,
1945 u64 root_objectid, u64 owner, u64 offset,
1946 int no_quota)
1947{
1948 int ret;
1949 struct btrfs_fs_info *fs_info = root->fs_info;
1950
1951 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1952 root_objectid == BTRFS_TREE_LOG_OBJECTID);
1953
1954 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1955 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
1956 num_bytes,
1957 parent, root_objectid, (int)owner,
1958 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
1959 } else {
1960 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
1961 num_bytes,
1962 parent, root_objectid, owner, offset,
1963 BTRFS_ADD_DELAYED_REF, NULL, no_quota);
1964 }
1965 return ret;
1966}
1967
1968static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1969 struct btrfs_root *root,
1970 u64 bytenr, u64 num_bytes,
1971 u64 parent, u64 root_objectid,
1972 u64 owner, u64 offset, int refs_to_add,
1973 int no_quota,
1974 struct btrfs_delayed_extent_op *extent_op)
1975{
1976 struct btrfs_fs_info *fs_info = root->fs_info;
1977 struct btrfs_path *path;
1978 struct extent_buffer *leaf;
1979 struct btrfs_extent_item *item;
1980 struct btrfs_key key;
1981 u64 refs;
1982 int ret;
1983 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
1984
1985 path = btrfs_alloc_path();
1986 if (!path)
1987 return -ENOMEM;
1988
1989 if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
1990 no_quota = 1;
1991
1992 path->reada = 1;
1993 path->leave_spinning = 1;
1994
1995 ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
1996 bytenr, num_bytes, parent,
1997 root_objectid, owner, offset,
1998 refs_to_add, extent_op);
1999 if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
2000 goto out;
2001
2002
2003
2004
2005 if (!ret && !no_quota) {
2006 ASSERT(root->fs_info->quota_enabled);
2007 leaf = path->nodes[0];
2008 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2009 item = btrfs_item_ptr(leaf, path->slots[0],
2010 struct btrfs_extent_item);
2011 if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
2012 type = BTRFS_QGROUP_OPER_ADD_SHARED;
2013 btrfs_release_path(path);
2014
2015 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
2016 bytenr, num_bytes, type, 0);
2017 goto out;
2018 }
2019
2020
2021
2022
2023
2024
2025 leaf = path->nodes[0];
2026 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2027 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2028 refs = btrfs_extent_refs(leaf, item);
2029 if (refs)
2030 type = BTRFS_QGROUP_OPER_ADD_SHARED;
2031 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2032 if (extent_op)
2033 __run_delayed_extent_op(extent_op, leaf, item);
2034
2035 btrfs_mark_buffer_dirty(leaf);
2036 btrfs_release_path(path);
2037
2038 if (!no_quota) {
2039 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
2040 bytenr, num_bytes, type, 0);
2041 if (ret)
2042 goto out;
2043 }
2044
2045 path->reada = 1;
2046 path->leave_spinning = 1;
2047
2048 ret = insert_extent_backref(trans, root->fs_info->extent_root,
2049 path, bytenr, parent, root_objectid,
2050 owner, offset, refs_to_add);
2051 if (ret)
2052 btrfs_abort_transaction(trans, root, ret);
2053out:
2054 btrfs_free_path(path);
2055 return ret;
2056}
2057
2058static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2059 struct btrfs_root *root,
2060 struct btrfs_delayed_ref_node *node,
2061 struct btrfs_delayed_extent_op *extent_op,
2062 int insert_reserved)
2063{
2064 int ret = 0;
2065 struct btrfs_delayed_data_ref *ref;
2066 struct btrfs_key ins;
2067 u64 parent = 0;
2068 u64 ref_root = 0;
2069 u64 flags = 0;
2070
2071 ins.objectid = node->bytenr;
2072 ins.offset = node->num_bytes;
2073 ins.type = BTRFS_EXTENT_ITEM_KEY;
2074
2075 ref = btrfs_delayed_node_to_data_ref(node);
2076 trace_run_delayed_data_ref(node, ref, node->action);
2077
2078 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2079 parent = ref->parent;
2080 ref_root = ref->root;
2081
2082 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2083 if (extent_op)
2084 flags |= extent_op->flags_to_set;
2085 ret = alloc_reserved_file_extent(trans, root,
2086 parent, ref_root, flags,
2087 ref->objectid, ref->offset,
2088 &ins, node->ref_mod);
2089 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2090 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
2091 node->num_bytes, parent,
2092 ref_root, ref->objectid,
2093 ref->offset, node->ref_mod,
2094 node->no_quota, extent_op);
2095 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2096 ret = __btrfs_free_extent(trans, root, node->bytenr,
2097 node->num_bytes, parent,
2098 ref_root, ref->objectid,
2099 ref->offset, node->ref_mod,
2100 extent_op, node->no_quota);
2101 } else {
2102 BUG();
2103 }
2104 return ret;
2105}
2106
2107static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2108 struct extent_buffer *leaf,
2109 struct btrfs_extent_item *ei)
2110{
2111 u64 flags = btrfs_extent_flags(leaf, ei);
2112 if (extent_op->update_flags) {
2113 flags |= extent_op->flags_to_set;
2114 btrfs_set_extent_flags(leaf, ei, flags);
2115 }
2116
2117 if (extent_op->update_key) {
2118 struct btrfs_tree_block_info *bi;
2119 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2120 bi = (struct btrfs_tree_block_info *)(ei + 1);
2121 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2122 }
2123}
2124
2125static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2126 struct btrfs_root *root,
2127 struct btrfs_delayed_ref_node *node,
2128 struct btrfs_delayed_extent_op *extent_op)
2129{
2130 struct btrfs_key key;
2131 struct btrfs_path *path;
2132 struct btrfs_extent_item *ei;
2133 struct extent_buffer *leaf;
2134 u32 item_size;
2135 int ret;
2136 int err = 0;
2137 int metadata = !extent_op->is_data;
2138
2139 if (trans->aborted)
2140 return 0;
2141
2142 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2143 metadata = 0;
2144
2145 path = btrfs_alloc_path();
2146 if (!path)
2147 return -ENOMEM;
2148
2149 key.objectid = node->bytenr;
2150
2151 if (metadata) {
2152 key.type = BTRFS_METADATA_ITEM_KEY;
2153 key.offset = extent_op->level;
2154 } else {
2155 key.type = BTRFS_EXTENT_ITEM_KEY;
2156 key.offset = node->num_bytes;
2157 }
2158
2159again:
2160 path->reada = 1;
2161 path->leave_spinning = 1;
2162 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
2163 path, 0, 1);
2164 if (ret < 0) {
2165 err = ret;
2166 goto out;
2167 }
2168 if (ret > 0) {
2169 if (metadata) {
2170 if (path->slots[0] > 0) {
2171 path->slots[0]--;
2172 btrfs_item_key_to_cpu(path->nodes[0], &key,
2173 path->slots[0]);
2174 if (key.objectid == node->bytenr &&
2175 key.type == BTRFS_EXTENT_ITEM_KEY &&
2176 key.offset == node->num_bytes)
2177 ret = 0;
2178 }
2179 if (ret > 0) {
2180 btrfs_release_path(path);
2181 metadata = 0;
2182
2183 key.objectid = node->bytenr;
2184 key.offset = node->num_bytes;
2185 key.type = BTRFS_EXTENT_ITEM_KEY;
2186 goto again;
2187 }
2188 } else {
2189 err = -EIO;
2190 goto out;
2191 }
2192 }
2193
2194 leaf = path->nodes[0];
2195 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2196#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2197 if (item_size < sizeof(*ei)) {
2198 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
2199 path, (u64)-1, 0);
2200 if (ret < 0) {
2201 err = ret;
2202 goto out;
2203 }
2204 leaf = path->nodes[0];
2205 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2206 }
2207#endif
2208 BUG_ON(item_size < sizeof(*ei));
2209 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2210 __run_delayed_extent_op(extent_op, leaf, ei);
2211
2212 btrfs_mark_buffer_dirty(leaf);
2213out:
2214 btrfs_free_path(path);
2215 return err;
2216}
2217
2218static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2219 struct btrfs_root *root,
2220 struct btrfs_delayed_ref_node *node,
2221 struct btrfs_delayed_extent_op *extent_op,
2222 int insert_reserved)
2223{
2224 int ret = 0;
2225 struct btrfs_delayed_tree_ref *ref;
2226 struct btrfs_key ins;
2227 u64 parent = 0;
2228 u64 ref_root = 0;
2229 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2230 SKINNY_METADATA);
2231
2232 ref = btrfs_delayed_node_to_tree_ref(node);
2233 trace_run_delayed_tree_ref(node, ref, node->action);
2234
2235 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2236 parent = ref->parent;
2237 ref_root = ref->root;
2238
2239 ins.objectid = node->bytenr;
2240 if (skinny_metadata) {
2241 ins.offset = ref->level;
2242 ins.type = BTRFS_METADATA_ITEM_KEY;
2243 } else {
2244 ins.offset = node->num_bytes;
2245 ins.type = BTRFS_EXTENT_ITEM_KEY;
2246 }
2247
2248 BUG_ON(node->ref_mod != 1);
2249 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2250 BUG_ON(!extent_op || !extent_op->update_flags);
2251 ret = alloc_reserved_tree_block(trans, root,
2252 parent, ref_root,
2253 extent_op->flags_to_set,
2254 &extent_op->key,
2255 ref->level, &ins,
2256 node->no_quota);
2257 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2258 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
2259 node->num_bytes, parent, ref_root,
2260 ref->level, 0, 1, node->no_quota,
2261 extent_op);
2262 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2263 ret = __btrfs_free_extent(trans, root, node->bytenr,
2264 node->num_bytes, parent, ref_root,
2265 ref->level, 0, 1, extent_op,
2266 node->no_quota);
2267 } else {
2268 BUG();
2269 }
2270 return ret;
2271}
2272
2273
2274static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2275 struct btrfs_root *root,
2276 struct btrfs_delayed_ref_node *node,
2277 struct btrfs_delayed_extent_op *extent_op,
2278 int insert_reserved)
2279{
2280 int ret = 0;
2281
2282 if (trans->aborted) {
2283 if (insert_reserved)
2284 btrfs_pin_extent(root, node->bytenr,
2285 node->num_bytes, 1);
2286 return 0;
2287 }
2288
2289 if (btrfs_delayed_ref_is_head(node)) {
2290 struct btrfs_delayed_ref_head *head;
2291
2292
2293
2294
2295
2296
2297 BUG_ON(extent_op);
2298 head = btrfs_delayed_node_to_head(node);
2299 trace_run_delayed_ref_head(node, head, node->action);
2300
2301 if (insert_reserved) {
2302 btrfs_pin_extent(root, node->bytenr,
2303 node->num_bytes, 1);
2304 if (head->is_data) {
2305 ret = btrfs_del_csums(trans, root,
2306 node->bytenr,
2307 node->num_bytes);
2308 }
2309 }
2310 return ret;
2311 }
2312
2313 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2314 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2315 ret = run_delayed_tree_ref(trans, root, node, extent_op,
2316 insert_reserved);
2317 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2318 node->type == BTRFS_SHARED_DATA_REF_KEY)
2319 ret = run_delayed_data_ref(trans, root, node, extent_op,
2320 insert_reserved);
2321 else
2322 BUG();
2323 return ret;
2324}
2325
2326static noinline struct btrfs_delayed_ref_node *
2327select_delayed_ref(struct btrfs_delayed_ref_head *head)
2328{
2329 struct rb_node *node;
2330 struct btrfs_delayed_ref_node *ref, *last = NULL;;
2331
2332
2333
2334
2335
2336
2337 node = rb_first(&head->ref_root);
2338 while (node) {
2339 ref = rb_entry(node, struct btrfs_delayed_ref_node,
2340 rb_node);
2341 if (ref->action == BTRFS_ADD_DELAYED_REF)
2342 return ref;
2343 else if (last == NULL)
2344 last = ref;
2345 node = rb_next(node);
2346 }
2347 return last;
2348}
2349
2350
2351
2352
2353
2354static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2355 struct btrfs_root *root,
2356 unsigned long nr)
2357{
2358 struct btrfs_delayed_ref_root *delayed_refs;
2359 struct btrfs_delayed_ref_node *ref;
2360 struct btrfs_delayed_ref_head *locked_ref = NULL;
2361 struct btrfs_delayed_extent_op *extent_op;
2362 struct btrfs_fs_info *fs_info = root->fs_info;
2363 ktime_t start = ktime_get();
2364 int ret;
2365 unsigned long count = 0;
2366 unsigned long actual_count = 0;
2367 int must_insert_reserved = 0;
2368
2369 delayed_refs = &trans->transaction->delayed_refs;
2370 while (1) {
2371 if (!locked_ref) {
2372 if (count >= nr)
2373 break;
2374
2375 spin_lock(&delayed_refs->lock);
2376 locked_ref = btrfs_select_ref_head(trans);
2377 if (!locked_ref) {
2378 spin_unlock(&delayed_refs->lock);
2379 break;
2380 }
2381
2382
2383
2384 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2385 spin_unlock(&delayed_refs->lock);
2386
2387
2388
2389
2390
2391
2392 if (ret == -EAGAIN) {
2393 locked_ref = NULL;
2394 count++;
2395 continue;
2396 }
2397 }
2398
2399
2400
2401
2402
2403
2404
2405
2406 spin_lock(&locked_ref->lock);
2407 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2408 locked_ref);
2409
2410
2411
2412
2413
2414 ref = select_delayed_ref(locked_ref);
2415
2416 if (ref && ref->seq &&
2417 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2418 spin_unlock(&locked_ref->lock);
2419 btrfs_delayed_ref_unlock(locked_ref);
2420 spin_lock(&delayed_refs->lock);
2421 locked_ref->processing = 0;
2422 delayed_refs->num_heads_ready++;
2423 spin_unlock(&delayed_refs->lock);
2424 locked_ref = NULL;
2425 cond_resched();
2426 count++;
2427 continue;
2428 }
2429
2430
2431
2432
2433
2434 must_insert_reserved = locked_ref->must_insert_reserved;
2435 locked_ref->must_insert_reserved = 0;
2436
2437 extent_op = locked_ref->extent_op;
2438 locked_ref->extent_op = NULL;
2439
2440 if (!ref) {
2441
2442
2443
2444
2445
2446
2447 ref = &locked_ref->node;
2448
2449 if (extent_op && must_insert_reserved) {
2450 btrfs_free_delayed_extent_op(extent_op);
2451 extent_op = NULL;
2452 }
2453
2454 if (extent_op) {
2455 spin_unlock(&locked_ref->lock);
2456 ret = run_delayed_extent_op(trans, root,
2457 ref, extent_op);
2458 btrfs_free_delayed_extent_op(extent_op);
2459
2460 if (ret) {
2461
2462
2463
2464
2465
2466
2467 if (must_insert_reserved)
2468 locked_ref->must_insert_reserved = 1;
2469 locked_ref->processing = 0;
2470 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2471 btrfs_delayed_ref_unlock(locked_ref);
2472 return ret;
2473 }
2474 continue;
2475 }
2476
2477
2478
2479
2480
2481
2482 spin_unlock(&locked_ref->lock);
2483 spin_lock(&delayed_refs->lock);
2484 spin_lock(&locked_ref->lock);
2485 if (rb_first(&locked_ref->ref_root) ||
2486 locked_ref->extent_op) {
2487 spin_unlock(&locked_ref->lock);
2488 spin_unlock(&delayed_refs->lock);
2489 continue;
2490 }
2491 ref->in_tree = 0;
2492 delayed_refs->num_heads--;
2493 rb_erase(&locked_ref->href_node,
2494 &delayed_refs->href_root);
2495 spin_unlock(&delayed_refs->lock);
2496 } else {
2497 actual_count++;
2498 ref->in_tree = 0;
2499 rb_erase(&ref->rb_node, &locked_ref->ref_root);
2500 }
2501 atomic_dec(&delayed_refs->num_entries);
2502
2503 if (!btrfs_delayed_ref_is_head(ref)) {
2504
2505
2506
2507
2508 switch (ref->action) {
2509 case BTRFS_ADD_DELAYED_REF:
2510 case BTRFS_ADD_DELAYED_EXTENT:
2511 locked_ref->node.ref_mod -= ref->ref_mod;
2512 break;
2513 case BTRFS_DROP_DELAYED_REF:
2514 locked_ref->node.ref_mod += ref->ref_mod;
2515 break;
2516 default:
2517 WARN_ON(1);
2518 }
2519 }
2520 spin_unlock(&locked_ref->lock);
2521
2522 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2523 must_insert_reserved);
2524
2525 btrfs_free_delayed_extent_op(extent_op);
2526 if (ret) {
2527 locked_ref->processing = 0;
2528 btrfs_delayed_ref_unlock(locked_ref);
2529 btrfs_put_delayed_ref(ref);
2530 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2531 return ret;
2532 }
2533
2534
2535
2536
2537
2538
2539
2540 if (btrfs_delayed_ref_is_head(ref)) {
2541 if (locked_ref->is_data &&
2542 locked_ref->total_ref_mod < 0) {
2543 spin_lock(&delayed_refs->lock);
2544 delayed_refs->pending_csums -= ref->num_bytes;
2545 spin_unlock(&delayed_refs->lock);
2546 }
2547 btrfs_delayed_ref_unlock(locked_ref);
2548 locked_ref = NULL;
2549 }
2550 btrfs_put_delayed_ref(ref);
2551 count++;
2552 cond_resched();
2553 }
2554
2555
2556
2557
2558
2559
2560 if (actual_count > 0) {
2561 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2562 u64 avg;
2563
2564
2565
2566
2567
2568 spin_lock(&delayed_refs->lock);
2569 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2570 fs_info->avg_delayed_ref_runtime = avg >> 2;
2571 spin_unlock(&delayed_refs->lock);
2572 }
2573 return 0;
2574}
2575
2576#ifdef SCRAMBLE_DELAYED_REFS
2577
2578
2579
2580
2581
2582static u64 find_middle(struct rb_root *root)
2583{
2584 struct rb_node *n = root->rb_node;
2585 struct btrfs_delayed_ref_node *entry;
2586 int alt = 1;
2587 u64 middle;
2588 u64 first = 0, last = 0;
2589
2590 n = rb_first(root);
2591 if (n) {
2592 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2593 first = entry->bytenr;
2594 }
2595 n = rb_last(root);
2596 if (n) {
2597 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2598 last = entry->bytenr;
2599 }
2600 n = root->rb_node;
2601
2602 while (n) {
2603 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2604 WARN_ON(!entry->in_tree);
2605
2606 middle = entry->bytenr;
2607
2608 if (alt)
2609 n = n->rb_left;
2610 else
2611 n = n->rb_right;
2612
2613 alt = 1 - alt;
2614 }
2615 return middle;
2616}
2617#endif
2618
2619static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2620{
2621 u64 num_bytes;
2622
2623 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2624 sizeof(struct btrfs_extent_inline_ref));
2625 if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2626 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2627
2628
2629
2630
2631
2632 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2633}
2634
2635
2636
2637
2638
2639u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
2640{
2641 u64 csum_size;
2642 u64 num_csums_per_leaf;
2643 u64 num_csums;
2644
2645 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
2646 num_csums_per_leaf = div64_u64(csum_size,
2647 (u64)btrfs_super_csum_size(root->fs_info->super_copy));
2648 num_csums = div64_u64(csum_bytes, root->sectorsize);
2649 num_csums += num_csums_per_leaf - 1;
2650 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2651 return num_csums;
2652}
2653
2654int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2655 struct btrfs_root *root)
2656{
2657 struct btrfs_block_rsv *global_rsv;
2658 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2659 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2660 u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
2661 u64 num_bytes, num_dirty_bgs_bytes;
2662 int ret = 0;
2663
2664 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
2665 num_heads = heads_to_leaves(root, num_heads);
2666 if (num_heads > 1)
2667 num_bytes += (num_heads - 1) * root->nodesize;
2668 num_bytes <<= 1;
2669 num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
2670 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
2671 num_dirty_bgs);
2672 global_rsv = &root->fs_info->global_block_rsv;
2673
2674
2675
2676
2677
2678 if (global_rsv->space_info->full) {
2679 num_dirty_bgs_bytes <<= 1;
2680 num_bytes <<= 1;
2681 }
2682
2683 spin_lock(&global_rsv->lock);
2684 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
2685 ret = 1;
2686 spin_unlock(&global_rsv->lock);
2687 return ret;
2688}
2689
2690int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2691 struct btrfs_root *root)
2692{
2693 struct btrfs_fs_info *fs_info = root->fs_info;
2694 u64 num_entries =
2695 atomic_read(&trans->transaction->delayed_refs.num_entries);
2696 u64 avg_runtime;
2697 u64 val;
2698
2699 smp_mb();
2700 avg_runtime = fs_info->avg_delayed_ref_runtime;
2701 val = num_entries * avg_runtime;
2702 if (num_entries * avg_runtime >= NSEC_PER_SEC)
2703 return 1;
2704 if (val >= NSEC_PER_SEC / 2)
2705 return 2;
2706
2707 return btrfs_check_space_for_delayed_refs(trans, root);
2708}
2709
2710struct async_delayed_refs {
2711 struct btrfs_root *root;
2712 int count;
2713 int error;
2714 int sync;
2715 struct completion wait;
2716 struct btrfs_work work;
2717};
2718
2719static void delayed_ref_async_start(struct btrfs_work *work)
2720{
2721 struct async_delayed_refs *async;
2722 struct btrfs_trans_handle *trans;
2723 int ret;
2724
2725 async = container_of(work, struct async_delayed_refs, work);
2726
2727 trans = btrfs_join_transaction(async->root);
2728 if (IS_ERR(trans)) {
2729 async->error = PTR_ERR(trans);
2730 goto done;
2731 }
2732
2733
2734
2735
2736
2737 trans->sync = true;
2738 ret = btrfs_run_delayed_refs(trans, async->root, async->count);
2739 if (ret)
2740 async->error = ret;
2741
2742 ret = btrfs_end_transaction(trans, async->root);
2743 if (ret && !async->error)
2744 async->error = ret;
2745done:
2746 if (async->sync)
2747 complete(&async->wait);
2748 else
2749 kfree(async);
2750}
2751
2752int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2753 unsigned long count, int wait)
2754{
2755 struct async_delayed_refs *async;
2756 int ret;
2757
2758 async = kmalloc(sizeof(*async), GFP_NOFS);
2759 if (!async)
2760 return -ENOMEM;
2761
2762 async->root = root->fs_info->tree_root;
2763 async->count = count;
2764 async->error = 0;
2765 if (wait)
2766 async->sync = 1;
2767 else
2768 async->sync = 0;
2769 init_completion(&async->wait);
2770
2771 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2772 delayed_ref_async_start, NULL, NULL);
2773
2774 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2775
2776 if (wait) {
2777 wait_for_completion(&async->wait);
2778 ret = async->error;
2779 kfree(async);
2780 return ret;
2781 }
2782 return 0;
2783}
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2796 struct btrfs_root *root, unsigned long count)
2797{
2798 struct rb_node *node;
2799 struct btrfs_delayed_ref_root *delayed_refs;
2800 struct btrfs_delayed_ref_head *head;
2801 int ret;
2802 int run_all = count == (unsigned long)-1;
2803
2804
2805 if (trans->aborted)
2806 return 0;
2807
2808 if (root == root->fs_info->extent_root)
2809 root = root->fs_info->tree_root;
2810
2811 delayed_refs = &trans->transaction->delayed_refs;
2812 if (count == 0)
2813 count = atomic_read(&delayed_refs->num_entries) * 2;
2814
2815again:
2816#ifdef SCRAMBLE_DELAYED_REFS
2817 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2818#endif
2819 ret = __btrfs_run_delayed_refs(trans, root, count);
2820 if (ret < 0) {
2821 btrfs_abort_transaction(trans, root, ret);
2822 return ret;
2823 }
2824
2825 if (run_all) {
2826 if (!list_empty(&trans->new_bgs))
2827 btrfs_create_pending_block_groups(trans, root);
2828
2829 spin_lock(&delayed_refs->lock);
2830 node = rb_first(&delayed_refs->href_root);
2831 if (!node) {
2832 spin_unlock(&delayed_refs->lock);
2833 goto out;
2834 }
2835 count = (unsigned long)-1;
2836
2837 while (node) {
2838 head = rb_entry(node, struct btrfs_delayed_ref_head,
2839 href_node);
2840 if (btrfs_delayed_ref_is_head(&head->node)) {
2841 struct btrfs_delayed_ref_node *ref;
2842
2843 ref = &head->node;
2844 atomic_inc(&ref->refs);
2845
2846 spin_unlock(&delayed_refs->lock);
2847
2848
2849
2850
2851 mutex_lock(&head->mutex);
2852 mutex_unlock(&head->mutex);
2853
2854 btrfs_put_delayed_ref(ref);
2855 cond_resched();
2856 goto again;
2857 } else {
2858 WARN_ON(1);
2859 }
2860 node = rb_next(node);
2861 }
2862 spin_unlock(&delayed_refs->lock);
2863 cond_resched();
2864 goto again;
2865 }
2866out:
2867 ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
2868 if (ret)
2869 return ret;
2870 assert_qgroups_uptodate(trans);
2871 return 0;
2872}
2873
2874int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 u64 bytenr, u64 num_bytes, u64 flags,
2877 int level, int is_data)
2878{
2879 struct btrfs_delayed_extent_op *extent_op;
2880 int ret;
2881
2882 extent_op = btrfs_alloc_delayed_extent_op();
2883 if (!extent_op)
2884 return -ENOMEM;
2885
2886 extent_op->flags_to_set = flags;
2887 extent_op->update_flags = 1;
2888 extent_op->update_key = 0;
2889 extent_op->is_data = is_data ? 1 : 0;
2890 extent_op->level = level;
2891
2892 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
2893 num_bytes, extent_op);
2894 if (ret)
2895 btrfs_free_delayed_extent_op(extent_op);
2896 return ret;
2897}
2898
2899static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2900 struct btrfs_root *root,
2901 struct btrfs_path *path,
2902 u64 objectid, u64 offset, u64 bytenr)
2903{
2904 struct btrfs_delayed_ref_head *head;
2905 struct btrfs_delayed_ref_node *ref;
2906 struct btrfs_delayed_data_ref *data_ref;
2907 struct btrfs_delayed_ref_root *delayed_refs;
2908 struct rb_node *node;
2909 int ret = 0;
2910
2911 delayed_refs = &trans->transaction->delayed_refs;
2912 spin_lock(&delayed_refs->lock);
2913 head = btrfs_find_delayed_ref_head(trans, bytenr);
2914 if (!head) {
2915 spin_unlock(&delayed_refs->lock);
2916 return 0;
2917 }
2918
2919 if (!mutex_trylock(&head->mutex)) {
2920 atomic_inc(&head->node.refs);
2921 spin_unlock(&delayed_refs->lock);
2922
2923 btrfs_release_path(path);
2924
2925
2926
2927
2928
2929 mutex_lock(&head->mutex);
2930 mutex_unlock(&head->mutex);
2931 btrfs_put_delayed_ref(&head->node);
2932 return -EAGAIN;
2933 }
2934 spin_unlock(&delayed_refs->lock);
2935
2936 spin_lock(&head->lock);
2937 node = rb_first(&head->ref_root);
2938 while (node) {
2939 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2940 node = rb_next(node);
2941
2942
2943 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2944 ret = 1;
2945 break;
2946 }
2947
2948 data_ref = btrfs_delayed_node_to_data_ref(ref);
2949
2950
2951
2952
2953
2954 if (data_ref->root != root->root_key.objectid ||
2955 data_ref->objectid != objectid ||
2956 data_ref->offset != offset) {
2957 ret = 1;
2958 break;
2959 }
2960 }
2961 spin_unlock(&head->lock);
2962 mutex_unlock(&head->mutex);
2963 return ret;
2964}
2965
2966static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2967 struct btrfs_root *root,
2968 struct btrfs_path *path,
2969 u64 objectid, u64 offset, u64 bytenr)
2970{
2971 struct btrfs_root *extent_root = root->fs_info->extent_root;
2972 struct extent_buffer *leaf;
2973 struct btrfs_extent_data_ref *ref;
2974 struct btrfs_extent_inline_ref *iref;
2975 struct btrfs_extent_item *ei;
2976 struct btrfs_key key;
2977 u32 item_size;
2978 int ret;
2979
2980 key.objectid = bytenr;
2981 key.offset = (u64)-1;
2982 key.type = BTRFS_EXTENT_ITEM_KEY;
2983
2984 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2985 if (ret < 0)
2986 goto out;
2987 BUG_ON(ret == 0);
2988
2989 ret = -ENOENT;
2990 if (path->slots[0] == 0)
2991 goto out;
2992
2993 path->slots[0]--;
2994 leaf = path->nodes[0];
2995 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2996
2997 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2998 goto out;
2999
3000 ret = 1;
3001 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3002#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3003 if (item_size < sizeof(*ei)) {
3004 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3005 goto out;
3006 }
3007#endif
3008 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3009
3010 if (item_size != sizeof(*ei) +
3011 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3012 goto out;
3013
3014 if (btrfs_extent_generation(leaf, ei) <=
3015 btrfs_root_last_snapshot(&root->root_item))
3016 goto out;
3017
3018 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3019 if (btrfs_extent_inline_ref_type(leaf, iref) !=
3020 BTRFS_EXTENT_DATA_REF_KEY)
3021 goto out;
3022
3023 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3024 if (btrfs_extent_refs(leaf, ei) !=
3025 btrfs_extent_data_ref_count(leaf, ref) ||
3026 btrfs_extent_data_ref_root(leaf, ref) !=
3027 root->root_key.objectid ||
3028 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3029 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3030 goto out;
3031
3032 ret = 0;
3033out:
3034 return ret;
3035}
3036
3037int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
3038 struct btrfs_root *root,
3039 u64 objectid, u64 offset, u64 bytenr)
3040{
3041 struct btrfs_path *path;
3042 int ret;
3043 int ret2;
3044
3045 path = btrfs_alloc_path();
3046 if (!path)
3047 return -ENOENT;
3048
3049 do {
3050 ret = check_committed_ref(trans, root, path, objectid,
3051 offset, bytenr);
3052 if (ret && ret != -ENOENT)
3053 goto out;
3054
3055 ret2 = check_delayed_ref(trans, root, path, objectid,
3056 offset, bytenr);
3057 } while (ret2 == -EAGAIN);
3058
3059 if (ret2 && ret2 != -ENOENT) {
3060 ret = ret2;
3061 goto out;
3062 }
3063
3064 if (ret != -ENOENT || ret2 != -ENOENT)
3065 ret = 0;
3066out:
3067 btrfs_free_path(path);
3068 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3069 WARN_ON(ret > 0);
3070 return ret;
3071}
3072
3073static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3074 struct btrfs_root *root,
3075 struct extent_buffer *buf,
3076 int full_backref, int inc)
3077{
3078 u64 bytenr;
3079 u64 num_bytes;
3080 u64 parent;
3081 u64 ref_root;
3082 u32 nritems;
3083 struct btrfs_key key;
3084 struct btrfs_file_extent_item *fi;
3085 int i;
3086 int level;
3087 int ret = 0;
3088 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
3089 u64, u64, u64, u64, u64, u64, int);
3090
3091
3092 if (btrfs_test_is_dummy_root(root))
3093 return 0;
3094
3095 ref_root = btrfs_header_owner(buf);
3096 nritems = btrfs_header_nritems(buf);
3097 level = btrfs_header_level(buf);
3098
3099 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3100 return 0;
3101
3102 if (inc)
3103 process_func = btrfs_inc_extent_ref;
3104 else
3105 process_func = btrfs_free_extent;
3106
3107 if (full_backref)
3108 parent = buf->start;
3109 else
3110 parent = 0;
3111
3112 for (i = 0; i < nritems; i++) {
3113 if (level == 0) {
3114 btrfs_item_key_to_cpu(buf, &key, i);
3115 if (key.type != BTRFS_EXTENT_DATA_KEY)
3116 continue;
3117 fi = btrfs_item_ptr(buf, i,
3118 struct btrfs_file_extent_item);
3119 if (btrfs_file_extent_type(buf, fi) ==
3120 BTRFS_FILE_EXTENT_INLINE)
3121 continue;
3122 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3123 if (bytenr == 0)
3124 continue;
3125
3126 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3127 key.offset -= btrfs_file_extent_offset(buf, fi);
3128 ret = process_func(trans, root, bytenr, num_bytes,
3129 parent, ref_root, key.objectid,
3130 key.offset, 1);
3131 if (ret)
3132 goto fail;
3133 } else {
3134 bytenr = btrfs_node_blockptr(buf, i);
3135 num_bytes = root->nodesize;
3136 ret = process_func(trans, root, bytenr, num_bytes,
3137 parent, ref_root, level - 1, 0,
3138 1);
3139 if (ret)
3140 goto fail;
3141 }
3142 }
3143 return 0;
3144fail:
3145 return ret;
3146}
3147
3148int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3149 struct extent_buffer *buf, int full_backref)
3150{
3151 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3152}
3153
3154int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3155 struct extent_buffer *buf, int full_backref)
3156{
3157 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3158}
3159
3160static int write_one_cache_group(struct btrfs_trans_handle *trans,
3161 struct btrfs_root *root,
3162 struct btrfs_path *path,
3163 struct btrfs_block_group_cache *cache)
3164{
3165 int ret;
3166 struct btrfs_root *extent_root = root->fs_info->extent_root;
3167 unsigned long bi;
3168 struct extent_buffer *leaf;
3169
3170 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3171 if (ret) {
3172 if (ret > 0)
3173 ret = -ENOENT;
3174 goto fail;
3175 }
3176
3177 leaf = path->nodes[0];
3178 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3179 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3180 btrfs_mark_buffer_dirty(leaf);
3181fail:
3182 btrfs_release_path(path);
3183 return ret;
3184
3185}
3186
3187static struct btrfs_block_group_cache *
3188next_block_group(struct btrfs_root *root,
3189 struct btrfs_block_group_cache *cache)
3190{
3191 struct rb_node *node;
3192
3193 spin_lock(&root->fs_info->block_group_cache_lock);
3194
3195
3196 if (RB_EMPTY_NODE(&cache->cache_node)) {
3197 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3198
3199 spin_unlock(&root->fs_info->block_group_cache_lock);
3200 btrfs_put_block_group(cache);
3201 cache = btrfs_lookup_first_block_group(root->fs_info,
3202 next_bytenr);
3203 return cache;
3204 }
3205 node = rb_next(&cache->cache_node);
3206 btrfs_put_block_group(cache);
3207 if (node) {
3208 cache = rb_entry(node, struct btrfs_block_group_cache,
3209 cache_node);
3210 btrfs_get_block_group(cache);
3211 } else
3212 cache = NULL;
3213 spin_unlock(&root->fs_info->block_group_cache_lock);
3214 return cache;
3215}
3216
3217static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3218 struct btrfs_trans_handle *trans,
3219 struct btrfs_path *path)
3220{
3221 struct btrfs_root *root = block_group->fs_info->tree_root;
3222 struct inode *inode = NULL;
3223 u64 alloc_hint = 0;
3224 int dcs = BTRFS_DC_ERROR;
3225 u64 num_pages = 0;
3226 int retries = 0;
3227 int ret = 0;
3228
3229
3230
3231
3232
3233 if (block_group->key.offset < (100 * 1024 * 1024)) {
3234 spin_lock(&block_group->lock);
3235 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3236 spin_unlock(&block_group->lock);
3237 return 0;
3238 }
3239
3240 if (trans->aborted)
3241 return 0;
3242again:
3243 inode = lookup_free_space_inode(root, block_group, path);
3244 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3245 ret = PTR_ERR(inode);
3246 btrfs_release_path(path);
3247 goto out;
3248 }
3249
3250 if (IS_ERR(inode)) {
3251 BUG_ON(retries);
3252 retries++;
3253
3254 if (block_group->ro)
3255 goto out_free;
3256
3257 ret = create_free_space_inode(root, trans, block_group, path);
3258 if (ret)
3259 goto out_free;
3260 goto again;
3261 }
3262
3263
3264 if (block_group->cache_generation == trans->transid &&
3265 i_size_read(inode)) {
3266 dcs = BTRFS_DC_SETUP;
3267 goto out_put;
3268 }
3269
3270
3271
3272
3273
3274
3275 BTRFS_I(inode)->generation = 0;
3276 ret = btrfs_update_inode(trans, root, inode);
3277 if (ret) {
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288 btrfs_abort_transaction(trans, root, ret);
3289 goto out_put;
3290 }
3291 WARN_ON(ret);
3292
3293 if (i_size_read(inode) > 0) {
3294 ret = btrfs_check_trunc_cache_free_space(root,
3295 &root->fs_info->global_block_rsv);
3296 if (ret)
3297 goto out_put;
3298
3299 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
3300 if (ret)
3301 goto out_put;
3302 }
3303
3304 spin_lock(&block_group->lock);
3305 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3306 !btrfs_test_opt(root, SPACE_CACHE)) {
3307
3308
3309
3310
3311
3312 dcs = BTRFS_DC_WRITTEN;
3313 spin_unlock(&block_group->lock);
3314 goto out_put;
3315 }
3316 spin_unlock(&block_group->lock);
3317
3318
3319
3320
3321
3322
3323
3324 num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024);
3325 if (!num_pages)
3326 num_pages = 1;
3327
3328 num_pages *= 16;
3329 num_pages *= PAGE_CACHE_SIZE;
3330
3331 ret = btrfs_check_data_free_space(inode, num_pages, num_pages);
3332 if (ret)
3333 goto out_put;
3334
3335 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3336 num_pages, num_pages,
3337 &alloc_hint);
3338 if (!ret)
3339 dcs = BTRFS_DC_SETUP;
3340 btrfs_free_reserved_data_space(inode, num_pages);
3341
3342out_put:
3343 iput(inode);
3344out_free:
3345 btrfs_release_path(path);
3346out:
3347 spin_lock(&block_group->lock);
3348 if (!ret && dcs == BTRFS_DC_SETUP)
3349 block_group->cache_generation = trans->transid;
3350 block_group->disk_cache_state = dcs;
3351 spin_unlock(&block_group->lock);
3352
3353 return ret;
3354}
3355
3356int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3357 struct btrfs_root *root)
3358{
3359 struct btrfs_block_group_cache *cache, *tmp;
3360 struct btrfs_transaction *cur_trans = trans->transaction;
3361 struct btrfs_path *path;
3362
3363 if (list_empty(&cur_trans->dirty_bgs) ||
3364 !btrfs_test_opt(root, SPACE_CACHE))
3365 return 0;
3366
3367 path = btrfs_alloc_path();
3368 if (!path)
3369 return -ENOMEM;
3370
3371
3372 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3373 dirty_list) {
3374 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3375 cache_save_setup(cache, trans, path);
3376 }
3377
3378 btrfs_free_path(path);
3379 return 0;
3380}
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3395 struct btrfs_root *root)
3396{
3397 struct btrfs_block_group_cache *cache;
3398 struct btrfs_transaction *cur_trans = trans->transaction;
3399 int ret = 0;
3400 int should_put;
3401 struct btrfs_path *path = NULL;
3402 LIST_HEAD(dirty);
3403 struct list_head *io = &cur_trans->io_bgs;
3404 int num_started = 0;
3405 int loops = 0;
3406
3407 spin_lock(&cur_trans->dirty_bgs_lock);
3408 if (list_empty(&cur_trans->dirty_bgs)) {
3409 spin_unlock(&cur_trans->dirty_bgs_lock);
3410 return 0;
3411 }
3412 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3413 spin_unlock(&cur_trans->dirty_bgs_lock);
3414
3415again:
3416
3417
3418
3419
3420 btrfs_create_pending_block_groups(trans, root);
3421
3422 if (!path) {
3423 path = btrfs_alloc_path();
3424 if (!path)
3425 return -ENOMEM;
3426 }
3427
3428
3429
3430
3431
3432
3433 mutex_lock(&trans->transaction->cache_write_mutex);
3434 while (!list_empty(&dirty)) {
3435 cache = list_first_entry(&dirty,
3436 struct btrfs_block_group_cache,
3437 dirty_list);
3438
3439
3440
3441
3442
3443 if (!list_empty(&cache->io_list)) {
3444 list_del_init(&cache->io_list);
3445 btrfs_wait_cache_io(root, trans, cache,
3446 &cache->io_ctl, path,
3447 cache->key.objectid);
3448 btrfs_put_block_group(cache);
3449 }
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460 spin_lock(&cur_trans->dirty_bgs_lock);
3461 list_del_init(&cache->dirty_list);
3462 spin_unlock(&cur_trans->dirty_bgs_lock);
3463
3464 should_put = 1;
3465
3466 cache_save_setup(cache, trans, path);
3467
3468 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3469 cache->io_ctl.inode = NULL;
3470 ret = btrfs_write_out_cache(root, trans, cache, path);
3471 if (ret == 0 && cache->io_ctl.inode) {
3472 num_started++;
3473 should_put = 0;
3474
3475
3476
3477
3478
3479 list_add_tail(&cache->io_list, io);
3480 } else {
3481
3482
3483
3484
3485 ret = 0;
3486 }
3487 }
3488 if (!ret) {
3489 ret = write_one_cache_group(trans, root, path, cache);
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499 if (ret == -ENOENT) {
3500 ret = 0;
3501 spin_lock(&cur_trans->dirty_bgs_lock);
3502 if (list_empty(&cache->dirty_list)) {
3503 list_add_tail(&cache->dirty_list,
3504 &cur_trans->dirty_bgs);
3505 btrfs_get_block_group(cache);
3506 }
3507 spin_unlock(&cur_trans->dirty_bgs_lock);
3508 } else if (ret) {
3509 btrfs_abort_transaction(trans, root, ret);
3510 }
3511 }
3512
3513
3514 if (should_put)
3515 btrfs_put_block_group(cache);
3516
3517 if (ret)
3518 break;
3519
3520
3521
3522
3523
3524
3525 mutex_unlock(&trans->transaction->cache_write_mutex);
3526 mutex_lock(&trans->transaction->cache_write_mutex);
3527 }
3528 mutex_unlock(&trans->transaction->cache_write_mutex);
3529
3530
3531
3532
3533
3534 ret = btrfs_run_delayed_refs(trans, root, 0);
3535 if (!ret && loops == 0) {
3536 loops++;
3537 spin_lock(&cur_trans->dirty_bgs_lock);
3538 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3539
3540
3541
3542
3543 if (!list_empty(&dirty)) {
3544 spin_unlock(&cur_trans->dirty_bgs_lock);
3545 goto again;
3546 }
3547 spin_unlock(&cur_trans->dirty_bgs_lock);
3548 }
3549
3550 btrfs_free_path(path);
3551 return ret;
3552}
3553
3554int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3555 struct btrfs_root *root)
3556{
3557 struct btrfs_block_group_cache *cache;
3558 struct btrfs_transaction *cur_trans = trans->transaction;
3559 int ret = 0;
3560 int should_put;
3561 struct btrfs_path *path;
3562 struct list_head *io = &cur_trans->io_bgs;
3563 int num_started = 0;
3564
3565 path = btrfs_alloc_path();
3566 if (!path)
3567 return -ENOMEM;
3568
3569
3570
3571
3572
3573
3574
3575 while (!list_empty(&cur_trans->dirty_bgs)) {
3576 cache = list_first_entry(&cur_trans->dirty_bgs,
3577 struct btrfs_block_group_cache,
3578 dirty_list);
3579
3580
3581
3582
3583
3584
3585 if (!list_empty(&cache->io_list)) {
3586 list_del_init(&cache->io_list);
3587 btrfs_wait_cache_io(root, trans, cache,
3588 &cache->io_ctl, path,
3589 cache->key.objectid);
3590 btrfs_put_block_group(cache);
3591 }
3592
3593
3594
3595
3596
3597 list_del_init(&cache->dirty_list);
3598 should_put = 1;
3599
3600 cache_save_setup(cache, trans, path);
3601
3602 if (!ret)
3603 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
3604
3605 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3606 cache->io_ctl.inode = NULL;
3607 ret = btrfs_write_out_cache(root, trans, cache, path);
3608 if (ret == 0 && cache->io_ctl.inode) {
3609 num_started++;
3610 should_put = 0;
3611 list_add_tail(&cache->io_list, io);
3612 } else {
3613
3614
3615
3616
3617 ret = 0;
3618 }
3619 }
3620 if (!ret) {
3621 ret = write_one_cache_group(trans, root, path, cache);
3622 if (ret)
3623 btrfs_abort_transaction(trans, root, ret);
3624 }
3625
3626
3627 if (should_put)
3628 btrfs_put_block_group(cache);
3629 }
3630
3631 while (!list_empty(io)) {
3632 cache = list_first_entry(io, struct btrfs_block_group_cache,
3633 io_list);
3634 list_del_init(&cache->io_list);
3635 btrfs_wait_cache_io(root, trans, cache,
3636 &cache->io_ctl, path, cache->key.objectid);
3637 btrfs_put_block_group(cache);
3638 }
3639
3640 btrfs_free_path(path);
3641 return ret;
3642}
3643
3644int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3645{
3646 struct btrfs_block_group_cache *block_group;
3647 int readonly = 0;
3648
3649 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
3650 if (!block_group || block_group->ro)
3651 readonly = 1;
3652 if (block_group)
3653 btrfs_put_block_group(block_group);
3654 return readonly;
3655}
3656
3657static const char *alloc_name(u64 flags)
3658{
3659 switch (flags) {
3660 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3661 return "mixed";
3662 case BTRFS_BLOCK_GROUP_METADATA:
3663 return "metadata";
3664 case BTRFS_BLOCK_GROUP_DATA:
3665 return "data";
3666 case BTRFS_BLOCK_GROUP_SYSTEM:
3667 return "system";
3668 default:
3669 WARN_ON(1);
3670 return "invalid-combination";
3671 };
3672}
3673
3674static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3675 u64 total_bytes, u64 bytes_used,
3676 struct btrfs_space_info **space_info)
3677{
3678 struct btrfs_space_info *found;
3679 int i;
3680 int factor;
3681 int ret;
3682
3683 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3684 BTRFS_BLOCK_GROUP_RAID10))
3685 factor = 2;
3686 else
3687 factor = 1;
3688
3689 found = __find_space_info(info, flags);
3690 if (found) {
3691 spin_lock(&found->lock);
3692 found->total_bytes += total_bytes;
3693 found->disk_total += total_bytes * factor;
3694 found->bytes_used += bytes_used;
3695 found->disk_used += bytes_used * factor;
3696 found->full = 0;
3697 spin_unlock(&found->lock);
3698 *space_info = found;
3699 return 0;
3700 }
3701 found = kzalloc(sizeof(*found), GFP_NOFS);
3702 if (!found)
3703 return -ENOMEM;
3704
3705 ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
3706 if (ret) {
3707 kfree(found);
3708 return ret;
3709 }
3710
3711 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3712 INIT_LIST_HEAD(&found->block_groups[i]);
3713 init_rwsem(&found->groups_sem);
3714 spin_lock_init(&found->lock);
3715 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3716 found->total_bytes = total_bytes;
3717 found->disk_total = total_bytes * factor;
3718 found->bytes_used = bytes_used;
3719 found->disk_used = bytes_used * factor;
3720 found->bytes_pinned = 0;
3721 found->bytes_reserved = 0;
3722 found->bytes_readonly = 0;
3723 found->bytes_may_use = 0;
3724 found->full = 0;
3725 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3726 found->chunk_alloc = 0;
3727 found->flush = 0;
3728 init_waitqueue_head(&found->wait);
3729 INIT_LIST_HEAD(&found->ro_bgs);
3730
3731 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3732 info->space_info_kobj, "%s",
3733 alloc_name(found->flags));
3734 if (ret) {
3735 kfree(found);
3736 return ret;
3737 }
3738
3739 *space_info = found;
3740 list_add_rcu(&found->list, &info->space_info);
3741 if (flags & BTRFS_BLOCK_GROUP_DATA)
3742 info->data_sinfo = found;
3743
3744 return ret;
3745}
3746
3747static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3748{
3749 u64 extra_flags = chunk_to_extended(flags) &
3750 BTRFS_EXTENDED_PROFILE_MASK;
3751
3752 write_seqlock(&fs_info->profiles_lock);
3753 if (flags & BTRFS_BLOCK_GROUP_DATA)
3754 fs_info->avail_data_alloc_bits |= extra_flags;
3755 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3756 fs_info->avail_metadata_alloc_bits |= extra_flags;
3757 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3758 fs_info->avail_system_alloc_bits |= extra_flags;
3759 write_sequnlock(&fs_info->profiles_lock);
3760}
3761
3762
3763
3764
3765
3766
3767
3768static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3769{
3770 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3771 u64 target = 0;
3772
3773 if (!bctl)
3774 return 0;
3775
3776 if (flags & BTRFS_BLOCK_GROUP_DATA &&
3777 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3778 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3779 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
3780 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3781 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3782 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
3783 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3784 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3785 }
3786
3787 return target;
3788}
3789
3790
3791
3792
3793
3794
3795
3796
3797static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3798{
3799 u64 num_devices = root->fs_info->fs_devices->rw_devices;
3800 u64 target;
3801 u64 tmp;
3802
3803
3804
3805
3806
3807 spin_lock(&root->fs_info->balance_lock);
3808 target = get_restripe_target(root->fs_info, flags);
3809 if (target) {
3810
3811 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
3812 spin_unlock(&root->fs_info->balance_lock);
3813 return extended_to_chunk(target);
3814 }
3815 }
3816 spin_unlock(&root->fs_info->balance_lock);
3817
3818
3819 if (num_devices == 1)
3820 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 |
3821 BTRFS_BLOCK_GROUP_RAID5);
3822 if (num_devices < 3)
3823 flags &= ~BTRFS_BLOCK_GROUP_RAID6;
3824 if (num_devices < 4)
3825 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
3826
3827 tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
3828 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 |
3829 BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10);
3830 flags &= ~tmp;
3831
3832 if (tmp & BTRFS_BLOCK_GROUP_RAID6)
3833 tmp = BTRFS_BLOCK_GROUP_RAID6;
3834 else if (tmp & BTRFS_BLOCK_GROUP_RAID5)
3835 tmp = BTRFS_BLOCK_GROUP_RAID5;
3836 else if (tmp & BTRFS_BLOCK_GROUP_RAID10)
3837 tmp = BTRFS_BLOCK_GROUP_RAID10;
3838 else if (tmp & BTRFS_BLOCK_GROUP_RAID1)
3839 tmp = BTRFS_BLOCK_GROUP_RAID1;
3840 else if (tmp & BTRFS_BLOCK_GROUP_RAID0)
3841 tmp = BTRFS_BLOCK_GROUP_RAID0;
3842
3843 return extended_to_chunk(flags | tmp);
3844}
3845
3846static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
3847{
3848 unsigned seq;
3849 u64 flags;
3850
3851 do {
3852 flags = orig_flags;
3853 seq = read_seqbegin(&root->fs_info->profiles_lock);
3854
3855 if (flags & BTRFS_BLOCK_GROUP_DATA)
3856 flags |= root->fs_info->avail_data_alloc_bits;
3857 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3858 flags |= root->fs_info->avail_system_alloc_bits;
3859 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
3860 flags |= root->fs_info->avail_metadata_alloc_bits;
3861 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
3862
3863 return btrfs_reduce_alloc_profile(root, flags);
3864}
3865
3866u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
3867{
3868 u64 flags;
3869 u64 ret;
3870
3871 if (data)
3872 flags = BTRFS_BLOCK_GROUP_DATA;
3873 else if (root == root->fs_info->chunk_root)
3874 flags = BTRFS_BLOCK_GROUP_SYSTEM;
3875 else
3876 flags = BTRFS_BLOCK_GROUP_METADATA;
3877
3878 ret = get_alloc_profile(root, flags);
3879 return ret;
3880}
3881
3882
3883
3884
3885
3886int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
3887{
3888 struct btrfs_space_info *data_sinfo;
3889 struct btrfs_root *root = BTRFS_I(inode)->root;
3890 struct btrfs_fs_info *fs_info = root->fs_info;
3891 u64 used;
3892 int ret = 0;
3893 int need_commit = 2;
3894 int have_pinned_space;
3895
3896
3897 bytes = ALIGN(bytes, root->sectorsize);
3898
3899 if (btrfs_is_free_space_inode(inode)) {
3900 need_commit = 0;
3901 ASSERT(current->journal_info);
3902 }
3903
3904 data_sinfo = fs_info->data_sinfo;
3905 if (!data_sinfo)
3906 goto alloc;
3907
3908again:
3909
3910 spin_lock(&data_sinfo->lock);
3911 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
3912 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
3913 data_sinfo->bytes_may_use;
3914
3915 if (used + bytes > data_sinfo->total_bytes) {
3916 struct btrfs_trans_handle *trans;
3917
3918
3919
3920
3921
3922 if (!data_sinfo->full) {
3923 u64 alloc_target;
3924
3925 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3926 spin_unlock(&data_sinfo->lock);
3927alloc:
3928 alloc_target = btrfs_get_alloc_profile(root, 1);
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939 trans = btrfs_join_transaction(root);
3940 if (IS_ERR(trans))
3941 return PTR_ERR(trans);
3942
3943 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3944 alloc_target,
3945 CHUNK_ALLOC_NO_FORCE);
3946 btrfs_end_transaction(trans, root);
3947 if (ret < 0) {
3948 if (ret != -ENOSPC)
3949 return ret;
3950 else {
3951 have_pinned_space = 1;
3952 goto commit_trans;
3953 }
3954 }
3955
3956 if (!data_sinfo)
3957 data_sinfo = fs_info->data_sinfo;
3958
3959 goto again;
3960 }
3961
3962
3963
3964
3965
3966
3967 have_pinned_space = percpu_counter_compare(
3968 &data_sinfo->total_bytes_pinned,
3969 used + bytes - data_sinfo->total_bytes);
3970 spin_unlock(&data_sinfo->lock);
3971
3972
3973commit_trans:
3974 if (need_commit &&
3975 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3976 need_commit--;
3977
3978 trans = btrfs_join_transaction(root);
3979 if (IS_ERR(trans))
3980 return PTR_ERR(trans);
3981 if (have_pinned_space >= 0 ||
3982 trans->transaction->have_free_bgs ||
3983 need_commit > 0) {
3984 ret = btrfs_commit_transaction(trans, root);
3985 if (ret)
3986 return ret;
3987
3988
3989
3990
3991 down_write(&root->fs_info->delayed_iput_sem);
3992 up_write(&root->fs_info->delayed_iput_sem);
3993 goto again;
3994 } else {
3995 btrfs_end_transaction(trans, root);
3996 }
3997 }
3998
3999 trace_btrfs_space_reservation(root->fs_info,
4000 "space_info:enospc",
4001 data_sinfo->flags, bytes, 1);
4002 return -ENOSPC;
4003 }
4004 ret = btrfs_qgroup_reserve(root, write_bytes);
4005 if (ret)
4006 goto out;
4007 data_sinfo->bytes_may_use += bytes;
4008 trace_btrfs_space_reservation(root->fs_info, "space_info",
4009 data_sinfo->flags, bytes, 1);
4010out:
4011 spin_unlock(&data_sinfo->lock);
4012
4013 return ret;
4014}
4015
4016
4017
4018
4019void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
4020{
4021 struct btrfs_root *root = BTRFS_I(inode)->root;
4022 struct btrfs_space_info *data_sinfo;
4023
4024
4025 bytes = ALIGN(bytes, root->sectorsize);
4026
4027 data_sinfo = root->fs_info->data_sinfo;
4028 spin_lock(&data_sinfo->lock);
4029 WARN_ON(data_sinfo->bytes_may_use < bytes);
4030 data_sinfo->bytes_may_use -= bytes;
4031 trace_btrfs_space_reservation(root->fs_info, "space_info",
4032 data_sinfo->flags, bytes, 0);
4033 spin_unlock(&data_sinfo->lock);
4034}
4035
4036static void force_metadata_allocation(struct btrfs_fs_info *info)
4037{
4038 struct list_head *head = &info->space_info;
4039 struct btrfs_space_info *found;
4040
4041 rcu_read_lock();
4042 list_for_each_entry_rcu(found, head, list) {
4043 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
4044 found->force_alloc = CHUNK_ALLOC_FORCE;
4045 }
4046 rcu_read_unlock();
4047}
4048
4049static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4050{
4051 return (global->size << 1);
4052}
4053
4054static int should_alloc_chunk(struct btrfs_root *root,
4055 struct btrfs_space_info *sinfo, int force)
4056{
4057 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4058 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
4059 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
4060 u64 thresh;
4061
4062 if (force == CHUNK_ALLOC_FORCE)
4063 return 1;
4064
4065
4066
4067
4068
4069
4070 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
4071 num_allocated += calc_global_rsv_need_space(global_rsv);
4072
4073
4074
4075
4076
4077 if (force == CHUNK_ALLOC_LIMITED) {
4078 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
4079 thresh = max_t(u64, 64 * 1024 * 1024,
4080 div_factor_fine(thresh, 1));
4081
4082 if (num_bytes - num_allocated < thresh)
4083 return 1;
4084 }
4085
4086 if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8))
4087 return 0;
4088 return 1;
4089}
4090
4091static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
4092{
4093 u64 num_dev;
4094
4095 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4096 BTRFS_BLOCK_GROUP_RAID0 |
4097 BTRFS_BLOCK_GROUP_RAID5 |
4098 BTRFS_BLOCK_GROUP_RAID6))
4099 num_dev = root->fs_info->fs_devices->rw_devices;
4100 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4101 num_dev = 2;
4102 else
4103 num_dev = 1;
4104
4105
4106 return btrfs_calc_trans_metadata_size(root, num_dev + 1);
4107}
4108
4109static void check_system_chunk(struct btrfs_trans_handle *trans,
4110 struct btrfs_root *root, u64 type)
4111{
4112 struct btrfs_space_info *info;
4113 u64 left;
4114 u64 thresh;
4115
4116 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4117 spin_lock(&info->lock);
4118 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
4119 info->bytes_reserved - info->bytes_readonly;
4120 spin_unlock(&info->lock);
4121
4122 thresh = get_system_chunk_thresh(root, type);
4123 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
4124 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
4125 left, thresh, type);
4126 dump_space_info(info, 0, 0);
4127 }
4128
4129 if (left < thresh) {
4130 u64 flags;
4131
4132 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
4133 btrfs_alloc_chunk(trans, root, flags);
4134 }
4135}
4136
4137static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4138 struct btrfs_root *extent_root, u64 flags, int force)
4139{
4140 struct btrfs_space_info *space_info;
4141 struct btrfs_fs_info *fs_info = extent_root->fs_info;
4142 int wait_for_alloc = 0;
4143 int ret = 0;
4144
4145
4146 if (trans->allocating_chunk)
4147 return -ENOSPC;
4148
4149 space_info = __find_space_info(extent_root->fs_info, flags);
4150 if (!space_info) {
4151 ret = update_space_info(extent_root->fs_info, flags,
4152 0, 0, &space_info);
4153 BUG_ON(ret);
4154 }
4155 BUG_ON(!space_info);
4156
4157again:
4158 spin_lock(&space_info->lock);
4159 if (force < space_info->force_alloc)
4160 force = space_info->force_alloc;
4161 if (space_info->full) {
4162 if (should_alloc_chunk(extent_root, space_info, force))
4163 ret = -ENOSPC;
4164 else
4165 ret = 0;
4166 spin_unlock(&space_info->lock);
4167 return ret;
4168 }
4169
4170 if (!should_alloc_chunk(extent_root, space_info, force)) {
4171 spin_unlock(&space_info->lock);
4172 return 0;
4173 } else if (space_info->chunk_alloc) {
4174 wait_for_alloc = 1;
4175 } else {
4176 space_info->chunk_alloc = 1;
4177 }
4178
4179 spin_unlock(&space_info->lock);
4180
4181 mutex_lock(&fs_info->chunk_mutex);
4182
4183
4184
4185
4186
4187
4188
4189 if (wait_for_alloc) {
4190 mutex_unlock(&fs_info->chunk_mutex);
4191 wait_for_alloc = 0;
4192 goto again;
4193 }
4194
4195 trans->allocating_chunk = true;
4196
4197
4198
4199
4200
4201 if (btrfs_mixed_space_info(space_info))
4202 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4203
4204
4205
4206
4207
4208
4209 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
4210 fs_info->data_chunk_allocations++;
4211 if (!(fs_info->data_chunk_allocations %
4212 fs_info->metadata_ratio))
4213 force_metadata_allocation(fs_info);
4214 }
4215
4216
4217
4218
4219
4220 check_system_chunk(trans, extent_root, flags);
4221
4222 ret = btrfs_alloc_chunk(trans, extent_root, flags);
4223 trans->allocating_chunk = false;
4224
4225 spin_lock(&space_info->lock);
4226 if (ret < 0 && ret != -ENOSPC)
4227 goto out;
4228 if (ret)
4229 space_info->full = 1;
4230 else
4231 ret = 1;
4232
4233 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4234out:
4235 space_info->chunk_alloc = 0;
4236 spin_unlock(&space_info->lock);
4237 mutex_unlock(&fs_info->chunk_mutex);
4238 return ret;
4239}
4240
4241static int can_overcommit(struct btrfs_root *root,
4242 struct btrfs_space_info *space_info, u64 bytes,
4243 enum btrfs_reserve_flush_enum flush)
4244{
4245 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4246 u64 profile = btrfs_get_alloc_profile(root, 0);
4247 u64 space_size;
4248 u64 avail;
4249 u64 used;
4250
4251 used = space_info->bytes_used + space_info->bytes_reserved +
4252 space_info->bytes_pinned + space_info->bytes_readonly;
4253
4254
4255
4256
4257
4258
4259
4260 spin_lock(&global_rsv->lock);
4261 space_size = calc_global_rsv_need_space(global_rsv);
4262 spin_unlock(&global_rsv->lock);
4263 if (used + space_size >= space_info->total_bytes)
4264 return 0;
4265
4266 used += space_info->bytes_may_use;
4267
4268 spin_lock(&root->fs_info->free_chunk_lock);
4269 avail = root->fs_info->free_chunk_space;
4270 spin_unlock(&root->fs_info->free_chunk_lock);
4271
4272
4273
4274
4275
4276
4277
4278 if (profile & (BTRFS_BLOCK_GROUP_DUP |
4279 BTRFS_BLOCK_GROUP_RAID1 |
4280 BTRFS_BLOCK_GROUP_RAID10))
4281 avail >>= 1;
4282
4283
4284
4285
4286
4287
4288 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4289 avail >>= 3;
4290 else
4291 avail >>= 1;
4292
4293 if (used + bytes < space_info->total_bytes + avail)
4294 return 1;
4295 return 0;
4296}
4297
4298static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
4299 unsigned long nr_pages, int nr_items)
4300{
4301 struct super_block *sb = root->fs_info->sb;
4302
4303 if (down_read_trylock(&sb->s_umount)) {
4304 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4305 up_read(&sb->s_umount);
4306 } else {
4307
4308
4309
4310
4311
4312
4313
4314 btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
4315 if (!current->journal_info)
4316 btrfs_wait_ordered_roots(root->fs_info, nr_items);
4317 }
4318}
4319
4320static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
4321{
4322 u64 bytes;
4323 int nr;
4324
4325 bytes = btrfs_calc_trans_metadata_size(root, 1);
4326 nr = (int)div64_u64(to_reclaim, bytes);
4327 if (!nr)
4328 nr = 1;
4329 return nr;
4330}
4331
4332#define EXTENT_SIZE_PER_ITEM (256 * 1024)
4333
4334
4335
4336
4337static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4338 bool wait_ordered)
4339{
4340 struct btrfs_block_rsv *block_rsv;
4341 struct btrfs_space_info *space_info;
4342 struct btrfs_trans_handle *trans;
4343 u64 delalloc_bytes;
4344 u64 max_reclaim;
4345 long time_left;
4346 unsigned long nr_pages;
4347 int loops;
4348 int items;
4349 enum btrfs_reserve_flush_enum flush;
4350
4351
4352 items = calc_reclaim_items_nr(root, to_reclaim);
4353 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
4354
4355 trans = (struct btrfs_trans_handle *)current->journal_info;
4356 block_rsv = &root->fs_info->delalloc_block_rsv;
4357 space_info = block_rsv->space_info;
4358
4359 delalloc_bytes = percpu_counter_sum_positive(
4360 &root->fs_info->delalloc_bytes);
4361 if (delalloc_bytes == 0) {
4362 if (trans)
4363 return;
4364 if (wait_ordered)
4365 btrfs_wait_ordered_roots(root->fs_info, items);
4366 return;
4367 }
4368
4369 loops = 0;
4370 while (delalloc_bytes && loops < 3) {
4371 max_reclaim = min(delalloc_bytes, to_reclaim);
4372 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
4373 btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
4374
4375
4376
4377
4378 max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
4379 if (!max_reclaim)
4380 goto skip_async;
4381
4382 if (max_reclaim <= nr_pages)
4383 max_reclaim = 0;
4384 else
4385 max_reclaim -= nr_pages;
4386
4387 wait_event(root->fs_info->async_submit_wait,
4388 atomic_read(&root->fs_info->async_delalloc_pages) <=
4389 (int)max_reclaim);
4390skip_async:
4391 if (!trans)
4392 flush = BTRFS_RESERVE_FLUSH_ALL;
4393 else
4394 flush = BTRFS_RESERVE_NO_FLUSH;
4395 spin_lock(&space_info->lock);
4396 if (can_overcommit(root, space_info, orig, flush)) {
4397 spin_unlock(&space_info->lock);
4398 break;
4399 }
4400 spin_unlock(&space_info->lock);
4401
4402 loops++;
4403 if (wait_ordered && !trans) {
4404 btrfs_wait_ordered_roots(root->fs_info, items);
4405 } else {
4406 time_left = schedule_timeout_killable(1);
4407 if (time_left)
4408 break;
4409 }
4410 delalloc_bytes = percpu_counter_sum_positive(
4411 &root->fs_info->delalloc_bytes);
4412 }
4413}
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425static int may_commit_transaction(struct btrfs_root *root,
4426 struct btrfs_space_info *space_info,
4427 u64 bytes, int force)
4428{
4429 struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
4430 struct btrfs_trans_handle *trans;
4431
4432 trans = (struct btrfs_trans_handle *)current->journal_info;
4433 if (trans)
4434 return -EAGAIN;
4435
4436 if (force)
4437 goto commit;
4438
4439
4440 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4441 bytes) >= 0)
4442 goto commit;
4443
4444
4445
4446
4447
4448 if (space_info != delayed_rsv->space_info)
4449 return -ENOSPC;
4450
4451 spin_lock(&delayed_rsv->lock);
4452 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4453 bytes - delayed_rsv->size) >= 0) {
4454 spin_unlock(&delayed_rsv->lock);
4455 return -ENOSPC;
4456 }
4457 spin_unlock(&delayed_rsv->lock);
4458
4459commit:
4460 trans = btrfs_join_transaction(root);
4461 if (IS_ERR(trans))
4462 return -ENOSPC;
4463
4464 return btrfs_commit_transaction(trans, root);
4465}
4466
4467enum flush_state {
4468 FLUSH_DELAYED_ITEMS_NR = 1,
4469 FLUSH_DELAYED_ITEMS = 2,
4470 FLUSH_DELALLOC = 3,
4471 FLUSH_DELALLOC_WAIT = 4,
4472 ALLOC_CHUNK = 5,
4473 COMMIT_TRANS = 6,
4474};
4475
4476static int flush_space(struct btrfs_root *root,
4477 struct btrfs_space_info *space_info, u64 num_bytes,
4478 u64 orig_bytes, int state)
4479{
4480 struct btrfs_trans_handle *trans;
4481 int nr;
4482 int ret = 0;
4483
4484 switch (state) {
4485 case FLUSH_DELAYED_ITEMS_NR:
4486 case FLUSH_DELAYED_ITEMS:
4487 if (state == FLUSH_DELAYED_ITEMS_NR)
4488 nr = calc_reclaim_items_nr(root, num_bytes) * 2;
4489 else
4490 nr = -1;
4491
4492 trans = btrfs_join_transaction(root);
4493 if (IS_ERR(trans)) {
4494 ret = PTR_ERR(trans);
4495 break;
4496 }
4497 ret = btrfs_run_delayed_items_nr(trans, root, nr);
4498 btrfs_end_transaction(trans, root);
4499 break;
4500 case FLUSH_DELALLOC:
4501 case FLUSH_DELALLOC_WAIT:
4502 shrink_delalloc(root, num_bytes * 2, orig_bytes,
4503 state == FLUSH_DELALLOC_WAIT);
4504 break;
4505 case ALLOC_CHUNK:
4506 trans = btrfs_join_transaction(root);
4507 if (IS_ERR(trans)) {
4508 ret = PTR_ERR(trans);
4509 break;
4510 }
4511 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4512 btrfs_get_alloc_profile(root, 0),
4513 CHUNK_ALLOC_NO_FORCE);
4514 btrfs_end_transaction(trans, root);
4515 if (ret == -ENOSPC)
4516 ret = 0;
4517 break;
4518 case COMMIT_TRANS:
4519 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
4520 break;
4521 default:
4522 ret = -ENOSPC;
4523 break;
4524 }
4525
4526 return ret;
4527}
4528
4529static inline u64
4530btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4531 struct btrfs_space_info *space_info)
4532{
4533 u64 used;
4534 u64 expected;
4535 u64 to_reclaim;
4536
4537 to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
4538 16 * 1024 * 1024);
4539 spin_lock(&space_info->lock);
4540 if (can_overcommit(root, space_info, to_reclaim,
4541 BTRFS_RESERVE_FLUSH_ALL)) {
4542 to_reclaim = 0;
4543 goto out;
4544 }
4545
4546 used = space_info->bytes_used + space_info->bytes_reserved +
4547 space_info->bytes_pinned + space_info->bytes_readonly +
4548 space_info->bytes_may_use;
4549 if (can_overcommit(root, space_info, 1024 * 1024,
4550 BTRFS_RESERVE_FLUSH_ALL))
4551 expected = div_factor_fine(space_info->total_bytes, 95);
4552 else
4553 expected = div_factor_fine(space_info->total_bytes, 90);
4554
4555 if (used > expected)
4556 to_reclaim = used - expected;
4557 else
4558 to_reclaim = 0;
4559 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4560 space_info->bytes_reserved);
4561out:
4562 spin_unlock(&space_info->lock);
4563
4564 return to_reclaim;
4565}
4566
4567static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4568 struct btrfs_fs_info *fs_info, u64 used)
4569{
4570 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4571
4572
4573 if (space_info->bytes_used >= thresh)
4574 return 0;
4575
4576 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
4577 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4578}
4579
4580static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
4581 struct btrfs_fs_info *fs_info,
4582 int flush_state)
4583{
4584 u64 used;
4585
4586 spin_lock(&space_info->lock);
4587
4588
4589
4590
4591 if (flush_state > COMMIT_TRANS && space_info->full) {
4592 spin_unlock(&space_info->lock);
4593 return 0;
4594 }
4595
4596 used = space_info->bytes_used + space_info->bytes_reserved +
4597 space_info->bytes_pinned + space_info->bytes_readonly +
4598 space_info->bytes_may_use;
4599 if (need_do_async_reclaim(space_info, fs_info, used)) {
4600 spin_unlock(&space_info->lock);
4601 return 1;
4602 }
4603 spin_unlock(&space_info->lock);
4604
4605 return 0;
4606}
4607
4608static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4609{
4610 struct btrfs_fs_info *fs_info;
4611 struct btrfs_space_info *space_info;
4612 u64 to_reclaim;
4613 int flush_state;
4614
4615 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4616 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4617
4618 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4619 space_info);
4620 if (!to_reclaim)
4621 return;
4622
4623 flush_state = FLUSH_DELAYED_ITEMS_NR;
4624 do {
4625 flush_space(fs_info->fs_root, space_info, to_reclaim,
4626 to_reclaim, flush_state);
4627 flush_state++;
4628 if (!btrfs_need_do_async_reclaim(space_info, fs_info,
4629 flush_state))
4630 return;
4631 } while (flush_state < COMMIT_TRANS);
4632}
4633
4634void btrfs_init_async_reclaim_work(struct work_struct *work)
4635{
4636 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
4637}
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653static int reserve_metadata_bytes(struct btrfs_root *root,
4654 struct btrfs_block_rsv *block_rsv,
4655 u64 orig_bytes,
4656 enum btrfs_reserve_flush_enum flush)
4657{
4658 struct btrfs_space_info *space_info = block_rsv->space_info;
4659 u64 used;
4660 u64 num_bytes = orig_bytes;
4661 int flush_state = FLUSH_DELAYED_ITEMS_NR;
4662 int ret = 0;
4663 bool flushing = false;
4664
4665again:
4666 ret = 0;
4667 spin_lock(&space_info->lock);
4668
4669
4670
4671
4672 while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
4673 space_info->flush) {
4674 spin_unlock(&space_info->lock);
4675
4676
4677
4678
4679
4680
4681 if (current->journal_info)
4682 return -EAGAIN;
4683 ret = wait_event_killable(space_info->wait, !space_info->flush);
4684
4685 if (ret)
4686 return -EINTR;
4687
4688 spin_lock(&space_info->lock);
4689 }
4690
4691 ret = -ENOSPC;
4692 used = space_info->bytes_used + space_info->bytes_reserved +
4693 space_info->bytes_pinned + space_info->bytes_readonly +
4694 space_info->bytes_may_use;
4695
4696
4697
4698
4699
4700
4701
4702
4703 if (used <= space_info->total_bytes) {
4704 if (used + orig_bytes <= space_info->total_bytes) {
4705 space_info->bytes_may_use += orig_bytes;
4706 trace_btrfs_space_reservation(root->fs_info,
4707 "space_info", space_info->flags, orig_bytes, 1);
4708 ret = 0;
4709 } else {
4710
4711
4712
4713
4714
4715 num_bytes = orig_bytes;
4716 }
4717 } else {
4718
4719
4720
4721
4722
4723 num_bytes = used - space_info->total_bytes +
4724 (orig_bytes * 2);
4725 }
4726
4727 if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
4728 space_info->bytes_may_use += orig_bytes;
4729 trace_btrfs_space_reservation(root->fs_info, "space_info",
4730 space_info->flags, orig_bytes,
4731 1);
4732 ret = 0;
4733 }
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
4744 flushing = true;
4745 space_info->flush = 1;
4746 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
4747 used += orig_bytes;
4748
4749
4750
4751
4752
4753 if (!root->fs_info->log_root_recovering &&
4754 need_do_async_reclaim(space_info, root->fs_info, used) &&
4755 !work_busy(&root->fs_info->async_reclaim_work))
4756 queue_work(system_unbound_wq,
4757 &root->fs_info->async_reclaim_work);
4758 }
4759 spin_unlock(&space_info->lock);
4760
4761 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
4762 goto out;
4763
4764 ret = flush_space(root, space_info, num_bytes, orig_bytes,
4765 flush_state);
4766 flush_state++;
4767
4768
4769
4770
4771
4772 if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4773 (flush_state == FLUSH_DELALLOC ||
4774 flush_state == FLUSH_DELALLOC_WAIT))
4775 flush_state = ALLOC_CHUNK;
4776
4777 if (!ret)
4778 goto again;
4779 else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4780 flush_state < COMMIT_TRANS)
4781 goto again;
4782 else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
4783 flush_state <= COMMIT_TRANS)
4784 goto again;
4785
4786out:
4787 if (ret == -ENOSPC &&
4788 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
4789 struct btrfs_block_rsv *global_rsv =
4790 &root->fs_info->global_block_rsv;
4791
4792 if (block_rsv != global_rsv &&
4793 !block_rsv_use_bytes(global_rsv, orig_bytes))
4794 ret = 0;
4795 }
4796 if (ret == -ENOSPC)
4797 trace_btrfs_space_reservation(root->fs_info,
4798 "space_info:enospc",
4799 space_info->flags, orig_bytes, 1);
4800 if (flushing) {
4801 spin_lock(&space_info->lock);
4802 space_info->flush = 0;
4803 wake_up_all(&space_info->wait);
4804 spin_unlock(&space_info->lock);
4805 }
4806 return ret;
4807}
4808
4809static struct btrfs_block_rsv *get_block_rsv(
4810 const struct btrfs_trans_handle *trans,
4811 const struct btrfs_root *root)
4812{
4813 struct btrfs_block_rsv *block_rsv = NULL;
4814
4815 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4816 block_rsv = trans->block_rsv;
4817
4818 if (root == root->fs_info->csum_root && trans->adding_csums)
4819 block_rsv = trans->block_rsv;
4820
4821 if (root == root->fs_info->uuid_root)
4822 block_rsv = trans->block_rsv;
4823
4824 if (!block_rsv)
4825 block_rsv = root->block_rsv;
4826
4827 if (!block_rsv)
4828 block_rsv = &root->fs_info->empty_block_rsv;
4829
4830 return block_rsv;
4831}
4832
4833static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
4834 u64 num_bytes)
4835{
4836 int ret = -ENOSPC;
4837 spin_lock(&block_rsv->lock);
4838 if (block_rsv->reserved >= num_bytes) {
4839 block_rsv->reserved -= num_bytes;
4840 if (block_rsv->reserved < block_rsv->size)
4841 block_rsv->full = 0;
4842 ret = 0;
4843 }
4844 spin_unlock(&block_rsv->lock);
4845 return ret;
4846}
4847
4848static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
4849 u64 num_bytes, int update_size)
4850{
4851 spin_lock(&block_rsv->lock);
4852 block_rsv->reserved += num_bytes;
4853 if (update_size)
4854 block_rsv->size += num_bytes;
4855 else if (block_rsv->reserved >= block_rsv->size)
4856 block_rsv->full = 1;
4857 spin_unlock(&block_rsv->lock);
4858}
4859
4860int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
4861 struct btrfs_block_rsv *dest, u64 num_bytes,
4862 int min_factor)
4863{
4864 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4865 u64 min_bytes;
4866
4867 if (global_rsv->space_info != dest->space_info)
4868 return -ENOSPC;
4869
4870 spin_lock(&global_rsv->lock);
4871 min_bytes = div_factor(global_rsv->size, min_factor);
4872 if (global_rsv->reserved < min_bytes + num_bytes) {
4873 spin_unlock(&global_rsv->lock);
4874 return -ENOSPC;
4875 }
4876 global_rsv->reserved -= num_bytes;
4877 if (global_rsv->reserved < global_rsv->size)
4878 global_rsv->full = 0;
4879 spin_unlock(&global_rsv->lock);
4880
4881 block_rsv_add_bytes(dest, num_bytes, 1);
4882 return 0;
4883}
4884
4885static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
4886 struct btrfs_block_rsv *block_rsv,
4887 struct btrfs_block_rsv *dest, u64 num_bytes)
4888{
4889 struct btrfs_space_info *space_info = block_rsv->space_info;
4890
4891 spin_lock(&block_rsv->lock);
4892 if (num_bytes == (u64)-1)
4893 num_bytes = block_rsv->size;
4894 block_rsv->size -= num_bytes;
4895 if (block_rsv->reserved >= block_rsv->size) {
4896 num_bytes = block_rsv->reserved - block_rsv->size;
4897 block_rsv->reserved = block_rsv->size;
4898 block_rsv->full = 1;
4899 } else {
4900 num_bytes = 0;
4901 }
4902 spin_unlock(&block_rsv->lock);
4903
4904 if (num_bytes > 0) {
4905 if (dest) {
4906 spin_lock(&dest->lock);
4907 if (!dest->full) {
4908 u64 bytes_to_add;
4909
4910 bytes_to_add = dest->size - dest->reserved;
4911 bytes_to_add = min(num_bytes, bytes_to_add);
4912 dest->reserved += bytes_to_add;
4913 if (dest->reserved >= dest->size)
4914 dest->full = 1;
4915 num_bytes -= bytes_to_add;
4916 }
4917 spin_unlock(&dest->lock);
4918 }
4919 if (num_bytes) {
4920 spin_lock(&space_info->lock);
4921 space_info->bytes_may_use -= num_bytes;
4922 trace_btrfs_space_reservation(fs_info, "space_info",
4923 space_info->flags, num_bytes, 0);
4924 spin_unlock(&space_info->lock);
4925 }
4926 }
4927}
4928
4929static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
4930 struct btrfs_block_rsv *dst, u64 num_bytes)
4931{
4932 int ret;
4933
4934 ret = block_rsv_use_bytes(src, num_bytes);
4935 if (ret)
4936 return ret;
4937
4938 block_rsv_add_bytes(dst, num_bytes, 1);
4939 return 0;
4940}
4941
4942void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
4943{
4944 memset(rsv, 0, sizeof(*rsv));
4945 spin_lock_init(&rsv->lock);
4946 rsv->type = type;
4947}
4948
4949struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
4950 unsigned short type)
4951{
4952 struct btrfs_block_rsv *block_rsv;
4953 struct btrfs_fs_info *fs_info = root->fs_info;
4954
4955 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
4956 if (!block_rsv)
4957 return NULL;
4958
4959 btrfs_init_block_rsv(block_rsv, type);
4960 block_rsv->space_info = __find_space_info(fs_info,
4961 BTRFS_BLOCK_GROUP_METADATA);
4962 return block_rsv;
4963}
4964
4965void btrfs_free_block_rsv(struct btrfs_root *root,
4966 struct btrfs_block_rsv *rsv)
4967{
4968 if (!rsv)
4969 return;
4970 btrfs_block_rsv_release(root, rsv, (u64)-1);
4971 kfree(rsv);
4972}
4973
4974void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
4975{
4976 kfree(rsv);
4977}
4978
4979int btrfs_block_rsv_add(struct btrfs_root *root,
4980 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
4981 enum btrfs_reserve_flush_enum flush)
4982{
4983 int ret;
4984
4985 if (num_bytes == 0)
4986 return 0;
4987
4988 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
4989 if (!ret) {
4990 block_rsv_add_bytes(block_rsv, num_bytes, 1);
4991 return 0;
4992 }
4993
4994 return ret;
4995}
4996
4997int btrfs_block_rsv_check(struct btrfs_root *root,
4998 struct btrfs_block_rsv *block_rsv, int min_factor)
4999{
5000 u64 num_bytes = 0;
5001 int ret = -ENOSPC;
5002
5003 if (!block_rsv)
5004 return 0;
5005
5006 spin_lock(&block_rsv->lock);
5007 num_bytes = div_factor(block_rsv->size, min_factor);
5008 if (block_rsv->reserved >= num_bytes)
5009 ret = 0;
5010 spin_unlock(&block_rsv->lock);
5011
5012 return ret;
5013}
5014
5015int btrfs_block_rsv_refill(struct btrfs_root *root,
5016 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5017 enum btrfs_reserve_flush_enum flush)
5018{
5019 u64 num_bytes = 0;
5020 int ret = -ENOSPC;
5021
5022 if (!block_rsv)
5023 return 0;
5024
5025 spin_lock(&block_rsv->lock);
5026 num_bytes = min_reserved;
5027 if (block_rsv->reserved >= num_bytes)
5028 ret = 0;
5029 else
5030 num_bytes -= block_rsv->reserved;
5031 spin_unlock(&block_rsv->lock);
5032
5033 if (!ret)
5034 return 0;
5035
5036 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5037 if (!ret) {
5038 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5039 return 0;
5040 }
5041
5042 return ret;
5043}
5044
5045int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
5046 struct btrfs_block_rsv *dst_rsv,
5047 u64 num_bytes)
5048{
5049 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5050}
5051
5052void btrfs_block_rsv_release(struct btrfs_root *root,
5053 struct btrfs_block_rsv *block_rsv,
5054 u64 num_bytes)
5055{
5056 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5057 if (global_rsv == block_rsv ||
5058 block_rsv->space_info != global_rsv->space_info)
5059 global_rsv = NULL;
5060 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
5061 num_bytes);
5062}
5063
5064
5065
5066
5067
5068
5069static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
5070{
5071 struct btrfs_space_info *sinfo;
5072 u64 num_bytes;
5073 u64 meta_used;
5074 u64 data_used;
5075 int csum_size = btrfs_super_csum_size(fs_info->super_copy);
5076
5077 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
5078 spin_lock(&sinfo->lock);
5079 data_used = sinfo->bytes_used;
5080 spin_unlock(&sinfo->lock);
5081
5082 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5083 spin_lock(&sinfo->lock);
5084 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
5085 data_used = 0;
5086 meta_used = sinfo->bytes_used;
5087 spin_unlock(&sinfo->lock);
5088
5089 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
5090 csum_size * 2;
5091 num_bytes += div_u64(data_used + meta_used, 50);
5092
5093 if (num_bytes * 3 > meta_used)
5094 num_bytes = div_u64(meta_used, 3);
5095
5096 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
5097}
5098
5099static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5100{
5101 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5102 struct btrfs_space_info *sinfo = block_rsv->space_info;
5103 u64 num_bytes;
5104
5105 num_bytes = calc_global_metadata_size(fs_info);
5106
5107 spin_lock(&sinfo->lock);
5108 spin_lock(&block_rsv->lock);
5109
5110 block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
5111
5112 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
5113 sinfo->bytes_reserved + sinfo->bytes_readonly +
5114 sinfo->bytes_may_use;
5115
5116 if (sinfo->total_bytes > num_bytes) {
5117 num_bytes = sinfo->total_bytes - num_bytes;
5118 block_rsv->reserved += num_bytes;
5119 sinfo->bytes_may_use += num_bytes;
5120 trace_btrfs_space_reservation(fs_info, "space_info",
5121 sinfo->flags, num_bytes, 1);
5122 }
5123
5124 if (block_rsv->reserved >= block_rsv->size) {
5125 num_bytes = block_rsv->reserved - block_rsv->size;
5126 sinfo->bytes_may_use -= num_bytes;
5127 trace_btrfs_space_reservation(fs_info, "space_info",
5128 sinfo->flags, num_bytes, 0);
5129 block_rsv->reserved = block_rsv->size;
5130 block_rsv->full = 1;
5131 }
5132
5133 spin_unlock(&block_rsv->lock);
5134 spin_unlock(&sinfo->lock);
5135}
5136
5137static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5138{
5139 struct btrfs_space_info *space_info;
5140
5141 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5142 fs_info->chunk_block_rsv.space_info = space_info;
5143
5144 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5145 fs_info->global_block_rsv.space_info = space_info;
5146 fs_info->delalloc_block_rsv.space_info = space_info;
5147 fs_info->trans_block_rsv.space_info = space_info;
5148 fs_info->empty_block_rsv.space_info = space_info;
5149 fs_info->delayed_block_rsv.space_info = space_info;
5150
5151 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
5152 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
5153 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5154 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
5155 if (fs_info->quota_root)
5156 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
5157 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
5158
5159 update_global_block_rsv(fs_info);
5160}
5161
5162static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5163{
5164 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5165 (u64)-1);
5166 WARN_ON(fs_info->delalloc_block_rsv.size > 0);
5167 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
5168 WARN_ON(fs_info->trans_block_rsv.size > 0);
5169 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5170 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5171 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
5172 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5173 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
5174}
5175
5176void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
5177 struct btrfs_root *root)
5178{
5179 if (!trans->block_rsv)
5180 return;
5181
5182 if (!trans->bytes_reserved)
5183 return;
5184
5185 trace_btrfs_space_reservation(root->fs_info, "transaction",
5186 trans->transid, trans->bytes_reserved, 0);
5187 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
5188 trans->bytes_reserved = 0;
5189}
5190
5191
5192int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5193 struct inode *inode)
5194{
5195 struct btrfs_root *root = BTRFS_I(inode)->root;
5196 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
5197 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
5198
5199
5200
5201
5202
5203
5204 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
5205 trace_btrfs_space_reservation(root->fs_info, "orphan",
5206 btrfs_ino(inode), num_bytes, 1);
5207 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
5208}
5209
5210void btrfs_orphan_release_metadata(struct inode *inode)
5211{
5212 struct btrfs_root *root = BTRFS_I(inode)->root;
5213 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
5214 trace_btrfs_space_reservation(root->fs_info, "orphan",
5215 btrfs_ino(inode), num_bytes, 0);
5216 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
5217}
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5234 struct btrfs_block_rsv *rsv,
5235 int items,
5236 u64 *qgroup_reserved,
5237 bool use_global_rsv)
5238{
5239 u64 num_bytes;
5240 int ret;
5241 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5242
5243 if (root->fs_info->quota_enabled) {
5244
5245 num_bytes = 3 * root->nodesize;
5246 ret = btrfs_qgroup_reserve(root, num_bytes);
5247 if (ret)
5248 return ret;
5249 } else {
5250 num_bytes = 0;
5251 }
5252
5253 *qgroup_reserved = num_bytes;
5254
5255 num_bytes = btrfs_calc_trans_metadata_size(root, items);
5256 rsv->space_info = __find_space_info(root->fs_info,
5257 BTRFS_BLOCK_GROUP_METADATA);
5258 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
5259 BTRFS_RESERVE_FLUSH_ALL);
5260
5261 if (ret == -ENOSPC && use_global_rsv)
5262 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
5263
5264 if (ret) {
5265 if (*qgroup_reserved)
5266 btrfs_qgroup_free(root, *qgroup_reserved);
5267 }
5268
5269 return ret;
5270}
5271
5272void btrfs_subvolume_release_metadata(struct btrfs_root *root,
5273 struct btrfs_block_rsv *rsv,
5274 u64 qgroup_reserved)
5275{
5276 btrfs_block_rsv_release(root, rsv, (u64)-1);
5277}
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
5290{
5291 unsigned drop_inode_space = 0;
5292 unsigned dropped_extents = 0;
5293 unsigned num_extents = 0;
5294
5295 num_extents = (unsigned)div64_u64(num_bytes +
5296 BTRFS_MAX_EXTENT_SIZE - 1,
5297 BTRFS_MAX_EXTENT_SIZE);
5298 ASSERT(num_extents);
5299 ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
5300 BTRFS_I(inode)->outstanding_extents -= num_extents;
5301
5302 if (BTRFS_I(inode)->outstanding_extents == 0 &&
5303 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5304 &BTRFS_I(inode)->runtime_flags))
5305 drop_inode_space = 1;
5306
5307
5308
5309
5310
5311 if (BTRFS_I(inode)->outstanding_extents >=
5312 BTRFS_I(inode)->reserved_extents)
5313 return drop_inode_space;
5314
5315 dropped_extents = BTRFS_I(inode)->reserved_extents -
5316 BTRFS_I(inode)->outstanding_extents;
5317 BTRFS_I(inode)->reserved_extents -= dropped_extents;
5318 return dropped_extents + drop_inode_space;
5319}
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5340 int reserve)
5341{
5342 struct btrfs_root *root = BTRFS_I(inode)->root;
5343 u64 old_csums, num_csums;
5344
5345 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5346 BTRFS_I(inode)->csum_bytes == 0)
5347 return 0;
5348
5349 old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5350 if (reserve)
5351 BTRFS_I(inode)->csum_bytes += num_bytes;
5352 else
5353 BTRFS_I(inode)->csum_bytes -= num_bytes;
5354 num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5355
5356
5357 if (old_csums == num_csums)
5358 return 0;
5359
5360 if (reserve)
5361 return btrfs_calc_trans_metadata_size(root,
5362 num_csums - old_csums);
5363
5364 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
5365}
5366
5367int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5368{
5369 struct btrfs_root *root = BTRFS_I(inode)->root;
5370 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
5371 u64 to_reserve = 0;
5372 u64 csum_bytes;
5373 unsigned nr_extents = 0;
5374 int extra_reserve = 0;
5375 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
5376 int ret = 0;
5377 bool delalloc_lock = true;
5378 u64 to_free = 0;
5379 unsigned dropped;
5380
5381
5382
5383
5384
5385
5386 if (btrfs_is_free_space_inode(inode)) {
5387 flush = BTRFS_RESERVE_NO_FLUSH;
5388 delalloc_lock = false;
5389 }
5390
5391 if (flush != BTRFS_RESERVE_NO_FLUSH &&
5392 btrfs_transaction_in_commit(root->fs_info))
5393 schedule_timeout(1);
5394
5395 if (delalloc_lock)
5396 mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
5397
5398 num_bytes = ALIGN(num_bytes, root->sectorsize);
5399
5400 spin_lock(&BTRFS_I(inode)->lock);
5401 nr_extents = (unsigned)div64_u64(num_bytes +
5402 BTRFS_MAX_EXTENT_SIZE - 1,
5403 BTRFS_MAX_EXTENT_SIZE);
5404 BTRFS_I(inode)->outstanding_extents += nr_extents;
5405 nr_extents = 0;
5406
5407 if (BTRFS_I(inode)->outstanding_extents >
5408 BTRFS_I(inode)->reserved_extents)
5409 nr_extents = BTRFS_I(inode)->outstanding_extents -
5410 BTRFS_I(inode)->reserved_extents;
5411
5412
5413
5414
5415
5416 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5417 &BTRFS_I(inode)->runtime_flags)) {
5418 nr_extents++;
5419 extra_reserve = 1;
5420 }
5421
5422 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
5423 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
5424 csum_bytes = BTRFS_I(inode)->csum_bytes;
5425 spin_unlock(&BTRFS_I(inode)->lock);
5426
5427 if (root->fs_info->quota_enabled) {
5428 ret = btrfs_qgroup_reserve(root, nr_extents * root->nodesize);
5429 if (ret)
5430 goto out_fail;
5431 }
5432
5433 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
5434 if (unlikely(ret)) {
5435 if (root->fs_info->quota_enabled)
5436 btrfs_qgroup_free(root, nr_extents * root->nodesize);
5437 goto out_fail;
5438 }
5439
5440 spin_lock(&BTRFS_I(inode)->lock);
5441 if (extra_reserve) {
5442 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5443 &BTRFS_I(inode)->runtime_flags);
5444 nr_extents--;
5445 }
5446 BTRFS_I(inode)->reserved_extents += nr_extents;
5447 spin_unlock(&BTRFS_I(inode)->lock);
5448
5449 if (delalloc_lock)
5450 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5451
5452 if (to_reserve)
5453 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5454 btrfs_ino(inode), to_reserve, 1);
5455 block_rsv_add_bytes(block_rsv, to_reserve, 1);
5456
5457 return 0;
5458
5459out_fail:
5460 spin_lock(&BTRFS_I(inode)->lock);
5461 dropped = drop_outstanding_extent(inode, num_bytes);
5462
5463
5464
5465
5466
5467 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
5468 calc_csum_metadata_size(inode, num_bytes, 0);
5469 } else {
5470 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
5471 u64 bytes;
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
5482 BTRFS_I(inode)->csum_bytes = csum_bytes;
5483 to_free = calc_csum_metadata_size(inode, bytes, 0);
5484
5485
5486
5487
5488
5489
5490
5491 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
5492 bytes = csum_bytes - orig_csum_bytes;
5493 bytes = calc_csum_metadata_size(inode, bytes, 0);
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
5504 if (bytes > to_free)
5505 to_free = bytes - to_free;
5506 else
5507 to_free = 0;
5508 }
5509 spin_unlock(&BTRFS_I(inode)->lock);
5510 if (dropped)
5511 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5512
5513 if (to_free) {
5514 btrfs_block_rsv_release(root, block_rsv, to_free);
5515 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5516 btrfs_ino(inode), to_free, 0);
5517 }
5518 if (delalloc_lock)
5519 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5520 return ret;
5521}
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5533{
5534 struct btrfs_root *root = BTRFS_I(inode)->root;
5535 u64 to_free = 0;
5536 unsigned dropped;
5537
5538 num_bytes = ALIGN(num_bytes, root->sectorsize);
5539 spin_lock(&BTRFS_I(inode)->lock);
5540 dropped = drop_outstanding_extent(inode, num_bytes);
5541
5542 if (num_bytes)
5543 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
5544 spin_unlock(&BTRFS_I(inode)->lock);
5545 if (dropped > 0)
5546 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5547
5548 if (btrfs_test_is_dummy_root(root))
5549 return;
5550
5551 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5552 btrfs_ino(inode), to_free, 0);
5553
5554 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
5555 to_free);
5556}
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
5574{
5575 int ret;
5576
5577 ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes);
5578 if (ret)
5579 return ret;
5580
5581 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
5582 if (ret) {
5583 btrfs_free_reserved_data_space(inode, num_bytes);
5584 return ret;
5585 }
5586
5587 return 0;
5588}
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
5604{
5605 btrfs_delalloc_release_metadata(inode, num_bytes);
5606 btrfs_free_reserved_data_space(inode, num_bytes);
5607}
5608
5609static int update_block_group(struct btrfs_trans_handle *trans,
5610 struct btrfs_root *root, u64 bytenr,
5611 u64 num_bytes, int alloc)
5612{
5613 struct btrfs_block_group_cache *cache = NULL;
5614 struct btrfs_fs_info *info = root->fs_info;
5615 u64 total = num_bytes;
5616 u64 old_val;
5617 u64 byte_in_group;
5618 int factor;
5619
5620
5621 spin_lock(&info->delalloc_root_lock);
5622 old_val = btrfs_super_bytes_used(info->super_copy);
5623 if (alloc)
5624 old_val += num_bytes;
5625 else
5626 old_val -= num_bytes;
5627 btrfs_set_super_bytes_used(info->super_copy, old_val);
5628 spin_unlock(&info->delalloc_root_lock);
5629
5630 while (total) {
5631 cache = btrfs_lookup_block_group(info, bytenr);
5632 if (!cache)
5633 return -ENOENT;
5634 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
5635 BTRFS_BLOCK_GROUP_RAID1 |
5636 BTRFS_BLOCK_GROUP_RAID10))
5637 factor = 2;
5638 else
5639 factor = 1;
5640
5641
5642
5643
5644
5645
5646 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5647 cache_block_group(cache, 1);
5648
5649 byte_in_group = bytenr - cache->key.objectid;
5650 WARN_ON(byte_in_group > cache->key.offset);
5651
5652 spin_lock(&cache->space_info->lock);
5653 spin_lock(&cache->lock);
5654
5655 if (btrfs_test_opt(root, SPACE_CACHE) &&
5656 cache->disk_cache_state < BTRFS_DC_CLEAR)
5657 cache->disk_cache_state = BTRFS_DC_CLEAR;
5658
5659 old_val = btrfs_block_group_used(&cache->item);
5660 num_bytes = min(total, cache->key.offset - byte_in_group);
5661 if (alloc) {
5662 old_val += num_bytes;
5663 btrfs_set_block_group_used(&cache->item, old_val);
5664 cache->reserved -= num_bytes;
5665 cache->space_info->bytes_reserved -= num_bytes;
5666 cache->space_info->bytes_used += num_bytes;
5667 cache->space_info->disk_used += num_bytes * factor;
5668 spin_unlock(&cache->lock);
5669 spin_unlock(&cache->space_info->lock);
5670 } else {
5671 old_val -= num_bytes;
5672 btrfs_set_block_group_used(&cache->item, old_val);
5673 cache->pinned += num_bytes;
5674 cache->space_info->bytes_pinned += num_bytes;
5675 cache->space_info->bytes_used -= num_bytes;
5676 cache->space_info->disk_used -= num_bytes * factor;
5677 spin_unlock(&cache->lock);
5678 spin_unlock(&cache->space_info->lock);
5679
5680 set_extent_dirty(info->pinned_extents,
5681 bytenr, bytenr + num_bytes - 1,
5682 GFP_NOFS | __GFP_NOFAIL);
5683
5684
5685
5686
5687 if (old_val == 0) {
5688 spin_lock(&info->unused_bgs_lock);
5689 if (list_empty(&cache->bg_list)) {
5690 btrfs_get_block_group(cache);
5691 list_add_tail(&cache->bg_list,
5692 &info->unused_bgs);
5693 }
5694 spin_unlock(&info->unused_bgs_lock);
5695 }
5696 }
5697
5698 spin_lock(&trans->transaction->dirty_bgs_lock);
5699 if (list_empty(&cache->dirty_list)) {
5700 list_add_tail(&cache->dirty_list,
5701 &trans->transaction->dirty_bgs);
5702 trans->transaction->num_dirty_bgs++;
5703 btrfs_get_block_group(cache);
5704 }
5705 spin_unlock(&trans->transaction->dirty_bgs_lock);
5706
5707 btrfs_put_block_group(cache);
5708 total -= num_bytes;
5709 bytenr += num_bytes;
5710 }
5711 return 0;
5712}
5713
5714static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
5715{
5716 struct btrfs_block_group_cache *cache;
5717 u64 bytenr;
5718
5719 spin_lock(&root->fs_info->block_group_cache_lock);
5720 bytenr = root->fs_info->first_logical_byte;
5721 spin_unlock(&root->fs_info->block_group_cache_lock);
5722
5723 if (bytenr < (u64)-1)
5724 return bytenr;
5725
5726 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
5727 if (!cache)
5728 return 0;
5729
5730 bytenr = cache->key.objectid;
5731 btrfs_put_block_group(cache);
5732
5733 return bytenr;
5734}
5735
5736static int pin_down_extent(struct btrfs_root *root,
5737 struct btrfs_block_group_cache *cache,
5738 u64 bytenr, u64 num_bytes, int reserved)
5739{
5740 spin_lock(&cache->space_info->lock);
5741 spin_lock(&cache->lock);
5742 cache->pinned += num_bytes;
5743 cache->space_info->bytes_pinned += num_bytes;
5744 if (reserved) {
5745 cache->reserved -= num_bytes;
5746 cache->space_info->bytes_reserved -= num_bytes;
5747 }
5748 spin_unlock(&cache->lock);
5749 spin_unlock(&cache->space_info->lock);
5750
5751 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
5752 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
5753 if (reserved)
5754 trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
5755 return 0;
5756}
5757
5758
5759
5760
5761int btrfs_pin_extent(struct btrfs_root *root,
5762 u64 bytenr, u64 num_bytes, int reserved)
5763{
5764 struct btrfs_block_group_cache *cache;
5765
5766 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
5767 BUG_ON(!cache);
5768
5769 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
5770
5771 btrfs_put_block_group(cache);
5772 return 0;
5773}
5774
5775
5776
5777
5778int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
5779 u64 bytenr, u64 num_bytes)
5780{
5781 struct btrfs_block_group_cache *cache;
5782 int ret;
5783
5784 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
5785 if (!cache)
5786 return -EINVAL;
5787
5788
5789
5790
5791
5792
5793
5794 cache_block_group(cache, 1);
5795
5796 pin_down_extent(root, cache, bytenr, num_bytes, 0);
5797
5798
5799 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
5800 btrfs_put_block_group(cache);
5801 return ret;
5802}
5803
5804static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes)
5805{
5806 int ret;
5807 struct btrfs_block_group_cache *block_group;
5808 struct btrfs_caching_control *caching_ctl;
5809
5810 block_group = btrfs_lookup_block_group(root->fs_info, start);
5811 if (!block_group)
5812 return -EINVAL;
5813
5814 cache_block_group(block_group, 0);
5815 caching_ctl = get_caching_control(block_group);
5816
5817 if (!caching_ctl) {
5818
5819 BUG_ON(!block_group_cache_done(block_group));
5820 ret = btrfs_remove_free_space(block_group, start, num_bytes);
5821 } else {
5822 mutex_lock(&caching_ctl->mutex);
5823
5824 if (start >= caching_ctl->progress) {
5825 ret = add_excluded_extent(root, start, num_bytes);
5826 } else if (start + num_bytes <= caching_ctl->progress) {
5827 ret = btrfs_remove_free_space(block_group,
5828 start, num_bytes);
5829 } else {
5830 num_bytes = caching_ctl->progress - start;
5831 ret = btrfs_remove_free_space(block_group,
5832 start, num_bytes);
5833 if (ret)
5834 goto out_lock;
5835
5836 num_bytes = (start + num_bytes) -
5837 caching_ctl->progress;
5838 start = caching_ctl->progress;
5839 ret = add_excluded_extent(root, start, num_bytes);
5840 }
5841out_lock:
5842 mutex_unlock(&caching_ctl->mutex);
5843 put_caching_control(caching_ctl);
5844 }
5845 btrfs_put_block_group(block_group);
5846 return ret;
5847}
5848
5849int btrfs_exclude_logged_extents(struct btrfs_root *log,
5850 struct extent_buffer *eb)
5851{
5852 struct btrfs_file_extent_item *item;
5853 struct btrfs_key key;
5854 int found_type;
5855 int i;
5856
5857 if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS))
5858 return 0;
5859
5860 for (i = 0; i < btrfs_header_nritems(eb); i++) {
5861 btrfs_item_key_to_cpu(eb, &key, i);
5862 if (key.type != BTRFS_EXTENT_DATA_KEY)
5863 continue;
5864 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
5865 found_type = btrfs_file_extent_type(eb, item);
5866 if (found_type == BTRFS_FILE_EXTENT_INLINE)
5867 continue;
5868 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
5869 continue;
5870 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
5871 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
5872 __exclude_logged_extent(log, key.objectid, key.offset);
5873 }
5874
5875 return 0;
5876}
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5902 u64 num_bytes, int reserve, int delalloc)
5903{
5904 struct btrfs_space_info *space_info = cache->space_info;
5905 int ret = 0;
5906
5907 spin_lock(&space_info->lock);
5908 spin_lock(&cache->lock);
5909 if (reserve != RESERVE_FREE) {
5910 if (cache->ro) {
5911 ret = -EAGAIN;
5912 } else {
5913 cache->reserved += num_bytes;
5914 space_info->bytes_reserved += num_bytes;
5915 if (reserve == RESERVE_ALLOC) {
5916 trace_btrfs_space_reservation(cache->fs_info,
5917 "space_info", space_info->flags,
5918 num_bytes, 0);
5919 space_info->bytes_may_use -= num_bytes;
5920 }
5921
5922 if (delalloc)
5923 cache->delalloc_bytes += num_bytes;
5924 }
5925 } else {
5926 if (cache->ro)
5927 space_info->bytes_readonly += num_bytes;
5928 cache->reserved -= num_bytes;
5929 space_info->bytes_reserved -= num_bytes;
5930
5931 if (delalloc)
5932 cache->delalloc_bytes -= num_bytes;
5933 }
5934 spin_unlock(&cache->lock);
5935 spin_unlock(&space_info->lock);
5936 return ret;
5937}
5938
5939void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5940 struct btrfs_root *root)
5941{
5942 struct btrfs_fs_info *fs_info = root->fs_info;
5943 struct btrfs_caching_control *next;
5944 struct btrfs_caching_control *caching_ctl;
5945 struct btrfs_block_group_cache *cache;
5946
5947 down_write(&fs_info->commit_root_sem);
5948
5949 list_for_each_entry_safe(caching_ctl, next,
5950 &fs_info->caching_block_groups, list) {
5951 cache = caching_ctl->block_group;
5952 if (block_group_cache_done(cache)) {
5953 cache->last_byte_to_unpin = (u64)-1;
5954 list_del_init(&caching_ctl->list);
5955 put_caching_control(caching_ctl);
5956 } else {
5957 cache->last_byte_to_unpin = caching_ctl->progress;
5958 }
5959 }
5960
5961 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
5962 fs_info->pinned_extents = &fs_info->freed_extents[1];
5963 else
5964 fs_info->pinned_extents = &fs_info->freed_extents[0];
5965
5966 up_write(&fs_info->commit_root_sem);
5967
5968 update_global_block_rsv(fs_info);
5969}
5970
5971static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
5972 const bool return_free_space)
5973{
5974 struct btrfs_fs_info *fs_info = root->fs_info;
5975 struct btrfs_block_group_cache *cache = NULL;
5976 struct btrfs_space_info *space_info;
5977 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5978 u64 len;
5979 bool readonly;
5980
5981 while (start <= end) {
5982 readonly = false;
5983 if (!cache ||
5984 start >= cache->key.objectid + cache->key.offset) {
5985 if (cache)
5986 btrfs_put_block_group(cache);
5987 cache = btrfs_lookup_block_group(fs_info, start);
5988 BUG_ON(!cache);
5989 }
5990
5991 len = cache->key.objectid + cache->key.offset - start;
5992 len = min(len, end + 1 - start);
5993
5994 if (start < cache->last_byte_to_unpin) {
5995 len = min(len, cache->last_byte_to_unpin - start);
5996 if (return_free_space)
5997 btrfs_add_free_space(cache, start, len);
5998 }
5999
6000 start += len;
6001 space_info = cache->space_info;
6002
6003 spin_lock(&space_info->lock);
6004 spin_lock(&cache->lock);
6005 cache->pinned -= len;
6006 space_info->bytes_pinned -= len;
6007 percpu_counter_add(&space_info->total_bytes_pinned, -len);
6008 if (cache->ro) {
6009 space_info->bytes_readonly += len;
6010 readonly = true;
6011 }
6012 spin_unlock(&cache->lock);
6013 if (!readonly && global_rsv->space_info == space_info) {
6014 spin_lock(&global_rsv->lock);
6015 if (!global_rsv->full) {
6016 len = min(len, global_rsv->size -
6017 global_rsv->reserved);
6018 global_rsv->reserved += len;
6019 space_info->bytes_may_use += len;
6020 if (global_rsv->reserved >= global_rsv->size)
6021 global_rsv->full = 1;
6022 }
6023 spin_unlock(&global_rsv->lock);
6024 }
6025 spin_unlock(&space_info->lock);
6026 }
6027
6028 if (cache)
6029 btrfs_put_block_group(cache);
6030 return 0;
6031}
6032
6033int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
6034 struct btrfs_root *root)
6035{
6036 struct btrfs_fs_info *fs_info = root->fs_info;
6037 struct extent_io_tree *unpin;
6038 u64 start;
6039 u64 end;
6040 int ret;
6041
6042 if (trans->aborted)
6043 return 0;
6044
6045 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6046 unpin = &fs_info->freed_extents[1];
6047 else
6048 unpin = &fs_info->freed_extents[0];
6049
6050 while (1) {
6051 mutex_lock(&fs_info->unused_bg_unpin_mutex);
6052 ret = find_first_extent_bit(unpin, 0, &start, &end,
6053 EXTENT_DIRTY, NULL);
6054 if (ret) {
6055 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6056 break;
6057 }
6058
6059 if (btrfs_test_opt(root, DISCARD))
6060 ret = btrfs_discard_extent(root, start,
6061 end + 1 - start, NULL);
6062
6063 clear_extent_dirty(unpin, start, end, GFP_NOFS);
6064 unpin_extent_range(root, start, end, true);
6065 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6066 cond_resched();
6067 }
6068
6069 return 0;
6070}
6071
6072static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
6073 u64 owner, u64 root_objectid)
6074{
6075 struct btrfs_space_info *space_info;
6076 u64 flags;
6077
6078 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
6079 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
6080 flags = BTRFS_BLOCK_GROUP_SYSTEM;
6081 else
6082 flags = BTRFS_BLOCK_GROUP_METADATA;
6083 } else {
6084 flags = BTRFS_BLOCK_GROUP_DATA;
6085 }
6086
6087 space_info = __find_space_info(fs_info, flags);
6088 BUG_ON(!space_info);
6089 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
6090}
6091
6092
6093static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6094 struct btrfs_root *root,
6095 u64 bytenr, u64 num_bytes, u64 parent,
6096 u64 root_objectid, u64 owner_objectid,
6097 u64 owner_offset, int refs_to_drop,
6098 struct btrfs_delayed_extent_op *extent_op,
6099 int no_quota)
6100{
6101 struct btrfs_key key;
6102 struct btrfs_path *path;
6103 struct btrfs_fs_info *info = root->fs_info;
6104 struct btrfs_root *extent_root = info->extent_root;
6105 struct extent_buffer *leaf;
6106 struct btrfs_extent_item *ei;
6107 struct btrfs_extent_inline_ref *iref;
6108 int ret;
6109 int is_data;
6110 int extent_slot = 0;
6111 int found_extent = 0;
6112 int num_to_del = 1;
6113 u32 item_size;
6114 u64 refs;
6115 int last_ref = 0;
6116 enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
6117 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6118 SKINNY_METADATA);
6119
6120 if (!info->quota_enabled || !is_fstree(root_objectid))
6121 no_quota = 1;
6122
6123 path = btrfs_alloc_path();
6124 if (!path)
6125 return -ENOMEM;
6126
6127 path->reada = 1;
6128 path->leave_spinning = 1;
6129
6130 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6131 BUG_ON(!is_data && refs_to_drop != 1);
6132
6133 if (is_data)
6134 skinny_metadata = 0;
6135
6136 ret = lookup_extent_backref(trans, extent_root, path, &iref,
6137 bytenr, num_bytes, parent,
6138 root_objectid, owner_objectid,
6139 owner_offset);
6140 if (ret == 0) {
6141 extent_slot = path->slots[0];
6142 while (extent_slot >= 0) {
6143 btrfs_item_key_to_cpu(path->nodes[0], &key,
6144 extent_slot);
6145 if (key.objectid != bytenr)
6146 break;
6147 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6148 key.offset == num_bytes) {
6149 found_extent = 1;
6150 break;
6151 }
6152 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6153 key.offset == owner_objectid) {
6154 found_extent = 1;
6155 break;
6156 }
6157 if (path->slots[0] - extent_slot > 5)
6158 break;
6159 extent_slot--;
6160 }
6161#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6162 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
6163 if (found_extent && item_size < sizeof(*ei))
6164 found_extent = 0;
6165#endif
6166 if (!found_extent) {
6167 BUG_ON(iref);
6168 ret = remove_extent_backref(trans, extent_root, path,
6169 NULL, refs_to_drop,
6170 is_data, &last_ref);
6171 if (ret) {
6172 btrfs_abort_transaction(trans, extent_root, ret);
6173 goto out;
6174 }
6175 btrfs_release_path(path);
6176 path->leave_spinning = 1;
6177
6178 key.objectid = bytenr;
6179 key.type = BTRFS_EXTENT_ITEM_KEY;
6180 key.offset = num_bytes;
6181
6182 if (!is_data && skinny_metadata) {
6183 key.type = BTRFS_METADATA_ITEM_KEY;
6184 key.offset = owner_objectid;
6185 }
6186
6187 ret = btrfs_search_slot(trans, extent_root,
6188 &key, path, -1, 1);
6189 if (ret > 0 && skinny_metadata && path->slots[0]) {
6190
6191
6192
6193
6194 path->slots[0]--;
6195 btrfs_item_key_to_cpu(path->nodes[0], &key,
6196 path->slots[0]);
6197 if (key.objectid == bytenr &&
6198 key.type == BTRFS_EXTENT_ITEM_KEY &&
6199 key.offset == num_bytes)
6200 ret = 0;
6201 }
6202
6203 if (ret > 0 && skinny_metadata) {
6204 skinny_metadata = false;
6205 key.objectid = bytenr;
6206 key.type = BTRFS_EXTENT_ITEM_KEY;
6207 key.offset = num_bytes;
6208 btrfs_release_path(path);
6209 ret = btrfs_search_slot(trans, extent_root,
6210 &key, path, -1, 1);
6211 }
6212
6213 if (ret) {
6214 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
6215 ret, bytenr);
6216 if (ret > 0)
6217 btrfs_print_leaf(extent_root,
6218 path->nodes[0]);
6219 }
6220 if (ret < 0) {
6221 btrfs_abort_transaction(trans, extent_root, ret);
6222 goto out;
6223 }
6224 extent_slot = path->slots[0];
6225 }
6226 } else if (WARN_ON(ret == -ENOENT)) {
6227 btrfs_print_leaf(extent_root, path->nodes[0]);
6228 btrfs_err(info,
6229 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
6230 bytenr, parent, root_objectid, owner_objectid,
6231 owner_offset);
6232 btrfs_abort_transaction(trans, extent_root, ret);
6233 goto out;
6234 } else {
6235 btrfs_abort_transaction(trans, extent_root, ret);
6236 goto out;
6237 }
6238
6239 leaf = path->nodes[0];
6240 item_size = btrfs_item_size_nr(leaf, extent_slot);
6241#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6242 if (item_size < sizeof(*ei)) {
6243 BUG_ON(found_extent || extent_slot != path->slots[0]);
6244 ret = convert_extent_item_v0(trans, extent_root, path,
6245 owner_objectid, 0);
6246 if (ret < 0) {
6247 btrfs_abort_transaction(trans, extent_root, ret);
6248 goto out;
6249 }
6250
6251 btrfs_release_path(path);
6252 path->leave_spinning = 1;
6253
6254 key.objectid = bytenr;
6255 key.type = BTRFS_EXTENT_ITEM_KEY;
6256 key.offset = num_bytes;
6257
6258 ret = btrfs_search_slot(trans, extent_root, &key, path,
6259 -1, 1);
6260 if (ret) {
6261 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
6262 ret, bytenr);
6263 btrfs_print_leaf(extent_root, path->nodes[0]);
6264 }
6265 if (ret < 0) {
6266 btrfs_abort_transaction(trans, extent_root, ret);
6267 goto out;
6268 }
6269
6270 extent_slot = path->slots[0];
6271 leaf = path->nodes[0];
6272 item_size = btrfs_item_size_nr(leaf, extent_slot);
6273 }
6274#endif
6275 BUG_ON(item_size < sizeof(*ei));
6276 ei = btrfs_item_ptr(leaf, extent_slot,
6277 struct btrfs_extent_item);
6278 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
6279 key.type == BTRFS_EXTENT_ITEM_KEY) {
6280 struct btrfs_tree_block_info *bi;
6281 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
6282 bi = (struct btrfs_tree_block_info *)(ei + 1);
6283 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
6284 }
6285
6286 refs = btrfs_extent_refs(leaf, ei);
6287 if (refs < refs_to_drop) {
6288 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
6289 "for bytenr %Lu", refs_to_drop, refs, bytenr);
6290 ret = -EINVAL;
6291 btrfs_abort_transaction(trans, extent_root, ret);
6292 goto out;
6293 }
6294 refs -= refs_to_drop;
6295
6296 if (refs > 0) {
6297 type = BTRFS_QGROUP_OPER_SUB_SHARED;
6298 if (extent_op)
6299 __run_delayed_extent_op(extent_op, leaf, ei);
6300
6301
6302
6303
6304 if (iref) {
6305 BUG_ON(!found_extent);
6306 } else {
6307 btrfs_set_extent_refs(leaf, ei, refs);
6308 btrfs_mark_buffer_dirty(leaf);
6309 }
6310 if (found_extent) {
6311 ret = remove_extent_backref(trans, extent_root, path,
6312 iref, refs_to_drop,
6313 is_data, &last_ref);
6314 if (ret) {
6315 btrfs_abort_transaction(trans, extent_root, ret);
6316 goto out;
6317 }
6318 }
6319 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
6320 root_objectid);
6321 } else {
6322 if (found_extent) {
6323 BUG_ON(is_data && refs_to_drop !=
6324 extent_data_ref_count(root, path, iref));
6325 if (iref) {
6326 BUG_ON(path->slots[0] != extent_slot);
6327 } else {
6328 BUG_ON(path->slots[0] != extent_slot + 1);
6329 path->slots[0] = extent_slot;
6330 num_to_del = 2;
6331 }
6332 }
6333
6334 last_ref = 1;
6335 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
6336 num_to_del);
6337 if (ret) {
6338 btrfs_abort_transaction(trans, extent_root, ret);
6339 goto out;
6340 }
6341 btrfs_release_path(path);
6342
6343 if (is_data) {
6344 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
6345 if (ret) {
6346 btrfs_abort_transaction(trans, extent_root, ret);
6347 goto out;
6348 }
6349 }
6350
6351 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
6352 if (ret) {
6353 btrfs_abort_transaction(trans, extent_root, ret);
6354 goto out;
6355 }
6356 }
6357 btrfs_release_path(path);
6358
6359
6360 if (!ret && last_ref && !no_quota) {
6361 int mod_seq = 0;
6362
6363 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
6364 type == BTRFS_QGROUP_OPER_SUB_SHARED)
6365 mod_seq = 1;
6366
6367 ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
6368 bytenr, num_bytes, type,
6369 mod_seq);
6370 }
6371out:
6372 btrfs_free_path(path);
6373 return ret;
6374}
6375
6376
6377
6378
6379
6380
6381
6382static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
6383 struct btrfs_root *root, u64 bytenr)
6384{
6385 struct btrfs_delayed_ref_head *head;
6386 struct btrfs_delayed_ref_root *delayed_refs;
6387 int ret = 0;
6388
6389 delayed_refs = &trans->transaction->delayed_refs;
6390 spin_lock(&delayed_refs->lock);
6391 head = btrfs_find_delayed_ref_head(trans, bytenr);
6392 if (!head)
6393 goto out_delayed_unlock;
6394
6395 spin_lock(&head->lock);
6396 if (rb_first(&head->ref_root))
6397 goto out;
6398
6399 if (head->extent_op) {
6400 if (!head->must_insert_reserved)
6401 goto out;
6402 btrfs_free_delayed_extent_op(head->extent_op);
6403 head->extent_op = NULL;
6404 }
6405
6406
6407
6408
6409
6410 if (!mutex_trylock(&head->mutex))
6411 goto out;
6412
6413
6414
6415
6416
6417 head->node.in_tree = 0;
6418 rb_erase(&head->href_node, &delayed_refs->href_root);
6419
6420 atomic_dec(&delayed_refs->num_entries);
6421
6422
6423
6424
6425
6426 delayed_refs->num_heads--;
6427 if (head->processing == 0)
6428 delayed_refs->num_heads_ready--;
6429 head->processing = 0;
6430 spin_unlock(&head->lock);
6431 spin_unlock(&delayed_refs->lock);
6432
6433 BUG_ON(head->extent_op);
6434 if (head->must_insert_reserved)
6435 ret = 1;
6436
6437 mutex_unlock(&head->mutex);
6438 btrfs_put_delayed_ref(&head->node);
6439 return ret;
6440out:
6441 spin_unlock(&head->lock);
6442
6443out_delayed_unlock:
6444 spin_unlock(&delayed_refs->lock);
6445 return 0;
6446}
6447
6448void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6449 struct btrfs_root *root,
6450 struct extent_buffer *buf,
6451 u64 parent, int last_ref)
6452{
6453 int pin = 1;
6454 int ret;
6455
6456 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6457 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
6458 buf->start, buf->len,
6459 parent, root->root_key.objectid,
6460 btrfs_header_level(buf),
6461 BTRFS_DROP_DELAYED_REF, NULL, 0);
6462 BUG_ON(ret);
6463 }
6464
6465 if (!last_ref)
6466 return;
6467
6468 if (btrfs_header_generation(buf) == trans->transid) {
6469 struct btrfs_block_group_cache *cache;
6470
6471 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
6472 ret = check_ref_cleanup(trans, root, buf->start);
6473 if (!ret)
6474 goto out;
6475 }
6476
6477 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
6478
6479 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
6480 pin_down_extent(root, cache, buf->start, buf->len, 1);
6481 btrfs_put_block_group(cache);
6482 goto out;
6483 }
6484
6485 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
6486
6487 btrfs_add_free_space(cache, buf->start, buf->len);
6488 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
6489 btrfs_put_block_group(cache);
6490 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
6491 pin = 0;
6492 }
6493out:
6494 if (pin)
6495 add_pinned_bytes(root->fs_info, buf->len,
6496 btrfs_header_level(buf),
6497 root->root_key.objectid);
6498
6499
6500
6501
6502
6503 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
6504}
6505
6506
6507int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6508 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
6509 u64 owner, u64 offset, int no_quota)
6510{
6511 int ret;
6512 struct btrfs_fs_info *fs_info = root->fs_info;
6513
6514 if (btrfs_test_is_dummy_root(root))
6515 return 0;
6516
6517 add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
6518
6519
6520
6521
6522
6523 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
6524 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
6525
6526 btrfs_pin_extent(root, bytenr, num_bytes, 1);
6527 ret = 0;
6528 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
6529 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
6530 num_bytes,
6531 parent, root_objectid, (int)owner,
6532 BTRFS_DROP_DELAYED_REF, NULL, no_quota);
6533 } else {
6534 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
6535 num_bytes,
6536 parent, root_objectid, owner,
6537 offset, BTRFS_DROP_DELAYED_REF,
6538 NULL, no_quota);
6539 }
6540 return ret;
6541}
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557static noinline void
6558wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6559 u64 num_bytes)
6560{
6561 struct btrfs_caching_control *caching_ctl;
6562
6563 caching_ctl = get_caching_control(cache);
6564 if (!caching_ctl)
6565 return;
6566
6567 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
6568 (cache->free_space_ctl->free_space >= num_bytes));
6569
6570 put_caching_control(caching_ctl);
6571}
6572
6573static noinline int
6574wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
6575{
6576 struct btrfs_caching_control *caching_ctl;
6577 int ret = 0;
6578
6579 caching_ctl = get_caching_control(cache);
6580 if (!caching_ctl)
6581 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
6582
6583 wait_event(caching_ctl->wait, block_group_cache_done(cache));
6584 if (cache->cached == BTRFS_CACHE_ERROR)
6585 ret = -EIO;
6586 put_caching_control(caching_ctl);
6587 return ret;
6588}
6589
6590int __get_raid_index(u64 flags)
6591{
6592 if (flags & BTRFS_BLOCK_GROUP_RAID10)
6593 return BTRFS_RAID_RAID10;
6594 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
6595 return BTRFS_RAID_RAID1;
6596 else if (flags & BTRFS_BLOCK_GROUP_DUP)
6597 return BTRFS_RAID_DUP;
6598 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
6599 return BTRFS_RAID_RAID0;
6600 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
6601 return BTRFS_RAID_RAID5;
6602 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
6603 return BTRFS_RAID_RAID6;
6604
6605 return BTRFS_RAID_SINGLE;
6606}
6607
6608int get_block_group_index(struct btrfs_block_group_cache *cache)
6609{
6610 return __get_raid_index(cache->flags);
6611}
6612
6613static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
6614 [BTRFS_RAID_RAID10] = "raid10",
6615 [BTRFS_RAID_RAID1] = "raid1",
6616 [BTRFS_RAID_DUP] = "dup",
6617 [BTRFS_RAID_RAID0] = "raid0",
6618 [BTRFS_RAID_SINGLE] = "single",
6619 [BTRFS_RAID_RAID5] = "raid5",
6620 [BTRFS_RAID_RAID6] = "raid6",
6621};
6622
6623static const char *get_raid_name(enum btrfs_raid_types type)
6624{
6625 if (type >= BTRFS_NR_RAID_TYPES)
6626 return NULL;
6627
6628 return btrfs_raid_type_names[type];
6629}
6630
6631enum btrfs_loop_type {
6632 LOOP_CACHING_NOWAIT = 0,
6633 LOOP_CACHING_WAIT = 1,
6634 LOOP_ALLOC_CHUNK = 2,
6635 LOOP_NO_EMPTY_SIZE = 3,
6636};
6637
6638static inline void
6639btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
6640 int delalloc)
6641{
6642 if (delalloc)
6643 down_read(&cache->data_rwsem);
6644}
6645
6646static inline void
6647btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
6648 int delalloc)
6649{
6650 btrfs_get_block_group(cache);
6651 if (delalloc)
6652 down_read(&cache->data_rwsem);
6653}
6654
6655static struct btrfs_block_group_cache *
6656btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
6657 struct btrfs_free_cluster *cluster,
6658 int delalloc)
6659{
6660 struct btrfs_block_group_cache *used_bg;
6661 bool locked = false;
6662again:
6663 spin_lock(&cluster->refill_lock);
6664 if (locked) {
6665 if (used_bg == cluster->block_group)
6666 return used_bg;
6667
6668 up_read(&used_bg->data_rwsem);
6669 btrfs_put_block_group(used_bg);
6670 }
6671
6672 used_bg = cluster->block_group;
6673 if (!used_bg)
6674 return NULL;
6675
6676 if (used_bg == block_group)
6677 return used_bg;
6678
6679 btrfs_get_block_group(used_bg);
6680
6681 if (!delalloc)
6682 return used_bg;
6683
6684 if (down_read_trylock(&used_bg->data_rwsem))
6685 return used_bg;
6686
6687 spin_unlock(&cluster->refill_lock);
6688 down_read(&used_bg->data_rwsem);
6689 locked = true;
6690 goto again;
6691}
6692
6693static inline void
6694btrfs_release_block_group(struct btrfs_block_group_cache *cache,
6695 int delalloc)
6696{
6697 if (delalloc)
6698 up_read(&cache->data_rwsem);
6699 btrfs_put_block_group(cache);
6700}
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713static noinline int find_free_extent(struct btrfs_root *orig_root,
6714 u64 num_bytes, u64 empty_size,
6715 u64 hint_byte, struct btrfs_key *ins,
6716 u64 flags, int delalloc)
6717{
6718 int ret = 0;
6719 struct btrfs_root *root = orig_root->fs_info->extent_root;
6720 struct btrfs_free_cluster *last_ptr = NULL;
6721 struct btrfs_block_group_cache *block_group = NULL;
6722 u64 search_start = 0;
6723 u64 max_extent_size = 0;
6724 int empty_cluster = 2 * 1024 * 1024;
6725 struct btrfs_space_info *space_info;
6726 int loop = 0;
6727 int index = __get_raid_index(flags);
6728 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
6729 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
6730 bool failed_cluster_refill = false;
6731 bool failed_alloc = false;
6732 bool use_cluster = true;
6733 bool have_caching_bg = false;
6734
6735 WARN_ON(num_bytes < root->sectorsize);
6736 ins->type = BTRFS_EXTENT_ITEM_KEY;
6737 ins->objectid = 0;
6738 ins->offset = 0;
6739
6740 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
6741
6742 space_info = __find_space_info(root->fs_info, flags);
6743 if (!space_info) {
6744 btrfs_err(root->fs_info, "No space info for %llu", flags);
6745 return -ENOSPC;
6746 }
6747
6748
6749
6750
6751
6752 if (btrfs_mixed_space_info(space_info))
6753 use_cluster = false;
6754
6755 if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
6756 last_ptr = &root->fs_info->meta_alloc_cluster;
6757 if (!btrfs_test_opt(root, SSD))
6758 empty_cluster = 64 * 1024;
6759 }
6760
6761 if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
6762 btrfs_test_opt(root, SSD)) {
6763 last_ptr = &root->fs_info->data_alloc_cluster;
6764 }
6765
6766 if (last_ptr) {
6767 spin_lock(&last_ptr->lock);
6768 if (last_ptr->block_group)
6769 hint_byte = last_ptr->window_start;
6770 spin_unlock(&last_ptr->lock);
6771 }
6772
6773 search_start = max(search_start, first_logical_byte(root, 0));
6774 search_start = max(search_start, hint_byte);
6775
6776 if (!last_ptr)
6777 empty_cluster = 0;
6778
6779 if (search_start == hint_byte) {
6780 block_group = btrfs_lookup_block_group(root->fs_info,
6781 search_start);
6782
6783
6784
6785
6786
6787
6788
6789 if (block_group && block_group_bits(block_group, flags) &&
6790 block_group->cached != BTRFS_CACHE_NO) {
6791 down_read(&space_info->groups_sem);
6792 if (list_empty(&block_group->list) ||
6793 block_group->ro) {
6794
6795
6796
6797
6798
6799
6800 btrfs_put_block_group(block_group);
6801 up_read(&space_info->groups_sem);
6802 } else {
6803 index = get_block_group_index(block_group);
6804 btrfs_lock_block_group(block_group, delalloc);
6805 goto have_block_group;
6806 }
6807 } else if (block_group) {
6808 btrfs_put_block_group(block_group);
6809 }
6810 }
6811search:
6812 have_caching_bg = false;
6813 down_read(&space_info->groups_sem);
6814 list_for_each_entry(block_group, &space_info->block_groups[index],
6815 list) {
6816 u64 offset;
6817 int cached;
6818
6819 btrfs_grab_block_group(block_group, delalloc);
6820 search_start = block_group->key.objectid;
6821
6822
6823
6824
6825
6826
6827 if (!block_group_bits(block_group, flags)) {
6828 u64 extra = BTRFS_BLOCK_GROUP_DUP |
6829 BTRFS_BLOCK_GROUP_RAID1 |
6830 BTRFS_BLOCK_GROUP_RAID5 |
6831 BTRFS_BLOCK_GROUP_RAID6 |
6832 BTRFS_BLOCK_GROUP_RAID10;
6833
6834
6835
6836
6837
6838
6839 if ((flags & extra) && !(block_group->flags & extra))
6840 goto loop;
6841 }
6842
6843have_block_group:
6844 cached = block_group_cache_done(block_group);
6845 if (unlikely(!cached)) {
6846 ret = cache_block_group(block_group, 0);
6847 BUG_ON(ret < 0);
6848 ret = 0;
6849 }
6850
6851 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
6852 goto loop;
6853 if (unlikely(block_group->ro))
6854 goto loop;
6855
6856
6857
6858
6859
6860 if (last_ptr) {
6861 struct btrfs_block_group_cache *used_block_group;
6862 unsigned long aligned_cluster;
6863
6864
6865
6866
6867 used_block_group = btrfs_lock_cluster(block_group,
6868 last_ptr,
6869 delalloc);
6870 if (!used_block_group)
6871 goto refill_cluster;
6872
6873 if (used_block_group != block_group &&
6874 (used_block_group->ro ||
6875 !block_group_bits(used_block_group, flags)))
6876 goto release_cluster;
6877
6878 offset = btrfs_alloc_from_cluster(used_block_group,
6879 last_ptr,
6880 num_bytes,
6881 used_block_group->key.objectid,
6882 &max_extent_size);
6883 if (offset) {
6884
6885 spin_unlock(&last_ptr->refill_lock);
6886 trace_btrfs_reserve_extent_cluster(root,
6887 used_block_group,
6888 search_start, num_bytes);
6889 if (used_block_group != block_group) {
6890 btrfs_release_block_group(block_group,
6891 delalloc);
6892 block_group = used_block_group;
6893 }
6894 goto checks;
6895 }
6896
6897 WARN_ON(last_ptr->block_group != used_block_group);
6898release_cluster:
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914 if (loop >= LOOP_NO_EMPTY_SIZE &&
6915 used_block_group != block_group) {
6916 spin_unlock(&last_ptr->refill_lock);
6917 btrfs_release_block_group(used_block_group,
6918 delalloc);
6919 goto unclustered_alloc;
6920 }
6921
6922
6923
6924
6925
6926 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6927
6928 if (used_block_group != block_group)
6929 btrfs_release_block_group(used_block_group,
6930 delalloc);
6931refill_cluster:
6932 if (loop >= LOOP_NO_EMPTY_SIZE) {
6933 spin_unlock(&last_ptr->refill_lock);
6934 goto unclustered_alloc;
6935 }
6936
6937 aligned_cluster = max_t(unsigned long,
6938 empty_cluster + empty_size,
6939 block_group->full_stripe_len);
6940
6941
6942 ret = btrfs_find_space_cluster(root, block_group,
6943 last_ptr, search_start,
6944 num_bytes,
6945 aligned_cluster);
6946 if (ret == 0) {
6947
6948
6949
6950
6951 offset = btrfs_alloc_from_cluster(block_group,
6952 last_ptr,
6953 num_bytes,
6954 search_start,
6955 &max_extent_size);
6956 if (offset) {
6957
6958 spin_unlock(&last_ptr->refill_lock);
6959 trace_btrfs_reserve_extent_cluster(root,
6960 block_group, search_start,
6961 num_bytes);
6962 goto checks;
6963 }
6964 } else if (!cached && loop > LOOP_CACHING_NOWAIT
6965 && !failed_cluster_refill) {
6966 spin_unlock(&last_ptr->refill_lock);
6967
6968 failed_cluster_refill = true;
6969 wait_block_group_cache_progress(block_group,
6970 num_bytes + empty_cluster + empty_size);
6971 goto have_block_group;
6972 }
6973
6974
6975
6976
6977
6978
6979
6980 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6981 spin_unlock(&last_ptr->refill_lock);
6982 goto loop;
6983 }
6984
6985unclustered_alloc:
6986 spin_lock(&block_group->free_space_ctl->tree_lock);
6987 if (cached &&
6988 block_group->free_space_ctl->free_space <
6989 num_bytes + empty_cluster + empty_size) {
6990 if (block_group->free_space_ctl->free_space >
6991 max_extent_size)
6992 max_extent_size =
6993 block_group->free_space_ctl->free_space;
6994 spin_unlock(&block_group->free_space_ctl->tree_lock);
6995 goto loop;
6996 }
6997 spin_unlock(&block_group->free_space_ctl->tree_lock);
6998
6999 offset = btrfs_find_space_for_alloc(block_group, search_start,
7000 num_bytes, empty_size,
7001 &max_extent_size);
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011 if (!offset && !failed_alloc && !cached &&
7012 loop > LOOP_CACHING_NOWAIT) {
7013 wait_block_group_cache_progress(block_group,
7014 num_bytes + empty_size);
7015 failed_alloc = true;
7016 goto have_block_group;
7017 } else if (!offset) {
7018 if (!cached)
7019 have_caching_bg = true;
7020 goto loop;
7021 }
7022checks:
7023 search_start = ALIGN(offset, root->stripesize);
7024
7025
7026 if (search_start + num_bytes >
7027 block_group->key.objectid + block_group->key.offset) {
7028 btrfs_add_free_space(block_group, offset, num_bytes);
7029 goto loop;
7030 }
7031
7032 if (offset < search_start)
7033 btrfs_add_free_space(block_group, offset,
7034 search_start - offset);
7035 BUG_ON(offset > search_start);
7036
7037 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
7038 alloc_type, delalloc);
7039 if (ret == -EAGAIN) {
7040 btrfs_add_free_space(block_group, offset, num_bytes);
7041 goto loop;
7042 }
7043
7044
7045 ins->objectid = search_start;
7046 ins->offset = num_bytes;
7047
7048 trace_btrfs_reserve_extent(orig_root, block_group,
7049 search_start, num_bytes);
7050 btrfs_release_block_group(block_group, delalloc);
7051 break;
7052loop:
7053 failed_cluster_refill = false;
7054 failed_alloc = false;
7055 BUG_ON(index != get_block_group_index(block_group));
7056 btrfs_release_block_group(block_group, delalloc);
7057 }
7058 up_read(&space_info->groups_sem);
7059
7060 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
7061 goto search;
7062
7063 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
7064 goto search;
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
7075 index = 0;
7076 loop++;
7077 if (loop == LOOP_ALLOC_CHUNK) {
7078 struct btrfs_trans_handle *trans;
7079 int exist = 0;
7080
7081 trans = current->journal_info;
7082 if (trans)
7083 exist = 1;
7084 else
7085 trans = btrfs_join_transaction(root);
7086
7087 if (IS_ERR(trans)) {
7088 ret = PTR_ERR(trans);
7089 goto out;
7090 }
7091
7092 ret = do_chunk_alloc(trans, root, flags,
7093 CHUNK_ALLOC_FORCE);
7094
7095
7096
7097
7098 if (ret < 0 && ret != -ENOSPC)
7099 btrfs_abort_transaction(trans,
7100 root, ret);
7101 else
7102 ret = 0;
7103 if (!exist)
7104 btrfs_end_transaction(trans, root);
7105 if (ret)
7106 goto out;
7107 }
7108
7109 if (loop == LOOP_NO_EMPTY_SIZE) {
7110 empty_size = 0;
7111 empty_cluster = 0;
7112 }
7113
7114 goto search;
7115 } else if (!ins->objectid) {
7116 ret = -ENOSPC;
7117 } else if (ins->objectid) {
7118 ret = 0;
7119 }
7120out:
7121 if (ret == -ENOSPC)
7122 ins->offset = max_extent_size;
7123 return ret;
7124}
7125
7126static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
7127 int dump_block_groups)
7128{
7129 struct btrfs_block_group_cache *cache;
7130 int index = 0;
7131
7132 spin_lock(&info->lock);
7133 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
7134 info->flags,
7135 info->total_bytes - info->bytes_used - info->bytes_pinned -
7136 info->bytes_reserved - info->bytes_readonly,
7137 (info->full) ? "" : "not ");
7138 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
7139 "reserved=%llu, may_use=%llu, readonly=%llu\n",
7140 info->total_bytes, info->bytes_used, info->bytes_pinned,
7141 info->bytes_reserved, info->bytes_may_use,
7142 info->bytes_readonly);
7143 spin_unlock(&info->lock);
7144
7145 if (!dump_block_groups)
7146 return;
7147
7148 down_read(&info->groups_sem);
7149again:
7150 list_for_each_entry(cache, &info->block_groups[index], list) {
7151 spin_lock(&cache->lock);
7152 printk(KERN_INFO "BTRFS: "
7153 "block group %llu has %llu bytes, "
7154 "%llu used %llu pinned %llu reserved %s\n",
7155 cache->key.objectid, cache->key.offset,
7156 btrfs_block_group_used(&cache->item), cache->pinned,
7157 cache->reserved, cache->ro ? "[readonly]" : "");
7158 btrfs_dump_free_space(cache, bytes);
7159 spin_unlock(&cache->lock);
7160 }
7161 if (++index < BTRFS_NR_RAID_TYPES)
7162 goto again;
7163 up_read(&info->groups_sem);
7164}
7165
7166int btrfs_reserve_extent(struct btrfs_root *root,
7167 u64 num_bytes, u64 min_alloc_size,
7168 u64 empty_size, u64 hint_byte,
7169 struct btrfs_key *ins, int is_data, int delalloc)
7170{
7171 bool final_tried = false;
7172 u64 flags;
7173 int ret;
7174
7175 flags = btrfs_get_alloc_profile(root, is_data);
7176again:
7177 WARN_ON(num_bytes < root->sectorsize);
7178 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
7179 flags, delalloc);
7180
7181 if (ret == -ENOSPC) {
7182 if (!final_tried && ins->offset) {
7183 num_bytes = min(num_bytes >> 1, ins->offset);
7184 num_bytes = round_down(num_bytes, root->sectorsize);
7185 num_bytes = max(num_bytes, min_alloc_size);
7186 if (num_bytes == min_alloc_size)
7187 final_tried = true;
7188 goto again;
7189 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7190 struct btrfs_space_info *sinfo;
7191
7192 sinfo = __find_space_info(root->fs_info, flags);
7193 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
7194 flags, num_bytes);
7195 if (sinfo)
7196 dump_space_info(sinfo, num_bytes, 1);
7197 }
7198 }
7199
7200 return ret;
7201}
7202
7203static int __btrfs_free_reserved_extent(struct btrfs_root *root,
7204 u64 start, u64 len,
7205 int pin, int delalloc)
7206{
7207 struct btrfs_block_group_cache *cache;
7208 int ret = 0;
7209
7210 cache = btrfs_lookup_block_group(root->fs_info, start);
7211 if (!cache) {
7212 btrfs_err(root->fs_info, "Unable to find block group for %llu",
7213 start);
7214 return -ENOSPC;
7215 }
7216
7217 if (pin)
7218 pin_down_extent(root, cache, start, len, 1);
7219 else {
7220 if (btrfs_test_opt(root, DISCARD))
7221 ret = btrfs_discard_extent(root, start, len, NULL);
7222 btrfs_add_free_space(cache, start, len);
7223 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
7224 }
7225
7226 btrfs_put_block_group(cache);
7227
7228 trace_btrfs_reserved_extent_free(root, start, len);
7229
7230 return ret;
7231}
7232
7233int btrfs_free_reserved_extent(struct btrfs_root *root,
7234 u64 start, u64 len, int delalloc)
7235{
7236 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
7237}
7238
7239int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
7240 u64 start, u64 len)
7241{
7242 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
7243}
7244
7245static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7246 struct btrfs_root *root,
7247 u64 parent, u64 root_objectid,
7248 u64 flags, u64 owner, u64 offset,
7249 struct btrfs_key *ins, int ref_mod)
7250{
7251 int ret;
7252 struct btrfs_fs_info *fs_info = root->fs_info;
7253 struct btrfs_extent_item *extent_item;
7254 struct btrfs_extent_inline_ref *iref;
7255 struct btrfs_path *path;
7256 struct extent_buffer *leaf;
7257 int type;
7258 u32 size;
7259
7260 if (parent > 0)
7261 type = BTRFS_SHARED_DATA_REF_KEY;
7262 else
7263 type = BTRFS_EXTENT_DATA_REF_KEY;
7264
7265 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
7266
7267 path = btrfs_alloc_path();
7268 if (!path)
7269 return -ENOMEM;
7270
7271 path->leave_spinning = 1;
7272 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7273 ins, size);
7274 if (ret) {
7275 btrfs_free_path(path);
7276 return ret;
7277 }
7278
7279 leaf = path->nodes[0];
7280 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7281 struct btrfs_extent_item);
7282 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
7283 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7284 btrfs_set_extent_flags(leaf, extent_item,
7285 flags | BTRFS_EXTENT_FLAG_DATA);
7286
7287 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7288 btrfs_set_extent_inline_ref_type(leaf, iref, type);
7289 if (parent > 0) {
7290 struct btrfs_shared_data_ref *ref;
7291 ref = (struct btrfs_shared_data_ref *)(iref + 1);
7292 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7293 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
7294 } else {
7295 struct btrfs_extent_data_ref *ref;
7296 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
7297 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
7298 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
7299 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
7300 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
7301 }
7302
7303 btrfs_mark_buffer_dirty(path->nodes[0]);
7304 btrfs_free_path(path);
7305
7306
7307 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
7308 ins->objectid, ins->offset,
7309 BTRFS_QGROUP_OPER_ADD_EXCL, 0);
7310 if (ret)
7311 return ret;
7312
7313 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
7314 if (ret) {
7315 btrfs_err(fs_info, "update block group failed for %llu %llu",
7316 ins->objectid, ins->offset);
7317 BUG();
7318 }
7319 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
7320 return ret;
7321}
7322
7323static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7324 struct btrfs_root *root,
7325 u64 parent, u64 root_objectid,
7326 u64 flags, struct btrfs_disk_key *key,
7327 int level, struct btrfs_key *ins,
7328 int no_quota)
7329{
7330 int ret;
7331 struct btrfs_fs_info *fs_info = root->fs_info;
7332 struct btrfs_extent_item *extent_item;
7333 struct btrfs_tree_block_info *block_info;
7334 struct btrfs_extent_inline_ref *iref;
7335 struct btrfs_path *path;
7336 struct extent_buffer *leaf;
7337 u32 size = sizeof(*extent_item) + sizeof(*iref);
7338 u64 num_bytes = ins->offset;
7339 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7340 SKINNY_METADATA);
7341
7342 if (!skinny_metadata)
7343 size += sizeof(*block_info);
7344
7345 path = btrfs_alloc_path();
7346 if (!path) {
7347 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7348 root->nodesize);
7349 return -ENOMEM;
7350 }
7351
7352 path->leave_spinning = 1;
7353 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7354 ins, size);
7355 if (ret) {
7356 btrfs_free_path(path);
7357 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7358 root->nodesize);
7359 return ret;
7360 }
7361
7362 leaf = path->nodes[0];
7363 extent_item = btrfs_item_ptr(leaf, path->slots[0],
7364 struct btrfs_extent_item);
7365 btrfs_set_extent_refs(leaf, extent_item, 1);
7366 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
7367 btrfs_set_extent_flags(leaf, extent_item,
7368 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
7369
7370 if (skinny_metadata) {
7371 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
7372 num_bytes = root->nodesize;
7373 } else {
7374 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
7375 btrfs_set_tree_block_key(leaf, block_info, key);
7376 btrfs_set_tree_block_level(leaf, block_info, level);
7377 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
7378 }
7379
7380 if (parent > 0) {
7381 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
7382 btrfs_set_extent_inline_ref_type(leaf, iref,
7383 BTRFS_SHARED_BLOCK_REF_KEY);
7384 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
7385 } else {
7386 btrfs_set_extent_inline_ref_type(leaf, iref,
7387 BTRFS_TREE_BLOCK_REF_KEY);
7388 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
7389 }
7390
7391 btrfs_mark_buffer_dirty(leaf);
7392 btrfs_free_path(path);
7393
7394 if (!no_quota) {
7395 ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
7396 ins->objectid, num_bytes,
7397 BTRFS_QGROUP_OPER_ADD_EXCL, 0);
7398 if (ret)
7399 return ret;
7400 }
7401
7402 ret = update_block_group(trans, root, ins->objectid, root->nodesize,
7403 1);
7404 if (ret) {
7405 btrfs_err(fs_info, "update block group failed for %llu %llu",
7406 ins->objectid, ins->offset);
7407 BUG();
7408 }
7409
7410 trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize);
7411 return ret;
7412}
7413
7414int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
7415 struct btrfs_root *root,
7416 u64 root_objectid, u64 owner,
7417 u64 offset, struct btrfs_key *ins)
7418{
7419 int ret;
7420
7421 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
7422
7423 ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
7424 ins->offset, 0,
7425 root_objectid, owner, offset,
7426 BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
7427 return ret;
7428}
7429
7430
7431
7432
7433
7434
7435int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7436 struct btrfs_root *root,
7437 u64 root_objectid, u64 owner, u64 offset,
7438 struct btrfs_key *ins)
7439{
7440 int ret;
7441 struct btrfs_block_group_cache *block_group;
7442
7443
7444
7445
7446
7447 if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
7448 ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
7449 if (ret)
7450 return ret;
7451 }
7452
7453 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
7454 if (!block_group)
7455 return -EINVAL;
7456
7457 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
7458 RESERVE_ALLOC_NO_ACCOUNT, 0);
7459 BUG_ON(ret);
7460 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
7461 0, owner, offset, ins, 1);
7462 btrfs_put_block_group(block_group);
7463 return ret;
7464}
7465
7466static struct extent_buffer *
7467btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
7468 u64 bytenr, int level)
7469{
7470 struct extent_buffer *buf;
7471
7472 buf = btrfs_find_create_tree_block(root, bytenr);
7473 if (!buf)
7474 return ERR_PTR(-ENOMEM);
7475 btrfs_set_header_generation(buf, trans->transid);
7476 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
7477 btrfs_tree_lock(buf);
7478 clean_tree_block(trans, root->fs_info, buf);
7479 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
7480
7481 btrfs_set_lock_blocking(buf);
7482 btrfs_set_buffer_uptodate(buf);
7483
7484 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
7485 buf->log_index = root->log_transid % 2;
7486
7487
7488
7489
7490 if (buf->log_index == 0)
7491 set_extent_dirty(&root->dirty_log_pages, buf->start,
7492 buf->start + buf->len - 1, GFP_NOFS);
7493 else
7494 set_extent_new(&root->dirty_log_pages, buf->start,
7495 buf->start + buf->len - 1, GFP_NOFS);
7496 } else {
7497 buf->log_index = -1;
7498 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
7499 buf->start + buf->len - 1, GFP_NOFS);
7500 }
7501 trans->blocks_used++;
7502
7503 return buf;
7504}
7505
7506static struct btrfs_block_rsv *
7507use_block_rsv(struct btrfs_trans_handle *trans,
7508 struct btrfs_root *root, u32 blocksize)
7509{
7510 struct btrfs_block_rsv *block_rsv;
7511 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
7512 int ret;
7513 bool global_updated = false;
7514
7515 block_rsv = get_block_rsv(trans, root);
7516
7517 if (unlikely(block_rsv->size == 0))
7518 goto try_reserve;
7519again:
7520 ret = block_rsv_use_bytes(block_rsv, blocksize);
7521 if (!ret)
7522 return block_rsv;
7523
7524 if (block_rsv->failfast)
7525 return ERR_PTR(ret);
7526
7527 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
7528 global_updated = true;
7529 update_global_block_rsv(root->fs_info);
7530 goto again;
7531 }
7532
7533 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
7534 static DEFINE_RATELIMIT_STATE(_rs,
7535 DEFAULT_RATELIMIT_INTERVAL * 10,
7536 1);
7537 if (__ratelimit(&_rs))
7538 WARN(1, KERN_DEBUG
7539 "BTRFS: block rsv returned %d\n", ret);
7540 }
7541try_reserve:
7542 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
7543 BTRFS_RESERVE_NO_FLUSH);
7544 if (!ret)
7545 return block_rsv;
7546
7547
7548
7549
7550
7551 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
7552 block_rsv->space_info == global_rsv->space_info) {
7553 ret = block_rsv_use_bytes(global_rsv, blocksize);
7554 if (!ret)
7555 return global_rsv;
7556 }
7557 return ERR_PTR(ret);
7558}
7559
7560static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
7561 struct btrfs_block_rsv *block_rsv, u32 blocksize)
7562{
7563 block_rsv_add_bytes(block_rsv, blocksize, 0);
7564 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
7565}
7566
7567
7568
7569
7570
7571
7572
7573
7574struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
7575 struct btrfs_root *root,
7576 u64 parent, u64 root_objectid,
7577 struct btrfs_disk_key *key, int level,
7578 u64 hint, u64 empty_size)
7579{
7580 struct btrfs_key ins;
7581 struct btrfs_block_rsv *block_rsv;
7582 struct extent_buffer *buf;
7583 struct btrfs_delayed_extent_op *extent_op;
7584 u64 flags = 0;
7585 int ret;
7586 u32 blocksize = root->nodesize;
7587 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
7588 SKINNY_METADATA);
7589
7590 if (btrfs_test_is_dummy_root(root)) {
7591 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
7592 level);
7593 if (!IS_ERR(buf))
7594 root->alloc_bytenr += blocksize;
7595 return buf;
7596 }
7597
7598 block_rsv = use_block_rsv(trans, root, blocksize);
7599 if (IS_ERR(block_rsv))
7600 return ERR_CAST(block_rsv);
7601
7602 ret = btrfs_reserve_extent(root, blocksize, blocksize,
7603 empty_size, hint, &ins, 0, 0);
7604 if (ret)
7605 goto out_unuse;
7606
7607 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
7608 if (IS_ERR(buf)) {
7609 ret = PTR_ERR(buf);
7610 goto out_free_reserved;
7611 }
7612
7613 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
7614 if (parent == 0)
7615 parent = ins.objectid;
7616 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7617 } else
7618 BUG_ON(parent > 0);
7619
7620 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
7621 extent_op = btrfs_alloc_delayed_extent_op();
7622 if (!extent_op) {
7623 ret = -ENOMEM;
7624 goto out_free_buf;
7625 }
7626 if (key)
7627 memcpy(&extent_op->key, key, sizeof(extent_op->key));
7628 else
7629 memset(&extent_op->key, 0, sizeof(extent_op->key));
7630 extent_op->flags_to_set = flags;
7631 if (skinny_metadata)
7632 extent_op->update_key = 0;
7633 else
7634 extent_op->update_key = 1;
7635 extent_op->update_flags = 1;
7636 extent_op->is_data = 0;
7637 extent_op->level = level;
7638
7639 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
7640 ins.objectid, ins.offset,
7641 parent, root_objectid, level,
7642 BTRFS_ADD_DELAYED_EXTENT,
7643 extent_op, 0);
7644 if (ret)
7645 goto out_free_delayed;
7646 }
7647 return buf;
7648
7649out_free_delayed:
7650 btrfs_free_delayed_extent_op(extent_op);
7651out_free_buf:
7652 free_extent_buffer(buf);
7653out_free_reserved:
7654 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
7655out_unuse:
7656 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
7657 return ERR_PTR(ret);
7658}
7659
7660struct walk_control {
7661 u64 refs[BTRFS_MAX_LEVEL];
7662 u64 flags[BTRFS_MAX_LEVEL];
7663 struct btrfs_key update_progress;
7664 int stage;
7665 int level;
7666 int shared_level;
7667 int update_ref;
7668 int keep_locks;
7669 int reada_slot;
7670 int reada_count;
7671 int for_reloc;
7672};
7673
7674#define DROP_REFERENCE 1
7675#define UPDATE_BACKREF 2
7676
7677static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
7678 struct btrfs_root *root,
7679 struct walk_control *wc,
7680 struct btrfs_path *path)
7681{
7682 u64 bytenr;
7683 u64 generation;
7684 u64 refs;
7685 u64 flags;
7686 u32 nritems;
7687 u32 blocksize;
7688 struct btrfs_key key;
7689 struct extent_buffer *eb;
7690 int ret;
7691 int slot;
7692 int nread = 0;
7693
7694 if (path->slots[wc->level] < wc->reada_slot) {
7695 wc->reada_count = wc->reada_count * 2 / 3;
7696 wc->reada_count = max(wc->reada_count, 2);
7697 } else {
7698 wc->reada_count = wc->reada_count * 3 / 2;
7699 wc->reada_count = min_t(int, wc->reada_count,
7700 BTRFS_NODEPTRS_PER_BLOCK(root));
7701 }
7702
7703 eb = path->nodes[wc->level];
7704 nritems = btrfs_header_nritems(eb);
7705 blocksize = root->nodesize;
7706
7707 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
7708 if (nread >= wc->reada_count)
7709 break;
7710
7711 cond_resched();
7712 bytenr = btrfs_node_blockptr(eb, slot);
7713 generation = btrfs_node_ptr_generation(eb, slot);
7714
7715 if (slot == path->slots[wc->level])
7716 goto reada;
7717
7718 if (wc->stage == UPDATE_BACKREF &&
7719 generation <= root->root_key.offset)
7720 continue;
7721
7722
7723 ret = btrfs_lookup_extent_info(trans, root, bytenr,
7724 wc->level - 1, 1, &refs,
7725 &flags);
7726
7727 if (ret < 0)
7728 continue;
7729 BUG_ON(refs == 0);
7730
7731 if (wc->stage == DROP_REFERENCE) {
7732 if (refs == 1)
7733 goto reada;
7734
7735 if (wc->level == 1 &&
7736 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7737 continue;
7738 if (!wc->update_ref ||
7739 generation <= root->root_key.offset)
7740 continue;
7741 btrfs_node_key_to_cpu(eb, &key, slot);
7742 ret = btrfs_comp_cpu_keys(&key,
7743 &wc->update_progress);
7744 if (ret < 0)
7745 continue;
7746 } else {
7747 if (wc->level == 1 &&
7748 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7749 continue;
7750 }
7751reada:
7752 readahead_tree_block(root, bytenr);
7753 nread++;
7754 }
7755 wc->reada_slot = slot;
7756}
7757
7758static int account_leaf_items(struct btrfs_trans_handle *trans,
7759 struct btrfs_root *root,
7760 struct extent_buffer *eb)
7761{
7762 int nr = btrfs_header_nritems(eb);
7763 int i, extent_type, ret;
7764 struct btrfs_key key;
7765 struct btrfs_file_extent_item *fi;
7766 u64 bytenr, num_bytes;
7767
7768 for (i = 0; i < nr; i++) {
7769 btrfs_item_key_to_cpu(eb, &key, i);
7770
7771 if (key.type != BTRFS_EXTENT_DATA_KEY)
7772 continue;
7773
7774 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
7775
7776 extent_type = btrfs_file_extent_type(eb, fi);
7777
7778 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
7779 continue;
7780
7781 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
7782 if (!bytenr)
7783 continue;
7784
7785 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
7786
7787 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7788 root->objectid,
7789 bytenr, num_bytes,
7790 BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
7791 if (ret)
7792 return ret;
7793 }
7794 return 0;
7795}
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811static int adjust_slots_upwards(struct btrfs_root *root,
7812 struct btrfs_path *path, int root_level)
7813{
7814 int level = 0;
7815 int nr, slot;
7816 struct extent_buffer *eb;
7817
7818 if (root_level == 0)
7819 return 1;
7820
7821 while (level <= root_level) {
7822 eb = path->nodes[level];
7823 nr = btrfs_header_nritems(eb);
7824 path->slots[level]++;
7825 slot = path->slots[level];
7826 if (slot >= nr || level == 0) {
7827
7828
7829
7830
7831
7832 if (level != root_level) {
7833 btrfs_tree_unlock_rw(eb, path->locks[level]);
7834 path->locks[level] = 0;
7835
7836 free_extent_buffer(eb);
7837 path->nodes[level] = NULL;
7838 path->slots[level] = 0;
7839 }
7840 } else {
7841
7842
7843
7844
7845
7846 break;
7847 }
7848
7849 level++;
7850 }
7851
7852 eb = path->nodes[root_level];
7853 if (path->slots[root_level] >= btrfs_header_nritems(eb))
7854 return 1;
7855
7856 return 0;
7857}
7858
7859
7860
7861
7862static int account_shared_subtree(struct btrfs_trans_handle *trans,
7863 struct btrfs_root *root,
7864 struct extent_buffer *root_eb,
7865 u64 root_gen,
7866 int root_level)
7867{
7868 int ret = 0;
7869 int level;
7870 struct extent_buffer *eb = root_eb;
7871 struct btrfs_path *path = NULL;
7872
7873 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
7874 BUG_ON(root_eb == NULL);
7875
7876 if (!root->fs_info->quota_enabled)
7877 return 0;
7878
7879 if (!extent_buffer_uptodate(root_eb)) {
7880 ret = btrfs_read_buffer(root_eb, root_gen);
7881 if (ret)
7882 goto out;
7883 }
7884
7885 if (root_level == 0) {
7886 ret = account_leaf_items(trans, root, root_eb);
7887 goto out;
7888 }
7889
7890 path = btrfs_alloc_path();
7891 if (!path)
7892 return -ENOMEM;
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903 extent_buffer_get(root_eb);
7904 path->nodes[root_level] = root_eb;
7905 path->slots[root_level] = 0;
7906 path->locks[root_level] = 0;
7907walk_down:
7908 level = root_level;
7909 while (level >= 0) {
7910 if (path->nodes[level] == NULL) {
7911 int parent_slot;
7912 u64 child_gen;
7913 u64 child_bytenr;
7914
7915
7916
7917 eb = path->nodes[level + 1];
7918 parent_slot = path->slots[level + 1];
7919 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
7920 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
7921
7922 eb = read_tree_block(root, child_bytenr, child_gen);
7923 if (!eb || !extent_buffer_uptodate(eb)) {
7924 ret = -EIO;
7925 goto out;
7926 }
7927
7928 path->nodes[level] = eb;
7929 path->slots[level] = 0;
7930
7931 btrfs_tree_read_lock(eb);
7932 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
7933 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
7934
7935 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7936 root->objectid,
7937 child_bytenr,
7938 root->nodesize,
7939 BTRFS_QGROUP_OPER_SUB_SUBTREE,
7940 0);
7941 if (ret)
7942 goto out;
7943
7944 }
7945
7946 if (level == 0) {
7947 ret = account_leaf_items(trans, root, path->nodes[level]);
7948 if (ret)
7949 goto out;
7950
7951
7952 ret = adjust_slots_upwards(root, path, root_level);
7953 if (ret)
7954 break;
7955
7956
7957 goto walk_down;
7958 }
7959
7960 level--;
7961 }
7962
7963 ret = 0;
7964out:
7965 btrfs_free_path(path);
7966
7967 return ret;
7968}
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
7979 struct btrfs_root *root,
7980 struct btrfs_path *path,
7981 struct walk_control *wc, int lookup_info)
7982{
7983 int level = wc->level;
7984 struct extent_buffer *eb = path->nodes[level];
7985 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
7986 int ret;
7987
7988 if (wc->stage == UPDATE_BACKREF &&
7989 btrfs_header_owner(eb) != root->root_key.objectid)
7990 return 1;
7991
7992
7993
7994
7995
7996 if (lookup_info &&
7997 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
7998 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
7999 BUG_ON(!path->locks[level]);
8000 ret = btrfs_lookup_extent_info(trans, root,
8001 eb->start, level, 1,
8002 &wc->refs[level],
8003 &wc->flags[level]);
8004 BUG_ON(ret == -ENOMEM);
8005 if (ret)
8006 return ret;
8007 BUG_ON(wc->refs[level] == 0);
8008 }
8009
8010 if (wc->stage == DROP_REFERENCE) {
8011 if (wc->refs[level] > 1)
8012 return 1;
8013
8014 if (path->locks[level] && !wc->keep_locks) {
8015 btrfs_tree_unlock_rw(eb, path->locks[level]);
8016 path->locks[level] = 0;
8017 }
8018 return 0;
8019 }
8020
8021
8022 if (!(wc->flags[level] & flag)) {
8023 BUG_ON(!path->locks[level]);
8024 ret = btrfs_inc_ref(trans, root, eb, 1);
8025 BUG_ON(ret);
8026 ret = btrfs_dec_ref(trans, root, eb, 0);
8027 BUG_ON(ret);
8028 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
8029 eb->len, flag,
8030 btrfs_header_level(eb), 0);
8031 BUG_ON(ret);
8032 wc->flags[level] |= flag;
8033 }
8034
8035
8036
8037
8038
8039 if (path->locks[level] && level > 0) {
8040 btrfs_tree_unlock_rw(eb, path->locks[level]);
8041 path->locks[level] = 0;
8042 }
8043 return 0;
8044}
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8060 struct btrfs_root *root,
8061 struct btrfs_path *path,
8062 struct walk_control *wc, int *lookup_info)
8063{
8064 u64 bytenr;
8065 u64 generation;
8066 u64 parent;
8067 u32 blocksize;
8068 struct btrfs_key key;
8069 struct extent_buffer *next;
8070 int level = wc->level;
8071 int reada = 0;
8072 int ret = 0;
8073 bool need_account = false;
8074
8075 generation = btrfs_node_ptr_generation(path->nodes[level],
8076 path->slots[level]);
8077
8078
8079
8080
8081
8082 if (wc->stage == UPDATE_BACKREF &&
8083 generation <= root->root_key.offset) {
8084 *lookup_info = 1;
8085 return 1;
8086 }
8087
8088 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8089 blocksize = root->nodesize;
8090
8091 next = btrfs_find_tree_block(root->fs_info, bytenr);
8092 if (!next) {
8093 next = btrfs_find_create_tree_block(root, bytenr);
8094 if (!next)
8095 return -ENOMEM;
8096 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8097 level - 1);
8098 reada = 1;
8099 }
8100 btrfs_tree_lock(next);
8101 btrfs_set_lock_blocking(next);
8102
8103 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
8104 &wc->refs[level - 1],
8105 &wc->flags[level - 1]);
8106 if (ret < 0) {
8107 btrfs_tree_unlock(next);
8108 return ret;
8109 }
8110
8111 if (unlikely(wc->refs[level - 1] == 0)) {
8112 btrfs_err(root->fs_info, "Missing references.");
8113 BUG();
8114 }
8115 *lookup_info = 0;
8116
8117 if (wc->stage == DROP_REFERENCE) {
8118 if (wc->refs[level - 1] > 1) {
8119 need_account = true;
8120 if (level == 1 &&
8121 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8122 goto skip;
8123
8124 if (!wc->update_ref ||
8125 generation <= root->root_key.offset)
8126 goto skip;
8127
8128 btrfs_node_key_to_cpu(path->nodes[level], &key,
8129 path->slots[level]);
8130 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8131 if (ret < 0)
8132 goto skip;
8133
8134 wc->stage = UPDATE_BACKREF;
8135 wc->shared_level = level - 1;
8136 }
8137 } else {
8138 if (level == 1 &&
8139 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8140 goto skip;
8141 }
8142
8143 if (!btrfs_buffer_uptodate(next, generation, 0)) {
8144 btrfs_tree_unlock(next);
8145 free_extent_buffer(next);
8146 next = NULL;
8147 *lookup_info = 1;
8148 }
8149
8150 if (!next) {
8151 if (reada && level == 1)
8152 reada_walk_down(trans, root, wc, path);
8153 next = read_tree_block(root, bytenr, generation);
8154 if (!next || !extent_buffer_uptodate(next)) {
8155 free_extent_buffer(next);
8156 return -EIO;
8157 }
8158 btrfs_tree_lock(next);
8159 btrfs_set_lock_blocking(next);
8160 }
8161
8162 level--;
8163 BUG_ON(level != btrfs_header_level(next));
8164 path->nodes[level] = next;
8165 path->slots[level] = 0;
8166 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8167 wc->level = level;
8168 if (wc->level == 1)
8169 wc->reada_slot = 0;
8170 return 0;
8171skip:
8172 wc->refs[level - 1] = 0;
8173 wc->flags[level - 1] = 0;
8174 if (wc->stage == DROP_REFERENCE) {
8175 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8176 parent = path->nodes[level]->start;
8177 } else {
8178 BUG_ON(root->root_key.objectid !=
8179 btrfs_header_owner(path->nodes[level]));
8180 parent = 0;
8181 }
8182
8183 if (need_account) {
8184 ret = account_shared_subtree(trans, root, next,
8185 generation, level - 1);
8186 if (ret) {
8187 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8188 "%d accounting shared subtree. Quota "
8189 "is out of sync, rescan required.\n",
8190 root->fs_info->sb->s_id, ret);
8191 }
8192 }
8193 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
8194 root->root_key.objectid, level - 1, 0, 0);
8195 BUG_ON(ret);
8196 }
8197 btrfs_tree_unlock(next);
8198 free_extent_buffer(next);
8199 *lookup_info = 1;
8200 return 1;
8201}
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
8216 struct btrfs_root *root,
8217 struct btrfs_path *path,
8218 struct walk_control *wc)
8219{
8220 int ret;
8221 int level = wc->level;
8222 struct extent_buffer *eb = path->nodes[level];
8223 u64 parent = 0;
8224
8225 if (wc->stage == UPDATE_BACKREF) {
8226 BUG_ON(wc->shared_level < level);
8227 if (level < wc->shared_level)
8228 goto out;
8229
8230 ret = find_next_key(path, level + 1, &wc->update_progress);
8231 if (ret > 0)
8232 wc->update_ref = 0;
8233
8234 wc->stage = DROP_REFERENCE;
8235 wc->shared_level = -1;
8236 path->slots[level] = 0;
8237
8238
8239
8240
8241
8242
8243 if (!path->locks[level]) {
8244 BUG_ON(level == 0);
8245 btrfs_tree_lock(eb);
8246 btrfs_set_lock_blocking(eb);
8247 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8248
8249 ret = btrfs_lookup_extent_info(trans, root,
8250 eb->start, level, 1,
8251 &wc->refs[level],
8252 &wc->flags[level]);
8253 if (ret < 0) {
8254 btrfs_tree_unlock_rw(eb, path->locks[level]);
8255 path->locks[level] = 0;
8256 return ret;
8257 }
8258 BUG_ON(wc->refs[level] == 0);
8259 if (wc->refs[level] == 1) {
8260 btrfs_tree_unlock_rw(eb, path->locks[level]);
8261 path->locks[level] = 0;
8262 return 1;
8263 }
8264 }
8265 }
8266
8267
8268 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
8269
8270 if (wc->refs[level] == 1) {
8271 if (level == 0) {
8272 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8273 ret = btrfs_dec_ref(trans, root, eb, 1);
8274 else
8275 ret = btrfs_dec_ref(trans, root, eb, 0);
8276 BUG_ON(ret);
8277 ret = account_leaf_items(trans, root, eb);
8278 if (ret) {
8279 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8280 "%d accounting leaf items. Quota "
8281 "is out of sync, rescan required.\n",
8282 root->fs_info->sb->s_id, ret);
8283 }
8284 }
8285
8286 if (!path->locks[level] &&
8287 btrfs_header_generation(eb) == trans->transid) {
8288 btrfs_tree_lock(eb);
8289 btrfs_set_lock_blocking(eb);
8290 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8291 }
8292 clean_tree_block(trans, root->fs_info, eb);
8293 }
8294
8295 if (eb == root->node) {
8296 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8297 parent = eb->start;
8298 else
8299 BUG_ON(root->root_key.objectid !=
8300 btrfs_header_owner(eb));
8301 } else {
8302 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
8303 parent = path->nodes[level + 1]->start;
8304 else
8305 BUG_ON(root->root_key.objectid !=
8306 btrfs_header_owner(path->nodes[level + 1]));
8307 }
8308
8309 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
8310out:
8311 wc->refs[level] = 0;
8312 wc->flags[level] = 0;
8313 return 0;
8314}
8315
8316static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
8317 struct btrfs_root *root,
8318 struct btrfs_path *path,
8319 struct walk_control *wc)
8320{
8321 int level = wc->level;
8322 int lookup_info = 1;
8323 int ret;
8324
8325 while (level >= 0) {
8326 ret = walk_down_proc(trans, root, path, wc, lookup_info);
8327 if (ret > 0)
8328 break;
8329
8330 if (level == 0)
8331 break;
8332
8333 if (path->slots[level] >=
8334 btrfs_header_nritems(path->nodes[level]))
8335 break;
8336
8337 ret = do_walk_down(trans, root, path, wc, &lookup_info);
8338 if (ret > 0) {
8339 path->slots[level]++;
8340 continue;
8341 } else if (ret < 0)
8342 return ret;
8343 level = wc->level;
8344 }
8345 return 0;
8346}
8347
8348static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
8349 struct btrfs_root *root,
8350 struct btrfs_path *path,
8351 struct walk_control *wc, int max_level)
8352{
8353 int level = wc->level;
8354 int ret;
8355
8356 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
8357 while (level < max_level && path->nodes[level]) {
8358 wc->level = level;
8359 if (path->slots[level] + 1 <
8360 btrfs_header_nritems(path->nodes[level])) {
8361 path->slots[level]++;
8362 return 0;
8363 } else {
8364 ret = walk_up_proc(trans, root, path, wc);
8365 if (ret > 0)
8366 return 0;
8367
8368 if (path->locks[level]) {
8369 btrfs_tree_unlock_rw(path->nodes[level],
8370 path->locks[level]);
8371 path->locks[level] = 0;
8372 }
8373 free_extent_buffer(path->nodes[level]);
8374 path->nodes[level] = NULL;
8375 level++;
8376 }
8377 }
8378 return 1;
8379}
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394int btrfs_drop_snapshot(struct btrfs_root *root,
8395 struct btrfs_block_rsv *block_rsv, int update_ref,
8396 int for_reloc)
8397{
8398 struct btrfs_path *path;
8399 struct btrfs_trans_handle *trans;
8400 struct btrfs_root *tree_root = root->fs_info->tree_root;
8401 struct btrfs_root_item *root_item = &root->root_item;
8402 struct walk_control *wc;
8403 struct btrfs_key key;
8404 int err = 0;
8405 int ret;
8406 int level;
8407 bool root_dropped = false;
8408
8409 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8410
8411 path = btrfs_alloc_path();
8412 if (!path) {
8413 err = -ENOMEM;
8414 goto out;
8415 }
8416
8417 wc = kzalloc(sizeof(*wc), GFP_NOFS);
8418 if (!wc) {
8419 btrfs_free_path(path);
8420 err = -ENOMEM;
8421 goto out;
8422 }
8423
8424 trans = btrfs_start_transaction(tree_root, 0);
8425 if (IS_ERR(trans)) {
8426 err = PTR_ERR(trans);
8427 goto out_free;
8428 }
8429
8430 if (block_rsv)
8431 trans->block_rsv = block_rsv;
8432
8433 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
8434 level = btrfs_header_level(root->node);
8435 path->nodes[level] = btrfs_lock_root_node(root);
8436 btrfs_set_lock_blocking(path->nodes[level]);
8437 path->slots[level] = 0;
8438 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8439 memset(&wc->update_progress, 0,
8440 sizeof(wc->update_progress));
8441 } else {
8442 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
8443 memcpy(&wc->update_progress, &key,
8444 sizeof(wc->update_progress));
8445
8446 level = root_item->drop_level;
8447 BUG_ON(level == 0);
8448 path->lowest_level = level;
8449 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8450 path->lowest_level = 0;
8451 if (ret < 0) {
8452 err = ret;
8453 goto out_end_trans;
8454 }
8455 WARN_ON(ret > 0);
8456
8457
8458
8459
8460
8461 btrfs_unlock_up_safe(path, 0);
8462
8463 level = btrfs_header_level(root->node);
8464 while (1) {
8465 btrfs_tree_lock(path->nodes[level]);
8466 btrfs_set_lock_blocking(path->nodes[level]);
8467 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8468
8469 ret = btrfs_lookup_extent_info(trans, root,
8470 path->nodes[level]->start,
8471 level, 1, &wc->refs[level],
8472 &wc->flags[level]);
8473 if (ret < 0) {
8474 err = ret;
8475 goto out_end_trans;
8476 }
8477 BUG_ON(wc->refs[level] == 0);
8478
8479 if (level == root_item->drop_level)
8480 break;
8481
8482 btrfs_tree_unlock(path->nodes[level]);
8483 path->locks[level] = 0;
8484 WARN_ON(wc->refs[level] != 1);
8485 level--;
8486 }
8487 }
8488
8489 wc->level = level;
8490 wc->shared_level = -1;
8491 wc->stage = DROP_REFERENCE;
8492 wc->update_ref = update_ref;
8493 wc->keep_locks = 0;
8494 wc->for_reloc = for_reloc;
8495 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
8496
8497 while (1) {
8498
8499 ret = walk_down_tree(trans, root, path, wc);
8500 if (ret < 0) {
8501 err = ret;
8502 break;
8503 }
8504
8505 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
8506 if (ret < 0) {
8507 err = ret;
8508 break;
8509 }
8510
8511 if (ret > 0) {
8512 BUG_ON(wc->stage != DROP_REFERENCE);
8513 break;
8514 }
8515
8516 if (wc->stage == DROP_REFERENCE) {
8517 level = wc->level;
8518 btrfs_node_key(path->nodes[level],
8519 &root_item->drop_progress,
8520 path->slots[level]);
8521 root_item->drop_level = level;
8522 }
8523
8524 BUG_ON(wc->level == 0);
8525 if (btrfs_should_end_transaction(trans, tree_root) ||
8526 (!for_reloc && btrfs_need_cleaner_sleep(root))) {
8527 ret = btrfs_update_root(trans, tree_root,
8528 &root->root_key,
8529 root_item);
8530 if (ret) {
8531 btrfs_abort_transaction(trans, tree_root, ret);
8532 err = ret;
8533 goto out_end_trans;
8534 }
8535
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545 ret = btrfs_delayed_qgroup_accounting(trans,
8546 root->fs_info);
8547 if (ret)
8548 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8549 "running qgroup updates "
8550 "during snapshot delete. "
8551 "Quota is out of sync, "
8552 "rescan required.\n", ret);
8553
8554 btrfs_end_transaction_throttle(trans, tree_root);
8555 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
8556 pr_debug("BTRFS: drop snapshot early exit\n");
8557 err = -EAGAIN;
8558 goto out_free;
8559 }
8560
8561 trans = btrfs_start_transaction(tree_root, 0);
8562 if (IS_ERR(trans)) {
8563 err = PTR_ERR(trans);
8564 goto out_free;
8565 }
8566 if (block_rsv)
8567 trans->block_rsv = block_rsv;
8568 }
8569 }
8570 btrfs_release_path(path);
8571 if (err)
8572 goto out_end_trans;
8573
8574 ret = btrfs_del_root(trans, tree_root, &root->root_key);
8575 if (ret) {
8576 btrfs_abort_transaction(trans, tree_root, ret);
8577 goto out_end_trans;
8578 }
8579
8580 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8581 ret = btrfs_find_root(tree_root, &root->root_key, path,
8582 NULL, NULL);
8583 if (ret < 0) {
8584 btrfs_abort_transaction(trans, tree_root, ret);
8585 err = ret;
8586 goto out_end_trans;
8587 } else if (ret > 0) {
8588
8589
8590
8591
8592
8593 btrfs_del_orphan_item(trans, tree_root,
8594 root->root_key.objectid);
8595 }
8596 }
8597
8598 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
8599 btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
8600 } else {
8601 free_extent_buffer(root->node);
8602 free_extent_buffer(root->commit_root);
8603 btrfs_put_fs_root(root);
8604 }
8605 root_dropped = true;
8606out_end_trans:
8607 ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
8608 if (ret)
8609 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8610 "running qgroup updates "
8611 "during snapshot delete. "
8612 "Quota is out of sync, "
8613 "rescan required.\n", ret);
8614
8615 btrfs_end_transaction_throttle(trans, tree_root);
8616out_free:
8617 kfree(wc);
8618 btrfs_free_path(path);
8619out:
8620
8621
8622
8623
8624
8625
8626
8627 if (!for_reloc && root_dropped == false)
8628 btrfs_add_dead_root(root);
8629 if (err && err != -EAGAIN)
8630 btrfs_std_error(root->fs_info, err);
8631 return err;
8632}
8633
8634
8635
8636
8637
8638
8639
8640int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
8641 struct btrfs_root *root,
8642 struct extent_buffer *node,
8643 struct extent_buffer *parent)
8644{
8645 struct btrfs_path *path;
8646 struct walk_control *wc;
8647 int level;
8648 int parent_level;
8649 int ret = 0;
8650 int wret;
8651
8652 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
8653
8654 path = btrfs_alloc_path();
8655 if (!path)
8656 return -ENOMEM;
8657
8658 wc = kzalloc(sizeof(*wc), GFP_NOFS);
8659 if (!wc) {
8660 btrfs_free_path(path);
8661 return -ENOMEM;
8662 }
8663
8664 btrfs_assert_tree_locked(parent);
8665 parent_level = btrfs_header_level(parent);
8666 extent_buffer_get(parent);
8667 path->nodes[parent_level] = parent;
8668 path->slots[parent_level] = btrfs_header_nritems(parent);
8669
8670 btrfs_assert_tree_locked(node);
8671 level = btrfs_header_level(node);
8672 path->nodes[level] = node;
8673 path->slots[level] = 0;
8674 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8675
8676 wc->refs[parent_level] = 1;
8677 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8678 wc->level = level;
8679 wc->shared_level = -1;
8680 wc->stage = DROP_REFERENCE;
8681 wc->update_ref = 0;
8682 wc->keep_locks = 1;
8683 wc->for_reloc = 1;
8684 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
8685
8686 while (1) {
8687 wret = walk_down_tree(trans, root, path, wc);
8688 if (wret < 0) {
8689 ret = wret;
8690 break;
8691 }
8692
8693 wret = walk_up_tree(trans, root, path, wc, parent_level);
8694 if (wret < 0)
8695 ret = wret;
8696 if (wret != 0)
8697 break;
8698 }
8699
8700 kfree(wc);
8701 btrfs_free_path(path);
8702 return ret;
8703}
8704
8705static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8706{
8707 u64 num_devices;
8708 u64 stripped;
8709
8710
8711
8712
8713
8714 stripped = get_restripe_target(root->fs_info, flags);
8715 if (stripped)
8716 return extended_to_chunk(stripped);
8717
8718 num_devices = root->fs_info->fs_devices->rw_devices;
8719
8720 stripped = BTRFS_BLOCK_GROUP_RAID0 |
8721 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
8722 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
8723
8724 if (num_devices == 1) {
8725 stripped |= BTRFS_BLOCK_GROUP_DUP;
8726 stripped = flags & ~stripped;
8727
8728
8729 if (flags & BTRFS_BLOCK_GROUP_RAID0)
8730 return stripped;
8731
8732
8733 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
8734 BTRFS_BLOCK_GROUP_RAID10))
8735 return stripped | BTRFS_BLOCK_GROUP_DUP;
8736 } else {
8737
8738 if (flags & stripped)
8739 return flags;
8740
8741 stripped |= BTRFS_BLOCK_GROUP_DUP;
8742 stripped = flags & ~stripped;
8743
8744
8745 if (flags & BTRFS_BLOCK_GROUP_DUP)
8746 return stripped | BTRFS_BLOCK_GROUP_RAID1;
8747
8748
8749 }
8750
8751 return flags;
8752}
8753
8754static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
8755{
8756 struct btrfs_space_info *sinfo = cache->space_info;
8757 u64 num_bytes;
8758 u64 min_allocable_bytes;
8759 int ret = -ENOSPC;
8760
8761
8762
8763
8764
8765
8766
8767 if ((sinfo->flags &
8768 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
8769 !force)
8770 min_allocable_bytes = 1 * 1024 * 1024;
8771 else
8772 min_allocable_bytes = 0;
8773
8774 spin_lock(&sinfo->lock);
8775 spin_lock(&cache->lock);
8776
8777 if (cache->ro) {
8778 ret = 0;
8779 goto out;
8780 }
8781
8782 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
8783 cache->bytes_super - btrfs_block_group_used(&cache->item);
8784
8785 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
8786 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
8787 min_allocable_bytes <= sinfo->total_bytes) {
8788 sinfo->bytes_readonly += num_bytes;
8789 cache->ro = 1;
8790 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
8791 ret = 0;
8792 }
8793out:
8794 spin_unlock(&cache->lock);
8795 spin_unlock(&sinfo->lock);
8796 return ret;
8797}
8798
8799int btrfs_set_block_group_ro(struct btrfs_root *root,
8800 struct btrfs_block_group_cache *cache)
8801
8802{
8803 struct btrfs_trans_handle *trans;
8804 u64 alloc_flags;
8805 int ret;
8806
8807 BUG_ON(cache->ro);
8808
8809again:
8810 trans = btrfs_join_transaction(root);
8811 if (IS_ERR(trans))
8812 return PTR_ERR(trans);
8813
8814
8815
8816
8817
8818
8819 mutex_lock(&root->fs_info->ro_block_group_mutex);
8820 if (trans->transaction->dirty_bg_run) {
8821 u64 transid = trans->transid;
8822
8823 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8824 btrfs_end_transaction(trans, root);
8825
8826 ret = btrfs_wait_for_commit(root, transid);
8827 if (ret)
8828 return ret;
8829 goto again;
8830 }
8831
8832
8833
8834
8835
8836 alloc_flags = update_block_group_flags(root, cache->flags);
8837 if (alloc_flags != cache->flags) {
8838 ret = do_chunk_alloc(trans, root, alloc_flags,
8839 CHUNK_ALLOC_FORCE);
8840
8841
8842
8843
8844
8845 if (ret == -ENOSPC)
8846 ret = 0;
8847 if (ret < 0)
8848 goto out;
8849 }
8850
8851 ret = set_block_group_ro(cache, 0);
8852 if (!ret)
8853 goto out;
8854 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8855 ret = do_chunk_alloc(trans, root, alloc_flags,
8856 CHUNK_ALLOC_FORCE);
8857 if (ret < 0)
8858 goto out;
8859 ret = set_block_group_ro(cache, 0);
8860out:
8861 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
8862 alloc_flags = update_block_group_flags(root, cache->flags);
8863 lock_chunks(root->fs_info->chunk_root);
8864 check_system_chunk(trans, root, alloc_flags);
8865 unlock_chunks(root->fs_info->chunk_root);
8866 }
8867 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8868
8869 btrfs_end_transaction(trans, root);
8870 return ret;
8871}
8872
8873int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8874 struct btrfs_root *root, u64 type)
8875{
8876 u64 alloc_flags = get_alloc_profile(root, type);
8877 return do_chunk_alloc(trans, root, alloc_flags,
8878 CHUNK_ALLOC_FORCE);
8879}
8880
8881
8882
8883
8884
8885u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
8886{
8887 struct btrfs_block_group_cache *block_group;
8888 u64 free_bytes = 0;
8889 int factor;
8890
8891
8892 if (list_empty(&sinfo->ro_bgs))
8893 return 0;
8894
8895 spin_lock(&sinfo->lock);
8896 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
8897 spin_lock(&block_group->lock);
8898
8899 if (!block_group->ro) {
8900 spin_unlock(&block_group->lock);
8901 continue;
8902 }
8903
8904 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
8905 BTRFS_BLOCK_GROUP_RAID10 |
8906 BTRFS_BLOCK_GROUP_DUP))
8907 factor = 2;
8908 else
8909 factor = 1;
8910
8911 free_bytes += (block_group->key.offset -
8912 btrfs_block_group_used(&block_group->item)) *
8913 factor;
8914
8915 spin_unlock(&block_group->lock);
8916 }
8917 spin_unlock(&sinfo->lock);
8918
8919 return free_bytes;
8920}
8921
8922void btrfs_set_block_group_rw(struct btrfs_root *root,
8923 struct btrfs_block_group_cache *cache)
8924{
8925 struct btrfs_space_info *sinfo = cache->space_info;
8926 u64 num_bytes;
8927
8928 BUG_ON(!cache->ro);
8929
8930 spin_lock(&sinfo->lock);
8931 spin_lock(&cache->lock);
8932 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
8933 cache->bytes_super - btrfs_block_group_used(&cache->item);
8934 sinfo->bytes_readonly -= num_bytes;
8935 cache->ro = 0;
8936 list_del_init(&cache->ro_list);
8937 spin_unlock(&cache->lock);
8938 spin_unlock(&sinfo->lock);
8939}
8940
8941
8942
8943
8944
8945
8946
8947int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
8948{
8949 struct btrfs_block_group_cache *block_group;
8950 struct btrfs_space_info *space_info;
8951 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
8952 struct btrfs_device *device;
8953 struct btrfs_trans_handle *trans;
8954 u64 min_free;
8955 u64 dev_min = 1;
8956 u64 dev_nr = 0;
8957 u64 target;
8958 int index;
8959 int full = 0;
8960 int ret = 0;
8961
8962 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
8963
8964
8965 if (!block_group)
8966 return -1;
8967
8968 min_free = btrfs_block_group_used(&block_group->item);
8969
8970
8971 if (!min_free)
8972 goto out;
8973
8974 space_info = block_group->space_info;
8975 spin_lock(&space_info->lock);
8976
8977 full = space_info->full;
8978
8979
8980
8981
8982
8983
8984
8985
8986 if ((space_info->total_bytes != block_group->key.offset) &&
8987 (space_info->bytes_used + space_info->bytes_reserved +
8988 space_info->bytes_pinned + space_info->bytes_readonly +
8989 min_free < space_info->total_bytes)) {
8990 spin_unlock(&space_info->lock);
8991 goto out;
8992 }
8993 spin_unlock(&space_info->lock);
8994
8995
8996
8997
8998
8999
9000
9001
9002 ret = -1;
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012 target = get_restripe_target(root->fs_info, block_group->flags);
9013 if (target) {
9014 index = __get_raid_index(extended_to_chunk(target));
9015 } else {
9016
9017
9018
9019
9020 if (full)
9021 goto out;
9022
9023 index = get_block_group_index(block_group);
9024 }
9025
9026 if (index == BTRFS_RAID_RAID10) {
9027 dev_min = 4;
9028
9029 min_free >>= 1;
9030 } else if (index == BTRFS_RAID_RAID1) {
9031 dev_min = 2;
9032 } else if (index == BTRFS_RAID_DUP) {
9033
9034 min_free <<= 1;
9035 } else if (index == BTRFS_RAID_RAID0) {
9036 dev_min = fs_devices->rw_devices;
9037 min_free = div64_u64(min_free, dev_min);
9038 }
9039
9040
9041 trans = btrfs_join_transaction(root);
9042 if (IS_ERR(trans)) {
9043 ret = PTR_ERR(trans);
9044 goto out;
9045 }
9046
9047 mutex_lock(&root->fs_info->chunk_mutex);
9048 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
9049 u64 dev_offset;
9050
9051
9052
9053
9054
9055 if (device->total_bytes > device->bytes_used + min_free &&
9056 !device->is_tgtdev_for_dev_replace) {
9057 ret = find_free_dev_extent(trans, device, min_free,
9058 &dev_offset, NULL);
9059 if (!ret)
9060 dev_nr++;
9061
9062 if (dev_nr >= dev_min)
9063 break;
9064
9065 ret = -1;
9066 }
9067 }
9068 mutex_unlock(&root->fs_info->chunk_mutex);
9069 btrfs_end_transaction(trans, root);
9070out:
9071 btrfs_put_block_group(block_group);
9072 return ret;
9073}
9074
9075static int find_first_block_group(struct btrfs_root *root,
9076 struct btrfs_path *path, struct btrfs_key *key)
9077{
9078 int ret = 0;
9079 struct btrfs_key found_key;
9080 struct extent_buffer *leaf;
9081 int slot;
9082
9083 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9084 if (ret < 0)
9085 goto out;
9086
9087 while (1) {
9088 slot = path->slots[0];
9089 leaf = path->nodes[0];
9090 if (slot >= btrfs_header_nritems(leaf)) {
9091 ret = btrfs_next_leaf(root, path);
9092 if (ret == 0)
9093 continue;
9094 if (ret < 0)
9095 goto out;
9096 break;
9097 }
9098 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9099
9100 if (found_key.objectid >= key->objectid &&
9101 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9102 ret = 0;
9103 goto out;
9104 }
9105 path->slots[0]++;
9106 }
9107out:
9108 return ret;
9109}
9110
9111void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9112{
9113 struct btrfs_block_group_cache *block_group;
9114 u64 last = 0;
9115
9116 while (1) {
9117 struct inode *inode;
9118
9119 block_group = btrfs_lookup_first_block_group(info, last);
9120 while (block_group) {
9121 spin_lock(&block_group->lock);
9122 if (block_group->iref)
9123 break;
9124 spin_unlock(&block_group->lock);
9125 block_group = next_block_group(info->tree_root,
9126 block_group);
9127 }
9128 if (!block_group) {
9129 if (last == 0)
9130 break;
9131 last = 0;
9132 continue;
9133 }
9134
9135 inode = block_group->inode;
9136 block_group->iref = 0;
9137 block_group->inode = NULL;
9138 spin_unlock(&block_group->lock);
9139 iput(inode);
9140 last = block_group->key.objectid + block_group->key.offset;
9141 btrfs_put_block_group(block_group);
9142 }
9143}
9144
9145int btrfs_free_block_groups(struct btrfs_fs_info *info)
9146{
9147 struct btrfs_block_group_cache *block_group;
9148 struct btrfs_space_info *space_info;
9149 struct btrfs_caching_control *caching_ctl;
9150 struct rb_node *n;
9151
9152 down_write(&info->commit_root_sem);
9153 while (!list_empty(&info->caching_block_groups)) {
9154 caching_ctl = list_entry(info->caching_block_groups.next,
9155 struct btrfs_caching_control, list);
9156 list_del(&caching_ctl->list);
9157 put_caching_control(caching_ctl);
9158 }
9159 up_write(&info->commit_root_sem);
9160
9161 spin_lock(&info->unused_bgs_lock);
9162 while (!list_empty(&info->unused_bgs)) {
9163 block_group = list_first_entry(&info->unused_bgs,
9164 struct btrfs_block_group_cache,
9165 bg_list);
9166 list_del_init(&block_group->bg_list);
9167 btrfs_put_block_group(block_group);
9168 }
9169 spin_unlock(&info->unused_bgs_lock);
9170
9171 spin_lock(&info->block_group_cache_lock);
9172 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
9173 block_group = rb_entry(n, struct btrfs_block_group_cache,
9174 cache_node);
9175 rb_erase(&block_group->cache_node,
9176 &info->block_group_cache_tree);
9177 RB_CLEAR_NODE(&block_group->cache_node);
9178 spin_unlock(&info->block_group_cache_lock);
9179
9180 down_write(&block_group->space_info->groups_sem);
9181 list_del(&block_group->list);
9182 up_write(&block_group->space_info->groups_sem);
9183
9184 if (block_group->cached == BTRFS_CACHE_STARTED)
9185 wait_block_group_cache_done(block_group);
9186
9187
9188
9189
9190
9191 if (block_group->cached == BTRFS_CACHE_NO ||
9192 block_group->cached == BTRFS_CACHE_ERROR)
9193 free_excluded_extents(info->extent_root, block_group);
9194
9195 btrfs_remove_free_space_cache(block_group);
9196 btrfs_put_block_group(block_group);
9197
9198 spin_lock(&info->block_group_cache_lock);
9199 }
9200 spin_unlock(&info->block_group_cache_lock);
9201
9202
9203
9204
9205
9206
9207
9208 synchronize_rcu();
9209
9210 release_global_block_rsv(info);
9211
9212 while (!list_empty(&info->space_info)) {
9213 int i;
9214
9215 space_info = list_entry(info->space_info.next,
9216 struct btrfs_space_info,
9217 list);
9218 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
9219 if (WARN_ON(space_info->bytes_pinned > 0 ||
9220 space_info->bytes_reserved > 0 ||
9221 space_info->bytes_may_use > 0)) {
9222 dump_space_info(space_info, 0, 0);
9223 }
9224 }
9225 list_del(&space_info->list);
9226 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
9227 struct kobject *kobj;
9228 kobj = space_info->block_group_kobjs[i];
9229 space_info->block_group_kobjs[i] = NULL;
9230 if (kobj) {
9231 kobject_del(kobj);
9232 kobject_put(kobj);
9233 }
9234 }
9235 kobject_del(&space_info->kobj);
9236 kobject_put(&space_info->kobj);
9237 }
9238 return 0;
9239}
9240
9241static void __link_block_group(struct btrfs_space_info *space_info,
9242 struct btrfs_block_group_cache *cache)
9243{
9244 int index = get_block_group_index(cache);
9245 bool first = false;
9246
9247 down_write(&space_info->groups_sem);
9248 if (list_empty(&space_info->block_groups[index]))
9249 first = true;
9250 list_add_tail(&cache->list, &space_info->block_groups[index]);
9251 up_write(&space_info->groups_sem);
9252
9253 if (first) {
9254 struct raid_kobject *rkobj;
9255 int ret;
9256
9257 rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
9258 if (!rkobj)
9259 goto out_err;
9260 rkobj->raid_type = index;
9261 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
9262 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
9263 "%s", get_raid_name(index));
9264 if (ret) {
9265 kobject_put(&rkobj->kobj);
9266 goto out_err;
9267 }
9268 space_info->block_group_kobjs[index] = &rkobj->kobj;
9269 }
9270
9271 return;
9272out_err:
9273 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
9274}
9275
9276static struct btrfs_block_group_cache *
9277btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
9278{
9279 struct btrfs_block_group_cache *cache;
9280
9281 cache = kzalloc(sizeof(*cache), GFP_NOFS);
9282 if (!cache)
9283 return NULL;
9284
9285 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
9286 GFP_NOFS);
9287 if (!cache->free_space_ctl) {
9288 kfree(cache);
9289 return NULL;
9290 }
9291
9292 cache->key.objectid = start;
9293 cache->key.offset = size;
9294 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9295
9296 cache->sectorsize = root->sectorsize;
9297 cache->fs_info = root->fs_info;
9298 cache->full_stripe_len = btrfs_full_stripe_len(root,
9299 &root->fs_info->mapping_tree,
9300 start);
9301 atomic_set(&cache->count, 1);
9302 spin_lock_init(&cache->lock);
9303 init_rwsem(&cache->data_rwsem);
9304 INIT_LIST_HEAD(&cache->list);
9305 INIT_LIST_HEAD(&cache->cluster_list);
9306 INIT_LIST_HEAD(&cache->bg_list);
9307 INIT_LIST_HEAD(&cache->ro_list);
9308 INIT_LIST_HEAD(&cache->dirty_list);
9309 INIT_LIST_HEAD(&cache->io_list);
9310 btrfs_init_free_space_ctl(cache);
9311 atomic_set(&cache->trimming, 0);
9312
9313 return cache;
9314}
9315
9316int btrfs_read_block_groups(struct btrfs_root *root)
9317{
9318 struct btrfs_path *path;
9319 int ret;
9320 struct btrfs_block_group_cache *cache;
9321 struct btrfs_fs_info *info = root->fs_info;
9322 struct btrfs_space_info *space_info;
9323 struct btrfs_key key;
9324 struct btrfs_key found_key;
9325 struct extent_buffer *leaf;
9326 int need_clear = 0;
9327 u64 cache_gen;
9328
9329 root = info->extent_root;
9330 key.objectid = 0;
9331 key.offset = 0;
9332 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9333 path = btrfs_alloc_path();
9334 if (!path)
9335 return -ENOMEM;
9336 path->reada = 1;
9337
9338 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
9339 if (btrfs_test_opt(root, SPACE_CACHE) &&
9340 btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
9341 need_clear = 1;
9342 if (btrfs_test_opt(root, CLEAR_CACHE))
9343 need_clear = 1;
9344
9345 while (1) {
9346 ret = find_first_block_group(root, path, &key);
9347 if (ret > 0)
9348 break;
9349 if (ret != 0)
9350 goto error;
9351
9352 leaf = path->nodes[0];
9353 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9354
9355 cache = btrfs_create_block_group_cache(root, found_key.objectid,
9356 found_key.offset);
9357 if (!cache) {
9358 ret = -ENOMEM;
9359 goto error;
9360 }
9361
9362 if (need_clear) {
9363
9364
9365
9366
9367
9368
9369
9370
9371
9372
9373 if (btrfs_test_opt(root, SPACE_CACHE))
9374 cache->disk_cache_state = BTRFS_DC_CLEAR;
9375 }
9376
9377 read_extent_buffer(leaf, &cache->item,
9378 btrfs_item_ptr_offset(leaf, path->slots[0]),
9379 sizeof(cache->item));
9380 cache->flags = btrfs_block_group_flags(&cache->item);
9381
9382 key.objectid = found_key.objectid + found_key.offset;
9383 btrfs_release_path(path);
9384
9385
9386
9387
9388
9389
9390 ret = exclude_super_stripes(root, cache);
9391 if (ret) {
9392
9393
9394
9395
9396 free_excluded_extents(root, cache);
9397 btrfs_put_block_group(cache);
9398 goto error;
9399 }
9400
9401
9402
9403
9404
9405
9406
9407
9408 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
9409 cache->last_byte_to_unpin = (u64)-1;
9410 cache->cached = BTRFS_CACHE_FINISHED;
9411 free_excluded_extents(root, cache);
9412 } else if (btrfs_block_group_used(&cache->item) == 0) {
9413 cache->last_byte_to_unpin = (u64)-1;
9414 cache->cached = BTRFS_CACHE_FINISHED;
9415 add_new_free_space(cache, root->fs_info,
9416 found_key.objectid,
9417 found_key.objectid +
9418 found_key.offset);
9419 free_excluded_extents(root, cache);
9420 }
9421
9422 ret = btrfs_add_block_group_cache(root->fs_info, cache);
9423 if (ret) {
9424 btrfs_remove_free_space_cache(cache);
9425 btrfs_put_block_group(cache);
9426 goto error;
9427 }
9428
9429 ret = update_space_info(info, cache->flags, found_key.offset,
9430 btrfs_block_group_used(&cache->item),
9431 &space_info);
9432 if (ret) {
9433 btrfs_remove_free_space_cache(cache);
9434 spin_lock(&info->block_group_cache_lock);
9435 rb_erase(&cache->cache_node,
9436 &info->block_group_cache_tree);
9437 RB_CLEAR_NODE(&cache->cache_node);
9438 spin_unlock(&info->block_group_cache_lock);
9439 btrfs_put_block_group(cache);
9440 goto error;
9441 }
9442
9443 cache->space_info = space_info;
9444 spin_lock(&cache->space_info->lock);
9445 cache->space_info->bytes_readonly += cache->bytes_super;
9446 spin_unlock(&cache->space_info->lock);
9447
9448 __link_block_group(space_info, cache);
9449
9450 set_avail_alloc_bits(root->fs_info, cache->flags);
9451 if (btrfs_chunk_readonly(root, cache->key.objectid)) {
9452 set_block_group_ro(cache, 1);
9453 } else if (btrfs_block_group_used(&cache->item) == 0) {
9454 spin_lock(&info->unused_bgs_lock);
9455
9456 if (list_empty(&cache->bg_list)) {
9457 btrfs_get_block_group(cache);
9458 list_add_tail(&cache->bg_list,
9459 &info->unused_bgs);
9460 }
9461 spin_unlock(&info->unused_bgs_lock);
9462 }
9463 }
9464
9465 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
9466 if (!(get_alloc_profile(root, space_info->flags) &
9467 (BTRFS_BLOCK_GROUP_RAID10 |
9468 BTRFS_BLOCK_GROUP_RAID1 |
9469 BTRFS_BLOCK_GROUP_RAID5 |
9470 BTRFS_BLOCK_GROUP_RAID6 |
9471 BTRFS_BLOCK_GROUP_DUP)))
9472 continue;
9473
9474
9475
9476
9477 list_for_each_entry(cache,
9478 &space_info->block_groups[BTRFS_RAID_RAID0],
9479 list)
9480 set_block_group_ro(cache, 1);
9481 list_for_each_entry(cache,
9482 &space_info->block_groups[BTRFS_RAID_SINGLE],
9483 list)
9484 set_block_group_ro(cache, 1);
9485 }
9486
9487 init_global_block_rsv(info);
9488 ret = 0;
9489error:
9490 btrfs_free_path(path);
9491 return ret;
9492}
9493
9494void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
9495 struct btrfs_root *root)
9496{
9497 struct btrfs_block_group_cache *block_group, *tmp;
9498 struct btrfs_root *extent_root = root->fs_info->extent_root;
9499 struct btrfs_block_group_item item;
9500 struct btrfs_key key;
9501 int ret = 0;
9502
9503 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
9504 if (ret)
9505 goto next;
9506
9507 spin_lock(&block_group->lock);
9508 memcpy(&item, &block_group->item, sizeof(item));
9509 memcpy(&key, &block_group->key, sizeof(key));
9510 spin_unlock(&block_group->lock);
9511
9512 ret = btrfs_insert_item(trans, extent_root, &key, &item,
9513 sizeof(item));
9514 if (ret)
9515 btrfs_abort_transaction(trans, extent_root, ret);
9516 ret = btrfs_finish_chunk_alloc(trans, extent_root,
9517 key.objectid, key.offset);
9518 if (ret)
9519 btrfs_abort_transaction(trans, extent_root, ret);
9520next:
9521 list_del_init(&block_group->bg_list);
9522 }
9523}
9524
9525int btrfs_make_block_group(struct btrfs_trans_handle *trans,
9526 struct btrfs_root *root, u64 bytes_used,
9527 u64 type, u64 chunk_objectid, u64 chunk_offset,
9528 u64 size)
9529{
9530 int ret;
9531 struct btrfs_root *extent_root;
9532 struct btrfs_block_group_cache *cache;
9533
9534 extent_root = root->fs_info->extent_root;
9535
9536 btrfs_set_log_full_commit(root->fs_info, trans);
9537
9538 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
9539 if (!cache)
9540 return -ENOMEM;
9541
9542 btrfs_set_block_group_used(&cache->item, bytes_used);
9543 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
9544 btrfs_set_block_group_flags(&cache->item, type);
9545
9546 cache->flags = type;
9547 cache->last_byte_to_unpin = (u64)-1;
9548 cache->cached = BTRFS_CACHE_FINISHED;
9549 ret = exclude_super_stripes(root, cache);
9550 if (ret) {
9551
9552
9553
9554
9555 free_excluded_extents(root, cache);
9556 btrfs_put_block_group(cache);
9557 return ret;
9558 }
9559
9560 add_new_free_space(cache, root->fs_info, chunk_offset,
9561 chunk_offset + size);
9562
9563 free_excluded_extents(root, cache);
9564
9565 ret = btrfs_add_block_group_cache(root->fs_info, cache);
9566 if (ret) {
9567 btrfs_remove_free_space_cache(cache);
9568 btrfs_put_block_group(cache);
9569 return ret;
9570 }
9571
9572 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
9573 &cache->space_info);
9574 if (ret) {
9575 btrfs_remove_free_space_cache(cache);
9576 spin_lock(&root->fs_info->block_group_cache_lock);
9577 rb_erase(&cache->cache_node,
9578 &root->fs_info->block_group_cache_tree);
9579 RB_CLEAR_NODE(&cache->cache_node);
9580 spin_unlock(&root->fs_info->block_group_cache_lock);
9581 btrfs_put_block_group(cache);
9582 return ret;
9583 }
9584 update_global_block_rsv(root->fs_info);
9585
9586 spin_lock(&cache->space_info->lock);
9587 cache->space_info->bytes_readonly += cache->bytes_super;
9588 spin_unlock(&cache->space_info->lock);
9589
9590 __link_block_group(cache->space_info, cache);
9591
9592 list_add_tail(&cache->bg_list, &trans->new_bgs);
9593
9594 set_avail_alloc_bits(extent_root->fs_info, type);
9595
9596 return 0;
9597}
9598
9599static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
9600{
9601 u64 extra_flags = chunk_to_extended(flags) &
9602 BTRFS_EXTENDED_PROFILE_MASK;
9603
9604 write_seqlock(&fs_info->profiles_lock);
9605 if (flags & BTRFS_BLOCK_GROUP_DATA)
9606 fs_info->avail_data_alloc_bits &= ~extra_flags;
9607 if (flags & BTRFS_BLOCK_GROUP_METADATA)
9608 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
9609 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
9610 fs_info->avail_system_alloc_bits &= ~extra_flags;
9611 write_sequnlock(&fs_info->profiles_lock);
9612}
9613
9614int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9615 struct btrfs_root *root, u64 group_start,
9616 struct extent_map *em)
9617{
9618 struct btrfs_path *path;
9619 struct btrfs_block_group_cache *block_group;
9620 struct btrfs_free_cluster *cluster;
9621 struct btrfs_root *tree_root = root->fs_info->tree_root;
9622 struct btrfs_key key;
9623 struct inode *inode;
9624 struct kobject *kobj = NULL;
9625 int ret;
9626 int index;
9627 int factor;
9628 struct btrfs_caching_control *caching_ctl = NULL;
9629 bool remove_em;
9630
9631 root = root->fs_info->extent_root;
9632
9633 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
9634 BUG_ON(!block_group);
9635 BUG_ON(!block_group->ro);
9636
9637
9638
9639
9640
9641 free_excluded_extents(root, block_group);
9642
9643 memcpy(&key, &block_group->key, sizeof(key));
9644 index = get_block_group_index(block_group);
9645 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
9646 BTRFS_BLOCK_GROUP_RAID1 |
9647 BTRFS_BLOCK_GROUP_RAID10))
9648 factor = 2;
9649 else
9650 factor = 1;
9651
9652
9653 cluster = &root->fs_info->data_alloc_cluster;
9654 spin_lock(&cluster->refill_lock);
9655 btrfs_return_cluster_to_free_space(block_group, cluster);
9656 spin_unlock(&cluster->refill_lock);
9657
9658
9659
9660
9661
9662 cluster = &root->fs_info->meta_alloc_cluster;
9663 spin_lock(&cluster->refill_lock);
9664 btrfs_return_cluster_to_free_space(block_group, cluster);
9665 spin_unlock(&cluster->refill_lock);
9666
9667 path = btrfs_alloc_path();
9668 if (!path) {
9669 ret = -ENOMEM;
9670 goto out;
9671 }
9672
9673
9674
9675
9676
9677 inode = lookup_free_space_inode(tree_root, block_group, path);
9678
9679 mutex_lock(&trans->transaction->cache_write_mutex);
9680
9681
9682
9683
9684 spin_lock(&trans->transaction->dirty_bgs_lock);
9685 if (!list_empty(&block_group->io_list)) {
9686 list_del_init(&block_group->io_list);
9687
9688 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
9689
9690 spin_unlock(&trans->transaction->dirty_bgs_lock);
9691 btrfs_wait_cache_io(root, trans, block_group,
9692 &block_group->io_ctl, path,
9693 block_group->key.objectid);
9694 btrfs_put_block_group(block_group);
9695 spin_lock(&trans->transaction->dirty_bgs_lock);
9696 }
9697
9698 if (!list_empty(&block_group->dirty_list)) {
9699 list_del_init(&block_group->dirty_list);
9700 btrfs_put_block_group(block_group);
9701 }
9702 spin_unlock(&trans->transaction->dirty_bgs_lock);
9703 mutex_unlock(&trans->transaction->cache_write_mutex);
9704
9705 if (!IS_ERR(inode)) {
9706 ret = btrfs_orphan_add(trans, inode);
9707 if (ret) {
9708 btrfs_add_delayed_iput(inode);
9709 goto out;
9710 }
9711 clear_nlink(inode);
9712
9713 spin_lock(&block_group->lock);
9714 if (block_group->iref) {
9715 block_group->iref = 0;
9716 block_group->inode = NULL;
9717 spin_unlock(&block_group->lock);
9718 iput(inode);
9719 } else {
9720 spin_unlock(&block_group->lock);
9721 }
9722
9723 btrfs_add_delayed_iput(inode);
9724 }
9725
9726 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
9727 key.offset = block_group->key.objectid;
9728 key.type = 0;
9729
9730 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
9731 if (ret < 0)
9732 goto out;
9733 if (ret > 0)
9734 btrfs_release_path(path);
9735 if (ret == 0) {
9736 ret = btrfs_del_item(trans, tree_root, path);
9737 if (ret)
9738 goto out;
9739 btrfs_release_path(path);
9740 }
9741
9742 spin_lock(&root->fs_info->block_group_cache_lock);
9743 rb_erase(&block_group->cache_node,
9744 &root->fs_info->block_group_cache_tree);
9745 RB_CLEAR_NODE(&block_group->cache_node);
9746
9747 if (root->fs_info->first_logical_byte == block_group->key.objectid)
9748 root->fs_info->first_logical_byte = (u64)-1;
9749 spin_unlock(&root->fs_info->block_group_cache_lock);
9750
9751 down_write(&block_group->space_info->groups_sem);
9752
9753
9754
9755
9756 list_del_init(&block_group->list);
9757 if (list_empty(&block_group->space_info->block_groups[index])) {
9758 kobj = block_group->space_info->block_group_kobjs[index];
9759 block_group->space_info->block_group_kobjs[index] = NULL;
9760 clear_avail_alloc_bits(root->fs_info, block_group->flags);
9761 }
9762 up_write(&block_group->space_info->groups_sem);
9763 if (kobj) {
9764 kobject_del(kobj);
9765 kobject_put(kobj);
9766 }
9767
9768 if (block_group->has_caching_ctl)
9769 caching_ctl = get_caching_control(block_group);
9770 if (block_group->cached == BTRFS_CACHE_STARTED)
9771 wait_block_group_cache_done(block_group);
9772 if (block_group->has_caching_ctl) {
9773 down_write(&root->fs_info->commit_root_sem);
9774 if (!caching_ctl) {
9775 struct btrfs_caching_control *ctl;
9776
9777 list_for_each_entry(ctl,
9778 &root->fs_info->caching_block_groups, list)
9779 if (ctl->block_group == block_group) {
9780 caching_ctl = ctl;
9781 atomic_inc(&caching_ctl->count);
9782 break;
9783 }
9784 }
9785 if (caching_ctl)
9786 list_del_init(&caching_ctl->list);
9787 up_write(&root->fs_info->commit_root_sem);
9788 if (caching_ctl) {
9789
9790 put_caching_control(caching_ctl);
9791 put_caching_control(caching_ctl);
9792 }
9793 }
9794
9795 spin_lock(&trans->transaction->dirty_bgs_lock);
9796 if (!list_empty(&block_group->dirty_list)) {
9797 WARN_ON(1);
9798 }
9799 if (!list_empty(&block_group->io_list)) {
9800 WARN_ON(1);
9801 }
9802 spin_unlock(&trans->transaction->dirty_bgs_lock);
9803 btrfs_remove_free_space_cache(block_group);
9804
9805 spin_lock(&block_group->space_info->lock);
9806 list_del_init(&block_group->ro_list);
9807
9808 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
9809 WARN_ON(block_group->space_info->total_bytes
9810 < block_group->key.offset);
9811 WARN_ON(block_group->space_info->bytes_readonly
9812 < block_group->key.offset);
9813 WARN_ON(block_group->space_info->disk_total
9814 < block_group->key.offset * factor);
9815 }
9816 block_group->space_info->total_bytes -= block_group->key.offset;
9817 block_group->space_info->bytes_readonly -= block_group->key.offset;
9818 block_group->space_info->disk_total -= block_group->key.offset * factor;
9819
9820 spin_unlock(&block_group->space_info->lock);
9821
9822 memcpy(&key, &block_group->key, sizeof(key));
9823
9824 lock_chunks(root);
9825 if (!list_empty(&em->list)) {
9826
9827 free_extent_map(em);
9828 }
9829 spin_lock(&block_group->lock);
9830 block_group->removed = 1;
9831
9832
9833
9834
9835
9836
9837
9838
9839
9840
9841
9842
9843
9844
9845
9846
9847
9848
9849 remove_em = (atomic_read(&block_group->trimming) == 0);
9850
9851
9852
9853
9854
9855 if (!remove_em) {
9856
9857
9858
9859
9860
9861
9862
9863
9864
9865
9866
9867 list_move_tail(&em->list, &root->fs_info->pinned_chunks);
9868 }
9869 spin_unlock(&block_group->lock);
9870
9871 if (remove_em) {
9872 struct extent_map_tree *em_tree;
9873
9874 em_tree = &root->fs_info->mapping_tree.map_tree;
9875 write_lock(&em_tree->lock);
9876
9877
9878
9879
9880
9881 remove_extent_mapping(em_tree, em);
9882 write_unlock(&em_tree->lock);
9883
9884 free_extent_map(em);
9885 }
9886
9887 unlock_chunks(root);
9888
9889 btrfs_put_block_group(block_group);
9890 btrfs_put_block_group(block_group);
9891
9892 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9893 if (ret > 0)
9894 ret = -EIO;
9895 if (ret < 0)
9896 goto out;
9897
9898 ret = btrfs_del_item(trans, root, path);
9899out:
9900 btrfs_free_path(path);
9901 return ret;
9902}
9903
9904
9905
9906
9907
9908void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9909{
9910 struct btrfs_block_group_cache *block_group;
9911 struct btrfs_space_info *space_info;
9912 struct btrfs_root *root = fs_info->extent_root;
9913 struct btrfs_trans_handle *trans;
9914 int ret = 0;
9915
9916 if (!fs_info->open)
9917 return;
9918
9919 spin_lock(&fs_info->unused_bgs_lock);
9920 while (!list_empty(&fs_info->unused_bgs)) {
9921 u64 start, end;
9922
9923 block_group = list_first_entry(&fs_info->unused_bgs,
9924 struct btrfs_block_group_cache,
9925 bg_list);
9926 space_info = block_group->space_info;
9927 list_del_init(&block_group->bg_list);
9928 if (ret || btrfs_mixed_space_info(space_info)) {
9929 btrfs_put_block_group(block_group);
9930 continue;
9931 }
9932 spin_unlock(&fs_info->unused_bgs_lock);
9933
9934
9935 down_write(&space_info->groups_sem);
9936 spin_lock(&block_group->lock);
9937 if (block_group->reserved ||
9938 btrfs_block_group_used(&block_group->item) ||
9939 block_group->ro) {
9940
9941
9942
9943
9944
9945
9946 spin_unlock(&block_group->lock);
9947 up_write(&space_info->groups_sem);
9948 goto next;
9949 }
9950 spin_unlock(&block_group->lock);
9951
9952
9953 ret = set_block_group_ro(block_group, 0);
9954 up_write(&space_info->groups_sem);
9955 if (ret < 0) {
9956 ret = 0;
9957 goto next;
9958 }
9959
9960
9961
9962
9963
9964
9965 trans = btrfs_start_transaction(root, 1);
9966 if (IS_ERR(trans)) {
9967 btrfs_set_block_group_rw(root, block_group);
9968 ret = PTR_ERR(trans);
9969 goto next;
9970 }
9971
9972
9973
9974
9975
9976 start = block_group->key.objectid;
9977 end = start + block_group->key.offset - 1;
9978
9979
9980
9981
9982
9983
9984
9985
9986
9987
9988
9989 mutex_lock(&fs_info->unused_bg_unpin_mutex);
9990 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
9991 EXTENT_DIRTY, GFP_NOFS);
9992 if (ret) {
9993 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9994 btrfs_set_block_group_rw(root, block_group);
9995 goto end_trans;
9996 }
9997 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
9998 EXTENT_DIRTY, GFP_NOFS);
9999 if (ret) {
10000 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10001 btrfs_set_block_group_rw(root, block_group);
10002 goto end_trans;
10003 }
10004 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10005
10006
10007 spin_lock(&space_info->lock);
10008 spin_lock(&block_group->lock);
10009
10010 space_info->bytes_pinned -= block_group->pinned;
10011 space_info->bytes_readonly += block_group->pinned;
10012 percpu_counter_add(&space_info->total_bytes_pinned,
10013 -block_group->pinned);
10014 block_group->pinned = 0;
10015
10016 spin_unlock(&block_group->lock);
10017 spin_unlock(&space_info->lock);
10018
10019
10020
10021
10022
10023 ret = btrfs_remove_chunk(trans, root,
10024 block_group->key.objectid);
10025end_trans:
10026 btrfs_end_transaction(trans, root);
10027next:
10028 btrfs_put_block_group(block_group);
10029 spin_lock(&fs_info->unused_bgs_lock);
10030 }
10031 spin_unlock(&fs_info->unused_bgs_lock);
10032}
10033
10034int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
10035{
10036 struct btrfs_space_info *space_info;
10037 struct btrfs_super_block *disk_super;
10038 u64 features;
10039 u64 flags;
10040 int mixed = 0;
10041 int ret;
10042
10043 disk_super = fs_info->super_copy;
10044 if (!btrfs_super_root(disk_super))
10045 return 1;
10046
10047 features = btrfs_super_incompat_flags(disk_super);
10048 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
10049 mixed = 1;
10050
10051 flags = BTRFS_BLOCK_GROUP_SYSTEM;
10052 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10053 if (ret)
10054 goto out;
10055
10056 if (mixed) {
10057 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
10058 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10059 } else {
10060 flags = BTRFS_BLOCK_GROUP_METADATA;
10061 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10062 if (ret)
10063 goto out;
10064
10065 flags = BTRFS_BLOCK_GROUP_DATA;
10066 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
10067 }
10068out:
10069 return ret;
10070}
10071
10072int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
10073{
10074 return unpin_extent_range(root, start, end, false);
10075}
10076
10077int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
10078{
10079 struct btrfs_fs_info *fs_info = root->fs_info;
10080 struct btrfs_block_group_cache *cache = NULL;
10081 u64 group_trimmed;
10082 u64 start;
10083 u64 end;
10084 u64 trimmed = 0;
10085 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10086 int ret = 0;
10087
10088
10089
10090
10091 if (range->len == total_bytes)
10092 cache = btrfs_lookup_first_block_group(fs_info, range->start);
10093 else
10094 cache = btrfs_lookup_block_group(fs_info, range->start);
10095
10096 while (cache) {
10097 if (cache->key.objectid >= (range->start + range->len)) {
10098 btrfs_put_block_group(cache);
10099 break;
10100 }
10101
10102 start = max(range->start, cache->key.objectid);
10103 end = min(range->start + range->len,
10104 cache->key.objectid + cache->key.offset);
10105
10106 if (end - start >= range->minlen) {
10107 if (!block_group_cache_done(cache)) {
10108 ret = cache_block_group(cache, 0);
10109 if (ret) {
10110 btrfs_put_block_group(cache);
10111 break;
10112 }
10113 ret = wait_block_group_cache_done(cache);
10114 if (ret) {
10115 btrfs_put_block_group(cache);
10116 break;
10117 }
10118 }
10119 ret = btrfs_trim_block_group(cache,
10120 &group_trimmed,
10121 start,
10122 end,
10123 range->minlen);
10124
10125 trimmed += group_trimmed;
10126 if (ret) {
10127 btrfs_put_block_group(cache);
10128 break;
10129 }
10130 }
10131
10132 cache = next_block_group(fs_info->tree_root, cache);
10133 }
10134
10135 range->len = trimmed;
10136 return ret;
10137}
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
10148{
10149 percpu_counter_dec(&root->subv_writers->counter);
10150
10151
10152
10153
10154 smp_mb();
10155 if (waitqueue_active(&root->subv_writers->wait))
10156 wake_up(&root->subv_writers->wait);
10157}
10158
10159int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
10160{
10161 if (atomic_read(&root->will_be_snapshoted))
10162 return 0;
10163
10164 percpu_counter_inc(&root->subv_writers->counter);
10165
10166
10167
10168 smp_mb();
10169 if (atomic_read(&root->will_be_snapshoted)) {
10170 btrfs_end_write_no_snapshoting(root);
10171 return 0;
10172 }
10173 return 1;
10174}
10175