1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/sched.h>
19#include <linux/pagemap.h>
20#include <linux/writeback.h>
21#include <linux/blkdev.h>
22#include <linux/sort.h>
23#include <linux/rcupdate.h>
24#include <linux/kthread.h>
25#include <linux/slab.h>
26#include <linux/ratelimit.h>
27#include <linux/percpu_counter.h>
28#include "hash.h"
29#include "tree-log.h"
30#include "disk-io.h"
31#include "print-tree.h"
32#include "volumes.h"
33#include "raid56.h"
34#include "locking.h"
35#include "free-space-cache.h"
36#include "free-space-tree.h"
37#include "math.h"
38#include "sysfs.h"
39#include "qgroup.h"
40
41#undef SCRAMBLE_DELAYED_REFS
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57enum {
58 CHUNK_ALLOC_NO_FORCE = 0,
59 CHUNK_ALLOC_LIMITED = 1,
60 CHUNK_ALLOC_FORCE = 2,
61};
62
63static int update_block_group(struct btrfs_trans_handle *trans,
64 struct btrfs_root *root, u64 bytenr,
65 u64 num_bytes, int alloc);
66static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
67 struct btrfs_root *root,
68 struct btrfs_delayed_ref_node *node, u64 parent,
69 u64 root_objectid, u64 owner_objectid,
70 u64 owner_offset, int refs_to_drop,
71 struct btrfs_delayed_extent_op *extra_op);
72static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
73 struct extent_buffer *leaf,
74 struct btrfs_extent_item *ei);
75static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
76 struct btrfs_root *root,
77 u64 parent, u64 root_objectid,
78 u64 flags, u64 owner, u64 offset,
79 struct btrfs_key *ins, int ref_mod);
80static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
81 struct btrfs_root *root,
82 u64 parent, u64 root_objectid,
83 u64 flags, struct btrfs_disk_key *key,
84 int level, struct btrfs_key *ins);
85static int do_chunk_alloc(struct btrfs_trans_handle *trans,
86 struct btrfs_root *extent_root, u64 flags,
87 int force);
88static int find_next_key(struct btrfs_path *path, int level,
89 struct btrfs_key *key);
90static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
91 int dump_block_groups);
92static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
93 u64 ram_bytes, u64 num_bytes, int delalloc);
94static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
95 u64 num_bytes, int delalloc);
96static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
97 u64 num_bytes);
98int btrfs_pin_extent(struct btrfs_root *root,
99 u64 bytenr, u64 num_bytes, int reserved);
100static int __reserve_metadata_bytes(struct btrfs_root *root,
101 struct btrfs_space_info *space_info,
102 u64 orig_bytes,
103 enum btrfs_reserve_flush_enum flush);
104static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
105 struct btrfs_space_info *space_info,
106 u64 num_bytes);
107static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
108 struct btrfs_space_info *space_info,
109 u64 num_bytes);
110
111static noinline int
112block_group_cache_done(struct btrfs_block_group_cache *cache)
113{
114 smp_mb();
115 return cache->cached == BTRFS_CACHE_FINISHED ||
116 cache->cached == BTRFS_CACHE_ERROR;
117}
118
119static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
120{
121 return (cache->flags & bits) == bits;
122}
123
124void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
125{
126 atomic_inc(&cache->count);
127}
128
129void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
130{
131 if (atomic_dec_and_test(&cache->count)) {
132 WARN_ON(cache->pinned > 0);
133 WARN_ON(cache->reserved > 0);
134 kfree(cache->free_space_ctl);
135 kfree(cache);
136 }
137}
138
139
140
141
142
143static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
144 struct btrfs_block_group_cache *block_group)
145{
146 struct rb_node **p;
147 struct rb_node *parent = NULL;
148 struct btrfs_block_group_cache *cache;
149
150 spin_lock(&info->block_group_cache_lock);
151 p = &info->block_group_cache_tree.rb_node;
152
153 while (*p) {
154 parent = *p;
155 cache = rb_entry(parent, struct btrfs_block_group_cache,
156 cache_node);
157 if (block_group->key.objectid < cache->key.objectid) {
158 p = &(*p)->rb_left;
159 } else if (block_group->key.objectid > cache->key.objectid) {
160 p = &(*p)->rb_right;
161 } else {
162 spin_unlock(&info->block_group_cache_lock);
163 return -EEXIST;
164 }
165 }
166
167 rb_link_node(&block_group->cache_node, parent, p);
168 rb_insert_color(&block_group->cache_node,
169 &info->block_group_cache_tree);
170
171 if (info->first_logical_byte > block_group->key.objectid)
172 info->first_logical_byte = block_group->key.objectid;
173
174 spin_unlock(&info->block_group_cache_lock);
175
176 return 0;
177}
178
179
180
181
182
183static struct btrfs_block_group_cache *
184block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
185 int contains)
186{
187 struct btrfs_block_group_cache *cache, *ret = NULL;
188 struct rb_node *n;
189 u64 end, start;
190
191 spin_lock(&info->block_group_cache_lock);
192 n = info->block_group_cache_tree.rb_node;
193
194 while (n) {
195 cache = rb_entry(n, struct btrfs_block_group_cache,
196 cache_node);
197 end = cache->key.objectid + cache->key.offset - 1;
198 start = cache->key.objectid;
199
200 if (bytenr < start) {
201 if (!contains && (!ret || start < ret->key.objectid))
202 ret = cache;
203 n = n->rb_left;
204 } else if (bytenr > start) {
205 if (contains && bytenr <= end) {
206 ret = cache;
207 break;
208 }
209 n = n->rb_right;
210 } else {
211 ret = cache;
212 break;
213 }
214 }
215 if (ret) {
216 btrfs_get_block_group(ret);
217 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
218 info->first_logical_byte = ret->key.objectid;
219 }
220 spin_unlock(&info->block_group_cache_lock);
221
222 return ret;
223}
224
225static int add_excluded_extent(struct btrfs_root *root,
226 u64 start, u64 num_bytes)
227{
228 u64 end = start + num_bytes - 1;
229 set_extent_bits(&root->fs_info->freed_extents[0],
230 start, end, EXTENT_UPTODATE);
231 set_extent_bits(&root->fs_info->freed_extents[1],
232 start, end, EXTENT_UPTODATE);
233 return 0;
234}
235
236static void free_excluded_extents(struct btrfs_root *root,
237 struct btrfs_block_group_cache *cache)
238{
239 u64 start, end;
240
241 start = cache->key.objectid;
242 end = start + cache->key.offset - 1;
243
244 clear_extent_bits(&root->fs_info->freed_extents[0],
245 start, end, EXTENT_UPTODATE);
246 clear_extent_bits(&root->fs_info->freed_extents[1],
247 start, end, EXTENT_UPTODATE);
248}
249
250static int exclude_super_stripes(struct btrfs_root *root,
251 struct btrfs_block_group_cache *cache)
252{
253 u64 bytenr;
254 u64 *logical;
255 int stripe_len;
256 int i, nr, ret;
257
258 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
259 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
260 cache->bytes_super += stripe_len;
261 ret = add_excluded_extent(root, cache->key.objectid,
262 stripe_len);
263 if (ret)
264 return ret;
265 }
266
267 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
268 bytenr = btrfs_sb_offset(i);
269 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
270 cache->key.objectid, bytenr,
271 0, &logical, &nr, &stripe_len);
272 if (ret)
273 return ret;
274
275 while (nr--) {
276 u64 start, len;
277
278 if (logical[nr] > cache->key.objectid +
279 cache->key.offset)
280 continue;
281
282 if (logical[nr] + stripe_len <= cache->key.objectid)
283 continue;
284
285 start = logical[nr];
286 if (start < cache->key.objectid) {
287 start = cache->key.objectid;
288 len = (logical[nr] + stripe_len) - start;
289 } else {
290 len = min_t(u64, stripe_len,
291 cache->key.objectid +
292 cache->key.offset - start);
293 }
294
295 cache->bytes_super += len;
296 ret = add_excluded_extent(root, start, len);
297 if (ret) {
298 kfree(logical);
299 return ret;
300 }
301 }
302
303 kfree(logical);
304 }
305 return 0;
306}
307
308static struct btrfs_caching_control *
309get_caching_control(struct btrfs_block_group_cache *cache)
310{
311 struct btrfs_caching_control *ctl;
312
313 spin_lock(&cache->lock);
314 if (!cache->caching_ctl) {
315 spin_unlock(&cache->lock);
316 return NULL;
317 }
318
319 ctl = cache->caching_ctl;
320 atomic_inc(&ctl->count);
321 spin_unlock(&cache->lock);
322 return ctl;
323}
324
325static void put_caching_control(struct btrfs_caching_control *ctl)
326{
327 if (atomic_dec_and_test(&ctl->count))
328 kfree(ctl);
329}
330
331#ifdef CONFIG_BTRFS_DEBUG
332static void fragment_free_space(struct btrfs_root *root,
333 struct btrfs_block_group_cache *block_group)
334{
335 u64 start = block_group->key.objectid;
336 u64 len = block_group->key.offset;
337 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
338 root->nodesize : root->sectorsize;
339 u64 step = chunk << 1;
340
341 while (len > chunk) {
342 btrfs_remove_free_space(block_group, start, chunk);
343 start += step;
344 if (len < step)
345 len = 0;
346 else
347 len -= step;
348 }
349}
350#endif
351
352
353
354
355
356
357u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
358 struct btrfs_fs_info *info, u64 start, u64 end)
359{
360 u64 extent_start, extent_end, size, total_added = 0;
361 int ret;
362
363 while (start < end) {
364 ret = find_first_extent_bit(info->pinned_extents, start,
365 &extent_start, &extent_end,
366 EXTENT_DIRTY | EXTENT_UPTODATE,
367 NULL);
368 if (ret)
369 break;
370
371 if (extent_start <= start) {
372 start = extent_end + 1;
373 } else if (extent_start > start && extent_start < end) {
374 size = extent_start - start;
375 total_added += size;
376 ret = btrfs_add_free_space(block_group, start,
377 size);
378 BUG_ON(ret);
379 start = extent_end + 1;
380 } else {
381 break;
382 }
383 }
384
385 if (start < end) {
386 size = end - start;
387 total_added += size;
388 ret = btrfs_add_free_space(block_group, start, size);
389 BUG_ON(ret);
390 }
391
392 return total_added;
393}
394
395static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
396{
397 struct btrfs_block_group_cache *block_group;
398 struct btrfs_fs_info *fs_info;
399 struct btrfs_root *extent_root;
400 struct btrfs_path *path;
401 struct extent_buffer *leaf;
402 struct btrfs_key key;
403 u64 total_found = 0;
404 u64 last = 0;
405 u32 nritems;
406 int ret;
407 bool wakeup = true;
408
409 block_group = caching_ctl->block_group;
410 fs_info = block_group->fs_info;
411 extent_root = fs_info->extent_root;
412
413 path = btrfs_alloc_path();
414 if (!path)
415 return -ENOMEM;
416
417 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
418
419#ifdef CONFIG_BTRFS_DEBUG
420
421
422
423
424
425 if (btrfs_should_fragment_free_space(extent_root, block_group))
426 wakeup = false;
427#endif
428
429
430
431
432
433
434 path->skip_locking = 1;
435 path->search_commit_root = 1;
436 path->reada = READA_FORWARD;
437
438 key.objectid = last;
439 key.offset = 0;
440 key.type = BTRFS_EXTENT_ITEM_KEY;
441
442next:
443 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
444 if (ret < 0)
445 goto out;
446
447 leaf = path->nodes[0];
448 nritems = btrfs_header_nritems(leaf);
449
450 while (1) {
451 if (btrfs_fs_closing(fs_info) > 1) {
452 last = (u64)-1;
453 break;
454 }
455
456 if (path->slots[0] < nritems) {
457 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
458 } else {
459 ret = find_next_key(path, 0, &key);
460 if (ret)
461 break;
462
463 if (need_resched() ||
464 rwsem_is_contended(&fs_info->commit_root_sem)) {
465 if (wakeup)
466 caching_ctl->progress = last;
467 btrfs_release_path(path);
468 up_read(&fs_info->commit_root_sem);
469 mutex_unlock(&caching_ctl->mutex);
470 cond_resched();
471 mutex_lock(&caching_ctl->mutex);
472 down_read(&fs_info->commit_root_sem);
473 goto next;
474 }
475
476 ret = btrfs_next_leaf(extent_root, path);
477 if (ret < 0)
478 goto out;
479 if (ret)
480 break;
481 leaf = path->nodes[0];
482 nritems = btrfs_header_nritems(leaf);
483 continue;
484 }
485
486 if (key.objectid < last) {
487 key.objectid = last;
488 key.offset = 0;
489 key.type = BTRFS_EXTENT_ITEM_KEY;
490
491 if (wakeup)
492 caching_ctl->progress = last;
493 btrfs_release_path(path);
494 goto next;
495 }
496
497 if (key.objectid < block_group->key.objectid) {
498 path->slots[0]++;
499 continue;
500 }
501
502 if (key.objectid >= block_group->key.objectid +
503 block_group->key.offset)
504 break;
505
506 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
507 key.type == BTRFS_METADATA_ITEM_KEY) {
508 total_found += add_new_free_space(block_group,
509 fs_info, last,
510 key.objectid);
511 if (key.type == BTRFS_METADATA_ITEM_KEY)
512 last = key.objectid +
513 fs_info->tree_root->nodesize;
514 else
515 last = key.objectid + key.offset;
516
517 if (total_found > CACHING_CTL_WAKE_UP) {
518 total_found = 0;
519 if (wakeup)
520 wake_up(&caching_ctl->wait);
521 }
522 }
523 path->slots[0]++;
524 }
525 ret = 0;
526
527 total_found += add_new_free_space(block_group, fs_info, last,
528 block_group->key.objectid +
529 block_group->key.offset);
530 spin_lock(&block_group->lock);
531 block_group->caching_ctl = NULL;
532 block_group->cached = BTRFS_CACHE_FINISHED;
533 spin_unlock(&block_group->lock);
534
535#ifdef CONFIG_BTRFS_DEBUG
536 if (btrfs_should_fragment_free_space(extent_root, block_group)) {
537 u64 bytes_used;
538
539 spin_lock(&block_group->space_info->lock);
540 spin_lock(&block_group->lock);
541 bytes_used = block_group->key.offset -
542 btrfs_block_group_used(&block_group->item);
543 block_group->space_info->bytes_used += bytes_used >> 1;
544 spin_unlock(&block_group->lock);
545 spin_unlock(&block_group->space_info->lock);
546 fragment_free_space(extent_root, block_group);
547 }
548#endif
549
550 caching_ctl->progress = (u64)-1;
551out:
552 btrfs_free_path(path);
553 return ret;
554}
555
556static noinline void caching_thread(struct btrfs_work *work)
557{
558 struct btrfs_block_group_cache *block_group;
559 struct btrfs_fs_info *fs_info;
560 struct btrfs_caching_control *caching_ctl;
561 struct btrfs_root *extent_root;
562 int ret;
563
564 caching_ctl = container_of(work, struct btrfs_caching_control, work);
565 block_group = caching_ctl->block_group;
566 fs_info = block_group->fs_info;
567 extent_root = fs_info->extent_root;
568
569 mutex_lock(&caching_ctl->mutex);
570 down_read(&fs_info->commit_root_sem);
571
572 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
573 ret = load_free_space_tree(caching_ctl);
574 else
575 ret = load_extent_tree_free(caching_ctl);
576
577 spin_lock(&block_group->lock);
578 block_group->caching_ctl = NULL;
579 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
580 spin_unlock(&block_group->lock);
581
582 up_read(&fs_info->commit_root_sem);
583 free_excluded_extents(fs_info->extent_root, block_group);
584 mutex_unlock(&caching_ctl->mutex);
585
586 wake_up(&caching_ctl->wait);
587
588 put_caching_control(caching_ctl);
589 btrfs_put_block_group(block_group);
590}
591
592static int cache_block_group(struct btrfs_block_group_cache *cache,
593 int load_cache_only)
594{
595 DEFINE_WAIT(wait);
596 struct btrfs_fs_info *fs_info = cache->fs_info;
597 struct btrfs_caching_control *caching_ctl;
598 int ret = 0;
599
600 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
601 if (!caching_ctl)
602 return -ENOMEM;
603
604 INIT_LIST_HEAD(&caching_ctl->list);
605 mutex_init(&caching_ctl->mutex);
606 init_waitqueue_head(&caching_ctl->wait);
607 caching_ctl->block_group = cache;
608 caching_ctl->progress = cache->key.objectid;
609 atomic_set(&caching_ctl->count, 1);
610 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
611 caching_thread, NULL, NULL);
612
613 spin_lock(&cache->lock);
614
615
616
617
618
619
620
621
622
623
624
625
626 while (cache->cached == BTRFS_CACHE_FAST) {
627 struct btrfs_caching_control *ctl;
628
629 ctl = cache->caching_ctl;
630 atomic_inc(&ctl->count);
631 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
632 spin_unlock(&cache->lock);
633
634 schedule();
635
636 finish_wait(&ctl->wait, &wait);
637 put_caching_control(ctl);
638 spin_lock(&cache->lock);
639 }
640
641 if (cache->cached != BTRFS_CACHE_NO) {
642 spin_unlock(&cache->lock);
643 kfree(caching_ctl);
644 return 0;
645 }
646 WARN_ON(cache->caching_ctl);
647 cache->caching_ctl = caching_ctl;
648 cache->cached = BTRFS_CACHE_FAST;
649 spin_unlock(&cache->lock);
650
651 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
652 mutex_lock(&caching_ctl->mutex);
653 ret = load_free_space_cache(fs_info, cache);
654
655 spin_lock(&cache->lock);
656 if (ret == 1) {
657 cache->caching_ctl = NULL;
658 cache->cached = BTRFS_CACHE_FINISHED;
659 cache->last_byte_to_unpin = (u64)-1;
660 caching_ctl->progress = (u64)-1;
661 } else {
662 if (load_cache_only) {
663 cache->caching_ctl = NULL;
664 cache->cached = BTRFS_CACHE_NO;
665 } else {
666 cache->cached = BTRFS_CACHE_STARTED;
667 cache->has_caching_ctl = 1;
668 }
669 }
670 spin_unlock(&cache->lock);
671#ifdef CONFIG_BTRFS_DEBUG
672 if (ret == 1 &&
673 btrfs_should_fragment_free_space(fs_info->extent_root,
674 cache)) {
675 u64 bytes_used;
676
677 spin_lock(&cache->space_info->lock);
678 spin_lock(&cache->lock);
679 bytes_used = cache->key.offset -
680 btrfs_block_group_used(&cache->item);
681 cache->space_info->bytes_used += bytes_used >> 1;
682 spin_unlock(&cache->lock);
683 spin_unlock(&cache->space_info->lock);
684 fragment_free_space(fs_info->extent_root, cache);
685 }
686#endif
687 mutex_unlock(&caching_ctl->mutex);
688
689 wake_up(&caching_ctl->wait);
690 if (ret == 1) {
691 put_caching_control(caching_ctl);
692 free_excluded_extents(fs_info->extent_root, cache);
693 return 0;
694 }
695 } else {
696
697
698
699
700 spin_lock(&cache->lock);
701 if (load_cache_only) {
702 cache->caching_ctl = NULL;
703 cache->cached = BTRFS_CACHE_NO;
704 } else {
705 cache->cached = BTRFS_CACHE_STARTED;
706 cache->has_caching_ctl = 1;
707 }
708 spin_unlock(&cache->lock);
709 wake_up(&caching_ctl->wait);
710 }
711
712 if (load_cache_only) {
713 put_caching_control(caching_ctl);
714 return 0;
715 }
716
717 down_write(&fs_info->commit_root_sem);
718 atomic_inc(&caching_ctl->count);
719 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
720 up_write(&fs_info->commit_root_sem);
721
722 btrfs_get_block_group(cache);
723
724 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
725
726 return ret;
727}
728
729
730
731
732static struct btrfs_block_group_cache *
733btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
734{
735 return block_group_cache_tree_search(info, bytenr, 0);
736}
737
738
739
740
741struct btrfs_block_group_cache *btrfs_lookup_block_group(
742 struct btrfs_fs_info *info,
743 u64 bytenr)
744{
745 return block_group_cache_tree_search(info, bytenr, 1);
746}
747
748static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
749 u64 flags)
750{
751 struct list_head *head = &info->space_info;
752 struct btrfs_space_info *found;
753
754 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
755
756 rcu_read_lock();
757 list_for_each_entry_rcu(found, head, list) {
758 if (found->flags & flags) {
759 rcu_read_unlock();
760 return found;
761 }
762 }
763 rcu_read_unlock();
764 return NULL;
765}
766
767
768
769
770
771void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
772{
773 struct list_head *head = &info->space_info;
774 struct btrfs_space_info *found;
775
776 rcu_read_lock();
777 list_for_each_entry_rcu(found, head, list)
778 found->full = 0;
779 rcu_read_unlock();
780}
781
782
783int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len)
784{
785 int ret;
786 struct btrfs_key key;
787 struct btrfs_path *path;
788
789 path = btrfs_alloc_path();
790 if (!path)
791 return -ENOMEM;
792
793 key.objectid = start;
794 key.offset = len;
795 key.type = BTRFS_EXTENT_ITEM_KEY;
796 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
797 0, 0);
798 btrfs_free_path(path);
799 return ret;
800}
801
802
803
804
805
806
807
808
809
810
811int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
812 struct btrfs_root *root, u64 bytenr,
813 u64 offset, int metadata, u64 *refs, u64 *flags)
814{
815 struct btrfs_delayed_ref_head *head;
816 struct btrfs_delayed_ref_root *delayed_refs;
817 struct btrfs_path *path;
818 struct btrfs_extent_item *ei;
819 struct extent_buffer *leaf;
820 struct btrfs_key key;
821 u32 item_size;
822 u64 num_refs;
823 u64 extent_flags;
824 int ret;
825
826
827
828
829
830 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
831 offset = root->nodesize;
832 metadata = 0;
833 }
834
835 path = btrfs_alloc_path();
836 if (!path)
837 return -ENOMEM;
838
839 if (!trans) {
840 path->skip_locking = 1;
841 path->search_commit_root = 1;
842 }
843
844search_again:
845 key.objectid = bytenr;
846 key.offset = offset;
847 if (metadata)
848 key.type = BTRFS_METADATA_ITEM_KEY;
849 else
850 key.type = BTRFS_EXTENT_ITEM_KEY;
851
852 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
853 &key, path, 0, 0);
854 if (ret < 0)
855 goto out_free;
856
857 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
858 if (path->slots[0]) {
859 path->slots[0]--;
860 btrfs_item_key_to_cpu(path->nodes[0], &key,
861 path->slots[0]);
862 if (key.objectid == bytenr &&
863 key.type == BTRFS_EXTENT_ITEM_KEY &&
864 key.offset == root->nodesize)
865 ret = 0;
866 }
867 }
868
869 if (ret == 0) {
870 leaf = path->nodes[0];
871 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
872 if (item_size >= sizeof(*ei)) {
873 ei = btrfs_item_ptr(leaf, path->slots[0],
874 struct btrfs_extent_item);
875 num_refs = btrfs_extent_refs(leaf, ei);
876 extent_flags = btrfs_extent_flags(leaf, ei);
877 } else {
878#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
879 struct btrfs_extent_item_v0 *ei0;
880 BUG_ON(item_size != sizeof(*ei0));
881 ei0 = btrfs_item_ptr(leaf, path->slots[0],
882 struct btrfs_extent_item_v0);
883 num_refs = btrfs_extent_refs_v0(leaf, ei0);
884
885 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
886#else
887 BUG();
888#endif
889 }
890 BUG_ON(num_refs == 0);
891 } else {
892 num_refs = 0;
893 extent_flags = 0;
894 ret = 0;
895 }
896
897 if (!trans)
898 goto out;
899
900 delayed_refs = &trans->transaction->delayed_refs;
901 spin_lock(&delayed_refs->lock);
902 head = btrfs_find_delayed_ref_head(trans, bytenr);
903 if (head) {
904 if (!mutex_trylock(&head->mutex)) {
905 atomic_inc(&head->node.refs);
906 spin_unlock(&delayed_refs->lock);
907
908 btrfs_release_path(path);
909
910
911
912
913
914 mutex_lock(&head->mutex);
915 mutex_unlock(&head->mutex);
916 btrfs_put_delayed_ref(&head->node);
917 goto search_again;
918 }
919 spin_lock(&head->lock);
920 if (head->extent_op && head->extent_op->update_flags)
921 extent_flags |= head->extent_op->flags_to_set;
922 else
923 BUG_ON(num_refs == 0);
924
925 num_refs += head->node.ref_mod;
926 spin_unlock(&head->lock);
927 mutex_unlock(&head->mutex);
928 }
929 spin_unlock(&delayed_refs->lock);
930out:
931 WARN_ON(num_refs == 0);
932 if (refs)
933 *refs = num_refs;
934 if (flags)
935 *flags = extent_flags;
936out_free:
937 btrfs_free_path(path);
938 return ret;
939}
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1048static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1049 struct btrfs_root *root,
1050 struct btrfs_path *path,
1051 u64 owner, u32 extra_size)
1052{
1053 struct btrfs_extent_item *item;
1054 struct btrfs_extent_item_v0 *ei0;
1055 struct btrfs_extent_ref_v0 *ref0;
1056 struct btrfs_tree_block_info *bi;
1057 struct extent_buffer *leaf;
1058 struct btrfs_key key;
1059 struct btrfs_key found_key;
1060 u32 new_size = sizeof(*item);
1061 u64 refs;
1062 int ret;
1063
1064 leaf = path->nodes[0];
1065 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
1066
1067 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1068 ei0 = btrfs_item_ptr(leaf, path->slots[0],
1069 struct btrfs_extent_item_v0);
1070 refs = btrfs_extent_refs_v0(leaf, ei0);
1071
1072 if (owner == (u64)-1) {
1073 while (1) {
1074 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1075 ret = btrfs_next_leaf(root, path);
1076 if (ret < 0)
1077 return ret;
1078 BUG_ON(ret > 0);
1079 leaf = path->nodes[0];
1080 }
1081 btrfs_item_key_to_cpu(leaf, &found_key,
1082 path->slots[0]);
1083 BUG_ON(key.objectid != found_key.objectid);
1084 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
1085 path->slots[0]++;
1086 continue;
1087 }
1088 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1089 struct btrfs_extent_ref_v0);
1090 owner = btrfs_ref_objectid_v0(leaf, ref0);
1091 break;
1092 }
1093 }
1094 btrfs_release_path(path);
1095
1096 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1097 new_size += sizeof(*bi);
1098
1099 new_size -= sizeof(*ei0);
1100 ret = btrfs_search_slot(trans, root, &key, path,
1101 new_size + extra_size, 1);
1102 if (ret < 0)
1103 return ret;
1104 BUG_ON(ret);
1105
1106 btrfs_extend_item(root, path, new_size);
1107
1108 leaf = path->nodes[0];
1109 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1110 btrfs_set_extent_refs(leaf, item, refs);
1111
1112 btrfs_set_extent_generation(leaf, item, 0);
1113 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1114 btrfs_set_extent_flags(leaf, item,
1115 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1116 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1117 bi = (struct btrfs_tree_block_info *)(item + 1);
1118
1119 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
1120 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1121 } else {
1122 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1123 }
1124 btrfs_mark_buffer_dirty(leaf);
1125 return 0;
1126}
1127#endif
1128
1129static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1130{
1131 u32 high_crc = ~(u32)0;
1132 u32 low_crc = ~(u32)0;
1133 __le64 lenum;
1134
1135 lenum = cpu_to_le64(root_objectid);
1136 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1137 lenum = cpu_to_le64(owner);
1138 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1139 lenum = cpu_to_le64(offset);
1140 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1141
1142 return ((u64)high_crc << 31) ^ (u64)low_crc;
1143}
1144
1145static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1146 struct btrfs_extent_data_ref *ref)
1147{
1148 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1149 btrfs_extent_data_ref_objectid(leaf, ref),
1150 btrfs_extent_data_ref_offset(leaf, ref));
1151}
1152
1153static int match_extent_data_ref(struct extent_buffer *leaf,
1154 struct btrfs_extent_data_ref *ref,
1155 u64 root_objectid, u64 owner, u64 offset)
1156{
1157 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1158 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1159 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1160 return 0;
1161 return 1;
1162}
1163
1164static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1165 struct btrfs_root *root,
1166 struct btrfs_path *path,
1167 u64 bytenr, u64 parent,
1168 u64 root_objectid,
1169 u64 owner, u64 offset)
1170{
1171 struct btrfs_key key;
1172 struct btrfs_extent_data_ref *ref;
1173 struct extent_buffer *leaf;
1174 u32 nritems;
1175 int ret;
1176 int recow;
1177 int err = -ENOENT;
1178
1179 key.objectid = bytenr;
1180 if (parent) {
1181 key.type = BTRFS_SHARED_DATA_REF_KEY;
1182 key.offset = parent;
1183 } else {
1184 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1185 key.offset = hash_extent_data_ref(root_objectid,
1186 owner, offset);
1187 }
1188again:
1189 recow = 0;
1190 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1191 if (ret < 0) {
1192 err = ret;
1193 goto fail;
1194 }
1195
1196 if (parent) {
1197 if (!ret)
1198 return 0;
1199#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1200 key.type = BTRFS_EXTENT_REF_V0_KEY;
1201 btrfs_release_path(path);
1202 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1203 if (ret < 0) {
1204 err = ret;
1205 goto fail;
1206 }
1207 if (!ret)
1208 return 0;
1209#endif
1210 goto fail;
1211 }
1212
1213 leaf = path->nodes[0];
1214 nritems = btrfs_header_nritems(leaf);
1215 while (1) {
1216 if (path->slots[0] >= nritems) {
1217 ret = btrfs_next_leaf(root, path);
1218 if (ret < 0)
1219 err = ret;
1220 if (ret)
1221 goto fail;
1222
1223 leaf = path->nodes[0];
1224 nritems = btrfs_header_nritems(leaf);
1225 recow = 1;
1226 }
1227
1228 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1229 if (key.objectid != bytenr ||
1230 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1231 goto fail;
1232
1233 ref = btrfs_item_ptr(leaf, path->slots[0],
1234 struct btrfs_extent_data_ref);
1235
1236 if (match_extent_data_ref(leaf, ref, root_objectid,
1237 owner, offset)) {
1238 if (recow) {
1239 btrfs_release_path(path);
1240 goto again;
1241 }
1242 err = 0;
1243 break;
1244 }
1245 path->slots[0]++;
1246 }
1247fail:
1248 return err;
1249}
1250
1251static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1252 struct btrfs_root *root,
1253 struct btrfs_path *path,
1254 u64 bytenr, u64 parent,
1255 u64 root_objectid, u64 owner,
1256 u64 offset, int refs_to_add)
1257{
1258 struct btrfs_key key;
1259 struct extent_buffer *leaf;
1260 u32 size;
1261 u32 num_refs;
1262 int ret;
1263
1264 key.objectid = bytenr;
1265 if (parent) {
1266 key.type = BTRFS_SHARED_DATA_REF_KEY;
1267 key.offset = parent;
1268 size = sizeof(struct btrfs_shared_data_ref);
1269 } else {
1270 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1271 key.offset = hash_extent_data_ref(root_objectid,
1272 owner, offset);
1273 size = sizeof(struct btrfs_extent_data_ref);
1274 }
1275
1276 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1277 if (ret && ret != -EEXIST)
1278 goto fail;
1279
1280 leaf = path->nodes[0];
1281 if (parent) {
1282 struct btrfs_shared_data_ref *ref;
1283 ref = btrfs_item_ptr(leaf, path->slots[0],
1284 struct btrfs_shared_data_ref);
1285 if (ret == 0) {
1286 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1287 } else {
1288 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1289 num_refs += refs_to_add;
1290 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1291 }
1292 } else {
1293 struct btrfs_extent_data_ref *ref;
1294 while (ret == -EEXIST) {
1295 ref = btrfs_item_ptr(leaf, path->slots[0],
1296 struct btrfs_extent_data_ref);
1297 if (match_extent_data_ref(leaf, ref, root_objectid,
1298 owner, offset))
1299 break;
1300 btrfs_release_path(path);
1301 key.offset++;
1302 ret = btrfs_insert_empty_item(trans, root, path, &key,
1303 size);
1304 if (ret && ret != -EEXIST)
1305 goto fail;
1306
1307 leaf = path->nodes[0];
1308 }
1309 ref = btrfs_item_ptr(leaf, path->slots[0],
1310 struct btrfs_extent_data_ref);
1311 if (ret == 0) {
1312 btrfs_set_extent_data_ref_root(leaf, ref,
1313 root_objectid);
1314 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1315 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1316 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1317 } else {
1318 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1319 num_refs += refs_to_add;
1320 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1321 }
1322 }
1323 btrfs_mark_buffer_dirty(leaf);
1324 ret = 0;
1325fail:
1326 btrfs_release_path(path);
1327 return ret;
1328}
1329
1330static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1331 struct btrfs_root *root,
1332 struct btrfs_path *path,
1333 int refs_to_drop, int *last_ref)
1334{
1335 struct btrfs_key key;
1336 struct btrfs_extent_data_ref *ref1 = NULL;
1337 struct btrfs_shared_data_ref *ref2 = NULL;
1338 struct extent_buffer *leaf;
1339 u32 num_refs = 0;
1340 int ret = 0;
1341
1342 leaf = path->nodes[0];
1343 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1344
1345 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1346 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1347 struct btrfs_extent_data_ref);
1348 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1349 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1350 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1351 struct btrfs_shared_data_ref);
1352 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1353#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1354 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1355 struct btrfs_extent_ref_v0 *ref0;
1356 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1357 struct btrfs_extent_ref_v0);
1358 num_refs = btrfs_ref_count_v0(leaf, ref0);
1359#endif
1360 } else {
1361 BUG();
1362 }
1363
1364 BUG_ON(num_refs < refs_to_drop);
1365 num_refs -= refs_to_drop;
1366
1367 if (num_refs == 0) {
1368 ret = btrfs_del_item(trans, root, path);
1369 *last_ref = 1;
1370 } else {
1371 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1372 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1373 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1374 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1375#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1376 else {
1377 struct btrfs_extent_ref_v0 *ref0;
1378 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1379 struct btrfs_extent_ref_v0);
1380 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1381 }
1382#endif
1383 btrfs_mark_buffer_dirty(leaf);
1384 }
1385 return ret;
1386}
1387
1388static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1389 struct btrfs_extent_inline_ref *iref)
1390{
1391 struct btrfs_key key;
1392 struct extent_buffer *leaf;
1393 struct btrfs_extent_data_ref *ref1;
1394 struct btrfs_shared_data_ref *ref2;
1395 u32 num_refs = 0;
1396
1397 leaf = path->nodes[0];
1398 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1399 if (iref) {
1400 if (btrfs_extent_inline_ref_type(leaf, iref) ==
1401 BTRFS_EXTENT_DATA_REF_KEY) {
1402 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1403 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1404 } else {
1405 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1406 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1407 }
1408 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1409 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1410 struct btrfs_extent_data_ref);
1411 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1412 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1413 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1414 struct btrfs_shared_data_ref);
1415 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1416#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1417 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1418 struct btrfs_extent_ref_v0 *ref0;
1419 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1420 struct btrfs_extent_ref_v0);
1421 num_refs = btrfs_ref_count_v0(leaf, ref0);
1422#endif
1423 } else {
1424 WARN_ON(1);
1425 }
1426 return num_refs;
1427}
1428
1429static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1430 struct btrfs_root *root,
1431 struct btrfs_path *path,
1432 u64 bytenr, u64 parent,
1433 u64 root_objectid)
1434{
1435 struct btrfs_key key;
1436 int ret;
1437
1438 key.objectid = bytenr;
1439 if (parent) {
1440 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1441 key.offset = parent;
1442 } else {
1443 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1444 key.offset = root_objectid;
1445 }
1446
1447 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1448 if (ret > 0)
1449 ret = -ENOENT;
1450#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1451 if (ret == -ENOENT && parent) {
1452 btrfs_release_path(path);
1453 key.type = BTRFS_EXTENT_REF_V0_KEY;
1454 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1455 if (ret > 0)
1456 ret = -ENOENT;
1457 }
1458#endif
1459 return ret;
1460}
1461
1462static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1463 struct btrfs_root *root,
1464 struct btrfs_path *path,
1465 u64 bytenr, u64 parent,
1466 u64 root_objectid)
1467{
1468 struct btrfs_key key;
1469 int ret;
1470
1471 key.objectid = bytenr;
1472 if (parent) {
1473 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1474 key.offset = parent;
1475 } else {
1476 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1477 key.offset = root_objectid;
1478 }
1479
1480 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1481 btrfs_release_path(path);
1482 return ret;
1483}
1484
1485static inline int extent_ref_type(u64 parent, u64 owner)
1486{
1487 int type;
1488 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1489 if (parent > 0)
1490 type = BTRFS_SHARED_BLOCK_REF_KEY;
1491 else
1492 type = BTRFS_TREE_BLOCK_REF_KEY;
1493 } else {
1494 if (parent > 0)
1495 type = BTRFS_SHARED_DATA_REF_KEY;
1496 else
1497 type = BTRFS_EXTENT_DATA_REF_KEY;
1498 }
1499 return type;
1500}
1501
1502static int find_next_key(struct btrfs_path *path, int level,
1503 struct btrfs_key *key)
1504
1505{
1506 for (; level < BTRFS_MAX_LEVEL; level++) {
1507 if (!path->nodes[level])
1508 break;
1509 if (path->slots[level] + 1 >=
1510 btrfs_header_nritems(path->nodes[level]))
1511 continue;
1512 if (level == 0)
1513 btrfs_item_key_to_cpu(path->nodes[level], key,
1514 path->slots[level] + 1);
1515 else
1516 btrfs_node_key_to_cpu(path->nodes[level], key,
1517 path->slots[level] + 1);
1518 return 0;
1519 }
1520 return 1;
1521}
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536static noinline_for_stack
1537int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1538 struct btrfs_root *root,
1539 struct btrfs_path *path,
1540 struct btrfs_extent_inline_ref **ref_ret,
1541 u64 bytenr, u64 num_bytes,
1542 u64 parent, u64 root_objectid,
1543 u64 owner, u64 offset, int insert)
1544{
1545 struct btrfs_key key;
1546 struct extent_buffer *leaf;
1547 struct btrfs_extent_item *ei;
1548 struct btrfs_extent_inline_ref *iref;
1549 u64 flags;
1550 u64 item_size;
1551 unsigned long ptr;
1552 unsigned long end;
1553 int extra_size;
1554 int type;
1555 int want;
1556 int ret;
1557 int err = 0;
1558 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1559 SKINNY_METADATA);
1560
1561 key.objectid = bytenr;
1562 key.type = BTRFS_EXTENT_ITEM_KEY;
1563 key.offset = num_bytes;
1564
1565 want = extent_ref_type(parent, owner);
1566 if (insert) {
1567 extra_size = btrfs_extent_inline_ref_size(want);
1568 path->keep_locks = 1;
1569 } else
1570 extra_size = -1;
1571
1572
1573
1574
1575
1576 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1577 key.type = BTRFS_METADATA_ITEM_KEY;
1578 key.offset = owner;
1579 }
1580
1581again:
1582 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1583 if (ret < 0) {
1584 err = ret;
1585 goto out;
1586 }
1587
1588
1589
1590
1591
1592 if (ret > 0 && skinny_metadata) {
1593 skinny_metadata = false;
1594 if (path->slots[0]) {
1595 path->slots[0]--;
1596 btrfs_item_key_to_cpu(path->nodes[0], &key,
1597 path->slots[0]);
1598 if (key.objectid == bytenr &&
1599 key.type == BTRFS_EXTENT_ITEM_KEY &&
1600 key.offset == num_bytes)
1601 ret = 0;
1602 }
1603 if (ret) {
1604 key.objectid = bytenr;
1605 key.type = BTRFS_EXTENT_ITEM_KEY;
1606 key.offset = num_bytes;
1607 btrfs_release_path(path);
1608 goto again;
1609 }
1610 }
1611
1612 if (ret && !insert) {
1613 err = -ENOENT;
1614 goto out;
1615 } else if (WARN_ON(ret)) {
1616 err = -EIO;
1617 goto out;
1618 }
1619
1620 leaf = path->nodes[0];
1621 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1622#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1623 if (item_size < sizeof(*ei)) {
1624 if (!insert) {
1625 err = -ENOENT;
1626 goto out;
1627 }
1628 ret = convert_extent_item_v0(trans, root, path, owner,
1629 extra_size);
1630 if (ret < 0) {
1631 err = ret;
1632 goto out;
1633 }
1634 leaf = path->nodes[0];
1635 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1636 }
1637#endif
1638 BUG_ON(item_size < sizeof(*ei));
1639
1640 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1641 flags = btrfs_extent_flags(leaf, ei);
1642
1643 ptr = (unsigned long)(ei + 1);
1644 end = (unsigned long)ei + item_size;
1645
1646 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1647 ptr += sizeof(struct btrfs_tree_block_info);
1648 BUG_ON(ptr > end);
1649 }
1650
1651 err = -ENOENT;
1652 while (1) {
1653 if (ptr >= end) {
1654 WARN_ON(ptr > end);
1655 break;
1656 }
1657 iref = (struct btrfs_extent_inline_ref *)ptr;
1658 type = btrfs_extent_inline_ref_type(leaf, iref);
1659 if (want < type)
1660 break;
1661 if (want > type) {
1662 ptr += btrfs_extent_inline_ref_size(type);
1663 continue;
1664 }
1665
1666 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1667 struct btrfs_extent_data_ref *dref;
1668 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1669 if (match_extent_data_ref(leaf, dref, root_objectid,
1670 owner, offset)) {
1671 err = 0;
1672 break;
1673 }
1674 if (hash_extent_data_ref_item(leaf, dref) <
1675 hash_extent_data_ref(root_objectid, owner, offset))
1676 break;
1677 } else {
1678 u64 ref_offset;
1679 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1680 if (parent > 0) {
1681 if (parent == ref_offset) {
1682 err = 0;
1683 break;
1684 }
1685 if (ref_offset < parent)
1686 break;
1687 } else {
1688 if (root_objectid == ref_offset) {
1689 err = 0;
1690 break;
1691 }
1692 if (ref_offset < root_objectid)
1693 break;
1694 }
1695 }
1696 ptr += btrfs_extent_inline_ref_size(type);
1697 }
1698 if (err == -ENOENT && insert) {
1699 if (item_size + extra_size >=
1700 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1701 err = -EAGAIN;
1702 goto out;
1703 }
1704
1705
1706
1707
1708
1709
1710 if (find_next_key(path, 0, &key) == 0 &&
1711 key.objectid == bytenr &&
1712 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1713 err = -EAGAIN;
1714 goto out;
1715 }
1716 }
1717 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1718out:
1719 if (insert) {
1720 path->keep_locks = 0;
1721 btrfs_unlock_up_safe(path, 1);
1722 }
1723 return err;
1724}
1725
1726
1727
1728
1729static noinline_for_stack
1730void setup_inline_extent_backref(struct btrfs_root *root,
1731 struct btrfs_path *path,
1732 struct btrfs_extent_inline_ref *iref,
1733 u64 parent, u64 root_objectid,
1734 u64 owner, u64 offset, int refs_to_add,
1735 struct btrfs_delayed_extent_op *extent_op)
1736{
1737 struct extent_buffer *leaf;
1738 struct btrfs_extent_item *ei;
1739 unsigned long ptr;
1740 unsigned long end;
1741 unsigned long item_offset;
1742 u64 refs;
1743 int size;
1744 int type;
1745
1746 leaf = path->nodes[0];
1747 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1748 item_offset = (unsigned long)iref - (unsigned long)ei;
1749
1750 type = extent_ref_type(parent, owner);
1751 size = btrfs_extent_inline_ref_size(type);
1752
1753 btrfs_extend_item(root, path, size);
1754
1755 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1756 refs = btrfs_extent_refs(leaf, ei);
1757 refs += refs_to_add;
1758 btrfs_set_extent_refs(leaf, ei, refs);
1759 if (extent_op)
1760 __run_delayed_extent_op(extent_op, leaf, ei);
1761
1762 ptr = (unsigned long)ei + item_offset;
1763 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1764 if (ptr < end - size)
1765 memmove_extent_buffer(leaf, ptr + size, ptr,
1766 end - size - ptr);
1767
1768 iref = (struct btrfs_extent_inline_ref *)ptr;
1769 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1770 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1771 struct btrfs_extent_data_ref *dref;
1772 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1773 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1774 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1775 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1776 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1777 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1778 struct btrfs_shared_data_ref *sref;
1779 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1780 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1781 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1782 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1783 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1784 } else {
1785 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1786 }
1787 btrfs_mark_buffer_dirty(leaf);
1788}
1789
1790static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1791 struct btrfs_root *root,
1792 struct btrfs_path *path,
1793 struct btrfs_extent_inline_ref **ref_ret,
1794 u64 bytenr, u64 num_bytes, u64 parent,
1795 u64 root_objectid, u64 owner, u64 offset)
1796{
1797 int ret;
1798
1799 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1800 bytenr, num_bytes, parent,
1801 root_objectid, owner, offset, 0);
1802 if (ret != -ENOENT)
1803 return ret;
1804
1805 btrfs_release_path(path);
1806 *ref_ret = NULL;
1807
1808 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1809 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1810 root_objectid);
1811 } else {
1812 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1813 root_objectid, owner, offset);
1814 }
1815 return ret;
1816}
1817
1818
1819
1820
1821static noinline_for_stack
1822void update_inline_extent_backref(struct btrfs_root *root,
1823 struct btrfs_path *path,
1824 struct btrfs_extent_inline_ref *iref,
1825 int refs_to_mod,
1826 struct btrfs_delayed_extent_op *extent_op,
1827 int *last_ref)
1828{
1829 struct extent_buffer *leaf;
1830 struct btrfs_extent_item *ei;
1831 struct btrfs_extent_data_ref *dref = NULL;
1832 struct btrfs_shared_data_ref *sref = NULL;
1833 unsigned long ptr;
1834 unsigned long end;
1835 u32 item_size;
1836 int size;
1837 int type;
1838 u64 refs;
1839
1840 leaf = path->nodes[0];
1841 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1842 refs = btrfs_extent_refs(leaf, ei);
1843 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1844 refs += refs_to_mod;
1845 btrfs_set_extent_refs(leaf, ei, refs);
1846 if (extent_op)
1847 __run_delayed_extent_op(extent_op, leaf, ei);
1848
1849 type = btrfs_extent_inline_ref_type(leaf, iref);
1850
1851 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1852 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1853 refs = btrfs_extent_data_ref_count(leaf, dref);
1854 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1855 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1856 refs = btrfs_shared_data_ref_count(leaf, sref);
1857 } else {
1858 refs = 1;
1859 BUG_ON(refs_to_mod != -1);
1860 }
1861
1862 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1863 refs += refs_to_mod;
1864
1865 if (refs > 0) {
1866 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1867 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1868 else
1869 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1870 } else {
1871 *last_ref = 1;
1872 size = btrfs_extent_inline_ref_size(type);
1873 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1874 ptr = (unsigned long)iref;
1875 end = (unsigned long)ei + item_size;
1876 if (ptr + size < end)
1877 memmove_extent_buffer(leaf, ptr, ptr + size,
1878 end - ptr - size);
1879 item_size -= size;
1880 btrfs_truncate_item(root, path, item_size, 1);
1881 }
1882 btrfs_mark_buffer_dirty(leaf);
1883}
1884
1885static noinline_for_stack
1886int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1887 struct btrfs_root *root,
1888 struct btrfs_path *path,
1889 u64 bytenr, u64 num_bytes, u64 parent,
1890 u64 root_objectid, u64 owner,
1891 u64 offset, int refs_to_add,
1892 struct btrfs_delayed_extent_op *extent_op)
1893{
1894 struct btrfs_extent_inline_ref *iref;
1895 int ret;
1896
1897 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1898 bytenr, num_bytes, parent,
1899 root_objectid, owner, offset, 1);
1900 if (ret == 0) {
1901 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1902 update_inline_extent_backref(root, path, iref,
1903 refs_to_add, extent_op, NULL);
1904 } else if (ret == -ENOENT) {
1905 setup_inline_extent_backref(root, path, iref, parent,
1906 root_objectid, owner, offset,
1907 refs_to_add, extent_op);
1908 ret = 0;
1909 }
1910 return ret;
1911}
1912
1913static int insert_extent_backref(struct btrfs_trans_handle *trans,
1914 struct btrfs_root *root,
1915 struct btrfs_path *path,
1916 u64 bytenr, u64 parent, u64 root_objectid,
1917 u64 owner, u64 offset, int refs_to_add)
1918{
1919 int ret;
1920 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1921 BUG_ON(refs_to_add != 1);
1922 ret = insert_tree_block_ref(trans, root, path, bytenr,
1923 parent, root_objectid);
1924 } else {
1925 ret = insert_extent_data_ref(trans, root, path, bytenr,
1926 parent, root_objectid,
1927 owner, offset, refs_to_add);
1928 }
1929 return ret;
1930}
1931
1932static int remove_extent_backref(struct btrfs_trans_handle *trans,
1933 struct btrfs_root *root,
1934 struct btrfs_path *path,
1935 struct btrfs_extent_inline_ref *iref,
1936 int refs_to_drop, int is_data, int *last_ref)
1937{
1938 int ret = 0;
1939
1940 BUG_ON(!is_data && refs_to_drop != 1);
1941 if (iref) {
1942 update_inline_extent_backref(root, path, iref,
1943 -refs_to_drop, NULL, last_ref);
1944 } else if (is_data) {
1945 ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
1946 last_ref);
1947 } else {
1948 *last_ref = 1;
1949 ret = btrfs_del_item(trans, root, path);
1950 }
1951 return ret;
1952}
1953
1954#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
1955static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1956 u64 *discarded_bytes)
1957{
1958 int j, ret = 0;
1959 u64 bytes_left, end;
1960 u64 aligned_start = ALIGN(start, 1 << 9);
1961
1962 if (WARN_ON(start != aligned_start)) {
1963 len -= aligned_start - start;
1964 len = round_down(len, 1 << 9);
1965 start = aligned_start;
1966 }
1967
1968 *discarded_bytes = 0;
1969
1970 if (!len)
1971 return 0;
1972
1973 end = start + len;
1974 bytes_left = len;
1975
1976
1977 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1978 u64 sb_start = btrfs_sb_offset(j);
1979 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1980 u64 size = sb_start - start;
1981
1982 if (!in_range(sb_start, start, bytes_left) &&
1983 !in_range(sb_end, start, bytes_left) &&
1984 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1985 continue;
1986
1987
1988
1989
1990
1991 if (sb_start <= start) {
1992 start += sb_end - start;
1993 if (start > end) {
1994 bytes_left = 0;
1995 break;
1996 }
1997 bytes_left = end - start;
1998 continue;
1999 }
2000
2001 if (size) {
2002 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
2003 GFP_NOFS, 0);
2004 if (!ret)
2005 *discarded_bytes += size;
2006 else if (ret != -EOPNOTSUPP)
2007 return ret;
2008 }
2009
2010 start = sb_end;
2011 if (start > end) {
2012 bytes_left = 0;
2013 break;
2014 }
2015 bytes_left = end - start;
2016 }
2017
2018 if (bytes_left) {
2019 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
2020 GFP_NOFS, 0);
2021 if (!ret)
2022 *discarded_bytes += bytes_left;
2023 }
2024 return ret;
2025}
2026
2027int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
2028 u64 num_bytes, u64 *actual_bytes)
2029{
2030 int ret;
2031 u64 discarded_bytes = 0;
2032 struct btrfs_bio *bbio = NULL;
2033
2034
2035
2036
2037
2038
2039 btrfs_bio_counter_inc_blocked(root->fs_info);
2040
2041 ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
2042 bytenr, &num_bytes, &bbio, 0);
2043
2044 if (!ret) {
2045 struct btrfs_bio_stripe *stripe = bbio->stripes;
2046 int i;
2047
2048
2049 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
2050 u64 bytes;
2051 if (!stripe->dev->can_discard)
2052 continue;
2053
2054 ret = btrfs_issue_discard(stripe->dev->bdev,
2055 stripe->physical,
2056 stripe->length,
2057 &bytes);
2058 if (!ret)
2059 discarded_bytes += bytes;
2060 else if (ret != -EOPNOTSUPP)
2061 break;
2062
2063
2064
2065
2066
2067
2068 ret = 0;
2069 }
2070 btrfs_put_bbio(bbio);
2071 }
2072 btrfs_bio_counter_dec(root->fs_info);
2073
2074 if (actual_bytes)
2075 *actual_bytes = discarded_bytes;
2076
2077
2078 if (ret == -EOPNOTSUPP)
2079 ret = 0;
2080 return ret;
2081}
2082
2083
2084int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2085 struct btrfs_root *root,
2086 u64 bytenr, u64 num_bytes, u64 parent,
2087 u64 root_objectid, u64 owner, u64 offset)
2088{
2089 int ret;
2090 struct btrfs_fs_info *fs_info = root->fs_info;
2091
2092 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
2093 root_objectid == BTRFS_TREE_LOG_OBJECTID);
2094
2095 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2096 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
2097 num_bytes,
2098 parent, root_objectid, (int)owner,
2099 BTRFS_ADD_DELAYED_REF, NULL);
2100 } else {
2101 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
2102 num_bytes, parent, root_objectid,
2103 owner, offset, 0,
2104 BTRFS_ADD_DELAYED_REF, NULL);
2105 }
2106 return ret;
2107}
2108
2109static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2110 struct btrfs_root *root,
2111 struct btrfs_delayed_ref_node *node,
2112 u64 parent, u64 root_objectid,
2113 u64 owner, u64 offset, int refs_to_add,
2114 struct btrfs_delayed_extent_op *extent_op)
2115{
2116 struct btrfs_fs_info *fs_info = root->fs_info;
2117 struct btrfs_path *path;
2118 struct extent_buffer *leaf;
2119 struct btrfs_extent_item *item;
2120 struct btrfs_key key;
2121 u64 bytenr = node->bytenr;
2122 u64 num_bytes = node->num_bytes;
2123 u64 refs;
2124 int ret;
2125
2126 path = btrfs_alloc_path();
2127 if (!path)
2128 return -ENOMEM;
2129
2130 path->reada = READA_FORWARD;
2131 path->leave_spinning = 1;
2132
2133 ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
2134 bytenr, num_bytes, parent,
2135 root_objectid, owner, offset,
2136 refs_to_add, extent_op);
2137 if ((ret < 0 && ret != -EAGAIN) || !ret)
2138 goto out;
2139
2140
2141
2142
2143
2144
2145 leaf = path->nodes[0];
2146 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2147 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2148 refs = btrfs_extent_refs(leaf, item);
2149 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2150 if (extent_op)
2151 __run_delayed_extent_op(extent_op, leaf, item);
2152
2153 btrfs_mark_buffer_dirty(leaf);
2154 btrfs_release_path(path);
2155
2156 path->reada = READA_FORWARD;
2157 path->leave_spinning = 1;
2158
2159 ret = insert_extent_backref(trans, root->fs_info->extent_root,
2160 path, bytenr, parent, root_objectid,
2161 owner, offset, refs_to_add);
2162 if (ret)
2163 btrfs_abort_transaction(trans, root, ret);
2164out:
2165 btrfs_free_path(path);
2166 return ret;
2167}
2168
2169static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2170 struct btrfs_root *root,
2171 struct btrfs_delayed_ref_node *node,
2172 struct btrfs_delayed_extent_op *extent_op,
2173 int insert_reserved)
2174{
2175 int ret = 0;
2176 struct btrfs_delayed_data_ref *ref;
2177 struct btrfs_key ins;
2178 u64 parent = 0;
2179 u64 ref_root = 0;
2180 u64 flags = 0;
2181
2182 ins.objectid = node->bytenr;
2183 ins.offset = node->num_bytes;
2184 ins.type = BTRFS_EXTENT_ITEM_KEY;
2185
2186 ref = btrfs_delayed_node_to_data_ref(node);
2187 trace_run_delayed_data_ref(node, ref, node->action);
2188
2189 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2190 parent = ref->parent;
2191 ref_root = ref->root;
2192
2193 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2194 if (extent_op)
2195 flags |= extent_op->flags_to_set;
2196 ret = alloc_reserved_file_extent(trans, root,
2197 parent, ref_root, flags,
2198 ref->objectid, ref->offset,
2199 &ins, node->ref_mod);
2200 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2201 ret = __btrfs_inc_extent_ref(trans, root, node, parent,
2202 ref_root, ref->objectid,
2203 ref->offset, node->ref_mod,
2204 extent_op);
2205 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2206 ret = __btrfs_free_extent(trans, root, node, parent,
2207 ref_root, ref->objectid,
2208 ref->offset, node->ref_mod,
2209 extent_op);
2210 } else {
2211 BUG();
2212 }
2213 return ret;
2214}
2215
2216static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2217 struct extent_buffer *leaf,
2218 struct btrfs_extent_item *ei)
2219{
2220 u64 flags = btrfs_extent_flags(leaf, ei);
2221 if (extent_op->update_flags) {
2222 flags |= extent_op->flags_to_set;
2223 btrfs_set_extent_flags(leaf, ei, flags);
2224 }
2225
2226 if (extent_op->update_key) {
2227 struct btrfs_tree_block_info *bi;
2228 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2229 bi = (struct btrfs_tree_block_info *)(ei + 1);
2230 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2231 }
2232}
2233
2234static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2235 struct btrfs_root *root,
2236 struct btrfs_delayed_ref_node *node,
2237 struct btrfs_delayed_extent_op *extent_op)
2238{
2239 struct btrfs_key key;
2240 struct btrfs_path *path;
2241 struct btrfs_extent_item *ei;
2242 struct extent_buffer *leaf;
2243 u32 item_size;
2244 int ret;
2245 int err = 0;
2246 int metadata = !extent_op->is_data;
2247
2248 if (trans->aborted)
2249 return 0;
2250
2251 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2252 metadata = 0;
2253
2254 path = btrfs_alloc_path();
2255 if (!path)
2256 return -ENOMEM;
2257
2258 key.objectid = node->bytenr;
2259
2260 if (metadata) {
2261 key.type = BTRFS_METADATA_ITEM_KEY;
2262 key.offset = extent_op->level;
2263 } else {
2264 key.type = BTRFS_EXTENT_ITEM_KEY;
2265 key.offset = node->num_bytes;
2266 }
2267
2268again:
2269 path->reada = READA_FORWARD;
2270 path->leave_spinning = 1;
2271 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
2272 path, 0, 1);
2273 if (ret < 0) {
2274 err = ret;
2275 goto out;
2276 }
2277 if (ret > 0) {
2278 if (metadata) {
2279 if (path->slots[0] > 0) {
2280 path->slots[0]--;
2281 btrfs_item_key_to_cpu(path->nodes[0], &key,
2282 path->slots[0]);
2283 if (key.objectid == node->bytenr &&
2284 key.type == BTRFS_EXTENT_ITEM_KEY &&
2285 key.offset == node->num_bytes)
2286 ret = 0;
2287 }
2288 if (ret > 0) {
2289 btrfs_release_path(path);
2290 metadata = 0;
2291
2292 key.objectid = node->bytenr;
2293 key.offset = node->num_bytes;
2294 key.type = BTRFS_EXTENT_ITEM_KEY;
2295 goto again;
2296 }
2297 } else {
2298 err = -EIO;
2299 goto out;
2300 }
2301 }
2302
2303 leaf = path->nodes[0];
2304 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2305#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2306 if (item_size < sizeof(*ei)) {
2307 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
2308 path, (u64)-1, 0);
2309 if (ret < 0) {
2310 err = ret;
2311 goto out;
2312 }
2313 leaf = path->nodes[0];
2314 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2315 }
2316#endif
2317 BUG_ON(item_size < sizeof(*ei));
2318 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2319 __run_delayed_extent_op(extent_op, leaf, ei);
2320
2321 btrfs_mark_buffer_dirty(leaf);
2322out:
2323 btrfs_free_path(path);
2324 return err;
2325}
2326
2327static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2328 struct btrfs_root *root,
2329 struct btrfs_delayed_ref_node *node,
2330 struct btrfs_delayed_extent_op *extent_op,
2331 int insert_reserved)
2332{
2333 int ret = 0;
2334 struct btrfs_delayed_tree_ref *ref;
2335 struct btrfs_key ins;
2336 u64 parent = 0;
2337 u64 ref_root = 0;
2338 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2339 SKINNY_METADATA);
2340
2341 ref = btrfs_delayed_node_to_tree_ref(node);
2342 trace_run_delayed_tree_ref(node, ref, node->action);
2343
2344 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2345 parent = ref->parent;
2346 ref_root = ref->root;
2347
2348 ins.objectid = node->bytenr;
2349 if (skinny_metadata) {
2350 ins.offset = ref->level;
2351 ins.type = BTRFS_METADATA_ITEM_KEY;
2352 } else {
2353 ins.offset = node->num_bytes;
2354 ins.type = BTRFS_EXTENT_ITEM_KEY;
2355 }
2356
2357 if (node->ref_mod != 1) {
2358 btrfs_err(root->fs_info,
2359 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
2360 node->bytenr, node->ref_mod, node->action, ref_root,
2361 parent);
2362 return -EIO;
2363 }
2364 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2365 BUG_ON(!extent_op || !extent_op->update_flags);
2366 ret = alloc_reserved_tree_block(trans, root,
2367 parent, ref_root,
2368 extent_op->flags_to_set,
2369 &extent_op->key,
2370 ref->level, &ins);
2371 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2372 ret = __btrfs_inc_extent_ref(trans, root, node,
2373 parent, ref_root,
2374 ref->level, 0, 1,
2375 extent_op);
2376 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2377 ret = __btrfs_free_extent(trans, root, node,
2378 parent, ref_root,
2379 ref->level, 0, 1, extent_op);
2380 } else {
2381 BUG();
2382 }
2383 return ret;
2384}
2385
2386
2387static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2388 struct btrfs_root *root,
2389 struct btrfs_delayed_ref_node *node,
2390 struct btrfs_delayed_extent_op *extent_op,
2391 int insert_reserved)
2392{
2393 int ret = 0;
2394
2395 if (trans->aborted) {
2396 if (insert_reserved)
2397 btrfs_pin_extent(root, node->bytenr,
2398 node->num_bytes, 1);
2399 return 0;
2400 }
2401
2402 if (btrfs_delayed_ref_is_head(node)) {
2403 struct btrfs_delayed_ref_head *head;
2404
2405
2406
2407
2408
2409
2410 BUG_ON(extent_op);
2411 head = btrfs_delayed_node_to_head(node);
2412 trace_run_delayed_ref_head(node, head, node->action);
2413
2414 if (insert_reserved) {
2415 btrfs_pin_extent(root, node->bytenr,
2416 node->num_bytes, 1);
2417 if (head->is_data) {
2418 ret = btrfs_del_csums(trans, root,
2419 node->bytenr,
2420 node->num_bytes);
2421 }
2422 }
2423
2424
2425 btrfs_qgroup_free_delayed_ref(root->fs_info,
2426 head->qgroup_ref_root,
2427 head->qgroup_reserved);
2428 return ret;
2429 }
2430
2431 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2432 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2433 ret = run_delayed_tree_ref(trans, root, node, extent_op,
2434 insert_reserved);
2435 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2436 node->type == BTRFS_SHARED_DATA_REF_KEY)
2437 ret = run_delayed_data_ref(trans, root, node, extent_op,
2438 insert_reserved);
2439 else
2440 BUG();
2441 return ret;
2442}
2443
2444static inline struct btrfs_delayed_ref_node *
2445select_delayed_ref(struct btrfs_delayed_ref_head *head)
2446{
2447 struct btrfs_delayed_ref_node *ref;
2448
2449 if (list_empty(&head->ref_list))
2450 return NULL;
2451
2452
2453
2454
2455
2456
2457
2458 if (!list_empty(&head->ref_add_list))
2459 return list_first_entry(&head->ref_add_list,
2460 struct btrfs_delayed_ref_node, add_list);
2461
2462 ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
2463 list);
2464 ASSERT(list_empty(&ref->add_list));
2465 return ref;
2466}
2467
2468
2469
2470
2471
2472static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2473 struct btrfs_root *root,
2474 unsigned long nr)
2475{
2476 struct btrfs_delayed_ref_root *delayed_refs;
2477 struct btrfs_delayed_ref_node *ref;
2478 struct btrfs_delayed_ref_head *locked_ref = NULL;
2479 struct btrfs_delayed_extent_op *extent_op;
2480 struct btrfs_fs_info *fs_info = root->fs_info;
2481 ktime_t start = ktime_get();
2482 int ret;
2483 unsigned long count = 0;
2484 unsigned long actual_count = 0;
2485 int must_insert_reserved = 0;
2486
2487 delayed_refs = &trans->transaction->delayed_refs;
2488 while (1) {
2489 if (!locked_ref) {
2490 if (count >= nr)
2491 break;
2492
2493 spin_lock(&delayed_refs->lock);
2494 locked_ref = btrfs_select_ref_head(trans);
2495 if (!locked_ref) {
2496 spin_unlock(&delayed_refs->lock);
2497 break;
2498 }
2499
2500
2501
2502 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2503 spin_unlock(&delayed_refs->lock);
2504
2505
2506
2507
2508
2509
2510 if (ret == -EAGAIN) {
2511 locked_ref = NULL;
2512 count++;
2513 continue;
2514 }
2515 }
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529 spin_lock(&locked_ref->lock);
2530 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2531 locked_ref);
2532
2533
2534
2535
2536
2537 ref = select_delayed_ref(locked_ref);
2538
2539 if (ref && ref->seq &&
2540 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2541 spin_unlock(&locked_ref->lock);
2542 spin_lock(&delayed_refs->lock);
2543 locked_ref->processing = 0;
2544 delayed_refs->num_heads_ready++;
2545 spin_unlock(&delayed_refs->lock);
2546 btrfs_delayed_ref_unlock(locked_ref);
2547 locked_ref = NULL;
2548 cond_resched();
2549 count++;
2550 continue;
2551 }
2552
2553
2554
2555
2556
2557 must_insert_reserved = locked_ref->must_insert_reserved;
2558 locked_ref->must_insert_reserved = 0;
2559
2560 extent_op = locked_ref->extent_op;
2561 locked_ref->extent_op = NULL;
2562
2563 if (!ref) {
2564
2565
2566
2567
2568
2569
2570 ref = &locked_ref->node;
2571
2572 if (extent_op && must_insert_reserved) {
2573 btrfs_free_delayed_extent_op(extent_op);
2574 extent_op = NULL;
2575 }
2576
2577 if (extent_op) {
2578 spin_unlock(&locked_ref->lock);
2579 ret = run_delayed_extent_op(trans, root,
2580 ref, extent_op);
2581 btrfs_free_delayed_extent_op(extent_op);
2582
2583 if (ret) {
2584
2585
2586
2587
2588
2589
2590 if (must_insert_reserved)
2591 locked_ref->must_insert_reserved = 1;
2592 spin_lock(&delayed_refs->lock);
2593 locked_ref->processing = 0;
2594 delayed_refs->num_heads_ready++;
2595 spin_unlock(&delayed_refs->lock);
2596 btrfs_debug(fs_info,
2597 "run_delayed_extent_op returned %d",
2598 ret);
2599 btrfs_delayed_ref_unlock(locked_ref);
2600 return ret;
2601 }
2602 continue;
2603 }
2604
2605
2606
2607
2608
2609
2610 spin_unlock(&locked_ref->lock);
2611 spin_lock(&delayed_refs->lock);
2612 spin_lock(&locked_ref->lock);
2613 if (!list_empty(&locked_ref->ref_list) ||
2614 locked_ref->extent_op) {
2615 spin_unlock(&locked_ref->lock);
2616 spin_unlock(&delayed_refs->lock);
2617 continue;
2618 }
2619 ref->in_tree = 0;
2620 delayed_refs->num_heads--;
2621 rb_erase(&locked_ref->href_node,
2622 &delayed_refs->href_root);
2623 spin_unlock(&delayed_refs->lock);
2624 } else {
2625 actual_count++;
2626 ref->in_tree = 0;
2627 list_del(&ref->list);
2628 if (!list_empty(&ref->add_list))
2629 list_del(&ref->add_list);
2630 }
2631 atomic_dec(&delayed_refs->num_entries);
2632
2633 if (!btrfs_delayed_ref_is_head(ref)) {
2634
2635
2636
2637
2638 switch (ref->action) {
2639 case BTRFS_ADD_DELAYED_REF:
2640 case BTRFS_ADD_DELAYED_EXTENT:
2641 locked_ref->node.ref_mod -= ref->ref_mod;
2642 break;
2643 case BTRFS_DROP_DELAYED_REF:
2644 locked_ref->node.ref_mod += ref->ref_mod;
2645 break;
2646 default:
2647 WARN_ON(1);
2648 }
2649 }
2650 spin_unlock(&locked_ref->lock);
2651
2652 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2653 must_insert_reserved);
2654
2655 btrfs_free_delayed_extent_op(extent_op);
2656 if (ret) {
2657 spin_lock(&delayed_refs->lock);
2658 locked_ref->processing = 0;
2659 delayed_refs->num_heads_ready++;
2660 spin_unlock(&delayed_refs->lock);
2661 btrfs_delayed_ref_unlock(locked_ref);
2662 btrfs_put_delayed_ref(ref);
2663 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
2664 ret);
2665 return ret;
2666 }
2667
2668
2669
2670
2671
2672
2673
2674 if (btrfs_delayed_ref_is_head(ref)) {
2675 if (locked_ref->is_data &&
2676 locked_ref->total_ref_mod < 0) {
2677 spin_lock(&delayed_refs->lock);
2678 delayed_refs->pending_csums -= ref->num_bytes;
2679 spin_unlock(&delayed_refs->lock);
2680 }
2681 btrfs_delayed_ref_unlock(locked_ref);
2682 locked_ref = NULL;
2683 }
2684 btrfs_put_delayed_ref(ref);
2685 count++;
2686 cond_resched();
2687 }
2688
2689
2690
2691
2692
2693
2694 if (actual_count > 0) {
2695 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2696 u64 avg;
2697
2698
2699
2700
2701
2702 spin_lock(&delayed_refs->lock);
2703 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2704 fs_info->avg_delayed_ref_runtime = avg >> 2;
2705 spin_unlock(&delayed_refs->lock);
2706 }
2707 return 0;
2708}
2709
2710#ifdef SCRAMBLE_DELAYED_REFS
2711
2712
2713
2714
2715
2716static u64 find_middle(struct rb_root *root)
2717{
2718 struct rb_node *n = root->rb_node;
2719 struct btrfs_delayed_ref_node *entry;
2720 int alt = 1;
2721 u64 middle;
2722 u64 first = 0, last = 0;
2723
2724 n = rb_first(root);
2725 if (n) {
2726 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2727 first = entry->bytenr;
2728 }
2729 n = rb_last(root);
2730 if (n) {
2731 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2732 last = entry->bytenr;
2733 }
2734 n = root->rb_node;
2735
2736 while (n) {
2737 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2738 WARN_ON(!entry->in_tree);
2739
2740 middle = entry->bytenr;
2741
2742 if (alt)
2743 n = n->rb_left;
2744 else
2745 n = n->rb_right;
2746
2747 alt = 1 - alt;
2748 }
2749 return middle;
2750}
2751#endif
2752
2753static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2754{
2755 u64 num_bytes;
2756
2757 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2758 sizeof(struct btrfs_extent_inline_ref));
2759 if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2760 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2761
2762
2763
2764
2765
2766 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2767}
2768
2769
2770
2771
2772
2773u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
2774{
2775 u64 csum_size;
2776 u64 num_csums_per_leaf;
2777 u64 num_csums;
2778
2779 csum_size = BTRFS_MAX_ITEM_SIZE(root);
2780 num_csums_per_leaf = div64_u64(csum_size,
2781 (u64)btrfs_super_csum_size(root->fs_info->super_copy));
2782 num_csums = div64_u64(csum_bytes, root->sectorsize);
2783 num_csums += num_csums_per_leaf - 1;
2784 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2785 return num_csums;
2786}
2787
2788int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2789 struct btrfs_root *root)
2790{
2791 struct btrfs_block_rsv *global_rsv;
2792 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2793 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2794 u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
2795 u64 num_bytes, num_dirty_bgs_bytes;
2796 int ret = 0;
2797
2798 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
2799 num_heads = heads_to_leaves(root, num_heads);
2800 if (num_heads > 1)
2801 num_bytes += (num_heads - 1) * root->nodesize;
2802 num_bytes <<= 1;
2803 num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
2804 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
2805 num_dirty_bgs);
2806 global_rsv = &root->fs_info->global_block_rsv;
2807
2808
2809
2810
2811
2812 if (global_rsv->space_info->full) {
2813 num_dirty_bgs_bytes <<= 1;
2814 num_bytes <<= 1;
2815 }
2816
2817 spin_lock(&global_rsv->lock);
2818 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
2819 ret = 1;
2820 spin_unlock(&global_rsv->lock);
2821 return ret;
2822}
2823
2824int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2825 struct btrfs_root *root)
2826{
2827 struct btrfs_fs_info *fs_info = root->fs_info;
2828 u64 num_entries =
2829 atomic_read(&trans->transaction->delayed_refs.num_entries);
2830 u64 avg_runtime;
2831 u64 val;
2832
2833 smp_mb();
2834 avg_runtime = fs_info->avg_delayed_ref_runtime;
2835 val = num_entries * avg_runtime;
2836 if (val >= NSEC_PER_SEC)
2837 return 1;
2838 if (val >= NSEC_PER_SEC / 2)
2839 return 2;
2840
2841 return btrfs_check_space_for_delayed_refs(trans, root);
2842}
2843
2844struct async_delayed_refs {
2845 struct btrfs_root *root;
2846 u64 transid;
2847 int count;
2848 int error;
2849 int sync;
2850 struct completion wait;
2851 struct btrfs_work work;
2852};
2853
2854static void delayed_ref_async_start(struct btrfs_work *work)
2855{
2856 struct async_delayed_refs *async;
2857 struct btrfs_trans_handle *trans;
2858 int ret;
2859
2860 async = container_of(work, struct async_delayed_refs, work);
2861
2862
2863 if (btrfs_transaction_blocked(async->root->fs_info))
2864 goto done;
2865
2866 trans = btrfs_join_transaction(async->root);
2867 if (IS_ERR(trans)) {
2868 async->error = PTR_ERR(trans);
2869 goto done;
2870 }
2871
2872
2873
2874
2875
2876 trans->sync = true;
2877
2878
2879 if (trans->transid > async->transid)
2880 goto end;
2881
2882 ret = btrfs_run_delayed_refs(trans, async->root, async->count);
2883 if (ret)
2884 async->error = ret;
2885end:
2886 ret = btrfs_end_transaction(trans, async->root);
2887 if (ret && !async->error)
2888 async->error = ret;
2889done:
2890 if (async->sync)
2891 complete(&async->wait);
2892 else
2893 kfree(async);
2894}
2895
2896int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2897 unsigned long count, u64 transid, int wait)
2898{
2899 struct async_delayed_refs *async;
2900 int ret;
2901
2902 async = kmalloc(sizeof(*async), GFP_NOFS);
2903 if (!async)
2904 return -ENOMEM;
2905
2906 async->root = root->fs_info->tree_root;
2907 async->count = count;
2908 async->error = 0;
2909 async->transid = transid;
2910 if (wait)
2911 async->sync = 1;
2912 else
2913 async->sync = 0;
2914 init_completion(&async->wait);
2915
2916 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2917 delayed_ref_async_start, NULL, NULL);
2918
2919 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2920
2921 if (wait) {
2922 wait_for_completion(&async->wait);
2923 ret = async->error;
2924 kfree(async);
2925 return ret;
2926 }
2927 return 0;
2928}
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2941 struct btrfs_root *root, unsigned long count)
2942{
2943 struct rb_node *node;
2944 struct btrfs_delayed_ref_root *delayed_refs;
2945 struct btrfs_delayed_ref_head *head;
2946 int ret;
2947 int run_all = count == (unsigned long)-1;
2948 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
2949
2950
2951 if (trans->aborted)
2952 return 0;
2953
2954 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &root->fs_info->flags))
2955 return 0;
2956
2957 if (root == root->fs_info->extent_root)
2958 root = root->fs_info->tree_root;
2959
2960 delayed_refs = &trans->transaction->delayed_refs;
2961 if (count == 0)
2962 count = atomic_read(&delayed_refs->num_entries) * 2;
2963
2964again:
2965#ifdef SCRAMBLE_DELAYED_REFS
2966 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2967#endif
2968 trans->can_flush_pending_bgs = false;
2969 ret = __btrfs_run_delayed_refs(trans, root, count);
2970 if (ret < 0) {
2971 btrfs_abort_transaction(trans, root, ret);
2972 return ret;
2973 }
2974
2975 if (run_all) {
2976 if (!list_empty(&trans->new_bgs))
2977 btrfs_create_pending_block_groups(trans, root);
2978
2979 spin_lock(&delayed_refs->lock);
2980 node = rb_first(&delayed_refs->href_root);
2981 if (!node) {
2982 spin_unlock(&delayed_refs->lock);
2983 goto out;
2984 }
2985
2986 while (node) {
2987 head = rb_entry(node, struct btrfs_delayed_ref_head,
2988 href_node);
2989 if (btrfs_delayed_ref_is_head(&head->node)) {
2990 struct btrfs_delayed_ref_node *ref;
2991
2992 ref = &head->node;
2993 atomic_inc(&ref->refs);
2994
2995 spin_unlock(&delayed_refs->lock);
2996
2997
2998
2999
3000 mutex_lock(&head->mutex);
3001 mutex_unlock(&head->mutex);
3002
3003 btrfs_put_delayed_ref(ref);
3004 cond_resched();
3005 goto again;
3006 } else {
3007 WARN_ON(1);
3008 }
3009 node = rb_next(node);
3010 }
3011 spin_unlock(&delayed_refs->lock);
3012 cond_resched();
3013 goto again;
3014 }
3015out:
3016 assert_qgroups_uptodate(trans);
3017 trans->can_flush_pending_bgs = can_flush_pending_bgs;
3018 return 0;
3019}
3020
3021int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3022 struct btrfs_root *root,
3023 u64 bytenr, u64 num_bytes, u64 flags,
3024 int level, int is_data)
3025{
3026 struct btrfs_delayed_extent_op *extent_op;
3027 int ret;
3028
3029 extent_op = btrfs_alloc_delayed_extent_op();
3030 if (!extent_op)
3031 return -ENOMEM;
3032
3033 extent_op->flags_to_set = flags;
3034 extent_op->update_flags = true;
3035 extent_op->update_key = false;
3036 extent_op->is_data = is_data ? true : false;
3037 extent_op->level = level;
3038
3039 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
3040 num_bytes, extent_op);
3041 if (ret)
3042 btrfs_free_delayed_extent_op(extent_op);
3043 return ret;
3044}
3045
3046static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
3047 struct btrfs_root *root,
3048 struct btrfs_path *path,
3049 u64 objectid, u64 offset, u64 bytenr)
3050{
3051 struct btrfs_delayed_ref_head *head;
3052 struct btrfs_delayed_ref_node *ref;
3053 struct btrfs_delayed_data_ref *data_ref;
3054 struct btrfs_delayed_ref_root *delayed_refs;
3055 int ret = 0;
3056
3057 delayed_refs = &trans->transaction->delayed_refs;
3058 spin_lock(&delayed_refs->lock);
3059 head = btrfs_find_delayed_ref_head(trans, bytenr);
3060 if (!head) {
3061 spin_unlock(&delayed_refs->lock);
3062 return 0;
3063 }
3064
3065 if (!mutex_trylock(&head->mutex)) {
3066 atomic_inc(&head->node.refs);
3067 spin_unlock(&delayed_refs->lock);
3068
3069 btrfs_release_path(path);
3070
3071
3072
3073
3074
3075 mutex_lock(&head->mutex);
3076 mutex_unlock(&head->mutex);
3077 btrfs_put_delayed_ref(&head->node);
3078 return -EAGAIN;
3079 }
3080 spin_unlock(&delayed_refs->lock);
3081
3082 spin_lock(&head->lock);
3083 list_for_each_entry(ref, &head->ref_list, list) {
3084
3085 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
3086 ret = 1;
3087 break;
3088 }
3089
3090 data_ref = btrfs_delayed_node_to_data_ref(ref);
3091
3092
3093
3094
3095
3096 if (data_ref->root != root->root_key.objectid ||
3097 data_ref->objectid != objectid ||
3098 data_ref->offset != offset) {
3099 ret = 1;
3100 break;
3101 }
3102 }
3103 spin_unlock(&head->lock);
3104 mutex_unlock(&head->mutex);
3105 return ret;
3106}
3107
3108static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
3109 struct btrfs_root *root,
3110 struct btrfs_path *path,
3111 u64 objectid, u64 offset, u64 bytenr)
3112{
3113 struct btrfs_root *extent_root = root->fs_info->extent_root;
3114 struct extent_buffer *leaf;
3115 struct btrfs_extent_data_ref *ref;
3116 struct btrfs_extent_inline_ref *iref;
3117 struct btrfs_extent_item *ei;
3118 struct btrfs_key key;
3119 u32 item_size;
3120 int ret;
3121
3122 key.objectid = bytenr;
3123 key.offset = (u64)-1;
3124 key.type = BTRFS_EXTENT_ITEM_KEY;
3125
3126 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3127 if (ret < 0)
3128 goto out;
3129 BUG_ON(ret == 0);
3130
3131 ret = -ENOENT;
3132 if (path->slots[0] == 0)
3133 goto out;
3134
3135 path->slots[0]--;
3136 leaf = path->nodes[0];
3137 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3138
3139 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
3140 goto out;
3141
3142 ret = 1;
3143 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3144#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3145 if (item_size < sizeof(*ei)) {
3146 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3147 goto out;
3148 }
3149#endif
3150 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3151
3152 if (item_size != sizeof(*ei) +
3153 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3154 goto out;
3155
3156 if (btrfs_extent_generation(leaf, ei) <=
3157 btrfs_root_last_snapshot(&root->root_item))
3158 goto out;
3159
3160 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3161 if (btrfs_extent_inline_ref_type(leaf, iref) !=
3162 BTRFS_EXTENT_DATA_REF_KEY)
3163 goto out;
3164
3165 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3166 if (btrfs_extent_refs(leaf, ei) !=
3167 btrfs_extent_data_ref_count(leaf, ref) ||
3168 btrfs_extent_data_ref_root(leaf, ref) !=
3169 root->root_key.objectid ||
3170 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3171 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3172 goto out;
3173
3174 ret = 0;
3175out:
3176 return ret;
3177}
3178
3179int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
3180 struct btrfs_root *root,
3181 u64 objectid, u64 offset, u64 bytenr)
3182{
3183 struct btrfs_path *path;
3184 int ret;
3185 int ret2;
3186
3187 path = btrfs_alloc_path();
3188 if (!path)
3189 return -ENOENT;
3190
3191 do {
3192 ret = check_committed_ref(trans, root, path, objectid,
3193 offset, bytenr);
3194 if (ret && ret != -ENOENT)
3195 goto out;
3196
3197 ret2 = check_delayed_ref(trans, root, path, objectid,
3198 offset, bytenr);
3199 } while (ret2 == -EAGAIN);
3200
3201 if (ret2 && ret2 != -ENOENT) {
3202 ret = ret2;
3203 goto out;
3204 }
3205
3206 if (ret != -ENOENT || ret2 != -ENOENT)
3207 ret = 0;
3208out:
3209 btrfs_free_path(path);
3210 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3211 WARN_ON(ret > 0);
3212 return ret;
3213}
3214
3215static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3216 struct btrfs_root *root,
3217 struct extent_buffer *buf,
3218 int full_backref, int inc)
3219{
3220 u64 bytenr;
3221 u64 num_bytes;
3222 u64 parent;
3223 u64 ref_root;
3224 u32 nritems;
3225 struct btrfs_key key;
3226 struct btrfs_file_extent_item *fi;
3227 int i;
3228 int level;
3229 int ret = 0;
3230 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
3231 u64, u64, u64, u64, u64, u64);
3232
3233
3234 if (btrfs_is_testing(root->fs_info))
3235 return 0;
3236
3237 ref_root = btrfs_header_owner(buf);
3238 nritems = btrfs_header_nritems(buf);
3239 level = btrfs_header_level(buf);
3240
3241 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3242 return 0;
3243
3244 if (inc)
3245 process_func = btrfs_inc_extent_ref;
3246 else
3247 process_func = btrfs_free_extent;
3248
3249 if (full_backref)
3250 parent = buf->start;
3251 else
3252 parent = 0;
3253
3254 for (i = 0; i < nritems; i++) {
3255 if (level == 0) {
3256 btrfs_item_key_to_cpu(buf, &key, i);
3257 if (key.type != BTRFS_EXTENT_DATA_KEY)
3258 continue;
3259 fi = btrfs_item_ptr(buf, i,
3260 struct btrfs_file_extent_item);
3261 if (btrfs_file_extent_type(buf, fi) ==
3262 BTRFS_FILE_EXTENT_INLINE)
3263 continue;
3264 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3265 if (bytenr == 0)
3266 continue;
3267
3268 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3269 key.offset -= btrfs_file_extent_offset(buf, fi);
3270 ret = process_func(trans, root, bytenr, num_bytes,
3271 parent, ref_root, key.objectid,
3272 key.offset);
3273 if (ret)
3274 goto fail;
3275 } else {
3276 bytenr = btrfs_node_blockptr(buf, i);
3277 num_bytes = root->nodesize;
3278 ret = process_func(trans, root, bytenr, num_bytes,
3279 parent, ref_root, level - 1, 0);
3280 if (ret)
3281 goto fail;
3282 }
3283 }
3284 return 0;
3285fail:
3286 return ret;
3287}
3288
3289int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3290 struct extent_buffer *buf, int full_backref)
3291{
3292 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3293}
3294
3295int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3296 struct extent_buffer *buf, int full_backref)
3297{
3298 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3299}
3300
3301static int write_one_cache_group(struct btrfs_trans_handle *trans,
3302 struct btrfs_root *root,
3303 struct btrfs_path *path,
3304 struct btrfs_block_group_cache *cache)
3305{
3306 int ret;
3307 struct btrfs_root *extent_root = root->fs_info->extent_root;
3308 unsigned long bi;
3309 struct extent_buffer *leaf;
3310
3311 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3312 if (ret) {
3313 if (ret > 0)
3314 ret = -ENOENT;
3315 goto fail;
3316 }
3317
3318 leaf = path->nodes[0];
3319 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3320 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3321 btrfs_mark_buffer_dirty(leaf);
3322fail:
3323 btrfs_release_path(path);
3324 return ret;
3325
3326}
3327
3328static struct btrfs_block_group_cache *
3329next_block_group(struct btrfs_root *root,
3330 struct btrfs_block_group_cache *cache)
3331{
3332 struct rb_node *node;
3333
3334 spin_lock(&root->fs_info->block_group_cache_lock);
3335
3336
3337 if (RB_EMPTY_NODE(&cache->cache_node)) {
3338 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3339
3340 spin_unlock(&root->fs_info->block_group_cache_lock);
3341 btrfs_put_block_group(cache);
3342 cache = btrfs_lookup_first_block_group(root->fs_info,
3343 next_bytenr);
3344 return cache;
3345 }
3346 node = rb_next(&cache->cache_node);
3347 btrfs_put_block_group(cache);
3348 if (node) {
3349 cache = rb_entry(node, struct btrfs_block_group_cache,
3350 cache_node);
3351 btrfs_get_block_group(cache);
3352 } else
3353 cache = NULL;
3354 spin_unlock(&root->fs_info->block_group_cache_lock);
3355 return cache;
3356}
3357
3358static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3359 struct btrfs_trans_handle *trans,
3360 struct btrfs_path *path)
3361{
3362 struct btrfs_root *root = block_group->fs_info->tree_root;
3363 struct inode *inode = NULL;
3364 u64 alloc_hint = 0;
3365 int dcs = BTRFS_DC_ERROR;
3366 u64 num_pages = 0;
3367 int retries = 0;
3368 int ret = 0;
3369
3370
3371
3372
3373
3374 if (block_group->key.offset < (100 * SZ_1M)) {
3375 spin_lock(&block_group->lock);
3376 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3377 spin_unlock(&block_group->lock);
3378 return 0;
3379 }
3380
3381 if (trans->aborted)
3382 return 0;
3383again:
3384 inode = lookup_free_space_inode(root, block_group, path);
3385 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3386 ret = PTR_ERR(inode);
3387 btrfs_release_path(path);
3388 goto out;
3389 }
3390
3391 if (IS_ERR(inode)) {
3392 BUG_ON(retries);
3393 retries++;
3394
3395 if (block_group->ro)
3396 goto out_free;
3397
3398 ret = create_free_space_inode(root, trans, block_group, path);
3399 if (ret)
3400 goto out_free;
3401 goto again;
3402 }
3403
3404
3405 if (block_group->cache_generation == trans->transid &&
3406 i_size_read(inode)) {
3407 dcs = BTRFS_DC_SETUP;
3408 goto out_put;
3409 }
3410
3411
3412
3413
3414
3415
3416 BTRFS_I(inode)->generation = 0;
3417 ret = btrfs_update_inode(trans, root, inode);
3418 if (ret) {
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429 btrfs_abort_transaction(trans, root, ret);
3430 goto out_put;
3431 }
3432 WARN_ON(ret);
3433
3434 if (i_size_read(inode) > 0) {
3435 ret = btrfs_check_trunc_cache_free_space(root,
3436 &root->fs_info->global_block_rsv);
3437 if (ret)
3438 goto out_put;
3439
3440 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
3441 if (ret)
3442 goto out_put;
3443 }
3444
3445 spin_lock(&block_group->lock);
3446 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3447 !btrfs_test_opt(root->fs_info, SPACE_CACHE)) {
3448
3449
3450
3451
3452
3453 dcs = BTRFS_DC_WRITTEN;
3454 spin_unlock(&block_group->lock);
3455 goto out_put;
3456 }
3457 spin_unlock(&block_group->lock);
3458
3459
3460
3461
3462
3463 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
3464 ret = -ENOSPC;
3465 goto out_put;
3466 }
3467
3468
3469
3470
3471
3472
3473
3474 num_pages = div_u64(block_group->key.offset, SZ_256M);
3475 if (!num_pages)
3476 num_pages = 1;
3477
3478 num_pages *= 16;
3479 num_pages *= PAGE_CACHE_SIZE;
3480
3481 ret = btrfs_check_data_free_space(inode, 0, num_pages);
3482 if (ret)
3483 goto out_put;
3484
3485 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3486 num_pages, num_pages,
3487 &alloc_hint);
3488
3489
3490
3491
3492
3493
3494
3495
3496 if (!ret)
3497 dcs = BTRFS_DC_SETUP;
3498 else if (ret == -ENOSPC)
3499 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3500
3501out_put:
3502 iput(inode);
3503out_free:
3504 btrfs_release_path(path);
3505out:
3506 spin_lock(&block_group->lock);
3507 if (!ret && dcs == BTRFS_DC_SETUP)
3508 block_group->cache_generation = trans->transid;
3509 block_group->disk_cache_state = dcs;
3510 spin_unlock(&block_group->lock);
3511
3512 return ret;
3513}
3514
3515int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3516 struct btrfs_root *root)
3517{
3518 struct btrfs_block_group_cache *cache, *tmp;
3519 struct btrfs_transaction *cur_trans = trans->transaction;
3520 struct btrfs_path *path;
3521
3522 if (list_empty(&cur_trans->dirty_bgs) ||
3523 !btrfs_test_opt(root->fs_info, SPACE_CACHE))
3524 return 0;
3525
3526 path = btrfs_alloc_path();
3527 if (!path)
3528 return -ENOMEM;
3529
3530
3531 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3532 dirty_list) {
3533 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3534 cache_save_setup(cache, trans, path);
3535 }
3536
3537 btrfs_free_path(path);
3538 return 0;
3539}
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3554 struct btrfs_root *root)
3555{
3556 struct btrfs_block_group_cache *cache;
3557 struct btrfs_transaction *cur_trans = trans->transaction;
3558 int ret = 0;
3559 int should_put;
3560 struct btrfs_path *path = NULL;
3561 LIST_HEAD(dirty);
3562 struct list_head *io = &cur_trans->io_bgs;
3563 int num_started = 0;
3564 int loops = 0;
3565
3566 spin_lock(&cur_trans->dirty_bgs_lock);
3567 if (list_empty(&cur_trans->dirty_bgs)) {
3568 spin_unlock(&cur_trans->dirty_bgs_lock);
3569 return 0;
3570 }
3571 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3572 spin_unlock(&cur_trans->dirty_bgs_lock);
3573
3574again:
3575
3576
3577
3578
3579 btrfs_create_pending_block_groups(trans, root);
3580
3581 if (!path) {
3582 path = btrfs_alloc_path();
3583 if (!path)
3584 return -ENOMEM;
3585 }
3586
3587
3588
3589
3590
3591
3592 mutex_lock(&trans->transaction->cache_write_mutex);
3593 while (!list_empty(&dirty)) {
3594 cache = list_first_entry(&dirty,
3595 struct btrfs_block_group_cache,
3596 dirty_list);
3597
3598
3599
3600
3601
3602 if (!list_empty(&cache->io_list)) {
3603 list_del_init(&cache->io_list);
3604 btrfs_wait_cache_io(root, trans, cache,
3605 &cache->io_ctl, path,
3606 cache->key.objectid);
3607 btrfs_put_block_group(cache);
3608 }
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619 spin_lock(&cur_trans->dirty_bgs_lock);
3620 list_del_init(&cache->dirty_list);
3621 spin_unlock(&cur_trans->dirty_bgs_lock);
3622
3623 should_put = 1;
3624
3625 cache_save_setup(cache, trans, path);
3626
3627 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3628 cache->io_ctl.inode = NULL;
3629 ret = btrfs_write_out_cache(root, trans, cache, path);
3630 if (ret == 0 && cache->io_ctl.inode) {
3631 num_started++;
3632 should_put = 0;
3633
3634
3635
3636
3637
3638 list_add_tail(&cache->io_list, io);
3639 } else {
3640
3641
3642
3643
3644 ret = 0;
3645 }
3646 }
3647 if (!ret) {
3648 ret = write_one_cache_group(trans, root, path, cache);
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658 if (ret == -ENOENT) {
3659 ret = 0;
3660 spin_lock(&cur_trans->dirty_bgs_lock);
3661 if (list_empty(&cache->dirty_list)) {
3662 list_add_tail(&cache->dirty_list,
3663 &cur_trans->dirty_bgs);
3664 btrfs_get_block_group(cache);
3665 }
3666 spin_unlock(&cur_trans->dirty_bgs_lock);
3667 } else if (ret) {
3668 btrfs_abort_transaction(trans, root, ret);
3669 }
3670 }
3671
3672
3673 if (should_put)
3674 btrfs_put_block_group(cache);
3675
3676 if (ret)
3677 break;
3678
3679
3680
3681
3682
3683
3684 mutex_unlock(&trans->transaction->cache_write_mutex);
3685 mutex_lock(&trans->transaction->cache_write_mutex);
3686 }
3687 mutex_unlock(&trans->transaction->cache_write_mutex);
3688
3689
3690
3691
3692
3693 ret = btrfs_run_delayed_refs(trans, root, 0);
3694 if (!ret && loops == 0) {
3695 loops++;
3696 spin_lock(&cur_trans->dirty_bgs_lock);
3697 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3698
3699
3700
3701
3702 if (!list_empty(&dirty)) {
3703 spin_unlock(&cur_trans->dirty_bgs_lock);
3704 goto again;
3705 }
3706 spin_unlock(&cur_trans->dirty_bgs_lock);
3707 } else if (ret < 0) {
3708 btrfs_cleanup_dirty_bgs(cur_trans, root);
3709 }
3710
3711 btrfs_free_path(path);
3712 return ret;
3713}
3714
3715int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3716 struct btrfs_root *root)
3717{
3718 struct btrfs_block_group_cache *cache;
3719 struct btrfs_transaction *cur_trans = trans->transaction;
3720 int ret = 0;
3721 int should_put;
3722 struct btrfs_path *path;
3723 struct list_head *io = &cur_trans->io_bgs;
3724 int num_started = 0;
3725
3726 path = btrfs_alloc_path();
3727 if (!path)
3728 return -ENOMEM;
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745 spin_lock(&cur_trans->dirty_bgs_lock);
3746 while (!list_empty(&cur_trans->dirty_bgs)) {
3747 cache = list_first_entry(&cur_trans->dirty_bgs,
3748 struct btrfs_block_group_cache,
3749 dirty_list);
3750
3751
3752
3753
3754
3755
3756 if (!list_empty(&cache->io_list)) {
3757 spin_unlock(&cur_trans->dirty_bgs_lock);
3758 list_del_init(&cache->io_list);
3759 btrfs_wait_cache_io(root, trans, cache,
3760 &cache->io_ctl, path,
3761 cache->key.objectid);
3762 btrfs_put_block_group(cache);
3763 spin_lock(&cur_trans->dirty_bgs_lock);
3764 }
3765
3766
3767
3768
3769
3770 list_del_init(&cache->dirty_list);
3771 spin_unlock(&cur_trans->dirty_bgs_lock);
3772 should_put = 1;
3773
3774 cache_save_setup(cache, trans, path);
3775
3776 if (!ret)
3777 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
3778
3779 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3780 cache->io_ctl.inode = NULL;
3781 ret = btrfs_write_out_cache(root, trans, cache, path);
3782 if (ret == 0 && cache->io_ctl.inode) {
3783 num_started++;
3784 should_put = 0;
3785 list_add_tail(&cache->io_list, io);
3786 } else {
3787
3788
3789
3790
3791 ret = 0;
3792 }
3793 }
3794 if (!ret) {
3795 ret = write_one_cache_group(trans, root, path, cache);
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809 if (ret == -ENOENT) {
3810 wait_event(cur_trans->writer_wait,
3811 atomic_read(&cur_trans->num_writers) == 1);
3812 ret = write_one_cache_group(trans, root, path,
3813 cache);
3814 }
3815 if (ret)
3816 btrfs_abort_transaction(trans, root, ret);
3817 }
3818
3819
3820 if (should_put)
3821 btrfs_put_block_group(cache);
3822 spin_lock(&cur_trans->dirty_bgs_lock);
3823 }
3824 spin_unlock(&cur_trans->dirty_bgs_lock);
3825
3826 while (!list_empty(io)) {
3827 cache = list_first_entry(io, struct btrfs_block_group_cache,
3828 io_list);
3829 list_del_init(&cache->io_list);
3830 btrfs_wait_cache_io(root, trans, cache,
3831 &cache->io_ctl, path, cache->key.objectid);
3832 btrfs_put_block_group(cache);
3833 }
3834
3835 btrfs_free_path(path);
3836 return ret;
3837}
3838
3839int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3840{
3841 struct btrfs_block_group_cache *block_group;
3842 int readonly = 0;
3843
3844 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
3845 if (!block_group || block_group->ro)
3846 readonly = 1;
3847 if (block_group)
3848 btrfs_put_block_group(block_group);
3849 return readonly;
3850}
3851
3852bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3853{
3854 struct btrfs_block_group_cache *bg;
3855 bool ret = true;
3856
3857 bg = btrfs_lookup_block_group(fs_info, bytenr);
3858 if (!bg)
3859 return false;
3860
3861 spin_lock(&bg->lock);
3862 if (bg->ro)
3863 ret = false;
3864 else
3865 atomic_inc(&bg->nocow_writers);
3866 spin_unlock(&bg->lock);
3867
3868
3869 if (!ret)
3870 btrfs_put_block_group(bg);
3871
3872 return ret;
3873
3874}
3875
3876void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3877{
3878 struct btrfs_block_group_cache *bg;
3879
3880 bg = btrfs_lookup_block_group(fs_info, bytenr);
3881 ASSERT(bg);
3882 if (atomic_dec_and_test(&bg->nocow_writers))
3883 wake_up_atomic_t(&bg->nocow_writers);
3884
3885
3886
3887
3888 btrfs_put_block_group(bg);
3889 btrfs_put_block_group(bg);
3890}
3891
3892static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a)
3893{
3894 schedule();
3895 return 0;
3896}
3897
3898void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
3899{
3900 wait_on_atomic_t(&bg->nocow_writers,
3901 btrfs_wait_nocow_writers_atomic_t,
3902 TASK_UNINTERRUPTIBLE);
3903}
3904
3905static const char *alloc_name(u64 flags)
3906{
3907 switch (flags) {
3908 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3909 return "mixed";
3910 case BTRFS_BLOCK_GROUP_METADATA:
3911 return "metadata";
3912 case BTRFS_BLOCK_GROUP_DATA:
3913 return "data";
3914 case BTRFS_BLOCK_GROUP_SYSTEM:
3915 return "system";
3916 default:
3917 WARN_ON(1);
3918 return "invalid-combination";
3919 };
3920}
3921
3922static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3923 u64 total_bytes, u64 bytes_used,
3924 u64 bytes_readonly,
3925 struct btrfs_space_info **space_info)
3926{
3927 struct btrfs_space_info *found;
3928 int i;
3929 int factor;
3930 int ret;
3931
3932 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3933 BTRFS_BLOCK_GROUP_RAID10))
3934 factor = 2;
3935 else
3936 factor = 1;
3937
3938 found = __find_space_info(info, flags);
3939 if (found) {
3940 spin_lock(&found->lock);
3941 found->total_bytes += total_bytes;
3942 found->disk_total += total_bytes * factor;
3943 found->bytes_used += bytes_used;
3944 found->disk_used += bytes_used * factor;
3945 found->bytes_readonly += bytes_readonly;
3946 if (total_bytes > 0)
3947 found->full = 0;
3948 space_info_add_new_bytes(info, found, total_bytes -
3949 bytes_used - bytes_readonly);
3950 spin_unlock(&found->lock);
3951 *space_info = found;
3952 return 0;
3953 }
3954 found = kzalloc(sizeof(*found), GFP_NOFS);
3955 if (!found)
3956 return -ENOMEM;
3957
3958 ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
3959 if (ret) {
3960 kfree(found);
3961 return ret;
3962 }
3963
3964 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3965 INIT_LIST_HEAD(&found->block_groups[i]);
3966 init_rwsem(&found->groups_sem);
3967 spin_lock_init(&found->lock);
3968 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3969 found->total_bytes = total_bytes;
3970 found->disk_total = total_bytes * factor;
3971 found->bytes_used = bytes_used;
3972 found->disk_used = bytes_used * factor;
3973 found->bytes_pinned = 0;
3974 found->bytes_reserved = 0;
3975 found->bytes_readonly = bytes_readonly;
3976 found->bytes_may_use = 0;
3977 found->full = 0;
3978 found->max_extent_size = 0;
3979 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3980 found->chunk_alloc = 0;
3981 found->flush = 0;
3982 init_waitqueue_head(&found->wait);
3983 INIT_LIST_HEAD(&found->ro_bgs);
3984 INIT_LIST_HEAD(&found->tickets);
3985 INIT_LIST_HEAD(&found->priority_tickets);
3986
3987 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3988 info->space_info_kobj, "%s",
3989 alloc_name(found->flags));
3990 if (ret) {
3991 kfree(found);
3992 return ret;
3993 }
3994
3995 *space_info = found;
3996 list_add_rcu(&found->list, &info->space_info);
3997 if (flags & BTRFS_BLOCK_GROUP_DATA)
3998 info->data_sinfo = found;
3999
4000 return ret;
4001}
4002
4003static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
4004{
4005 u64 extra_flags = chunk_to_extended(flags) &
4006 BTRFS_EXTENDED_PROFILE_MASK;
4007
4008 write_seqlock(&fs_info->profiles_lock);
4009 if (flags & BTRFS_BLOCK_GROUP_DATA)
4010 fs_info->avail_data_alloc_bits |= extra_flags;
4011 if (flags & BTRFS_BLOCK_GROUP_METADATA)
4012 fs_info->avail_metadata_alloc_bits |= extra_flags;
4013 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4014 fs_info->avail_system_alloc_bits |= extra_flags;
4015 write_sequnlock(&fs_info->profiles_lock);
4016}
4017
4018
4019
4020
4021
4022
4023
4024static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
4025{
4026 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4027 u64 target = 0;
4028
4029 if (!bctl)
4030 return 0;
4031
4032 if (flags & BTRFS_BLOCK_GROUP_DATA &&
4033 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4034 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
4035 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
4036 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4037 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
4038 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
4039 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4040 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
4041 }
4042
4043 return target;
4044}
4045
4046
4047
4048
4049
4050
4051
4052
4053static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
4054{
4055 u64 num_devices = root->fs_info->fs_devices->rw_devices;
4056 u64 target;
4057 u64 raid_type;
4058 u64 allowed = 0;
4059
4060
4061
4062
4063
4064 spin_lock(&root->fs_info->balance_lock);
4065 target = get_restripe_target(root->fs_info, flags);
4066 if (target) {
4067
4068 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
4069 spin_unlock(&root->fs_info->balance_lock);
4070 return extended_to_chunk(target);
4071 }
4072 }
4073 spin_unlock(&root->fs_info->balance_lock);
4074
4075
4076 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
4077 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
4078 allowed |= btrfs_raid_group[raid_type];
4079 }
4080 allowed &= flags;
4081
4082 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
4083 allowed = BTRFS_BLOCK_GROUP_RAID6;
4084 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
4085 allowed = BTRFS_BLOCK_GROUP_RAID5;
4086 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
4087 allowed = BTRFS_BLOCK_GROUP_RAID10;
4088 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
4089 allowed = BTRFS_BLOCK_GROUP_RAID1;
4090 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
4091 allowed = BTRFS_BLOCK_GROUP_RAID0;
4092
4093 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
4094
4095 return extended_to_chunk(flags | allowed);
4096}
4097
4098static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
4099{
4100 unsigned seq;
4101 u64 flags;
4102
4103 do {
4104 flags = orig_flags;
4105 seq = read_seqbegin(&root->fs_info->profiles_lock);
4106
4107 if (flags & BTRFS_BLOCK_GROUP_DATA)
4108 flags |= root->fs_info->avail_data_alloc_bits;
4109 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4110 flags |= root->fs_info->avail_system_alloc_bits;
4111 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
4112 flags |= root->fs_info->avail_metadata_alloc_bits;
4113 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
4114
4115 return btrfs_reduce_alloc_profile(root, flags);
4116}
4117
4118u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
4119{
4120 u64 flags;
4121 u64 ret;
4122
4123 if (data)
4124 flags = BTRFS_BLOCK_GROUP_DATA;
4125 else if (root == root->fs_info->chunk_root)
4126 flags = BTRFS_BLOCK_GROUP_SYSTEM;
4127 else
4128 flags = BTRFS_BLOCK_GROUP_METADATA;
4129
4130 ret = get_alloc_profile(root, flags);
4131 return ret;
4132}
4133
4134int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
4135{
4136 struct btrfs_space_info *data_sinfo;
4137 struct btrfs_root *root = BTRFS_I(inode)->root;
4138 struct btrfs_fs_info *fs_info = root->fs_info;
4139 u64 used;
4140 int ret = 0;
4141 int need_commit = 2;
4142 int have_pinned_space;
4143
4144
4145 bytes = ALIGN(bytes, root->sectorsize);
4146
4147 if (btrfs_is_free_space_inode(inode)) {
4148 need_commit = 0;
4149 ASSERT(current->journal_info);
4150 }
4151
4152 data_sinfo = fs_info->data_sinfo;
4153 if (!data_sinfo)
4154 goto alloc;
4155
4156again:
4157
4158 spin_lock(&data_sinfo->lock);
4159 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
4160 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
4161 data_sinfo->bytes_may_use;
4162
4163 if (used + bytes > data_sinfo->total_bytes) {
4164 struct btrfs_trans_handle *trans;
4165
4166
4167
4168
4169
4170 if (!data_sinfo->full) {
4171 u64 alloc_target;
4172
4173 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
4174 spin_unlock(&data_sinfo->lock);
4175alloc:
4176 alloc_target = btrfs_get_alloc_profile(root, 1);
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187 trans = btrfs_join_transaction(root);
4188 if (IS_ERR(trans))
4189 return PTR_ERR(trans);
4190
4191 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4192 alloc_target,
4193 CHUNK_ALLOC_NO_FORCE);
4194 btrfs_end_transaction(trans, root);
4195 if (ret < 0) {
4196 if (ret != -ENOSPC)
4197 return ret;
4198 else {
4199 have_pinned_space = 1;
4200 goto commit_trans;
4201 }
4202 }
4203
4204 if (!data_sinfo)
4205 data_sinfo = fs_info->data_sinfo;
4206
4207 goto again;
4208 }
4209
4210
4211
4212
4213
4214
4215 have_pinned_space = percpu_counter_compare(
4216 &data_sinfo->total_bytes_pinned,
4217 used + bytes - data_sinfo->total_bytes);
4218 spin_unlock(&data_sinfo->lock);
4219
4220
4221commit_trans:
4222 if (need_commit &&
4223 !atomic_read(&root->fs_info->open_ioctl_trans)) {
4224 need_commit--;
4225
4226 if (need_commit > 0) {
4227 btrfs_start_delalloc_roots(fs_info, 0, -1);
4228 btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
4229 }
4230
4231 trans = btrfs_join_transaction(root);
4232 if (IS_ERR(trans))
4233 return PTR_ERR(trans);
4234 if (have_pinned_space >= 0 ||
4235 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4236 &trans->transaction->flags) ||
4237 need_commit > 0) {
4238 ret = btrfs_commit_transaction(trans, root);
4239 if (ret)
4240 return ret;
4241
4242
4243
4244
4245
4246 mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
4247 mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
4248 goto again;
4249 } else {
4250 btrfs_end_transaction(trans, root);
4251 }
4252 }
4253
4254 trace_btrfs_space_reservation(root->fs_info,
4255 "space_info:enospc",
4256 data_sinfo->flags, bytes, 1);
4257 return -ENOSPC;
4258 }
4259 data_sinfo->bytes_may_use += bytes;
4260 trace_btrfs_space_reservation(root->fs_info, "space_info",
4261 data_sinfo->flags, bytes, 1);
4262 spin_unlock(&data_sinfo->lock);
4263
4264 return ret;
4265}
4266
4267
4268
4269
4270
4271
4272int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
4273{
4274 struct btrfs_root *root = BTRFS_I(inode)->root;
4275 int ret;
4276
4277
4278 len = round_up(start + len, root->sectorsize) -
4279 round_down(start, root->sectorsize);
4280 start = round_down(start, root->sectorsize);
4281
4282 ret = btrfs_alloc_data_chunk_ondemand(inode, len);
4283 if (ret < 0)
4284 return ret;
4285
4286
4287 ret = btrfs_qgroup_reserve_data(inode, start, len);
4288 if (ret)
4289 btrfs_free_reserved_data_space_noquota(inode, start, len);
4290 return ret;
4291}
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
4302 u64 len)
4303{
4304 struct btrfs_root *root = BTRFS_I(inode)->root;
4305 struct btrfs_space_info *data_sinfo;
4306
4307
4308 len = round_up(start + len, root->sectorsize) -
4309 round_down(start, root->sectorsize);
4310 start = round_down(start, root->sectorsize);
4311
4312 data_sinfo = root->fs_info->data_sinfo;
4313 spin_lock(&data_sinfo->lock);
4314 if (WARN_ON(data_sinfo->bytes_may_use < len))
4315 data_sinfo->bytes_may_use = 0;
4316 else
4317 data_sinfo->bytes_may_use -= len;
4318 trace_btrfs_space_reservation(root->fs_info, "space_info",
4319 data_sinfo->flags, len, 0);
4320 spin_unlock(&data_sinfo->lock);
4321}
4322
4323
4324
4325
4326
4327
4328
4329
4330void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
4331{
4332 struct btrfs_root *root = BTRFS_I(inode)->root;
4333
4334
4335 len = round_up(start + len, root->sectorsize) -
4336 round_down(start, root->sectorsize);
4337 start = round_down(start, root->sectorsize);
4338
4339 btrfs_free_reserved_data_space_noquota(inode, start, len);
4340 btrfs_qgroup_free_data(inode, start, len);
4341}
4342
4343static void force_metadata_allocation(struct btrfs_fs_info *info)
4344{
4345 struct list_head *head = &info->space_info;
4346 struct btrfs_space_info *found;
4347
4348 rcu_read_lock();
4349 list_for_each_entry_rcu(found, head, list) {
4350 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
4351 found->force_alloc = CHUNK_ALLOC_FORCE;
4352 }
4353 rcu_read_unlock();
4354}
4355
4356static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4357{
4358 return (global->size << 1);
4359}
4360
4361static int should_alloc_chunk(struct btrfs_root *root,
4362 struct btrfs_space_info *sinfo, int force)
4363{
4364 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4365 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
4366 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
4367 u64 thresh;
4368
4369 if (force == CHUNK_ALLOC_FORCE)
4370 return 1;
4371
4372
4373
4374
4375
4376
4377 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
4378 num_allocated += calc_global_rsv_need_space(global_rsv);
4379
4380
4381
4382
4383
4384 if (force == CHUNK_ALLOC_LIMITED) {
4385 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
4386 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
4387
4388 if (num_bytes - num_allocated < thresh)
4389 return 1;
4390 }
4391
4392 if (num_allocated + SZ_2M < div_factor(num_bytes, 8))
4393 return 0;
4394 return 1;
4395}
4396
4397static u64 get_profile_num_devs(struct btrfs_root *root, u64 type)
4398{
4399 u64 num_dev;
4400
4401 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4402 BTRFS_BLOCK_GROUP_RAID0 |
4403 BTRFS_BLOCK_GROUP_RAID5 |
4404 BTRFS_BLOCK_GROUP_RAID6))
4405 num_dev = root->fs_info->fs_devices->rw_devices;
4406 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4407 num_dev = 2;
4408 else
4409 num_dev = 1;
4410
4411 return num_dev;
4412}
4413
4414
4415
4416
4417
4418
4419void check_system_chunk(struct btrfs_trans_handle *trans,
4420 struct btrfs_root *root,
4421 u64 type)
4422{
4423 struct btrfs_space_info *info;
4424 u64 left;
4425 u64 thresh;
4426 int ret = 0;
4427 u64 num_devs;
4428
4429
4430
4431
4432
4433 ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
4434
4435 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4436 spin_lock(&info->lock);
4437 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
4438 info->bytes_reserved - info->bytes_readonly -
4439 info->bytes_may_use;
4440 spin_unlock(&info->lock);
4441
4442 num_devs = get_profile_num_devs(root, type);
4443
4444
4445 thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
4446 btrfs_calc_trans_metadata_size(root, 1);
4447
4448 if (left < thresh && btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
4449 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
4450 left, thresh, type);
4451 dump_space_info(info, 0, 0);
4452 }
4453
4454 if (left < thresh) {
4455 u64 flags;
4456
4457 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
4458
4459
4460
4461
4462
4463
4464 ret = btrfs_alloc_chunk(trans, root, flags);
4465 }
4466
4467 if (!ret) {
4468 ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
4469 &root->fs_info->chunk_block_rsv,
4470 thresh, BTRFS_RESERVE_NO_FLUSH);
4471 if (!ret)
4472 trans->chunk_bytes_reserved += thresh;
4473 }
4474}
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485static int do_chunk_alloc(struct btrfs_trans_handle *trans,
4486 struct btrfs_root *extent_root, u64 flags, int force)
4487{
4488 struct btrfs_space_info *space_info;
4489 struct btrfs_fs_info *fs_info = extent_root->fs_info;
4490 int wait_for_alloc = 0;
4491 int ret = 0;
4492
4493
4494 if (trans->allocating_chunk)
4495 return -ENOSPC;
4496
4497 space_info = __find_space_info(extent_root->fs_info, flags);
4498 if (!space_info) {
4499 ret = update_space_info(extent_root->fs_info, flags,
4500 0, 0, 0, &space_info);
4501 BUG_ON(ret);
4502 }
4503 BUG_ON(!space_info);
4504
4505again:
4506 spin_lock(&space_info->lock);
4507 if (force < space_info->force_alloc)
4508 force = space_info->force_alloc;
4509 if (space_info->full) {
4510 if (should_alloc_chunk(extent_root, space_info, force))
4511 ret = -ENOSPC;
4512 else
4513 ret = 0;
4514 spin_unlock(&space_info->lock);
4515 return ret;
4516 }
4517
4518 if (!should_alloc_chunk(extent_root, space_info, force)) {
4519 spin_unlock(&space_info->lock);
4520 return 0;
4521 } else if (space_info->chunk_alloc) {
4522 wait_for_alloc = 1;
4523 } else {
4524 space_info->chunk_alloc = 1;
4525 }
4526
4527 spin_unlock(&space_info->lock);
4528
4529 mutex_lock(&fs_info->chunk_mutex);
4530
4531
4532
4533
4534
4535
4536
4537 if (wait_for_alloc) {
4538 mutex_unlock(&fs_info->chunk_mutex);
4539 wait_for_alloc = 0;
4540 goto again;
4541 }
4542
4543 trans->allocating_chunk = true;
4544
4545
4546
4547
4548
4549 if (btrfs_mixed_space_info(space_info))
4550 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4551
4552
4553
4554
4555
4556
4557 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
4558 fs_info->data_chunk_allocations++;
4559 if (!(fs_info->data_chunk_allocations %
4560 fs_info->metadata_ratio))
4561 force_metadata_allocation(fs_info);
4562 }
4563
4564
4565
4566
4567
4568 check_system_chunk(trans, extent_root, flags);
4569
4570 ret = btrfs_alloc_chunk(trans, extent_root, flags);
4571 trans->allocating_chunk = false;
4572
4573 spin_lock(&space_info->lock);
4574 if (ret < 0 && ret != -ENOSPC)
4575 goto out;
4576 if (ret)
4577 space_info->full = 1;
4578 else
4579 ret = 1;
4580
4581 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4582out:
4583 space_info->chunk_alloc = 0;
4584 spin_unlock(&space_info->lock);
4585 mutex_unlock(&fs_info->chunk_mutex);
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600 if (trans->can_flush_pending_bgs &&
4601 trans->chunk_bytes_reserved >= (u64)SZ_2M) {
4602 btrfs_create_pending_block_groups(trans, trans->root);
4603 btrfs_trans_release_chunk_metadata(trans);
4604 }
4605 return ret;
4606}
4607
4608static int can_overcommit(struct btrfs_root *root,
4609 struct btrfs_space_info *space_info, u64 bytes,
4610 enum btrfs_reserve_flush_enum flush)
4611{
4612 struct btrfs_block_rsv *global_rsv;
4613 u64 profile;
4614 u64 space_size;
4615 u64 avail;
4616 u64 used;
4617
4618
4619 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
4620 return 0;
4621
4622 BUG_ON(root->fs_info == NULL);
4623 global_rsv = &root->fs_info->global_block_rsv;
4624 profile = btrfs_get_alloc_profile(root, 0);
4625 used = space_info->bytes_used + space_info->bytes_reserved +
4626 space_info->bytes_pinned + space_info->bytes_readonly;
4627
4628
4629
4630
4631
4632
4633
4634 spin_lock(&global_rsv->lock);
4635 space_size = calc_global_rsv_need_space(global_rsv);
4636 spin_unlock(&global_rsv->lock);
4637 if (used + space_size >= space_info->total_bytes)
4638 return 0;
4639
4640 used += space_info->bytes_may_use;
4641
4642 spin_lock(&root->fs_info->free_chunk_lock);
4643 avail = root->fs_info->free_chunk_space;
4644 spin_unlock(&root->fs_info->free_chunk_lock);
4645
4646
4647
4648
4649
4650
4651
4652 if (profile & (BTRFS_BLOCK_GROUP_DUP |
4653 BTRFS_BLOCK_GROUP_RAID1 |
4654 BTRFS_BLOCK_GROUP_RAID10))
4655 avail >>= 1;
4656
4657
4658
4659
4660
4661
4662 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4663 avail >>= 3;
4664 else
4665 avail >>= 1;
4666
4667 if (used + bytes < space_info->total_bytes + avail)
4668 return 1;
4669 return 0;
4670}
4671
4672static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
4673 unsigned long nr_pages, int nr_items)
4674{
4675 struct super_block *sb = root->fs_info->sb;
4676
4677 if (down_read_trylock(&sb->s_umount)) {
4678 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4679 up_read(&sb->s_umount);
4680 } else {
4681
4682
4683
4684
4685
4686
4687
4688 btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
4689 if (!current->journal_info)
4690 btrfs_wait_ordered_roots(root->fs_info, nr_items,
4691 0, (u64)-1);
4692 }
4693}
4694
4695static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
4696{
4697 u64 bytes;
4698 int nr;
4699
4700 bytes = btrfs_calc_trans_metadata_size(root, 1);
4701 nr = (int)div64_u64(to_reclaim, bytes);
4702 if (!nr)
4703 nr = 1;
4704 return nr;
4705}
4706
4707#define EXTENT_SIZE_PER_ITEM SZ_256K
4708
4709
4710
4711
4712static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4713 bool wait_ordered)
4714{
4715 struct btrfs_block_rsv *block_rsv;
4716 struct btrfs_space_info *space_info;
4717 struct btrfs_trans_handle *trans;
4718 u64 delalloc_bytes;
4719 u64 max_reclaim;
4720 long time_left;
4721 unsigned long nr_pages;
4722 int loops;
4723 int items;
4724 enum btrfs_reserve_flush_enum flush;
4725
4726
4727 items = calc_reclaim_items_nr(root, to_reclaim);
4728 to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
4729
4730 trans = (struct btrfs_trans_handle *)current->journal_info;
4731 block_rsv = &root->fs_info->delalloc_block_rsv;
4732 space_info = block_rsv->space_info;
4733
4734 delalloc_bytes = percpu_counter_sum_positive(
4735 &root->fs_info->delalloc_bytes);
4736 if (delalloc_bytes == 0) {
4737 if (trans)
4738 return;
4739 if (wait_ordered)
4740 btrfs_wait_ordered_roots(root->fs_info, items,
4741 0, (u64)-1);
4742 return;
4743 }
4744
4745 loops = 0;
4746 while (delalloc_bytes && loops < 3) {
4747 max_reclaim = min(delalloc_bytes, to_reclaim);
4748 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
4749 btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
4750
4751
4752
4753
4754 max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
4755 if (!max_reclaim)
4756 goto skip_async;
4757
4758 if (max_reclaim <= nr_pages)
4759 max_reclaim = 0;
4760 else
4761 max_reclaim -= nr_pages;
4762
4763 wait_event(root->fs_info->async_submit_wait,
4764 atomic_read(&root->fs_info->async_delalloc_pages) <=
4765 (int)max_reclaim);
4766skip_async:
4767 if (!trans)
4768 flush = BTRFS_RESERVE_FLUSH_ALL;
4769 else
4770 flush = BTRFS_RESERVE_NO_FLUSH;
4771 spin_lock(&space_info->lock);
4772 if (can_overcommit(root, space_info, orig, flush)) {
4773 spin_unlock(&space_info->lock);
4774 break;
4775 }
4776 if (list_empty(&space_info->tickets) &&
4777 list_empty(&space_info->priority_tickets)) {
4778 spin_unlock(&space_info->lock);
4779 break;
4780 }
4781 spin_unlock(&space_info->lock);
4782
4783 loops++;
4784 if (wait_ordered && !trans) {
4785 btrfs_wait_ordered_roots(root->fs_info, items,
4786 0, (u64)-1);
4787 } else {
4788 time_left = schedule_timeout_killable(1);
4789 if (time_left)
4790 break;
4791 }
4792 delalloc_bytes = percpu_counter_sum_positive(
4793 &root->fs_info->delalloc_bytes);
4794 }
4795}
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807static int may_commit_transaction(struct btrfs_root *root,
4808 struct btrfs_space_info *space_info,
4809 u64 bytes, int force)
4810{
4811 struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
4812 struct btrfs_trans_handle *trans;
4813
4814 trans = (struct btrfs_trans_handle *)current->journal_info;
4815 if (trans)
4816 return -EAGAIN;
4817
4818 if (force)
4819 goto commit;
4820
4821
4822 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4823 bytes) >= 0)
4824 goto commit;
4825
4826
4827
4828
4829
4830 if (space_info != delayed_rsv->space_info)
4831 return -ENOSPC;
4832
4833 spin_lock(&delayed_rsv->lock);
4834 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4835 bytes - delayed_rsv->size) >= 0) {
4836 spin_unlock(&delayed_rsv->lock);
4837 return -ENOSPC;
4838 }
4839 spin_unlock(&delayed_rsv->lock);
4840
4841commit:
4842 trans = btrfs_join_transaction(root);
4843 if (IS_ERR(trans))
4844 return -ENOSPC;
4845
4846 return btrfs_commit_transaction(trans, root);
4847}
4848
4849struct reserve_ticket {
4850 u64 bytes;
4851 int error;
4852 struct list_head list;
4853 wait_queue_head_t wait;
4854};
4855
4856static int flush_space(struct btrfs_root *root,
4857 struct btrfs_space_info *space_info, u64 num_bytes,
4858 u64 orig_bytes, int state)
4859{
4860 struct btrfs_trans_handle *trans;
4861 int nr;
4862 int ret = 0;
4863
4864 switch (state) {
4865 case FLUSH_DELAYED_ITEMS_NR:
4866 case FLUSH_DELAYED_ITEMS:
4867 if (state == FLUSH_DELAYED_ITEMS_NR)
4868 nr = calc_reclaim_items_nr(root, num_bytes) * 2;
4869 else
4870 nr = -1;
4871
4872 trans = btrfs_join_transaction(root);
4873 if (IS_ERR(trans)) {
4874 ret = PTR_ERR(trans);
4875 break;
4876 }
4877 ret = btrfs_run_delayed_items_nr(trans, root, nr);
4878 btrfs_end_transaction(trans, root);
4879 break;
4880 case FLUSH_DELALLOC:
4881 case FLUSH_DELALLOC_WAIT:
4882 shrink_delalloc(root, num_bytes * 2, orig_bytes,
4883 state == FLUSH_DELALLOC_WAIT);
4884 break;
4885 case ALLOC_CHUNK:
4886 trans = btrfs_join_transaction(root);
4887 if (IS_ERR(trans)) {
4888 ret = PTR_ERR(trans);
4889 break;
4890 }
4891 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4892 btrfs_get_alloc_profile(root, 0),
4893 CHUNK_ALLOC_NO_FORCE);
4894 btrfs_end_transaction(trans, root);
4895 if (ret > 0 || ret == -ENOSPC)
4896 ret = 0;
4897 break;
4898 case COMMIT_TRANS:
4899 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
4900 break;
4901 default:
4902 ret = -ENOSPC;
4903 break;
4904 }
4905
4906 trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
4907 orig_bytes, state, ret);
4908 return ret;
4909}
4910
4911static inline u64
4912btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4913 struct btrfs_space_info *space_info)
4914{
4915 struct reserve_ticket *ticket;
4916 u64 used;
4917 u64 expected;
4918 u64 to_reclaim = 0;
4919
4920 list_for_each_entry(ticket, &space_info->tickets, list)
4921 to_reclaim += ticket->bytes;
4922 list_for_each_entry(ticket, &space_info->priority_tickets, list)
4923 to_reclaim += ticket->bytes;
4924 if (to_reclaim)
4925 return to_reclaim;
4926
4927 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
4928 if (can_overcommit(root, space_info, to_reclaim,
4929 BTRFS_RESERVE_FLUSH_ALL))
4930 return 0;
4931
4932 used = space_info->bytes_used + space_info->bytes_reserved +
4933 space_info->bytes_pinned + space_info->bytes_readonly +
4934 space_info->bytes_may_use;
4935 if (can_overcommit(root, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
4936 expected = div_factor_fine(space_info->total_bytes, 95);
4937 else
4938 expected = div_factor_fine(space_info->total_bytes, 90);
4939
4940 if (used > expected)
4941 to_reclaim = used - expected;
4942 else
4943 to_reclaim = 0;
4944 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4945 space_info->bytes_reserved);
4946 return to_reclaim;
4947}
4948
4949static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4950 struct btrfs_root *root, u64 used)
4951{
4952 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4953
4954
4955 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
4956 return 0;
4957
4958 if (!btrfs_calc_reclaim_metadata_size(root, space_info))
4959 return 0;
4960
4961 return (used >= thresh && !btrfs_fs_closing(root->fs_info) &&
4962 !test_bit(BTRFS_FS_STATE_REMOUNTING,
4963 &root->fs_info->fs_state));
4964}
4965
4966static void wake_all_tickets(struct list_head *head)
4967{
4968 struct reserve_ticket *ticket;
4969
4970 while (!list_empty(head)) {
4971 ticket = list_first_entry(head, struct reserve_ticket, list);
4972 list_del_init(&ticket->list);
4973 ticket->error = -ENOSPC;
4974 wake_up(&ticket->wait);
4975 }
4976}
4977
4978
4979
4980
4981
4982
4983static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4984{
4985 struct btrfs_fs_info *fs_info;
4986 struct btrfs_space_info *space_info;
4987 u64 to_reclaim;
4988 int flush_state;
4989 int commit_cycles = 0;
4990 u64 last_tickets_id;
4991
4992 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4993 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4994
4995 spin_lock(&space_info->lock);
4996 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4997 space_info);
4998 if (!to_reclaim) {
4999 space_info->flush = 0;
5000 spin_unlock(&space_info->lock);
5001 return;
5002 }
5003 last_tickets_id = space_info->tickets_id;
5004 spin_unlock(&space_info->lock);
5005
5006 flush_state = FLUSH_DELAYED_ITEMS_NR;
5007 do {
5008 struct reserve_ticket *ticket;
5009 int ret;
5010
5011 ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
5012 to_reclaim, flush_state);
5013 spin_lock(&space_info->lock);
5014 if (list_empty(&space_info->tickets)) {
5015 space_info->flush = 0;
5016 spin_unlock(&space_info->lock);
5017 return;
5018 }
5019 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
5020 space_info);
5021 ticket = list_first_entry(&space_info->tickets,
5022 struct reserve_ticket, list);
5023 if (last_tickets_id == space_info->tickets_id) {
5024 flush_state++;
5025 } else {
5026 last_tickets_id = space_info->tickets_id;
5027 flush_state = FLUSH_DELAYED_ITEMS_NR;
5028 if (commit_cycles)
5029 commit_cycles--;
5030 }
5031
5032 if (flush_state > COMMIT_TRANS) {
5033 commit_cycles++;
5034 if (commit_cycles > 2) {
5035 wake_all_tickets(&space_info->tickets);
5036 space_info->flush = 0;
5037 } else {
5038 flush_state = FLUSH_DELAYED_ITEMS_NR;
5039 }
5040 }
5041 spin_unlock(&space_info->lock);
5042 } while (flush_state <= COMMIT_TRANS);
5043}
5044
5045void btrfs_init_async_reclaim_work(struct work_struct *work)
5046{
5047 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
5048}
5049
5050static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
5051 struct btrfs_space_info *space_info,
5052 struct reserve_ticket *ticket)
5053{
5054 u64 to_reclaim;
5055 int flush_state = FLUSH_DELAYED_ITEMS_NR;
5056
5057 spin_lock(&space_info->lock);
5058 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
5059 space_info);
5060 if (!to_reclaim) {
5061 spin_unlock(&space_info->lock);
5062 return;
5063 }
5064 spin_unlock(&space_info->lock);
5065
5066 do {
5067 flush_space(fs_info->fs_root, space_info, to_reclaim,
5068 to_reclaim, flush_state);
5069 flush_state++;
5070 spin_lock(&space_info->lock);
5071 if (ticket->bytes == 0) {
5072 spin_unlock(&space_info->lock);
5073 return;
5074 }
5075 spin_unlock(&space_info->lock);
5076
5077
5078
5079
5080
5081 if (flush_state == FLUSH_DELALLOC ||
5082 flush_state == FLUSH_DELALLOC_WAIT)
5083 flush_state = ALLOC_CHUNK;
5084 } while (flush_state < COMMIT_TRANS);
5085}
5086
5087static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
5088 struct btrfs_space_info *space_info,
5089 struct reserve_ticket *ticket, u64 orig_bytes)
5090
5091{
5092 DEFINE_WAIT(wait);
5093 int ret = 0;
5094
5095 spin_lock(&space_info->lock);
5096 while (ticket->bytes > 0 && ticket->error == 0) {
5097 prepare_to_wait(&ticket->wait, &wait, TASK_KILLABLE);
5098 spin_unlock(&space_info->lock);
5099
5100 if (fatal_signal_pending(current)) {
5101 ret = -EINTR;
5102 break;
5103 }
5104
5105 schedule();
5106
5107 finish_wait(&ticket->wait, &wait);
5108 spin_lock(&space_info->lock);
5109 }
5110 if (!ret)
5111 ret = ticket->error;
5112 if (!list_empty(&ticket->list))
5113 list_del_init(&ticket->list);
5114 if (ticket->bytes && ticket->bytes < orig_bytes) {
5115 u64 num_bytes = orig_bytes - ticket->bytes;
5116 space_info->bytes_may_use -= num_bytes;
5117 trace_btrfs_space_reservation(fs_info, "space_info",
5118 space_info->flags, num_bytes, 0);
5119 }
5120 spin_unlock(&space_info->lock);
5121
5122 return ret;
5123}
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139static int __reserve_metadata_bytes(struct btrfs_root *root,
5140 struct btrfs_space_info *space_info,
5141 u64 orig_bytes,
5142 enum btrfs_reserve_flush_enum flush)
5143{
5144 struct reserve_ticket ticket;
5145 u64 used;
5146 int ret = 0;
5147
5148 ASSERT(orig_bytes);
5149 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
5150
5151 spin_lock(&space_info->lock);
5152 ret = -ENOSPC;
5153 used = space_info->bytes_used + space_info->bytes_reserved +
5154 space_info->bytes_pinned + space_info->bytes_readonly +
5155 space_info->bytes_may_use;
5156
5157
5158
5159
5160
5161
5162 if (used + orig_bytes <= space_info->total_bytes) {
5163 space_info->bytes_may_use += orig_bytes;
5164 trace_btrfs_space_reservation(root->fs_info, "space_info",
5165 space_info->flags, orig_bytes,
5166 1);
5167 ret = 0;
5168 } else if (can_overcommit(root, space_info, orig_bytes, flush)) {
5169 space_info->bytes_may_use += orig_bytes;
5170 trace_btrfs_space_reservation(root->fs_info, "space_info",
5171 space_info->flags, orig_bytes,
5172 1);
5173 ret = 0;
5174 }
5175
5176
5177
5178
5179
5180
5181
5182
5183 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5184 ticket.bytes = orig_bytes;
5185 ticket.error = 0;
5186 init_waitqueue_head(&ticket.wait);
5187 if (flush == BTRFS_RESERVE_FLUSH_ALL) {
5188 list_add_tail(&ticket.list, &space_info->tickets);
5189 if (!space_info->flush) {
5190 space_info->flush = 1;
5191 trace_btrfs_trigger_flush(root->fs_info,
5192 space_info->flags,
5193 orig_bytes, flush,
5194 "enospc");
5195 queue_work(system_unbound_wq,
5196 &root->fs_info->async_reclaim_work);
5197 }
5198 } else {
5199 list_add_tail(&ticket.list,
5200 &space_info->priority_tickets);
5201 }
5202 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5203 used += orig_bytes;
5204
5205
5206
5207
5208
5209 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags) &&
5210 need_do_async_reclaim(space_info, root, used) &&
5211 !work_busy(&root->fs_info->async_reclaim_work)) {
5212 trace_btrfs_trigger_flush(root->fs_info,
5213 space_info->flags,
5214 orig_bytes, flush,
5215 "preempt");
5216 queue_work(system_unbound_wq,
5217 &root->fs_info->async_reclaim_work);
5218 }
5219 }
5220 spin_unlock(&space_info->lock);
5221 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5222 return ret;
5223
5224 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5225 return wait_reserve_ticket(root->fs_info, space_info, &ticket,
5226 orig_bytes);
5227
5228 ret = 0;
5229 priority_reclaim_metadata_space(root->fs_info, space_info, &ticket);
5230 spin_lock(&space_info->lock);
5231 if (ticket.bytes) {
5232 if (ticket.bytes < orig_bytes) {
5233 u64 num_bytes = orig_bytes - ticket.bytes;
5234 space_info->bytes_may_use -= num_bytes;
5235 trace_btrfs_space_reservation(root->fs_info,
5236 "space_info", space_info->flags,
5237 num_bytes, 0);
5238
5239 }
5240 list_del_init(&ticket.list);
5241 ret = -ENOSPC;
5242 }
5243 spin_unlock(&space_info->lock);
5244 ASSERT(list_empty(&ticket.list));
5245 return ret;
5246}
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262static int reserve_metadata_bytes(struct btrfs_root *root,
5263 struct btrfs_block_rsv *block_rsv,
5264 u64 orig_bytes,
5265 enum btrfs_reserve_flush_enum flush)
5266{
5267 int ret;
5268
5269 ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
5270 flush);
5271 if (ret == -ENOSPC &&
5272 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5273 struct btrfs_block_rsv *global_rsv =
5274 &root->fs_info->global_block_rsv;
5275
5276 if (block_rsv != global_rsv &&
5277 !block_rsv_use_bytes(global_rsv, orig_bytes))
5278 ret = 0;
5279 }
5280 if (ret == -ENOSPC)
5281 trace_btrfs_space_reservation(root->fs_info,
5282 "space_info:enospc",
5283 block_rsv->space_info->flags,
5284 orig_bytes, 1);
5285 return ret;
5286}
5287
5288static struct btrfs_block_rsv *get_block_rsv(
5289 const struct btrfs_trans_handle *trans,
5290 const struct btrfs_root *root)
5291{
5292 struct btrfs_block_rsv *block_rsv = NULL;
5293
5294 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
5295 (root == root->fs_info->csum_root && trans->adding_csums) ||
5296 (root == root->fs_info->uuid_root))
5297 block_rsv = trans->block_rsv;
5298
5299 if (!block_rsv)
5300 block_rsv = root->block_rsv;
5301
5302 if (!block_rsv)
5303 block_rsv = &root->fs_info->empty_block_rsv;
5304
5305 return block_rsv;
5306}
5307
5308static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
5309 u64 num_bytes)
5310{
5311 int ret = -ENOSPC;
5312 spin_lock(&block_rsv->lock);
5313 if (block_rsv->reserved >= num_bytes) {
5314 block_rsv->reserved -= num_bytes;
5315 if (block_rsv->reserved < block_rsv->size)
5316 block_rsv->full = 0;
5317 ret = 0;
5318 }
5319 spin_unlock(&block_rsv->lock);
5320 return ret;
5321}
5322
5323static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
5324 u64 num_bytes, int update_size)
5325{
5326 spin_lock(&block_rsv->lock);
5327 block_rsv->reserved += num_bytes;
5328 if (update_size)
5329 block_rsv->size += num_bytes;
5330 else if (block_rsv->reserved >= block_rsv->size)
5331 block_rsv->full = 1;
5332 spin_unlock(&block_rsv->lock);
5333}
5334
5335int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5336 struct btrfs_block_rsv *dest, u64 num_bytes,
5337 int min_factor)
5338{
5339 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5340 u64 min_bytes;
5341
5342 if (global_rsv->space_info != dest->space_info)
5343 return -ENOSPC;
5344
5345 spin_lock(&global_rsv->lock);
5346 min_bytes = div_factor(global_rsv->size, min_factor);
5347 if (global_rsv->reserved < min_bytes + num_bytes) {
5348 spin_unlock(&global_rsv->lock);
5349 return -ENOSPC;
5350 }
5351 global_rsv->reserved -= num_bytes;
5352 if (global_rsv->reserved < global_rsv->size)
5353 global_rsv->full = 0;
5354 spin_unlock(&global_rsv->lock);
5355
5356 block_rsv_add_bytes(dest, num_bytes, 1);
5357 return 0;
5358}
5359
5360
5361
5362
5363
5364static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
5365 struct btrfs_space_info *space_info,
5366 u64 num_bytes)
5367{
5368 struct reserve_ticket *ticket;
5369 struct list_head *head;
5370 u64 used;
5371 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
5372 bool check_overcommit = false;
5373
5374 spin_lock(&space_info->lock);
5375 head = &space_info->priority_tickets;
5376
5377
5378
5379
5380
5381
5382 used = space_info->bytes_used + space_info->bytes_reserved +
5383 space_info->bytes_pinned + space_info->bytes_readonly +
5384 space_info->bytes_may_use;
5385 if (used - num_bytes >= space_info->total_bytes)
5386 check_overcommit = true;
5387again:
5388 while (!list_empty(head) && num_bytes) {
5389 ticket = list_first_entry(head, struct reserve_ticket,
5390 list);
5391
5392
5393
5394
5395 if (check_overcommit &&
5396 !can_overcommit(fs_info->extent_root, space_info, 0,
5397 flush))
5398 break;
5399 if (num_bytes >= ticket->bytes) {
5400 list_del_init(&ticket->list);
5401 num_bytes -= ticket->bytes;
5402 ticket->bytes = 0;
5403 space_info->tickets_id++;
5404 wake_up(&ticket->wait);
5405 } else {
5406 ticket->bytes -= num_bytes;
5407 num_bytes = 0;
5408 }
5409 }
5410
5411 if (num_bytes && head == &space_info->priority_tickets) {
5412 head = &space_info->tickets;
5413 flush = BTRFS_RESERVE_FLUSH_ALL;
5414 goto again;
5415 }
5416 space_info->bytes_may_use -= num_bytes;
5417 trace_btrfs_space_reservation(fs_info, "space_info",
5418 space_info->flags, num_bytes, 0);
5419 spin_unlock(&space_info->lock);
5420}
5421
5422
5423
5424
5425
5426
5427static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
5428 struct btrfs_space_info *space_info,
5429 u64 num_bytes)
5430{
5431 struct reserve_ticket *ticket;
5432 struct list_head *head = &space_info->priority_tickets;
5433
5434again:
5435 while (!list_empty(head) && num_bytes) {
5436 ticket = list_first_entry(head, struct reserve_ticket,
5437 list);
5438 if (num_bytes >= ticket->bytes) {
5439 trace_btrfs_space_reservation(fs_info, "space_info",
5440 space_info->flags,
5441 ticket->bytes, 1);
5442 list_del_init(&ticket->list);
5443 num_bytes -= ticket->bytes;
5444 space_info->bytes_may_use += ticket->bytes;
5445 ticket->bytes = 0;
5446 space_info->tickets_id++;
5447 wake_up(&ticket->wait);
5448 } else {
5449 trace_btrfs_space_reservation(fs_info, "space_info",
5450 space_info->flags,
5451 num_bytes, 1);
5452 space_info->bytes_may_use += num_bytes;
5453 ticket->bytes -= num_bytes;
5454 num_bytes = 0;
5455 }
5456 }
5457
5458 if (num_bytes && head == &space_info->priority_tickets) {
5459 head = &space_info->tickets;
5460 goto again;
5461 }
5462}
5463
5464static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5465 struct btrfs_block_rsv *block_rsv,
5466 struct btrfs_block_rsv *dest, u64 num_bytes)
5467{
5468 struct btrfs_space_info *space_info = block_rsv->space_info;
5469
5470 spin_lock(&block_rsv->lock);
5471 if (num_bytes == (u64)-1)
5472 num_bytes = block_rsv->size;
5473 block_rsv->size -= num_bytes;
5474 if (block_rsv->reserved >= block_rsv->size) {
5475 num_bytes = block_rsv->reserved - block_rsv->size;
5476 block_rsv->reserved = block_rsv->size;
5477 block_rsv->full = 1;
5478 } else {
5479 num_bytes = 0;
5480 }
5481 spin_unlock(&block_rsv->lock);
5482
5483 if (num_bytes > 0) {
5484 if (dest) {
5485 spin_lock(&dest->lock);
5486 if (!dest->full) {
5487 u64 bytes_to_add;
5488
5489 bytes_to_add = dest->size - dest->reserved;
5490 bytes_to_add = min(num_bytes, bytes_to_add);
5491 dest->reserved += bytes_to_add;
5492 if (dest->reserved >= dest->size)
5493 dest->full = 1;
5494 num_bytes -= bytes_to_add;
5495 }
5496 spin_unlock(&dest->lock);
5497 }
5498 if (num_bytes)
5499 space_info_add_old_bytes(fs_info, space_info,
5500 num_bytes);
5501 }
5502}
5503
5504int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
5505 struct btrfs_block_rsv *dst, u64 num_bytes,
5506 int update_size)
5507{
5508 int ret;
5509
5510 ret = block_rsv_use_bytes(src, num_bytes);
5511 if (ret)
5512 return ret;
5513
5514 block_rsv_add_bytes(dst, num_bytes, update_size);
5515 return 0;
5516}
5517
5518void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
5519{
5520 memset(rsv, 0, sizeof(*rsv));
5521 spin_lock_init(&rsv->lock);
5522 rsv->type = type;
5523}
5524
5525struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
5526 unsigned short type)
5527{
5528 struct btrfs_block_rsv *block_rsv;
5529 struct btrfs_fs_info *fs_info = root->fs_info;
5530
5531 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5532 if (!block_rsv)
5533 return NULL;
5534
5535 btrfs_init_block_rsv(block_rsv, type);
5536 block_rsv->space_info = __find_space_info(fs_info,
5537 BTRFS_BLOCK_GROUP_METADATA);
5538 return block_rsv;
5539}
5540
5541void btrfs_free_block_rsv(struct btrfs_root *root,
5542 struct btrfs_block_rsv *rsv)
5543{
5544 if (!rsv)
5545 return;
5546 btrfs_block_rsv_release(root, rsv, (u64)-1);
5547 kfree(rsv);
5548}
5549
5550void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
5551{
5552 kfree(rsv);
5553}
5554
5555int btrfs_block_rsv_add(struct btrfs_root *root,
5556 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5557 enum btrfs_reserve_flush_enum flush)
5558{
5559 int ret;
5560
5561 if (num_bytes == 0)
5562 return 0;
5563
5564 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5565 if (!ret) {
5566 block_rsv_add_bytes(block_rsv, num_bytes, 1);
5567 return 0;
5568 }
5569
5570 return ret;
5571}
5572
5573int btrfs_block_rsv_check(struct btrfs_root *root,
5574 struct btrfs_block_rsv *block_rsv, int min_factor)
5575{
5576 u64 num_bytes = 0;
5577 int ret = -ENOSPC;
5578
5579 if (!block_rsv)
5580 return 0;
5581
5582 spin_lock(&block_rsv->lock);
5583 num_bytes = div_factor(block_rsv->size, min_factor);
5584 if (block_rsv->reserved >= num_bytes)
5585 ret = 0;
5586 spin_unlock(&block_rsv->lock);
5587
5588 return ret;
5589}
5590
5591int btrfs_block_rsv_refill(struct btrfs_root *root,
5592 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5593 enum btrfs_reserve_flush_enum flush)
5594{
5595 u64 num_bytes = 0;
5596 int ret = -ENOSPC;
5597
5598 if (!block_rsv)
5599 return 0;
5600
5601 spin_lock(&block_rsv->lock);
5602 num_bytes = min_reserved;
5603 if (block_rsv->reserved >= num_bytes)
5604 ret = 0;
5605 else
5606 num_bytes -= block_rsv->reserved;
5607 spin_unlock(&block_rsv->lock);
5608
5609 if (!ret)
5610 return 0;
5611
5612 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5613 if (!ret) {
5614 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5615 return 0;
5616 }
5617
5618 return ret;
5619}
5620
5621void btrfs_block_rsv_release(struct btrfs_root *root,
5622 struct btrfs_block_rsv *block_rsv,
5623 u64 num_bytes)
5624{
5625 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5626 if (global_rsv == block_rsv ||
5627 block_rsv->space_info != global_rsv->space_info)
5628 global_rsv = NULL;
5629 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
5630 num_bytes);
5631}
5632
5633static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5634{
5635 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5636 struct btrfs_space_info *sinfo = block_rsv->space_info;
5637 u64 num_bytes;
5638
5639
5640
5641
5642
5643
5644 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
5645 btrfs_root_used(&fs_info->csum_root->root_item) +
5646 btrfs_root_used(&fs_info->tree_root->root_item);
5647 num_bytes = max_t(u64, num_bytes, SZ_16M);
5648
5649 spin_lock(&sinfo->lock);
5650 spin_lock(&block_rsv->lock);
5651
5652 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
5653
5654 if (block_rsv->reserved < block_rsv->size) {
5655 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
5656 sinfo->bytes_reserved + sinfo->bytes_readonly +
5657 sinfo->bytes_may_use;
5658 if (sinfo->total_bytes > num_bytes) {
5659 num_bytes = sinfo->total_bytes - num_bytes;
5660 num_bytes = min(num_bytes,
5661 block_rsv->size - block_rsv->reserved);
5662 block_rsv->reserved += num_bytes;
5663 sinfo->bytes_may_use += num_bytes;
5664 trace_btrfs_space_reservation(fs_info, "space_info",
5665 sinfo->flags, num_bytes,
5666 1);
5667 }
5668 } else if (block_rsv->reserved > block_rsv->size) {
5669 num_bytes = block_rsv->reserved - block_rsv->size;
5670 sinfo->bytes_may_use -= num_bytes;
5671 trace_btrfs_space_reservation(fs_info, "space_info",
5672 sinfo->flags, num_bytes, 0);
5673 block_rsv->reserved = block_rsv->size;
5674 }
5675
5676 if (block_rsv->reserved == block_rsv->size)
5677 block_rsv->full = 1;
5678 else
5679 block_rsv->full = 0;
5680
5681 spin_unlock(&block_rsv->lock);
5682 spin_unlock(&sinfo->lock);
5683}
5684
5685static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5686{
5687 struct btrfs_space_info *space_info;
5688
5689 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5690 fs_info->chunk_block_rsv.space_info = space_info;
5691
5692 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5693 fs_info->global_block_rsv.space_info = space_info;
5694 fs_info->delalloc_block_rsv.space_info = space_info;
5695 fs_info->trans_block_rsv.space_info = space_info;
5696 fs_info->empty_block_rsv.space_info = space_info;
5697 fs_info->delayed_block_rsv.space_info = space_info;
5698
5699 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
5700 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
5701 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5702 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
5703 if (fs_info->quota_root)
5704 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
5705 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
5706
5707 update_global_block_rsv(fs_info);
5708}
5709
5710static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5711{
5712 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5713 (u64)-1);
5714 WARN_ON(fs_info->delalloc_block_rsv.size > 0);
5715 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
5716 WARN_ON(fs_info->trans_block_rsv.size > 0);
5717 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5718 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5719 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
5720 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5721 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
5722}
5723
5724void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
5725 struct btrfs_root *root)
5726{
5727 if (!trans->block_rsv)
5728 return;
5729
5730 if (!trans->bytes_reserved)
5731 return;
5732
5733 trace_btrfs_space_reservation(root->fs_info, "transaction",
5734 trans->transid, trans->bytes_reserved, 0);
5735 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
5736 trans->bytes_reserved = 0;
5737}
5738
5739
5740
5741
5742
5743void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
5744{
5745 struct btrfs_fs_info *fs_info = trans->root->fs_info;
5746
5747 if (!trans->chunk_bytes_reserved)
5748 return;
5749
5750 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
5751
5752 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
5753 trans->chunk_bytes_reserved);
5754 trans->chunk_bytes_reserved = 0;
5755}
5756
5757
5758int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
5759 struct inode *inode)
5760{
5761 struct btrfs_root *root = BTRFS_I(inode)->root;
5762
5763
5764
5765
5766
5767
5768 struct btrfs_block_rsv *src_rsv = trans->block_rsv;
5769 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
5770
5771
5772
5773
5774
5775
5776 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
5777 trace_btrfs_space_reservation(root->fs_info, "orphan",
5778 btrfs_ino(inode), num_bytes, 1);
5779 return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
5780}
5781
5782void btrfs_orphan_release_metadata(struct inode *inode)
5783{
5784 struct btrfs_root *root = BTRFS_I(inode)->root;
5785 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
5786 trace_btrfs_space_reservation(root->fs_info, "orphan",
5787 btrfs_ino(inode), num_bytes, 0);
5788 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
5789}
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
5806 struct btrfs_block_rsv *rsv,
5807 int items,
5808 u64 *qgroup_reserved,
5809 bool use_global_rsv)
5810{
5811 u64 num_bytes;
5812 int ret;
5813 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5814
5815 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) {
5816
5817 num_bytes = 3 * root->nodesize;
5818 ret = btrfs_qgroup_reserve_meta(root, num_bytes);
5819 if (ret)
5820 return ret;
5821 } else {
5822 num_bytes = 0;
5823 }
5824
5825 *qgroup_reserved = num_bytes;
5826
5827 num_bytes = btrfs_calc_trans_metadata_size(root, items);
5828 rsv->space_info = __find_space_info(root->fs_info,
5829 BTRFS_BLOCK_GROUP_METADATA);
5830 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
5831 BTRFS_RESERVE_FLUSH_ALL);
5832
5833 if (ret == -ENOSPC && use_global_rsv)
5834 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
5835
5836 if (ret && *qgroup_reserved)
5837 btrfs_qgroup_free_meta(root, *qgroup_reserved);
5838
5839 return ret;
5840}
5841
5842void btrfs_subvolume_release_metadata(struct btrfs_root *root,
5843 struct btrfs_block_rsv *rsv,
5844 u64 qgroup_reserved)
5845{
5846 btrfs_block_rsv_release(root, rsv, (u64)-1);
5847}
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
5860{
5861 unsigned drop_inode_space = 0;
5862 unsigned dropped_extents = 0;
5863 unsigned num_extents = 0;
5864
5865 num_extents = (unsigned)div64_u64(num_bytes +
5866 BTRFS_MAX_EXTENT_SIZE - 1,
5867 BTRFS_MAX_EXTENT_SIZE);
5868 ASSERT(num_extents);
5869 ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
5870 BTRFS_I(inode)->outstanding_extents -= num_extents;
5871
5872 if (BTRFS_I(inode)->outstanding_extents == 0 &&
5873 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
5874 &BTRFS_I(inode)->runtime_flags))
5875 drop_inode_space = 1;
5876
5877
5878
5879
5880
5881 if (BTRFS_I(inode)->outstanding_extents >=
5882 BTRFS_I(inode)->reserved_extents)
5883 return drop_inode_space;
5884
5885 dropped_extents = BTRFS_I(inode)->reserved_extents -
5886 BTRFS_I(inode)->outstanding_extents;
5887 BTRFS_I(inode)->reserved_extents -= dropped_extents;
5888 return dropped_extents + drop_inode_space;
5889}
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5910 int reserve)
5911{
5912 struct btrfs_root *root = BTRFS_I(inode)->root;
5913 u64 old_csums, num_csums;
5914
5915 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5916 BTRFS_I(inode)->csum_bytes == 0)
5917 return 0;
5918
5919 old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5920 if (reserve)
5921 BTRFS_I(inode)->csum_bytes += num_bytes;
5922 else
5923 BTRFS_I(inode)->csum_bytes -= num_bytes;
5924 num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5925
5926
5927 if (old_csums == num_csums)
5928 return 0;
5929
5930 if (reserve)
5931 return btrfs_calc_trans_metadata_size(root,
5932 num_csums - old_csums);
5933
5934 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
5935}
5936
5937int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5938{
5939 struct btrfs_root *root = BTRFS_I(inode)->root;
5940 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
5941 u64 to_reserve = 0;
5942 u64 csum_bytes;
5943 unsigned nr_extents = 0;
5944 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
5945 int ret = 0;
5946 bool delalloc_lock = true;
5947 u64 to_free = 0;
5948 unsigned dropped;
5949 bool release_extra = false;
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959 if (btrfs_is_free_space_inode(inode)) {
5960 flush = BTRFS_RESERVE_NO_FLUSH;
5961 delalloc_lock = false;
5962 } else if (current->journal_info) {
5963 flush = BTRFS_RESERVE_FLUSH_LIMIT;
5964 }
5965
5966 if (flush != BTRFS_RESERVE_NO_FLUSH &&
5967 btrfs_transaction_in_commit(root->fs_info))
5968 schedule_timeout(1);
5969
5970 if (delalloc_lock)
5971 mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
5972
5973 num_bytes = ALIGN(num_bytes, root->sectorsize);
5974
5975 spin_lock(&BTRFS_I(inode)->lock);
5976 nr_extents = (unsigned)div64_u64(num_bytes +
5977 BTRFS_MAX_EXTENT_SIZE - 1,
5978 BTRFS_MAX_EXTENT_SIZE);
5979 BTRFS_I(inode)->outstanding_extents += nr_extents;
5980
5981 nr_extents = 0;
5982 if (BTRFS_I(inode)->outstanding_extents >
5983 BTRFS_I(inode)->reserved_extents)
5984 nr_extents += BTRFS_I(inode)->outstanding_extents -
5985 BTRFS_I(inode)->reserved_extents;
5986
5987
5988 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents + 1);
5989 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
5990 csum_bytes = BTRFS_I(inode)->csum_bytes;
5991 spin_unlock(&BTRFS_I(inode)->lock);
5992
5993 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags)) {
5994 ret = btrfs_qgroup_reserve_meta(root,
5995 nr_extents * root->nodesize);
5996 if (ret)
5997 goto out_fail;
5998 }
5999
6000 ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush);
6001 if (unlikely(ret)) {
6002 btrfs_qgroup_free_meta(root, nr_extents * root->nodesize);
6003 goto out_fail;
6004 }
6005
6006 spin_lock(&BTRFS_I(inode)->lock);
6007 if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
6008 &BTRFS_I(inode)->runtime_flags)) {
6009 to_reserve -= btrfs_calc_trans_metadata_size(root, 1);
6010 release_extra = true;
6011 }
6012 BTRFS_I(inode)->reserved_extents += nr_extents;
6013 spin_unlock(&BTRFS_I(inode)->lock);
6014
6015 if (delalloc_lock)
6016 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
6017
6018 if (to_reserve)
6019 trace_btrfs_space_reservation(root->fs_info, "delalloc",
6020 btrfs_ino(inode), to_reserve, 1);
6021 if (release_extra)
6022 btrfs_block_rsv_release(root, block_rsv,
6023 btrfs_calc_trans_metadata_size(root,
6024 1));
6025 return 0;
6026
6027out_fail:
6028 spin_lock(&BTRFS_I(inode)->lock);
6029 dropped = drop_outstanding_extent(inode, num_bytes);
6030
6031
6032
6033
6034
6035 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
6036 calc_csum_metadata_size(inode, num_bytes, 0);
6037 } else {
6038 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
6039 u64 bytes;
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
6050 BTRFS_I(inode)->csum_bytes = csum_bytes;
6051 to_free = calc_csum_metadata_size(inode, bytes, 0);
6052
6053
6054
6055
6056
6057
6058
6059 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
6060 bytes = csum_bytes - orig_csum_bytes;
6061 bytes = calc_csum_metadata_size(inode, bytes, 0);
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
6072 if (bytes > to_free)
6073 to_free = bytes - to_free;
6074 else
6075 to_free = 0;
6076 }
6077 spin_unlock(&BTRFS_I(inode)->lock);
6078 if (dropped)
6079 to_free += btrfs_calc_trans_metadata_size(root, dropped);
6080
6081 if (to_free) {
6082 btrfs_block_rsv_release(root, block_rsv, to_free);
6083 trace_btrfs_space_reservation(root->fs_info, "delalloc",
6084 btrfs_ino(inode), to_free, 0);
6085 }
6086 if (delalloc_lock)
6087 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
6088 return ret;
6089}
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
6101{
6102 struct btrfs_root *root = BTRFS_I(inode)->root;
6103 u64 to_free = 0;
6104 unsigned dropped;
6105
6106 num_bytes = ALIGN(num_bytes, root->sectorsize);
6107 spin_lock(&BTRFS_I(inode)->lock);
6108 dropped = drop_outstanding_extent(inode, num_bytes);
6109
6110 if (num_bytes)
6111 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
6112 spin_unlock(&BTRFS_I(inode)->lock);
6113 if (dropped > 0)
6114 to_free += btrfs_calc_trans_metadata_size(root, dropped);
6115
6116 if (btrfs_is_testing(root->fs_info))
6117 return;
6118
6119 trace_btrfs_space_reservation(root->fs_info, "delalloc",
6120 btrfs_ino(inode), to_free, 0);
6121
6122 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
6123 to_free);
6124}
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
6150{
6151 int ret;
6152
6153 ret = btrfs_check_data_free_space(inode, start, len);
6154 if (ret < 0)
6155 return ret;
6156 ret = btrfs_delalloc_reserve_metadata(inode, len);
6157 if (ret < 0)
6158 btrfs_free_reserved_data_space(inode, start, len);
6159 return ret;
6160}
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len)
6178{
6179 btrfs_delalloc_release_metadata(inode, len);
6180 btrfs_free_reserved_data_space(inode, start, len);
6181}
6182
6183static int update_block_group(struct btrfs_trans_handle *trans,
6184 struct btrfs_root *root, u64 bytenr,
6185 u64 num_bytes, int alloc)
6186{
6187 struct btrfs_block_group_cache *cache = NULL;
6188 struct btrfs_fs_info *info = root->fs_info;
6189 u64 total = num_bytes;
6190 u64 old_val;
6191 u64 byte_in_group;
6192 int factor;
6193
6194
6195 spin_lock(&info->delalloc_root_lock);
6196 old_val = btrfs_super_bytes_used(info->super_copy);
6197 if (alloc)
6198 old_val += num_bytes;
6199 else
6200 old_val -= num_bytes;
6201 btrfs_set_super_bytes_used(info->super_copy, old_val);
6202 spin_unlock(&info->delalloc_root_lock);
6203
6204 while (total) {
6205 cache = btrfs_lookup_block_group(info, bytenr);
6206 if (!cache)
6207 return -ENOENT;
6208 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
6209 BTRFS_BLOCK_GROUP_RAID1 |
6210 BTRFS_BLOCK_GROUP_RAID10))
6211 factor = 2;
6212 else
6213 factor = 1;
6214
6215
6216
6217
6218
6219
6220 if (!alloc && cache->cached == BTRFS_CACHE_NO)
6221 cache_block_group(cache, 1);
6222
6223 byte_in_group = bytenr - cache->key.objectid;
6224 WARN_ON(byte_in_group > cache->key.offset);
6225
6226 spin_lock(&cache->space_info->lock);
6227 spin_lock(&cache->lock);
6228
6229 if (btrfs_test_opt(root->fs_info, SPACE_CACHE) &&
6230 cache->disk_cache_state < BTRFS_DC_CLEAR)
6231 cache->disk_cache_state = BTRFS_DC_CLEAR;
6232
6233 old_val = btrfs_block_group_used(&cache->item);
6234 num_bytes = min(total, cache->key.offset - byte_in_group);
6235 if (alloc) {
6236 old_val += num_bytes;
6237 btrfs_set_block_group_used(&cache->item, old_val);
6238 cache->reserved -= num_bytes;
6239 cache->space_info->bytes_reserved -= num_bytes;
6240 cache->space_info->bytes_used += num_bytes;
6241 cache->space_info->disk_used += num_bytes * factor;
6242 spin_unlock(&cache->lock);
6243 spin_unlock(&cache->space_info->lock);
6244 } else {
6245 old_val -= num_bytes;
6246 btrfs_set_block_group_used(&cache->item, old_val);
6247 cache->pinned += num_bytes;
6248 cache->space_info->bytes_pinned += num_bytes;
6249 cache->space_info->bytes_used -= num_bytes;
6250 cache->space_info->disk_used -= num_bytes * factor;
6251 spin_unlock(&cache->lock);
6252 spin_unlock(&cache->space_info->lock);
6253
6254 trace_btrfs_space_reservation(root->fs_info, "pinned",
6255 cache->space_info->flags,
6256 num_bytes, 1);
6257 set_extent_dirty(info->pinned_extents,
6258 bytenr, bytenr + num_bytes - 1,
6259 GFP_NOFS | __GFP_NOFAIL);
6260 }
6261
6262 spin_lock(&trans->transaction->dirty_bgs_lock);
6263 if (list_empty(&cache->dirty_list)) {
6264 list_add_tail(&cache->dirty_list,
6265 &trans->transaction->dirty_bgs);
6266 trans->transaction->num_dirty_bgs++;
6267 btrfs_get_block_group(cache);
6268 }
6269 spin_unlock(&trans->transaction->dirty_bgs_lock);
6270
6271
6272
6273
6274
6275
6276
6277 if (!alloc && old_val == 0) {
6278 spin_lock(&info->unused_bgs_lock);
6279 if (list_empty(&cache->bg_list)) {
6280 btrfs_get_block_group(cache);
6281 list_add_tail(&cache->bg_list,
6282 &info->unused_bgs);
6283 }
6284 spin_unlock(&info->unused_bgs_lock);
6285 }
6286
6287 btrfs_put_block_group(cache);
6288 total -= num_bytes;
6289 bytenr += num_bytes;
6290 }
6291 return 0;
6292}
6293
6294static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
6295{
6296 struct btrfs_block_group_cache *cache;
6297 u64 bytenr;
6298
6299 spin_lock(&root->fs_info->block_group_cache_lock);
6300 bytenr = root->fs_info->first_logical_byte;
6301 spin_unlock(&root->fs_info->block_group_cache_lock);
6302
6303 if (bytenr < (u64)-1)
6304 return bytenr;
6305
6306 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
6307 if (!cache)
6308 return 0;
6309
6310 bytenr = cache->key.objectid;
6311 btrfs_put_block_group(cache);
6312
6313 return bytenr;
6314}
6315
6316static int pin_down_extent(struct btrfs_root *root,
6317 struct btrfs_block_group_cache *cache,
6318 u64 bytenr, u64 num_bytes, int reserved)
6319{
6320 spin_lock(&cache->space_info->lock);
6321 spin_lock(&cache->lock);
6322 cache->pinned += num_bytes;
6323 cache->space_info->bytes_pinned += num_bytes;
6324 if (reserved) {
6325 cache->reserved -= num_bytes;
6326 cache->space_info->bytes_reserved -= num_bytes;
6327 }
6328 spin_unlock(&cache->lock);
6329 spin_unlock(&cache->space_info->lock);
6330
6331 trace_btrfs_space_reservation(root->fs_info, "pinned",
6332 cache->space_info->flags, num_bytes, 1);
6333 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
6334 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
6335 return 0;
6336}
6337
6338
6339
6340
6341int btrfs_pin_extent(struct btrfs_root *root,
6342 u64 bytenr, u64 num_bytes, int reserved)
6343{
6344 struct btrfs_block_group_cache *cache;
6345
6346 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
6347 BUG_ON(!cache);
6348
6349 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
6350
6351 btrfs_put_block_group(cache);
6352 return 0;
6353}
6354
6355
6356
6357
6358int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
6359 u64 bytenr, u64 num_bytes)
6360{
6361 struct btrfs_block_group_cache *cache;
6362 int ret;
6363
6364 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
6365 if (!cache)
6366 return -EINVAL;
6367
6368
6369
6370
6371
6372
6373
6374 cache_block_group(cache, 1);
6375
6376 pin_down_extent(root, cache, bytenr, num_bytes, 0);
6377
6378
6379 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
6380 btrfs_put_block_group(cache);
6381 return ret;
6382}
6383
6384static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes)
6385{
6386 int ret;
6387 struct btrfs_block_group_cache *block_group;
6388 struct btrfs_caching_control *caching_ctl;
6389
6390 block_group = btrfs_lookup_block_group(root->fs_info, start);
6391 if (!block_group)
6392 return -EINVAL;
6393
6394 cache_block_group(block_group, 0);
6395 caching_ctl = get_caching_control(block_group);
6396
6397 if (!caching_ctl) {
6398
6399 BUG_ON(!block_group_cache_done(block_group));
6400 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6401 } else {
6402 mutex_lock(&caching_ctl->mutex);
6403
6404 if (start >= caching_ctl->progress) {
6405 ret = add_excluded_extent(root, start, num_bytes);
6406 } else if (start + num_bytes <= caching_ctl->progress) {
6407 ret = btrfs_remove_free_space(block_group,
6408 start, num_bytes);
6409 } else {
6410 num_bytes = caching_ctl->progress - start;
6411 ret = btrfs_remove_free_space(block_group,
6412 start, num_bytes);
6413 if (ret)
6414 goto out_lock;
6415
6416 num_bytes = (start + num_bytes) -
6417 caching_ctl->progress;
6418 start = caching_ctl->progress;
6419 ret = add_excluded_extent(root, start, num_bytes);
6420 }
6421out_lock:
6422 mutex_unlock(&caching_ctl->mutex);
6423 put_caching_control(caching_ctl);
6424 }
6425 btrfs_put_block_group(block_group);
6426 return ret;
6427}
6428
6429int btrfs_exclude_logged_extents(struct btrfs_root *log,
6430 struct extent_buffer *eb)
6431{
6432 struct btrfs_file_extent_item *item;
6433 struct btrfs_key key;
6434 int found_type;
6435 int i;
6436
6437 if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS))
6438 return 0;
6439
6440 for (i = 0; i < btrfs_header_nritems(eb); i++) {
6441 btrfs_item_key_to_cpu(eb, &key, i);
6442 if (key.type != BTRFS_EXTENT_DATA_KEY)
6443 continue;
6444 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
6445 found_type = btrfs_file_extent_type(eb, item);
6446 if (found_type == BTRFS_FILE_EXTENT_INLINE)
6447 continue;
6448 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
6449 continue;
6450 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
6451 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
6452 __exclude_logged_extent(log, key.objectid, key.offset);
6453 }
6454
6455 return 0;
6456}
6457
6458static void
6459btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
6460{
6461 atomic_inc(&bg->reservations);
6462}
6463
6464void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
6465 const u64 start)
6466{
6467 struct btrfs_block_group_cache *bg;
6468
6469 bg = btrfs_lookup_block_group(fs_info, start);
6470 ASSERT(bg);
6471 if (atomic_dec_and_test(&bg->reservations))
6472 wake_up_atomic_t(&bg->reservations);
6473 btrfs_put_block_group(bg);
6474}
6475
6476static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a)
6477{
6478 schedule();
6479 return 0;
6480}
6481
6482void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6483{
6484 struct btrfs_space_info *space_info = bg->space_info;
6485
6486 ASSERT(bg->ro);
6487
6488 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
6489 return;
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501 down_write(&space_info->groups_sem);
6502 up_write(&space_info->groups_sem);
6503
6504 wait_on_atomic_t(&bg->reservations,
6505 btrfs_wait_bg_reservations_atomic_t,
6506 TASK_UNINTERRUPTIBLE);
6507}
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
6529 u64 ram_bytes, u64 num_bytes, int delalloc)
6530{
6531 struct btrfs_space_info *space_info = cache->space_info;
6532 int ret = 0;
6533
6534 spin_lock(&space_info->lock);
6535 spin_lock(&cache->lock);
6536 if (cache->ro) {
6537 ret = -EAGAIN;
6538 } else {
6539 cache->reserved += num_bytes;
6540 space_info->bytes_reserved += num_bytes;
6541
6542 trace_btrfs_space_reservation(cache->fs_info,
6543 "space_info", space_info->flags,
6544 ram_bytes, 0);
6545 space_info->bytes_may_use -= ram_bytes;
6546 if (delalloc)
6547 cache->delalloc_bytes += num_bytes;
6548 }
6549 spin_unlock(&cache->lock);
6550 spin_unlock(&space_info->lock);
6551 return ret;
6552}
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
6567 u64 num_bytes, int delalloc)
6568{
6569 struct btrfs_space_info *space_info = cache->space_info;
6570 int ret = 0;
6571
6572 spin_lock(&space_info->lock);
6573 spin_lock(&cache->lock);
6574 if (cache->ro)
6575 space_info->bytes_readonly += num_bytes;
6576 cache->reserved -= num_bytes;
6577 space_info->bytes_reserved -= num_bytes;
6578
6579 if (delalloc)
6580 cache->delalloc_bytes -= num_bytes;
6581 spin_unlock(&cache->lock);
6582 spin_unlock(&space_info->lock);
6583 return ret;
6584}
6585void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
6586 struct btrfs_root *root)
6587{
6588 struct btrfs_fs_info *fs_info = root->fs_info;
6589 struct btrfs_caching_control *next;
6590 struct btrfs_caching_control *caching_ctl;
6591 struct btrfs_block_group_cache *cache;
6592
6593 down_write(&fs_info->commit_root_sem);
6594
6595 list_for_each_entry_safe(caching_ctl, next,
6596 &fs_info->caching_block_groups, list) {
6597 cache = caching_ctl->block_group;
6598 if (block_group_cache_done(cache)) {
6599 cache->last_byte_to_unpin = (u64)-1;
6600 list_del_init(&caching_ctl->list);
6601 put_caching_control(caching_ctl);
6602 } else {
6603 cache->last_byte_to_unpin = caching_ctl->progress;
6604 }
6605 }
6606
6607 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6608 fs_info->pinned_extents = &fs_info->freed_extents[1];
6609 else
6610 fs_info->pinned_extents = &fs_info->freed_extents[0];
6611
6612 up_write(&fs_info->commit_root_sem);
6613
6614 update_global_block_rsv(fs_info);
6615}
6616
6617
6618
6619
6620
6621static struct btrfs_free_cluster *
6622fetch_cluster_info(struct btrfs_root *root, struct btrfs_space_info *space_info,
6623 u64 *empty_cluster)
6624{
6625 struct btrfs_free_cluster *ret = NULL;
6626 bool ssd = btrfs_test_opt(root->fs_info, SSD);
6627
6628 *empty_cluster = 0;
6629 if (btrfs_mixed_space_info(space_info))
6630 return ret;
6631
6632 if (ssd)
6633 *empty_cluster = SZ_2M;
6634 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
6635 ret = &root->fs_info->meta_alloc_cluster;
6636 if (!ssd)
6637 *empty_cluster = SZ_64K;
6638 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && ssd) {
6639 ret = &root->fs_info->data_alloc_cluster;
6640 }
6641
6642 return ret;
6643}
6644
6645static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
6646 const bool return_free_space)
6647{
6648 struct btrfs_fs_info *fs_info = root->fs_info;
6649 struct btrfs_block_group_cache *cache = NULL;
6650 struct btrfs_space_info *space_info;
6651 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6652 struct btrfs_free_cluster *cluster = NULL;
6653 u64 len;
6654 u64 total_unpinned = 0;
6655 u64 empty_cluster = 0;
6656 bool readonly;
6657
6658 while (start <= end) {
6659 readonly = false;
6660 if (!cache ||
6661 start >= cache->key.objectid + cache->key.offset) {
6662 if (cache)
6663 btrfs_put_block_group(cache);
6664 total_unpinned = 0;
6665 cache = btrfs_lookup_block_group(fs_info, start);
6666 BUG_ON(!cache);
6667
6668 cluster = fetch_cluster_info(root,
6669 cache->space_info,
6670 &empty_cluster);
6671 empty_cluster <<= 1;
6672 }
6673
6674 len = cache->key.objectid + cache->key.offset - start;
6675 len = min(len, end + 1 - start);
6676
6677 if (start < cache->last_byte_to_unpin) {
6678 len = min(len, cache->last_byte_to_unpin - start);
6679 if (return_free_space)
6680 btrfs_add_free_space(cache, start, len);
6681 }
6682
6683 start += len;
6684 total_unpinned += len;
6685 space_info = cache->space_info;
6686
6687
6688
6689
6690
6691
6692
6693 if (cluster && cluster->fragmented &&
6694 total_unpinned > empty_cluster) {
6695 spin_lock(&cluster->lock);
6696 cluster->fragmented = 0;
6697 spin_unlock(&cluster->lock);
6698 }
6699
6700 spin_lock(&space_info->lock);
6701 spin_lock(&cache->lock);
6702 cache->pinned -= len;
6703 space_info->bytes_pinned -= len;
6704
6705 trace_btrfs_space_reservation(fs_info, "pinned",
6706 space_info->flags, len, 0);
6707 space_info->max_extent_size = 0;
6708 percpu_counter_add(&space_info->total_bytes_pinned, -len);
6709 if (cache->ro) {
6710 space_info->bytes_readonly += len;
6711 readonly = true;
6712 }
6713 spin_unlock(&cache->lock);
6714 if (!readonly && return_free_space &&
6715 global_rsv->space_info == space_info) {
6716 u64 to_add = len;
6717 WARN_ON(!return_free_space);
6718 spin_lock(&global_rsv->lock);
6719 if (!global_rsv->full) {
6720 to_add = min(len, global_rsv->size -
6721 global_rsv->reserved);
6722 global_rsv->reserved += to_add;
6723 space_info->bytes_may_use += to_add;
6724 if (global_rsv->reserved >= global_rsv->size)
6725 global_rsv->full = 1;
6726 trace_btrfs_space_reservation(fs_info,
6727 "space_info",
6728 space_info->flags,
6729 to_add, 1);
6730 len -= to_add;
6731 }
6732 spin_unlock(&global_rsv->lock);
6733
6734 if (len)
6735 space_info_add_new_bytes(fs_info, space_info,
6736 len);
6737 }
6738 spin_unlock(&space_info->lock);
6739 }
6740
6741 if (cache)
6742 btrfs_put_block_group(cache);
6743 return 0;
6744}
6745
6746int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
6747 struct btrfs_root *root)
6748{
6749 struct btrfs_fs_info *fs_info = root->fs_info;
6750 struct btrfs_block_group_cache *block_group, *tmp;
6751 struct list_head *deleted_bgs;
6752 struct extent_io_tree *unpin;
6753 u64 start;
6754 u64 end;
6755 int ret;
6756
6757 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6758 unpin = &fs_info->freed_extents[1];
6759 else
6760 unpin = &fs_info->freed_extents[0];
6761
6762 while (!trans->aborted) {
6763 mutex_lock(&fs_info->unused_bg_unpin_mutex);
6764 ret = find_first_extent_bit(unpin, 0, &start, &end,
6765 EXTENT_DIRTY, NULL);
6766 if (ret) {
6767 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6768 break;
6769 }
6770
6771 if (btrfs_test_opt(root->fs_info, DISCARD))
6772 ret = btrfs_discard_extent(root, start,
6773 end + 1 - start, NULL);
6774
6775 clear_extent_dirty(unpin, start, end);
6776 unpin_extent_range(root, start, end, true);
6777 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6778 cond_resched();
6779 }
6780
6781
6782
6783
6784
6785
6786 deleted_bgs = &trans->transaction->deleted_bgs;
6787 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6788 u64 trimmed = 0;
6789
6790 ret = -EROFS;
6791 if (!trans->aborted)
6792 ret = btrfs_discard_extent(root,
6793 block_group->key.objectid,
6794 block_group->key.offset,
6795 &trimmed);
6796
6797 list_del_init(&block_group->bg_list);
6798 btrfs_put_block_group_trimming(block_group);
6799 btrfs_put_block_group(block_group);
6800
6801 if (ret) {
6802 const char *errstr = btrfs_decode_error(ret);
6803 btrfs_warn(fs_info,
6804 "Discard failed while removing blockgroup: errno=%d %s\n",
6805 ret, errstr);
6806 }
6807 }
6808
6809 return 0;
6810}
6811
6812static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
6813 u64 owner, u64 root_objectid)
6814{
6815 struct btrfs_space_info *space_info;
6816 u64 flags;
6817
6818 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
6819 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
6820 flags = BTRFS_BLOCK_GROUP_SYSTEM;
6821 else
6822 flags = BTRFS_BLOCK_GROUP_METADATA;
6823 } else {
6824 flags = BTRFS_BLOCK_GROUP_DATA;
6825 }
6826
6827 space_info = __find_space_info(fs_info, flags);
6828 BUG_ON(!space_info);
6829 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
6830}
6831
6832
6833static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6834 struct btrfs_root *root,
6835 struct btrfs_delayed_ref_node *node, u64 parent,
6836 u64 root_objectid, u64 owner_objectid,
6837 u64 owner_offset, int refs_to_drop,
6838 struct btrfs_delayed_extent_op *extent_op)
6839{
6840 struct btrfs_key key;
6841 struct btrfs_path *path;
6842 struct btrfs_fs_info *info = root->fs_info;
6843 struct btrfs_root *extent_root = info->extent_root;
6844 struct extent_buffer *leaf;
6845 struct btrfs_extent_item *ei;
6846 struct btrfs_extent_inline_ref *iref;
6847 int ret;
6848 int is_data;
6849 int extent_slot = 0;
6850 int found_extent = 0;
6851 int num_to_del = 1;
6852 u32 item_size;
6853 u64 refs;
6854 u64 bytenr = node->bytenr;
6855 u64 num_bytes = node->num_bytes;
6856 int last_ref = 0;
6857 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6858 SKINNY_METADATA);
6859
6860 path = btrfs_alloc_path();
6861 if (!path)
6862 return -ENOMEM;
6863
6864 path->reada = READA_FORWARD;
6865 path->leave_spinning = 1;
6866
6867 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6868 BUG_ON(!is_data && refs_to_drop != 1);
6869
6870 if (is_data)
6871 skinny_metadata = 0;
6872
6873 ret = lookup_extent_backref(trans, extent_root, path, &iref,
6874 bytenr, num_bytes, parent,
6875 root_objectid, owner_objectid,
6876 owner_offset);
6877 if (ret == 0) {
6878 extent_slot = path->slots[0];
6879 while (extent_slot >= 0) {
6880 btrfs_item_key_to_cpu(path->nodes[0], &key,
6881 extent_slot);
6882 if (key.objectid != bytenr)
6883 break;
6884 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6885 key.offset == num_bytes) {
6886 found_extent = 1;
6887 break;
6888 }
6889 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6890 key.offset == owner_objectid) {
6891 found_extent = 1;
6892 break;
6893 }
6894 if (path->slots[0] - extent_slot > 5)
6895 break;
6896 extent_slot--;
6897 }
6898#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6899 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
6900 if (found_extent && item_size < sizeof(*ei))
6901 found_extent = 0;
6902#endif
6903 if (!found_extent) {
6904 BUG_ON(iref);
6905 ret = remove_extent_backref(trans, extent_root, path,
6906 NULL, refs_to_drop,
6907 is_data, &last_ref);
6908 if (ret) {
6909 btrfs_abort_transaction(trans, extent_root, ret);
6910 goto out;
6911 }
6912 btrfs_release_path(path);
6913 path->leave_spinning = 1;
6914
6915 key.objectid = bytenr;
6916 key.type = BTRFS_EXTENT_ITEM_KEY;
6917 key.offset = num_bytes;
6918
6919 if (!is_data && skinny_metadata) {
6920 key.type = BTRFS_METADATA_ITEM_KEY;
6921 key.offset = owner_objectid;
6922 }
6923
6924 ret = btrfs_search_slot(trans, extent_root,
6925 &key, path, -1, 1);
6926 if (ret > 0 && skinny_metadata && path->slots[0]) {
6927
6928
6929
6930
6931 path->slots[0]--;
6932 btrfs_item_key_to_cpu(path->nodes[0], &key,
6933 path->slots[0]);
6934 if (key.objectid == bytenr &&
6935 key.type == BTRFS_EXTENT_ITEM_KEY &&
6936 key.offset == num_bytes)
6937 ret = 0;
6938 }
6939
6940 if (ret > 0 && skinny_metadata) {
6941 skinny_metadata = false;
6942 key.objectid = bytenr;
6943 key.type = BTRFS_EXTENT_ITEM_KEY;
6944 key.offset = num_bytes;
6945 btrfs_release_path(path);
6946 ret = btrfs_search_slot(trans, extent_root,
6947 &key, path, -1, 1);
6948 }
6949
6950 if (ret) {
6951 btrfs_err(info,
6952 "umm, got %d back from search, was looking for %llu",
6953 ret, bytenr);
6954 if (ret > 0)
6955 btrfs_print_leaf(extent_root,
6956 path->nodes[0]);
6957 }
6958 if (ret < 0) {
6959 btrfs_abort_transaction(trans, extent_root, ret);
6960 goto out;
6961 }
6962 extent_slot = path->slots[0];
6963 }
6964 } else if (WARN_ON(ret == -ENOENT)) {
6965 btrfs_print_leaf(extent_root, path->nodes[0]);
6966 btrfs_err(info,
6967 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
6968 bytenr, parent, root_objectid, owner_objectid,
6969 owner_offset);
6970 btrfs_abort_transaction(trans, extent_root, ret);
6971 goto out;
6972 } else {
6973 btrfs_abort_transaction(trans, extent_root, ret);
6974 goto out;
6975 }
6976
6977 leaf = path->nodes[0];
6978 item_size = btrfs_item_size_nr(leaf, extent_slot);
6979#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6980 if (item_size < sizeof(*ei)) {
6981 BUG_ON(found_extent || extent_slot != path->slots[0]);
6982 ret = convert_extent_item_v0(trans, extent_root, path,
6983 owner_objectid, 0);
6984 if (ret < 0) {
6985 btrfs_abort_transaction(trans, extent_root, ret);
6986 goto out;
6987 }
6988
6989 btrfs_release_path(path);
6990 path->leave_spinning = 1;
6991
6992 key.objectid = bytenr;
6993 key.type = BTRFS_EXTENT_ITEM_KEY;
6994 key.offset = num_bytes;
6995
6996 ret = btrfs_search_slot(trans, extent_root, &key, path,
6997 -1, 1);
6998 if (ret) {
6999 btrfs_err(info,
7000 "umm, got %d back from search, was looking for %llu",
7001 ret, bytenr);
7002 btrfs_print_leaf(extent_root, path->nodes[0]);
7003 }
7004 if (ret < 0) {
7005 btrfs_abort_transaction(trans, extent_root, ret);
7006 goto out;
7007 }
7008
7009 extent_slot = path->slots[0];
7010 leaf = path->nodes[0];
7011 item_size = btrfs_item_size_nr(leaf, extent_slot);
7012 }
7013#endif
7014 BUG_ON(item_size < sizeof(*ei));
7015 ei = btrfs_item_ptr(leaf, extent_slot,
7016 struct btrfs_extent_item);
7017 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
7018 key.type == BTRFS_EXTENT_ITEM_KEY) {
7019 struct btrfs_tree_block_info *bi;
7020 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
7021 bi = (struct btrfs_tree_block_info *)(ei + 1);
7022 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
7023 }
7024
7025 refs = btrfs_extent_refs(leaf, ei);
7026 if (refs < refs_to_drop) {
7027 btrfs_err(info,
7028 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
7029 refs_to_drop, refs, bytenr);
7030 ret = -EINVAL;
7031 btrfs_abort_transaction(trans, extent_root, ret);
7032 goto out;
7033 }
7034 refs -= refs_to_drop;
7035
7036 if (refs > 0) {
7037 if (extent_op)
7038 __run_delayed_extent_op(extent_op, leaf, ei);
7039
7040
7041
7042
7043 if (iref) {
7044 BUG_ON(!found_extent);
7045 } else {
7046 btrfs_set_extent_refs(leaf, ei, refs);
7047 btrfs_mark_buffer_dirty(leaf);
7048 }
7049 if (found_extent) {
7050 ret = remove_extent_backref(trans, extent_root, path,
7051 iref, refs_to_drop,
7052 is_data, &last_ref);
7053 if (ret) {
7054 btrfs_abort_transaction(trans, extent_root, ret);
7055 goto out;
7056 }
7057 }
7058 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
7059 root_objectid);
7060 } else {
7061 if (found_extent) {
7062 BUG_ON(is_data && refs_to_drop !=
7063 extent_data_ref_count(path, iref));
7064 if (iref) {
7065 BUG_ON(path->slots[0] != extent_slot);
7066 } else {
7067 BUG_ON(path->slots[0] != extent_slot + 1);
7068 path->slots[0] = extent_slot;
7069 num_to_del = 2;
7070 }
7071 }
7072
7073 last_ref = 1;
7074 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
7075 num_to_del);
7076 if (ret) {
7077 btrfs_abort_transaction(trans, extent_root, ret);
7078 goto out;
7079 }
7080 btrfs_release_path(path);
7081
7082 if (is_data) {
7083 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
7084 if (ret) {
7085 btrfs_abort_transaction(trans, extent_root, ret);
7086 goto out;
7087 }
7088 }
7089
7090 ret = add_to_free_space_tree(trans, root->fs_info, bytenr,
7091 num_bytes);
7092 if (ret) {
7093 btrfs_abort_transaction(trans, extent_root, ret);
7094 goto out;
7095 }
7096
7097 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
7098 if (ret) {
7099 btrfs_abort_transaction(trans, extent_root, ret);
7100 goto out;
7101 }
7102 }
7103 btrfs_release_path(path);
7104
7105out:
7106 btrfs_free_path(path);
7107 return ret;
7108}
7109
7110
7111
7112
7113
7114
7115
7116static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
7117 struct btrfs_root *root, u64 bytenr)
7118{
7119 struct btrfs_delayed_ref_head *head;
7120 struct btrfs_delayed_ref_root *delayed_refs;
7121 int ret = 0;
7122
7123 delayed_refs = &trans->transaction->delayed_refs;
7124 spin_lock(&delayed_refs->lock);
7125 head = btrfs_find_delayed_ref_head(trans, bytenr);
7126 if (!head)
7127 goto out_delayed_unlock;
7128
7129 spin_lock(&head->lock);
7130 if (!list_empty(&head->ref_list))
7131 goto out;
7132
7133 if (head->extent_op) {
7134 if (!head->must_insert_reserved)
7135 goto out;
7136 btrfs_free_delayed_extent_op(head->extent_op);
7137 head->extent_op = NULL;
7138 }
7139
7140
7141
7142
7143
7144 if (!mutex_trylock(&head->mutex))
7145 goto out;
7146
7147
7148
7149
7150
7151 head->node.in_tree = 0;
7152 rb_erase(&head->href_node, &delayed_refs->href_root);
7153
7154 atomic_dec(&delayed_refs->num_entries);
7155
7156
7157
7158
7159
7160 delayed_refs->num_heads--;
7161 if (head->processing == 0)
7162 delayed_refs->num_heads_ready--;
7163 head->processing = 0;
7164 spin_unlock(&head->lock);
7165 spin_unlock(&delayed_refs->lock);
7166
7167 BUG_ON(head->extent_op);
7168 if (head->must_insert_reserved)
7169 ret = 1;
7170
7171 mutex_unlock(&head->mutex);
7172 btrfs_put_delayed_ref(&head->node);
7173 return ret;
7174out:
7175 spin_unlock(&head->lock);
7176
7177out_delayed_unlock:
7178 spin_unlock(&delayed_refs->lock);
7179 return 0;
7180}
7181
7182void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7183 struct btrfs_root *root,
7184 struct extent_buffer *buf,
7185 u64 parent, int last_ref)
7186{
7187 int pin = 1;
7188 int ret;
7189
7190 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7191 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
7192 buf->start, buf->len,
7193 parent, root->root_key.objectid,
7194 btrfs_header_level(buf),
7195 BTRFS_DROP_DELAYED_REF, NULL);
7196 BUG_ON(ret);
7197 }
7198
7199 if (!last_ref)
7200 return;
7201
7202 if (btrfs_header_generation(buf) == trans->transid) {
7203 struct btrfs_block_group_cache *cache;
7204
7205 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7206 ret = check_ref_cleanup(trans, root, buf->start);
7207 if (!ret)
7208 goto out;
7209 }
7210
7211 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
7212
7213 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
7214 pin_down_extent(root, cache, buf->start, buf->len, 1);
7215 btrfs_put_block_group(cache);
7216 goto out;
7217 }
7218
7219 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
7220
7221 btrfs_add_free_space(cache, buf->start, buf->len);
7222 btrfs_free_reserved_bytes(cache, buf->len, 0);
7223 btrfs_put_block_group(cache);
7224 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
7225 pin = 0;
7226 }
7227out:
7228 if (pin)
7229 add_pinned_bytes(root->fs_info, buf->len,
7230 btrfs_header_level(buf),
7231 root->root_key.objectid);
7232
7233
7234
7235
7236
7237 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
7238}
7239
7240
7241int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
7242 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
7243 u64 owner, u64 offset)
7244{
7245 int ret;
7246 struct btrfs_fs_info *fs_info = root->fs_info;
7247
7248 if (btrfs_is_testing(fs_info))
7249 return 0;
7250
7251 add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
7252
7253
7254
7255
7256
7257 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
7258 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
7259
7260 btrfs_pin_extent(root, bytenr, num_bytes, 1);
7261 ret = 0;
7262 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
7263 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
7264 num_bytes,
7265 parent, root_objectid, (int)owner,
7266 BTRFS_DROP_DELAYED_REF, NULL);
7267 } else {
7268 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
7269 num_bytes,
7270 parent, root_objectid, owner,
7271 offset, 0,
7272 BTRFS_DROP_DELAYED_REF, NULL);
7273 }
7274 return ret;
7275}
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291static noinline void
7292wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
7293 u64 num_bytes)
7294{
7295 struct btrfs_caching_control *caching_ctl;
7296
7297 caching_ctl = get_caching_control(cache);
7298 if (!caching_ctl)
7299 return;
7300
7301 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
7302 (cache->free_space_ctl->free_space >= num_bytes));
7303
7304 put_caching_control(caching_ctl);
7305}
7306
7307static noinline int
7308wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
7309{
7310 struct btrfs_caching_control *caching_ctl;
7311 int ret = 0;
7312
7313 caching_ctl = get_caching_control(cache);
7314 if (!caching_ctl)
7315 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
7316
7317 wait_event(caching_ctl->wait, block_group_cache_done(cache));
7318 if (cache->cached == BTRFS_CACHE_ERROR)
7319 ret = -EIO;
7320 put_caching_control(caching_ctl);
7321 return ret;
7322}
7323
7324int __get_raid_index(u64 flags)
7325{
7326 if (flags & BTRFS_BLOCK_GROUP_RAID10)
7327 return BTRFS_RAID_RAID10;
7328 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
7329 return BTRFS_RAID_RAID1;
7330 else if (flags & BTRFS_BLOCK_GROUP_DUP)
7331 return BTRFS_RAID_DUP;
7332 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
7333 return BTRFS_RAID_RAID0;
7334 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
7335 return BTRFS_RAID_RAID5;
7336 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
7337 return BTRFS_RAID_RAID6;
7338
7339 return BTRFS_RAID_SINGLE;
7340}
7341
7342int get_block_group_index(struct btrfs_block_group_cache *cache)
7343{
7344 return __get_raid_index(cache->flags);
7345}
7346
7347static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
7348 [BTRFS_RAID_RAID10] = "raid10",
7349 [BTRFS_RAID_RAID1] = "raid1",
7350 [BTRFS_RAID_DUP] = "dup",
7351 [BTRFS_RAID_RAID0] = "raid0",
7352 [BTRFS_RAID_SINGLE] = "single",
7353 [BTRFS_RAID_RAID5] = "raid5",
7354 [BTRFS_RAID_RAID6] = "raid6",
7355};
7356
7357static const char *get_raid_name(enum btrfs_raid_types type)
7358{
7359 if (type >= BTRFS_NR_RAID_TYPES)
7360 return NULL;
7361
7362 return btrfs_raid_type_names[type];
7363}
7364
7365enum btrfs_loop_type {
7366 LOOP_CACHING_NOWAIT = 0,
7367 LOOP_CACHING_WAIT = 1,
7368 LOOP_ALLOC_CHUNK = 2,
7369 LOOP_NO_EMPTY_SIZE = 3,
7370};
7371
7372static inline void
7373btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
7374 int delalloc)
7375{
7376 if (delalloc)
7377 down_read(&cache->data_rwsem);
7378}
7379
7380static inline void
7381btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
7382 int delalloc)
7383{
7384 btrfs_get_block_group(cache);
7385 if (delalloc)
7386 down_read(&cache->data_rwsem);
7387}
7388
7389static struct btrfs_block_group_cache *
7390btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
7391 struct btrfs_free_cluster *cluster,
7392 int delalloc)
7393{
7394 struct btrfs_block_group_cache *used_bg = NULL;
7395
7396 spin_lock(&cluster->refill_lock);
7397 while (1) {
7398 used_bg = cluster->block_group;
7399 if (!used_bg)
7400 return NULL;
7401
7402 if (used_bg == block_group)
7403 return used_bg;
7404
7405 btrfs_get_block_group(used_bg);
7406
7407 if (!delalloc)
7408 return used_bg;
7409
7410 if (down_read_trylock(&used_bg->data_rwsem))
7411 return used_bg;
7412
7413 spin_unlock(&cluster->refill_lock);
7414
7415
7416 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
7417
7418 spin_lock(&cluster->refill_lock);
7419 if (used_bg == cluster->block_group)
7420 return used_bg;
7421
7422 up_read(&used_bg->data_rwsem);
7423 btrfs_put_block_group(used_bg);
7424 }
7425}
7426
7427static inline void
7428btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7429 int delalloc)
7430{
7431 if (delalloc)
7432 up_read(&cache->data_rwsem);
7433 btrfs_put_block_group(cache);
7434}
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447static noinline int find_free_extent(struct btrfs_root *orig_root,
7448 u64 ram_bytes, u64 num_bytes, u64 empty_size,
7449 u64 hint_byte, struct btrfs_key *ins,
7450 u64 flags, int delalloc)
7451{
7452 int ret = 0;
7453 struct btrfs_root *root = orig_root->fs_info->extent_root;
7454 struct btrfs_free_cluster *last_ptr = NULL;
7455 struct btrfs_block_group_cache *block_group = NULL;
7456 u64 search_start = 0;
7457 u64 max_extent_size = 0;
7458 u64 empty_cluster = 0;
7459 struct btrfs_space_info *space_info;
7460 int loop = 0;
7461 int index = __get_raid_index(flags);
7462 bool failed_cluster_refill = false;
7463 bool failed_alloc = false;
7464 bool use_cluster = true;
7465 bool have_caching_bg = false;
7466 bool orig_have_caching_bg = false;
7467 bool full_search = false;
7468
7469 WARN_ON(num_bytes < root->sectorsize);
7470 ins->type = BTRFS_EXTENT_ITEM_KEY;
7471 ins->objectid = 0;
7472 ins->offset = 0;
7473
7474 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
7475
7476 space_info = __find_space_info(root->fs_info, flags);
7477 if (!space_info) {
7478 btrfs_err(root->fs_info, "No space info for %llu", flags);
7479 return -ENOSPC;
7480 }
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492 if (unlikely(space_info->max_extent_size)) {
7493 spin_lock(&space_info->lock);
7494 if (space_info->max_extent_size &&
7495 num_bytes > space_info->max_extent_size) {
7496 ins->offset = space_info->max_extent_size;
7497 spin_unlock(&space_info->lock);
7498 return -ENOSPC;
7499 } else if (space_info->max_extent_size) {
7500 use_cluster = false;
7501 }
7502 spin_unlock(&space_info->lock);
7503 }
7504
7505 last_ptr = fetch_cluster_info(orig_root, space_info, &empty_cluster);
7506 if (last_ptr) {
7507 spin_lock(&last_ptr->lock);
7508 if (last_ptr->block_group)
7509 hint_byte = last_ptr->window_start;
7510 if (last_ptr->fragmented) {
7511
7512
7513
7514
7515
7516 hint_byte = last_ptr->window_start;
7517 use_cluster = false;
7518 }
7519 spin_unlock(&last_ptr->lock);
7520 }
7521
7522 search_start = max(search_start, first_logical_byte(root, 0));
7523 search_start = max(search_start, hint_byte);
7524 if (search_start == hint_byte) {
7525 block_group = btrfs_lookup_block_group(root->fs_info,
7526 search_start);
7527
7528
7529
7530
7531
7532
7533
7534 if (block_group && block_group_bits(block_group, flags) &&
7535 block_group->cached != BTRFS_CACHE_NO) {
7536 down_read(&space_info->groups_sem);
7537 if (list_empty(&block_group->list) ||
7538 block_group->ro) {
7539
7540
7541
7542
7543
7544
7545 btrfs_put_block_group(block_group);
7546 up_read(&space_info->groups_sem);
7547 } else {
7548 index = get_block_group_index(block_group);
7549 btrfs_lock_block_group(block_group, delalloc);
7550 goto have_block_group;
7551 }
7552 } else if (block_group) {
7553 btrfs_put_block_group(block_group);
7554 }
7555 }
7556search:
7557 have_caching_bg = false;
7558 if (index == 0 || index == __get_raid_index(flags))
7559 full_search = true;
7560 down_read(&space_info->groups_sem);
7561 list_for_each_entry(block_group, &space_info->block_groups[index],
7562 list) {
7563 u64 offset;
7564 int cached;
7565
7566 btrfs_grab_block_group(block_group, delalloc);
7567 search_start = block_group->key.objectid;
7568
7569
7570
7571
7572
7573
7574 if (!block_group_bits(block_group, flags)) {
7575 u64 extra = BTRFS_BLOCK_GROUP_DUP |
7576 BTRFS_BLOCK_GROUP_RAID1 |
7577 BTRFS_BLOCK_GROUP_RAID5 |
7578 BTRFS_BLOCK_GROUP_RAID6 |
7579 BTRFS_BLOCK_GROUP_RAID10;
7580
7581
7582
7583
7584
7585
7586 if ((flags & extra) && !(block_group->flags & extra))
7587 goto loop;
7588 }
7589
7590have_block_group:
7591 cached = block_group_cache_done(block_group);
7592 if (unlikely(!cached)) {
7593 have_caching_bg = true;
7594 ret = cache_block_group(block_group, 0);
7595 BUG_ON(ret < 0);
7596 ret = 0;
7597 }
7598
7599 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
7600 goto loop;
7601 if (unlikely(block_group->ro))
7602 goto loop;
7603
7604
7605
7606
7607
7608 if (last_ptr && use_cluster) {
7609 struct btrfs_block_group_cache *used_block_group;
7610 unsigned long aligned_cluster;
7611
7612
7613
7614
7615 used_block_group = btrfs_lock_cluster(block_group,
7616 last_ptr,
7617 delalloc);
7618 if (!used_block_group)
7619 goto refill_cluster;
7620
7621 if (used_block_group != block_group &&
7622 (used_block_group->ro ||
7623 !block_group_bits(used_block_group, flags)))
7624 goto release_cluster;
7625
7626 offset = btrfs_alloc_from_cluster(used_block_group,
7627 last_ptr,
7628 num_bytes,
7629 used_block_group->key.objectid,
7630 &max_extent_size);
7631 if (offset) {
7632
7633 spin_unlock(&last_ptr->refill_lock);
7634 trace_btrfs_reserve_extent_cluster(root,
7635 used_block_group,
7636 search_start, num_bytes);
7637 if (used_block_group != block_group) {
7638 btrfs_release_block_group(block_group,
7639 delalloc);
7640 block_group = used_block_group;
7641 }
7642 goto checks;
7643 }
7644
7645 WARN_ON(last_ptr->block_group != used_block_group);
7646release_cluster:
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662 if (loop >= LOOP_NO_EMPTY_SIZE &&
7663 used_block_group != block_group) {
7664 spin_unlock(&last_ptr->refill_lock);
7665 btrfs_release_block_group(used_block_group,
7666 delalloc);
7667 goto unclustered_alloc;
7668 }
7669
7670
7671
7672
7673
7674 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7675
7676 if (used_block_group != block_group)
7677 btrfs_release_block_group(used_block_group,
7678 delalloc);
7679refill_cluster:
7680 if (loop >= LOOP_NO_EMPTY_SIZE) {
7681 spin_unlock(&last_ptr->refill_lock);
7682 goto unclustered_alloc;
7683 }
7684
7685 aligned_cluster = max_t(unsigned long,
7686 empty_cluster + empty_size,
7687 block_group->full_stripe_len);
7688
7689
7690 ret = btrfs_find_space_cluster(root, block_group,
7691 last_ptr, search_start,
7692 num_bytes,
7693 aligned_cluster);
7694 if (ret == 0) {
7695
7696
7697
7698
7699 offset = btrfs_alloc_from_cluster(block_group,
7700 last_ptr,
7701 num_bytes,
7702 search_start,
7703 &max_extent_size);
7704 if (offset) {
7705
7706 spin_unlock(&last_ptr->refill_lock);
7707 trace_btrfs_reserve_extent_cluster(root,
7708 block_group, search_start,
7709 num_bytes);
7710 goto checks;
7711 }
7712 } else if (!cached && loop > LOOP_CACHING_NOWAIT
7713 && !failed_cluster_refill) {
7714 spin_unlock(&last_ptr->refill_lock);
7715
7716 failed_cluster_refill = true;
7717 wait_block_group_cache_progress(block_group,
7718 num_bytes + empty_cluster + empty_size);
7719 goto have_block_group;
7720 }
7721
7722
7723
7724
7725
7726
7727
7728 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7729 spin_unlock(&last_ptr->refill_lock);
7730 goto loop;
7731 }
7732
7733unclustered_alloc:
7734
7735
7736
7737
7738
7739 if (unlikely(last_ptr)) {
7740 spin_lock(&last_ptr->lock);
7741 last_ptr->fragmented = 1;
7742 spin_unlock(&last_ptr->lock);
7743 }
7744 spin_lock(&block_group->free_space_ctl->tree_lock);
7745 if (cached &&
7746 block_group->free_space_ctl->free_space <
7747 num_bytes + empty_cluster + empty_size) {
7748 if (block_group->free_space_ctl->free_space >
7749 max_extent_size)
7750 max_extent_size =
7751 block_group->free_space_ctl->free_space;
7752 spin_unlock(&block_group->free_space_ctl->tree_lock);
7753 goto loop;
7754 }
7755 spin_unlock(&block_group->free_space_ctl->tree_lock);
7756
7757 offset = btrfs_find_space_for_alloc(block_group, search_start,
7758 num_bytes, empty_size,
7759 &max_extent_size);
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769 if (!offset && !failed_alloc && !cached &&
7770 loop > LOOP_CACHING_NOWAIT) {
7771 wait_block_group_cache_progress(block_group,
7772 num_bytes + empty_size);
7773 failed_alloc = true;
7774 goto have_block_group;
7775 } else if (!offset) {
7776 goto loop;
7777 }
7778checks:
7779 search_start = ALIGN(offset, root->stripesize);
7780
7781
7782 if (search_start + num_bytes >
7783 block_group->key.objectid + block_group->key.offset) {
7784 btrfs_add_free_space(block_group, offset, num_bytes);
7785 goto loop;
7786 }
7787
7788 if (offset < search_start)
7789 btrfs_add_free_space(block_group, offset,
7790 search_start - offset);
7791 BUG_ON(offset > search_start);
7792
7793 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
7794 num_bytes, delalloc);
7795 if (ret == -EAGAIN) {
7796 btrfs_add_free_space(block_group, offset, num_bytes);
7797 goto loop;
7798 }
7799 btrfs_inc_block_group_reservations(block_group);
7800
7801
7802 ins->objectid = search_start;
7803 ins->offset = num_bytes;
7804
7805 trace_btrfs_reserve_extent(orig_root, block_group,
7806 search_start, num_bytes);
7807 btrfs_release_block_group(block_group, delalloc);
7808 break;
7809loop:
7810 failed_cluster_refill = false;
7811 failed_alloc = false;
7812 BUG_ON(index != get_block_group_index(block_group));
7813 btrfs_release_block_group(block_group, delalloc);
7814 }
7815 up_read(&space_info->groups_sem);
7816
7817 if ((loop == LOOP_CACHING_NOWAIT) && have_caching_bg
7818 && !orig_have_caching_bg)
7819 orig_have_caching_bg = true;
7820
7821 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
7822 goto search;
7823
7824 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
7825 goto search;
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
7836 index = 0;
7837 if (loop == LOOP_CACHING_NOWAIT) {
7838
7839
7840
7841
7842
7843 if (orig_have_caching_bg || !full_search)
7844 loop = LOOP_CACHING_WAIT;
7845 else
7846 loop = LOOP_ALLOC_CHUNK;
7847 } else {
7848 loop++;
7849 }
7850
7851 if (loop == LOOP_ALLOC_CHUNK) {
7852 struct btrfs_trans_handle *trans;
7853 int exist = 0;
7854
7855 trans = current->journal_info;
7856 if (trans)
7857 exist = 1;
7858 else
7859 trans = btrfs_join_transaction(root);
7860
7861 if (IS_ERR(trans)) {
7862 ret = PTR_ERR(trans);
7863 goto out;
7864 }
7865
7866 ret = do_chunk_alloc(trans, root, flags,
7867 CHUNK_ALLOC_FORCE);
7868
7869
7870
7871
7872
7873
7874 if (ret == -ENOSPC)
7875 loop = LOOP_NO_EMPTY_SIZE;
7876
7877
7878
7879
7880
7881 if (ret < 0 && ret != -ENOSPC)
7882 btrfs_abort_transaction(trans,
7883 root, ret);
7884 else
7885 ret = 0;
7886 if (!exist)
7887 btrfs_end_transaction(trans, root);
7888 if (ret)
7889 goto out;
7890 }
7891
7892 if (loop == LOOP_NO_EMPTY_SIZE) {
7893
7894
7895
7896
7897 if (empty_size == 0 &&
7898 empty_cluster == 0) {
7899 ret = -ENOSPC;
7900 goto out;
7901 }
7902 empty_size = 0;
7903 empty_cluster = 0;
7904 }
7905
7906 goto search;
7907 } else if (!ins->objectid) {
7908 ret = -ENOSPC;
7909 } else if (ins->objectid) {
7910 if (!use_cluster && last_ptr) {
7911 spin_lock(&last_ptr->lock);
7912 last_ptr->window_start = ins->objectid;
7913 spin_unlock(&last_ptr->lock);
7914 }
7915 ret = 0;
7916 }
7917out:
7918 if (ret == -ENOSPC) {
7919 spin_lock(&space_info->lock);
7920 space_info->max_extent_size = max_extent_size;
7921 spin_unlock(&space_info->lock);
7922 ins->offset = max_extent_size;
7923 }
7924 return ret;
7925}
7926
7927static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
7928 int dump_block_groups)
7929{
7930 struct btrfs_block_group_cache *cache;
7931 int index = 0;
7932
7933 spin_lock(&info->lock);
7934 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
7935 info->flags,
7936 info->total_bytes - info->bytes_used - info->bytes_pinned -
7937 info->bytes_reserved - info->bytes_readonly -
7938 info->bytes_may_use, (info->full) ? "" : "not ");
7939 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu\n",
7940 info->total_bytes, info->bytes_used, info->bytes_pinned,
7941 info->bytes_reserved, info->bytes_may_use,
7942 info->bytes_readonly);
7943 spin_unlock(&info->lock);
7944
7945 if (!dump_block_groups)
7946 return;
7947
7948 down_read(&info->groups_sem);
7949again:
7950 list_for_each_entry(cache, &info->block_groups[index], list) {
7951 spin_lock(&cache->lock);
7952 printk(KERN_INFO "BTRFS: block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
7953 cache->key.objectid, cache->key.offset,
7954 btrfs_block_group_used(&cache->item), cache->pinned,
7955 cache->reserved, cache->ro ? "[readonly]" : "");
7956 btrfs_dump_free_space(cache, bytes);
7957 spin_unlock(&cache->lock);
7958 }
7959 if (++index < BTRFS_NR_RAID_TYPES)
7960 goto again;
7961 up_read(&info->groups_sem);
7962}
7963
7964int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
7965 u64 num_bytes, u64 min_alloc_size,
7966 u64 empty_size, u64 hint_byte,
7967 struct btrfs_key *ins, int is_data, int delalloc)
7968{
7969 bool final_tried = num_bytes == min_alloc_size;
7970 u64 flags;
7971 int ret;
7972
7973 flags = btrfs_get_alloc_profile(root, is_data);
7974again:
7975 WARN_ON(num_bytes < root->sectorsize);
7976 ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
7977 hint_byte, ins, flags, delalloc);
7978 if (!ret && !is_data) {
7979 btrfs_dec_block_group_reservations(root->fs_info,
7980 ins->objectid);
7981 } else if (ret == -ENOSPC) {
7982 if (!final_tried && ins->offset) {
7983 num_bytes = min(num_bytes >> 1, ins->offset);
7984 num_bytes = round_down(num_bytes, root->sectorsize);
7985 num_bytes = max(num_bytes, min_alloc_size);
7986 ram_bytes = num_bytes;
7987 if (num_bytes == min_alloc_size)
7988 final_tried = true;
7989 goto again;
7990 } else if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
7991 struct btrfs_space_info *sinfo;
7992
7993 sinfo = __find_space_info(root->fs_info, flags);
7994 btrfs_err(root->fs_info,
7995 "allocation failed flags %llu, wanted %llu",
7996 flags, num_bytes);
7997 if (sinfo)
7998 dump_space_info(sinfo, num_bytes, 1);
7999 }
8000 }
8001
8002 return ret;
8003}
8004
8005static int __btrfs_free_reserved_extent(struct btrfs_root *root,
8006 u64 start, u64 len,
8007 int pin, int delalloc)
8008{
8009 struct btrfs_block_group_cache *cache;
8010 int ret = 0;
8011
8012 cache = btrfs_lookup_block_group(root->fs_info, start);
8013 if (!cache) {
8014 btrfs_err(root->fs_info, "Unable to find block group for %llu",
8015 start);
8016 return -ENOSPC;
8017 }
8018
8019 if (pin)
8020 pin_down_extent(root, cache, start, len, 1);
8021 else {
8022 if (btrfs_test_opt(root->fs_info, DISCARD))
8023 ret = btrfs_discard_extent(root, start, len, NULL);
8024 btrfs_add_free_space(cache, start, len);
8025 btrfs_free_reserved_bytes(cache, len, delalloc);
8026 trace_btrfs_reserved_extent_free(root, start, len);
8027 }
8028
8029 btrfs_put_block_group(cache);
8030 return ret;
8031}
8032
8033int btrfs_free_reserved_extent(struct btrfs_root *root,
8034 u64 start, u64 len, int delalloc)
8035{
8036 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
8037}
8038
8039int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
8040 u64 start, u64 len)
8041{
8042 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
8043}
8044
8045static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8046 struct btrfs_root *root,
8047 u64 parent, u64 root_objectid,
8048 u64 flags, u64 owner, u64 offset,
8049 struct btrfs_key *ins, int ref_mod)
8050{
8051 int ret;
8052 struct btrfs_fs_info *fs_info = root->fs_info;
8053 struct btrfs_extent_item *extent_item;
8054 struct btrfs_extent_inline_ref *iref;
8055 struct btrfs_path *path;
8056 struct extent_buffer *leaf;
8057 int type;
8058 u32 size;
8059
8060 if (parent > 0)
8061 type = BTRFS_SHARED_DATA_REF_KEY;
8062 else
8063 type = BTRFS_EXTENT_DATA_REF_KEY;
8064
8065 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
8066
8067 path = btrfs_alloc_path();
8068 if (!path)
8069 return -ENOMEM;
8070
8071 path->leave_spinning = 1;
8072 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8073 ins, size);
8074 if (ret) {
8075 btrfs_free_path(path);
8076 return ret;
8077 }
8078
8079 leaf = path->nodes[0];
8080 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8081 struct btrfs_extent_item);
8082 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
8083 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8084 btrfs_set_extent_flags(leaf, extent_item,
8085 flags | BTRFS_EXTENT_FLAG_DATA);
8086
8087 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8088 btrfs_set_extent_inline_ref_type(leaf, iref, type);
8089 if (parent > 0) {
8090 struct btrfs_shared_data_ref *ref;
8091 ref = (struct btrfs_shared_data_ref *)(iref + 1);
8092 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
8093 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
8094 } else {
8095 struct btrfs_extent_data_ref *ref;
8096 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
8097 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
8098 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
8099 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
8100 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
8101 }
8102
8103 btrfs_mark_buffer_dirty(path->nodes[0]);
8104 btrfs_free_path(path);
8105
8106 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
8107 ins->offset);
8108 if (ret)
8109 return ret;
8110
8111 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
8112 if (ret) {
8113 btrfs_err(fs_info, "update block group failed for %llu %llu",
8114 ins->objectid, ins->offset);
8115 BUG();
8116 }
8117 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
8118 return ret;
8119}
8120
8121static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
8122 struct btrfs_root *root,
8123 u64 parent, u64 root_objectid,
8124 u64 flags, struct btrfs_disk_key *key,
8125 int level, struct btrfs_key *ins)
8126{
8127 int ret;
8128 struct btrfs_fs_info *fs_info = root->fs_info;
8129 struct btrfs_extent_item *extent_item;
8130 struct btrfs_tree_block_info *block_info;
8131 struct btrfs_extent_inline_ref *iref;
8132 struct btrfs_path *path;
8133 struct extent_buffer *leaf;
8134 u32 size = sizeof(*extent_item) + sizeof(*iref);
8135 u64 num_bytes = ins->offset;
8136 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
8137 SKINNY_METADATA);
8138
8139 if (!skinny_metadata)
8140 size += sizeof(*block_info);
8141
8142 path = btrfs_alloc_path();
8143 if (!path) {
8144 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
8145 root->nodesize);
8146 return -ENOMEM;
8147 }
8148
8149 path->leave_spinning = 1;
8150 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8151 ins, size);
8152 if (ret) {
8153 btrfs_free_path(path);
8154 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
8155 root->nodesize);
8156 return ret;
8157 }
8158
8159 leaf = path->nodes[0];
8160 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8161 struct btrfs_extent_item);
8162 btrfs_set_extent_refs(leaf, extent_item, 1);
8163 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8164 btrfs_set_extent_flags(leaf, extent_item,
8165 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
8166
8167 if (skinny_metadata) {
8168 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8169 num_bytes = root->nodesize;
8170 } else {
8171 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
8172 btrfs_set_tree_block_key(leaf, block_info, key);
8173 btrfs_set_tree_block_level(leaf, block_info, level);
8174 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
8175 }
8176
8177 if (parent > 0) {
8178 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
8179 btrfs_set_extent_inline_ref_type(leaf, iref,
8180 BTRFS_SHARED_BLOCK_REF_KEY);
8181 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
8182 } else {
8183 btrfs_set_extent_inline_ref_type(leaf, iref,
8184 BTRFS_TREE_BLOCK_REF_KEY);
8185 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
8186 }
8187
8188 btrfs_mark_buffer_dirty(leaf);
8189 btrfs_free_path(path);
8190
8191 ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
8192 num_bytes);
8193 if (ret)
8194 return ret;
8195
8196 ret = update_block_group(trans, root, ins->objectid, root->nodesize,
8197 1);
8198 if (ret) {
8199 btrfs_err(fs_info, "update block group failed for %llu %llu",
8200 ins->objectid, ins->offset);
8201 BUG();
8202 }
8203
8204 trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize);
8205 return ret;
8206}
8207
8208int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8209 struct btrfs_root *root,
8210 u64 root_objectid, u64 owner,
8211 u64 offset, u64 ram_bytes,
8212 struct btrfs_key *ins)
8213{
8214 int ret;
8215
8216 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
8217
8218 ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
8219 ins->offset, 0,
8220 root_objectid, owner, offset,
8221 ram_bytes, BTRFS_ADD_DELAYED_EXTENT,
8222 NULL);
8223 return ret;
8224}
8225
8226
8227
8228
8229
8230
8231int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
8232 struct btrfs_root *root,
8233 u64 root_objectid, u64 owner, u64 offset,
8234 struct btrfs_key *ins)
8235{
8236 int ret;
8237 struct btrfs_block_group_cache *block_group;
8238 struct btrfs_space_info *space_info;
8239
8240
8241
8242
8243
8244 if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
8245 ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
8246 if (ret)
8247 return ret;
8248 }
8249
8250 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
8251 if (!block_group)
8252 return -EINVAL;
8253
8254 space_info = block_group->space_info;
8255 spin_lock(&space_info->lock);
8256 spin_lock(&block_group->lock);
8257 space_info->bytes_reserved += ins->offset;
8258 block_group->reserved += ins->offset;
8259 spin_unlock(&block_group->lock);
8260 spin_unlock(&space_info->lock);
8261
8262 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
8263 0, owner, offset, ins, 1);
8264 btrfs_put_block_group(block_group);
8265 return ret;
8266}
8267
8268static struct extent_buffer *
8269btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8270 u64 bytenr, int level)
8271{
8272 struct extent_buffer *buf;
8273
8274 buf = btrfs_find_create_tree_block(root, bytenr);
8275 if (IS_ERR(buf))
8276 return buf;
8277
8278 btrfs_set_header_generation(buf, trans->transid);
8279 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
8280 btrfs_tree_lock(buf);
8281 clean_tree_block(trans, root->fs_info, buf);
8282 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
8283
8284 btrfs_set_lock_blocking(buf);
8285 set_extent_buffer_uptodate(buf);
8286
8287 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
8288 buf->log_index = root->log_transid % 2;
8289
8290
8291
8292
8293 if (buf->log_index == 0)
8294 set_extent_dirty(&root->dirty_log_pages, buf->start,
8295 buf->start + buf->len - 1, GFP_NOFS);
8296 else
8297 set_extent_new(&root->dirty_log_pages, buf->start,
8298 buf->start + buf->len - 1);
8299 } else {
8300 buf->log_index = -1;
8301 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
8302 buf->start + buf->len - 1, GFP_NOFS);
8303 }
8304 trans->dirty = true;
8305
8306 return buf;
8307}
8308
8309static struct btrfs_block_rsv *
8310use_block_rsv(struct btrfs_trans_handle *trans,
8311 struct btrfs_root *root, u32 blocksize)
8312{
8313 struct btrfs_block_rsv *block_rsv;
8314 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
8315 int ret;
8316 bool global_updated = false;
8317
8318 block_rsv = get_block_rsv(trans, root);
8319
8320 if (unlikely(block_rsv->size == 0))
8321 goto try_reserve;
8322again:
8323 ret = block_rsv_use_bytes(block_rsv, blocksize);
8324 if (!ret)
8325 return block_rsv;
8326
8327 if (block_rsv->failfast)
8328 return ERR_PTR(ret);
8329
8330 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
8331 global_updated = true;
8332 update_global_block_rsv(root->fs_info);
8333 goto again;
8334 }
8335
8336 if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
8337 static DEFINE_RATELIMIT_STATE(_rs,
8338 DEFAULT_RATELIMIT_INTERVAL * 10,
8339 1);
8340 if (__ratelimit(&_rs))
8341 WARN(1, KERN_DEBUG
8342 "BTRFS: block rsv returned %d\n", ret);
8343 }
8344try_reserve:
8345 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
8346 BTRFS_RESERVE_NO_FLUSH);
8347 if (!ret)
8348 return block_rsv;
8349
8350
8351
8352
8353
8354 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
8355 block_rsv->space_info == global_rsv->space_info) {
8356 ret = block_rsv_use_bytes(global_rsv, blocksize);
8357 if (!ret)
8358 return global_rsv;
8359 }
8360 return ERR_PTR(ret);
8361}
8362
8363static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
8364 struct btrfs_block_rsv *block_rsv, u32 blocksize)
8365{
8366 block_rsv_add_bytes(block_rsv, blocksize, 0);
8367 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
8368}
8369
8370
8371
8372
8373
8374struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8375 struct btrfs_root *root,
8376 u64 parent, u64 root_objectid,
8377 struct btrfs_disk_key *key, int level,
8378 u64 hint, u64 empty_size)
8379{
8380 struct btrfs_key ins;
8381 struct btrfs_block_rsv *block_rsv;
8382 struct extent_buffer *buf;
8383 struct btrfs_delayed_extent_op *extent_op;
8384 u64 flags = 0;
8385 int ret;
8386 u32 blocksize = root->nodesize;
8387 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
8388 SKINNY_METADATA);
8389
8390#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8391 if (btrfs_is_testing(root->fs_info)) {
8392 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
8393 level);
8394 if (!IS_ERR(buf))
8395 root->alloc_bytenr += blocksize;
8396 return buf;
8397 }
8398#endif
8399
8400 block_rsv = use_block_rsv(trans, root, blocksize);
8401 if (IS_ERR(block_rsv))
8402 return ERR_CAST(block_rsv);
8403
8404 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
8405 empty_size, hint, &ins, 0, 0);
8406 if (ret)
8407 goto out_unuse;
8408
8409 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level);
8410 if (IS_ERR(buf)) {
8411 ret = PTR_ERR(buf);
8412 goto out_free_reserved;
8413 }
8414
8415 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
8416 if (parent == 0)
8417 parent = ins.objectid;
8418 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8419 } else
8420 BUG_ON(parent > 0);
8421
8422 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
8423 extent_op = btrfs_alloc_delayed_extent_op();
8424 if (!extent_op) {
8425 ret = -ENOMEM;
8426 goto out_free_buf;
8427 }
8428 if (key)
8429 memcpy(&extent_op->key, key, sizeof(extent_op->key));
8430 else
8431 memset(&extent_op->key, 0, sizeof(extent_op->key));
8432 extent_op->flags_to_set = flags;
8433 extent_op->update_key = skinny_metadata ? false : true;
8434 extent_op->update_flags = true;
8435 extent_op->is_data = false;
8436 extent_op->level = level;
8437
8438 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
8439 ins.objectid, ins.offset,
8440 parent, root_objectid, level,
8441 BTRFS_ADD_DELAYED_EXTENT,
8442 extent_op);
8443 if (ret)
8444 goto out_free_delayed;
8445 }
8446 return buf;
8447
8448out_free_delayed:
8449 btrfs_free_delayed_extent_op(extent_op);
8450out_free_buf:
8451 free_extent_buffer(buf);
8452out_free_reserved:
8453 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 0);
8454out_unuse:
8455 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
8456 return ERR_PTR(ret);
8457}
8458
8459struct walk_control {
8460 u64 refs[BTRFS_MAX_LEVEL];
8461 u64 flags[BTRFS_MAX_LEVEL];
8462 struct btrfs_key update_progress;
8463 int stage;
8464 int level;
8465 int shared_level;
8466 int update_ref;
8467 int keep_locks;
8468 int reada_slot;
8469 int reada_count;
8470 int for_reloc;
8471};
8472
8473#define DROP_REFERENCE 1
8474#define UPDATE_BACKREF 2
8475
8476static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
8477 struct btrfs_root *root,
8478 struct walk_control *wc,
8479 struct btrfs_path *path)
8480{
8481 u64 bytenr;
8482 u64 generation;
8483 u64 refs;
8484 u64 flags;
8485 u32 nritems;
8486 struct btrfs_key key;
8487 struct extent_buffer *eb;
8488 int ret;
8489 int slot;
8490 int nread = 0;
8491
8492 if (path->slots[wc->level] < wc->reada_slot) {
8493 wc->reada_count = wc->reada_count * 2 / 3;
8494 wc->reada_count = max(wc->reada_count, 2);
8495 } else {
8496 wc->reada_count = wc->reada_count * 3 / 2;
8497 wc->reada_count = min_t(int, wc->reada_count,
8498 BTRFS_NODEPTRS_PER_BLOCK(root));
8499 }
8500
8501 eb = path->nodes[wc->level];
8502 nritems = btrfs_header_nritems(eb);
8503
8504 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
8505 if (nread >= wc->reada_count)
8506 break;
8507
8508 cond_resched();
8509 bytenr = btrfs_node_blockptr(eb, slot);
8510 generation = btrfs_node_ptr_generation(eb, slot);
8511
8512 if (slot == path->slots[wc->level])
8513 goto reada;
8514
8515 if (wc->stage == UPDATE_BACKREF &&
8516 generation <= root->root_key.offset)
8517 continue;
8518
8519
8520 ret = btrfs_lookup_extent_info(trans, root, bytenr,
8521 wc->level - 1, 1, &refs,
8522 &flags);
8523
8524 if (ret < 0)
8525 continue;
8526 BUG_ON(refs == 0);
8527
8528 if (wc->stage == DROP_REFERENCE) {
8529 if (refs == 1)
8530 goto reada;
8531
8532 if (wc->level == 1 &&
8533 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8534 continue;
8535 if (!wc->update_ref ||
8536 generation <= root->root_key.offset)
8537 continue;
8538 btrfs_node_key_to_cpu(eb, &key, slot);
8539 ret = btrfs_comp_cpu_keys(&key,
8540 &wc->update_progress);
8541 if (ret < 0)
8542 continue;
8543 } else {
8544 if (wc->level == 1 &&
8545 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8546 continue;
8547 }
8548reada:
8549 readahead_tree_block(root, bytenr);
8550 nread++;
8551 }
8552 wc->reada_slot = slot;
8553}
8554
8555static int account_leaf_items(struct btrfs_trans_handle *trans,
8556 struct btrfs_root *root,
8557 struct extent_buffer *eb)
8558{
8559 int nr = btrfs_header_nritems(eb);
8560 int i, extent_type, ret;
8561 struct btrfs_key key;
8562 struct btrfs_file_extent_item *fi;
8563 u64 bytenr, num_bytes;
8564
8565
8566 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags))
8567 return 0;
8568
8569 for (i = 0; i < nr; i++) {
8570 btrfs_item_key_to_cpu(eb, &key, i);
8571
8572 if (key.type != BTRFS_EXTENT_DATA_KEY)
8573 continue;
8574
8575 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
8576
8577 extent_type = btrfs_file_extent_type(eb, fi);
8578
8579 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
8580 continue;
8581
8582 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8583 if (!bytenr)
8584 continue;
8585
8586 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8587
8588 ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
8589 bytenr, num_bytes, GFP_NOFS);
8590 if (ret)
8591 return ret;
8592 }
8593 return 0;
8594}
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610static int adjust_slots_upwards(struct btrfs_root *root,
8611 struct btrfs_path *path, int root_level)
8612{
8613 int level = 0;
8614 int nr, slot;
8615 struct extent_buffer *eb;
8616
8617 if (root_level == 0)
8618 return 1;
8619
8620 while (level <= root_level) {
8621 eb = path->nodes[level];
8622 nr = btrfs_header_nritems(eb);
8623 path->slots[level]++;
8624 slot = path->slots[level];
8625 if (slot >= nr || level == 0) {
8626
8627
8628
8629
8630
8631 if (level != root_level) {
8632 btrfs_tree_unlock_rw(eb, path->locks[level]);
8633 path->locks[level] = 0;
8634
8635 free_extent_buffer(eb);
8636 path->nodes[level] = NULL;
8637 path->slots[level] = 0;
8638 }
8639 } else {
8640
8641
8642
8643
8644
8645 break;
8646 }
8647
8648 level++;
8649 }
8650
8651 eb = path->nodes[root_level];
8652 if (path->slots[root_level] >= btrfs_header_nritems(eb))
8653 return 1;
8654
8655 return 0;
8656}
8657
8658
8659
8660
8661static int account_shared_subtree(struct btrfs_trans_handle *trans,
8662 struct btrfs_root *root,
8663 struct extent_buffer *root_eb,
8664 u64 root_gen,
8665 int root_level)
8666{
8667 int ret = 0;
8668 int level;
8669 struct extent_buffer *eb = root_eb;
8670 struct btrfs_path *path = NULL;
8671
8672 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
8673 BUG_ON(root_eb == NULL);
8674
8675 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags))
8676 return 0;
8677
8678 if (!extent_buffer_uptodate(root_eb)) {
8679 ret = btrfs_read_buffer(root_eb, root_gen);
8680 if (ret)
8681 goto out;
8682 }
8683
8684 if (root_level == 0) {
8685 ret = account_leaf_items(trans, root, root_eb);
8686 goto out;
8687 }
8688
8689 path = btrfs_alloc_path();
8690 if (!path)
8691 return -ENOMEM;
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702 extent_buffer_get(root_eb);
8703 path->nodes[root_level] = root_eb;
8704 path->slots[root_level] = 0;
8705 path->locks[root_level] = 0;
8706walk_down:
8707 level = root_level;
8708 while (level >= 0) {
8709 if (path->nodes[level] == NULL) {
8710 int parent_slot;
8711 u64 child_gen;
8712 u64 child_bytenr;
8713
8714
8715
8716 eb = path->nodes[level + 1];
8717 parent_slot = path->slots[level + 1];
8718 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
8719 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
8720
8721 eb = read_tree_block(root, child_bytenr, child_gen);
8722 if (IS_ERR(eb)) {
8723 ret = PTR_ERR(eb);
8724 goto out;
8725 } else if (!extent_buffer_uptodate(eb)) {
8726 free_extent_buffer(eb);
8727 ret = -EIO;
8728 goto out;
8729 }
8730
8731 path->nodes[level] = eb;
8732 path->slots[level] = 0;
8733
8734 btrfs_tree_read_lock(eb);
8735 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
8736 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
8737
8738 ret = btrfs_qgroup_insert_dirty_extent(trans,
8739 root->fs_info, child_bytenr,
8740 root->nodesize, GFP_NOFS);
8741 if (ret)
8742 goto out;
8743 }
8744
8745 if (level == 0) {
8746 ret = account_leaf_items(trans, root, path->nodes[level]);
8747 if (ret)
8748 goto out;
8749
8750
8751 ret = adjust_slots_upwards(root, path, root_level);
8752 if (ret)
8753 break;
8754
8755
8756 goto walk_down;
8757 }
8758
8759 level--;
8760 }
8761
8762 ret = 0;
8763out:
8764 btrfs_free_path(path);
8765
8766 return ret;
8767}
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8778 struct btrfs_root *root,
8779 struct btrfs_path *path,
8780 struct walk_control *wc, int lookup_info)
8781{
8782 int level = wc->level;
8783 struct extent_buffer *eb = path->nodes[level];
8784 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8785 int ret;
8786
8787 if (wc->stage == UPDATE_BACKREF &&
8788 btrfs_header_owner(eb) != root->root_key.objectid)
8789 return 1;
8790
8791
8792
8793
8794
8795 if (lookup_info &&
8796 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
8797 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
8798 BUG_ON(!path->locks[level]);
8799 ret = btrfs_lookup_extent_info(trans, root,
8800 eb->start, level, 1,
8801 &wc->refs[level],
8802 &wc->flags[level]);
8803 BUG_ON(ret == -ENOMEM);
8804 if (ret)
8805 return ret;
8806 BUG_ON(wc->refs[level] == 0);
8807 }
8808
8809 if (wc->stage == DROP_REFERENCE) {
8810 if (wc->refs[level] > 1)
8811 return 1;
8812
8813 if (path->locks[level] && !wc->keep_locks) {
8814 btrfs_tree_unlock_rw(eb, path->locks[level]);
8815 path->locks[level] = 0;
8816 }
8817 return 0;
8818 }
8819
8820
8821 if (!(wc->flags[level] & flag)) {
8822 BUG_ON(!path->locks[level]);
8823 ret = btrfs_inc_ref(trans, root, eb, 1);
8824 BUG_ON(ret);
8825 ret = btrfs_dec_ref(trans, root, eb, 0);
8826 BUG_ON(ret);
8827 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
8828 eb->len, flag,
8829 btrfs_header_level(eb), 0);
8830 BUG_ON(ret);
8831 wc->flags[level] |= flag;
8832 }
8833
8834
8835
8836
8837
8838 if (path->locks[level] && level > 0) {
8839 btrfs_tree_unlock_rw(eb, path->locks[level]);
8840 path->locks[level] = 0;
8841 }
8842 return 0;
8843}
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8859 struct btrfs_root *root,
8860 struct btrfs_path *path,
8861 struct walk_control *wc, int *lookup_info)
8862{
8863 u64 bytenr;
8864 u64 generation;
8865 u64 parent;
8866 u32 blocksize;
8867 struct btrfs_key key;
8868 struct extent_buffer *next;
8869 int level = wc->level;
8870 int reada = 0;
8871 int ret = 0;
8872 bool need_account = false;
8873
8874 generation = btrfs_node_ptr_generation(path->nodes[level],
8875 path->slots[level]);
8876
8877
8878
8879
8880
8881 if (wc->stage == UPDATE_BACKREF &&
8882 generation <= root->root_key.offset) {
8883 *lookup_info = 1;
8884 return 1;
8885 }
8886
8887 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8888 blocksize = root->nodesize;
8889
8890 next = btrfs_find_tree_block(root->fs_info, bytenr);
8891 if (!next) {
8892 next = btrfs_find_create_tree_block(root, bytenr);
8893 if (IS_ERR(next))
8894 return PTR_ERR(next);
8895
8896 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8897 level - 1);
8898 reada = 1;
8899 }
8900 btrfs_tree_lock(next);
8901 btrfs_set_lock_blocking(next);
8902
8903 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
8904 &wc->refs[level - 1],
8905 &wc->flags[level - 1]);
8906 if (ret < 0)
8907 goto out_unlock;
8908
8909 if (unlikely(wc->refs[level - 1] == 0)) {
8910 btrfs_err(root->fs_info, "Missing references.");
8911 ret = -EIO;
8912 goto out_unlock;
8913 }
8914 *lookup_info = 0;
8915
8916 if (wc->stage == DROP_REFERENCE) {
8917 if (wc->refs[level - 1] > 1) {
8918 need_account = true;
8919 if (level == 1 &&
8920 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8921 goto skip;
8922
8923 if (!wc->update_ref ||
8924 generation <= root->root_key.offset)
8925 goto skip;
8926
8927 btrfs_node_key_to_cpu(path->nodes[level], &key,
8928 path->slots[level]);
8929 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8930 if (ret < 0)
8931 goto skip;
8932
8933 wc->stage = UPDATE_BACKREF;
8934 wc->shared_level = level - 1;
8935 }
8936 } else {
8937 if (level == 1 &&
8938 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8939 goto skip;
8940 }
8941
8942 if (!btrfs_buffer_uptodate(next, generation, 0)) {
8943 btrfs_tree_unlock(next);
8944 free_extent_buffer(next);
8945 next = NULL;
8946 *lookup_info = 1;
8947 }
8948
8949 if (!next) {
8950 if (reada && level == 1)
8951 reada_walk_down(trans, root, wc, path);
8952 next = read_tree_block(root, bytenr, generation);
8953 if (IS_ERR(next)) {
8954 return PTR_ERR(next);
8955 } else if (!extent_buffer_uptodate(next)) {
8956 free_extent_buffer(next);
8957 return -EIO;
8958 }
8959 btrfs_tree_lock(next);
8960 btrfs_set_lock_blocking(next);
8961 }
8962
8963 level--;
8964 ASSERT(level == btrfs_header_level(next));
8965 if (level != btrfs_header_level(next)) {
8966 btrfs_err(root->fs_info, "mismatched level");
8967 ret = -EIO;
8968 goto out_unlock;
8969 }
8970 path->nodes[level] = next;
8971 path->slots[level] = 0;
8972 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8973 wc->level = level;
8974 if (wc->level == 1)
8975 wc->reada_slot = 0;
8976 return 0;
8977skip:
8978 wc->refs[level - 1] = 0;
8979 wc->flags[level - 1] = 0;
8980 if (wc->stage == DROP_REFERENCE) {
8981 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8982 parent = path->nodes[level]->start;
8983 } else {
8984 ASSERT(root->root_key.objectid ==
8985 btrfs_header_owner(path->nodes[level]));
8986 if (root->root_key.objectid !=
8987 btrfs_header_owner(path->nodes[level])) {
8988 btrfs_err(root->fs_info,
8989 "mismatched block owner");
8990 ret = -EIO;
8991 goto out_unlock;
8992 }
8993 parent = 0;
8994 }
8995
8996 if (need_account) {
8997 ret = account_shared_subtree(trans, root, next,
8998 generation, level - 1);
8999 if (ret) {
9000 btrfs_err_rl(root->fs_info,
9001 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
9002 ret);
9003 }
9004 }
9005 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
9006 root->root_key.objectid, level - 1, 0);
9007 if (ret)
9008 goto out_unlock;
9009 }
9010
9011 *lookup_info = 1;
9012 ret = 1;
9013
9014out_unlock:
9015 btrfs_tree_unlock(next);
9016 free_extent_buffer(next);
9017
9018 return ret;
9019}
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
9034 struct btrfs_root *root,
9035 struct btrfs_path *path,
9036 struct walk_control *wc)
9037{
9038 int ret;
9039 int level = wc->level;
9040 struct extent_buffer *eb = path->nodes[level];
9041 u64 parent = 0;
9042
9043 if (wc->stage == UPDATE_BACKREF) {
9044 BUG_ON(wc->shared_level < level);
9045 if (level < wc->shared_level)
9046 goto out;
9047
9048 ret = find_next_key(path, level + 1, &wc->update_progress);
9049 if (ret > 0)
9050 wc->update_ref = 0;
9051
9052 wc->stage = DROP_REFERENCE;
9053 wc->shared_level = -1;
9054 path->slots[level] = 0;
9055
9056
9057
9058
9059
9060
9061 if (!path->locks[level]) {
9062 BUG_ON(level == 0);
9063 btrfs_tree_lock(eb);
9064 btrfs_set_lock_blocking(eb);
9065 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9066
9067 ret = btrfs_lookup_extent_info(trans, root,
9068 eb->start, level, 1,
9069 &wc->refs[level],
9070 &wc->flags[level]);
9071 if (ret < 0) {
9072 btrfs_tree_unlock_rw(eb, path->locks[level]);
9073 path->locks[level] = 0;
9074 return ret;
9075 }
9076 BUG_ON(wc->refs[level] == 0);
9077 if (wc->refs[level] == 1) {
9078 btrfs_tree_unlock_rw(eb, path->locks[level]);
9079 path->locks[level] = 0;
9080 return 1;
9081 }
9082 }
9083 }
9084
9085
9086 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
9087
9088 if (wc->refs[level] == 1) {
9089 if (level == 0) {
9090 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9091 ret = btrfs_dec_ref(trans, root, eb, 1);
9092 else
9093 ret = btrfs_dec_ref(trans, root, eb, 0);
9094 BUG_ON(ret);
9095 ret = account_leaf_items(trans, root, eb);
9096 if (ret) {
9097 btrfs_err_rl(root->fs_info,
9098 "error %d accounting leaf items. Quota is out of sync, rescan required.",
9099 ret);
9100 }
9101 }
9102
9103 if (!path->locks[level] &&
9104 btrfs_header_generation(eb) == trans->transid) {
9105 btrfs_tree_lock(eb);
9106 btrfs_set_lock_blocking(eb);
9107 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9108 }
9109 clean_tree_block(trans, root->fs_info, eb);
9110 }
9111
9112 if (eb == root->node) {
9113 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9114 parent = eb->start;
9115 else
9116 BUG_ON(root->root_key.objectid !=
9117 btrfs_header_owner(eb));
9118 } else {
9119 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9120 parent = path->nodes[level + 1]->start;
9121 else
9122 BUG_ON(root->root_key.objectid !=
9123 btrfs_header_owner(path->nodes[level + 1]));
9124 }
9125
9126 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
9127out:
9128 wc->refs[level] = 0;
9129 wc->flags[level] = 0;
9130 return 0;
9131}
9132
9133static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
9134 struct btrfs_root *root,
9135 struct btrfs_path *path,
9136 struct walk_control *wc)
9137{
9138 int level = wc->level;
9139 int lookup_info = 1;
9140 int ret;
9141
9142 while (level >= 0) {
9143 ret = walk_down_proc(trans, root, path, wc, lookup_info);
9144 if (ret > 0)
9145 break;
9146
9147 if (level == 0)
9148 break;
9149
9150 if (path->slots[level] >=
9151 btrfs_header_nritems(path->nodes[level]))
9152 break;
9153
9154 ret = do_walk_down(trans, root, path, wc, &lookup_info);
9155 if (ret > 0) {
9156 path->slots[level]++;
9157 continue;
9158 } else if (ret < 0)
9159 return ret;
9160 level = wc->level;
9161 }
9162 return 0;
9163}
9164
9165static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
9166 struct btrfs_root *root,
9167 struct btrfs_path *path,
9168 struct walk_control *wc, int max_level)
9169{
9170 int level = wc->level;
9171 int ret;
9172
9173 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
9174 while (level < max_level && path->nodes[level]) {
9175 wc->level = level;
9176 if (path->slots[level] + 1 <
9177 btrfs_header_nritems(path->nodes[level])) {
9178 path->slots[level]++;
9179 return 0;
9180 } else {
9181 ret = walk_up_proc(trans, root, path, wc);
9182 if (ret > 0)
9183 return 0;
9184
9185 if (path->locks[level]) {
9186 btrfs_tree_unlock_rw(path->nodes[level],
9187 path->locks[level]);
9188 path->locks[level] = 0;
9189 }
9190 free_extent_buffer(path->nodes[level]);
9191 path->nodes[level] = NULL;
9192 level++;
9193 }
9194 }
9195 return 1;
9196}
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211int btrfs_drop_snapshot(struct btrfs_root *root,
9212 struct btrfs_block_rsv *block_rsv, int update_ref,
9213 int for_reloc)
9214{
9215 struct btrfs_path *path;
9216 struct btrfs_trans_handle *trans;
9217 struct btrfs_root *tree_root = root->fs_info->tree_root;
9218 struct btrfs_root_item *root_item = &root->root_item;
9219 struct walk_control *wc;
9220 struct btrfs_key key;
9221 int err = 0;
9222 int ret;
9223 int level;
9224 bool root_dropped = false;
9225
9226 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
9227
9228 path = btrfs_alloc_path();
9229 if (!path) {
9230 err = -ENOMEM;
9231 goto out;
9232 }
9233
9234 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9235 if (!wc) {
9236 btrfs_free_path(path);
9237 err = -ENOMEM;
9238 goto out;
9239 }
9240
9241 trans = btrfs_start_transaction(tree_root, 0);
9242 if (IS_ERR(trans)) {
9243 err = PTR_ERR(trans);
9244 goto out_free;
9245 }
9246
9247 if (block_rsv)
9248 trans->block_rsv = block_rsv;
9249
9250 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
9251 level = btrfs_header_level(root->node);
9252 path->nodes[level] = btrfs_lock_root_node(root);
9253 btrfs_set_lock_blocking(path->nodes[level]);
9254 path->slots[level] = 0;
9255 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9256 memset(&wc->update_progress, 0,
9257 sizeof(wc->update_progress));
9258 } else {
9259 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
9260 memcpy(&wc->update_progress, &key,
9261 sizeof(wc->update_progress));
9262
9263 level = root_item->drop_level;
9264 BUG_ON(level == 0);
9265 path->lowest_level = level;
9266 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9267 path->lowest_level = 0;
9268 if (ret < 0) {
9269 err = ret;
9270 goto out_end_trans;
9271 }
9272 WARN_ON(ret > 0);
9273
9274
9275
9276
9277
9278 btrfs_unlock_up_safe(path, 0);
9279
9280 level = btrfs_header_level(root->node);
9281 while (1) {
9282 btrfs_tree_lock(path->nodes[level]);
9283 btrfs_set_lock_blocking(path->nodes[level]);
9284 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9285
9286 ret = btrfs_lookup_extent_info(trans, root,
9287 path->nodes[level]->start,
9288 level, 1, &wc->refs[level],
9289 &wc->flags[level]);
9290 if (ret < 0) {
9291 err = ret;
9292 goto out_end_trans;
9293 }
9294 BUG_ON(wc->refs[level] == 0);
9295
9296 if (level == root_item->drop_level)
9297 break;
9298
9299 btrfs_tree_unlock(path->nodes[level]);
9300 path->locks[level] = 0;
9301 WARN_ON(wc->refs[level] != 1);
9302 level--;
9303 }
9304 }
9305
9306 wc->level = level;
9307 wc->shared_level = -1;
9308 wc->stage = DROP_REFERENCE;
9309 wc->update_ref = update_ref;
9310 wc->keep_locks = 0;
9311 wc->for_reloc = for_reloc;
9312 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
9313
9314 while (1) {
9315
9316 ret = walk_down_tree(trans, root, path, wc);
9317 if (ret < 0) {
9318 err = ret;
9319 break;
9320 }
9321
9322 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
9323 if (ret < 0) {
9324 err = ret;
9325 break;
9326 }
9327
9328 if (ret > 0) {
9329 BUG_ON(wc->stage != DROP_REFERENCE);
9330 break;
9331 }
9332
9333 if (wc->stage == DROP_REFERENCE) {
9334 level = wc->level;
9335 btrfs_node_key(path->nodes[level],
9336 &root_item->drop_progress,
9337 path->slots[level]);
9338 root_item->drop_level = level;
9339 }
9340
9341 BUG_ON(wc->level == 0);
9342 if (btrfs_should_end_transaction(trans, tree_root) ||
9343 (!for_reloc && btrfs_need_cleaner_sleep(root))) {
9344 ret = btrfs_update_root(trans, tree_root,
9345 &root->root_key,
9346 root_item);
9347 if (ret) {
9348 btrfs_abort_transaction(trans, tree_root, ret);
9349 err = ret;
9350 goto out_end_trans;
9351 }
9352
9353 btrfs_end_transaction_throttle(trans, tree_root);
9354 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
9355 pr_debug("BTRFS: drop snapshot early exit\n");
9356 err = -EAGAIN;
9357 goto out_free;
9358 }
9359
9360 trans = btrfs_start_transaction(tree_root, 0);
9361 if (IS_ERR(trans)) {
9362 err = PTR_ERR(trans);
9363 goto out_free;
9364 }
9365 if (block_rsv)
9366 trans->block_rsv = block_rsv;
9367 }
9368 }
9369 btrfs_release_path(path);
9370 if (err)
9371 goto out_end_trans;
9372
9373 ret = btrfs_del_root(trans, tree_root, &root->root_key);
9374 if (ret) {
9375 btrfs_abort_transaction(trans, tree_root, ret);
9376 goto out_end_trans;
9377 }
9378
9379 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9380 ret = btrfs_find_root(tree_root, &root->root_key, path,
9381 NULL, NULL);
9382 if (ret < 0) {
9383 btrfs_abort_transaction(trans, tree_root, ret);
9384 err = ret;
9385 goto out_end_trans;
9386 } else if (ret > 0) {
9387
9388
9389
9390
9391
9392 btrfs_del_orphan_item(trans, tree_root,
9393 root->root_key.objectid);
9394 }
9395 }
9396
9397 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
9398 btrfs_add_dropped_root(trans, root);
9399 } else {
9400 free_extent_buffer(root->node);
9401 free_extent_buffer(root->commit_root);
9402 btrfs_put_fs_root(root);
9403 }
9404 root_dropped = true;
9405out_end_trans:
9406 btrfs_end_transaction_throttle(trans, tree_root);
9407out_free:
9408 kfree(wc);
9409 btrfs_free_path(path);
9410out:
9411
9412
9413
9414
9415
9416
9417
9418 if (!for_reloc && root_dropped == false)
9419 btrfs_add_dead_root(root);
9420 if (err && err != -EAGAIN)
9421 btrfs_handle_fs_error(root->fs_info, err, NULL);
9422 return err;
9423}
9424
9425
9426
9427
9428
9429
9430
9431int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
9432 struct btrfs_root *root,
9433 struct extent_buffer *node,
9434 struct extent_buffer *parent)
9435{
9436 struct btrfs_path *path;
9437 struct walk_control *wc;
9438 int level;
9439 int parent_level;
9440 int ret = 0;
9441 int wret;
9442
9443 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
9444
9445 path = btrfs_alloc_path();
9446 if (!path)
9447 return -ENOMEM;
9448
9449 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9450 if (!wc) {
9451 btrfs_free_path(path);
9452 return -ENOMEM;
9453 }
9454
9455 btrfs_assert_tree_locked(parent);
9456 parent_level = btrfs_header_level(parent);
9457 extent_buffer_get(parent);
9458 path->nodes[parent_level] = parent;
9459 path->slots[parent_level] = btrfs_header_nritems(parent);
9460
9461 btrfs_assert_tree_locked(node);
9462 level = btrfs_header_level(node);
9463 path->nodes[level] = node;
9464 path->slots[level] = 0;
9465 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9466
9467 wc->refs[parent_level] = 1;
9468 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
9469 wc->level = level;
9470 wc->shared_level = -1;
9471 wc->stage = DROP_REFERENCE;
9472 wc->update_ref = 0;
9473 wc->keep_locks = 1;
9474 wc->for_reloc = 1;
9475 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
9476
9477 while (1) {
9478 wret = walk_down_tree(trans, root, path, wc);
9479 if (wret < 0) {
9480 ret = wret;
9481 break;
9482 }
9483
9484 wret = walk_up_tree(trans, root, path, wc, parent_level);
9485 if (wret < 0)
9486 ret = wret;
9487 if (wret != 0)
9488 break;
9489 }
9490
9491 kfree(wc);
9492 btrfs_free_path(path);
9493 return ret;
9494}
9495
9496static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
9497{
9498 u64 num_devices;
9499 u64 stripped;
9500
9501
9502
9503
9504
9505 stripped = get_restripe_target(root->fs_info, flags);
9506 if (stripped)
9507 return extended_to_chunk(stripped);
9508
9509 num_devices = root->fs_info->fs_devices->rw_devices;
9510
9511 stripped = BTRFS_BLOCK_GROUP_RAID0 |
9512 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
9513 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
9514
9515 if (num_devices == 1) {
9516 stripped |= BTRFS_BLOCK_GROUP_DUP;
9517 stripped = flags & ~stripped;
9518
9519
9520 if (flags & BTRFS_BLOCK_GROUP_RAID0)
9521 return stripped;
9522
9523
9524 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
9525 BTRFS_BLOCK_GROUP_RAID10))
9526 return stripped | BTRFS_BLOCK_GROUP_DUP;
9527 } else {
9528
9529 if (flags & stripped)
9530 return flags;
9531
9532 stripped |= BTRFS_BLOCK_GROUP_DUP;
9533 stripped = flags & ~stripped;
9534
9535
9536 if (flags & BTRFS_BLOCK_GROUP_DUP)
9537 return stripped | BTRFS_BLOCK_GROUP_RAID1;
9538
9539
9540 }
9541
9542 return flags;
9543}
9544
9545static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
9546{
9547 struct btrfs_space_info *sinfo = cache->space_info;
9548 u64 num_bytes;
9549 u64 min_allocable_bytes;
9550 int ret = -ENOSPC;
9551
9552
9553
9554
9555
9556
9557 if ((sinfo->flags &
9558 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
9559 !force)
9560 min_allocable_bytes = SZ_1M;
9561 else
9562 min_allocable_bytes = 0;
9563
9564 spin_lock(&sinfo->lock);
9565 spin_lock(&cache->lock);
9566
9567 if (cache->ro) {
9568 cache->ro++;
9569 ret = 0;
9570 goto out;
9571 }
9572
9573 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
9574 cache->bytes_super - btrfs_block_group_used(&cache->item);
9575
9576 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
9577 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
9578 min_allocable_bytes <= sinfo->total_bytes) {
9579 sinfo->bytes_readonly += num_bytes;
9580 cache->ro++;
9581 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
9582 ret = 0;
9583 }
9584out:
9585 spin_unlock(&cache->lock);
9586 spin_unlock(&sinfo->lock);
9587 return ret;
9588}
9589
9590int btrfs_inc_block_group_ro(struct btrfs_root *root,
9591 struct btrfs_block_group_cache *cache)
9592
9593{
9594 struct btrfs_trans_handle *trans;
9595 u64 alloc_flags;
9596 int ret;
9597
9598again:
9599 trans = btrfs_join_transaction(root);
9600 if (IS_ERR(trans))
9601 return PTR_ERR(trans);
9602
9603
9604
9605
9606
9607
9608 mutex_lock(&root->fs_info->ro_block_group_mutex);
9609 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
9610 u64 transid = trans->transid;
9611
9612 mutex_unlock(&root->fs_info->ro_block_group_mutex);
9613 btrfs_end_transaction(trans, root);
9614
9615 ret = btrfs_wait_for_commit(root, transid);
9616 if (ret)
9617 return ret;
9618 goto again;
9619 }
9620
9621
9622
9623
9624
9625 alloc_flags = update_block_group_flags(root, cache->flags);
9626 if (alloc_flags != cache->flags) {
9627 ret = do_chunk_alloc(trans, root, alloc_flags,
9628 CHUNK_ALLOC_FORCE);
9629
9630
9631
9632
9633
9634 if (ret == -ENOSPC)
9635 ret = 0;
9636 if (ret < 0)
9637 goto out;
9638 }
9639
9640 ret = inc_block_group_ro(cache, 0);
9641 if (!ret)
9642 goto out;
9643 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
9644 ret = do_chunk_alloc(trans, root, alloc_flags,
9645 CHUNK_ALLOC_FORCE);
9646 if (ret < 0)
9647 goto out;
9648 ret = inc_block_group_ro(cache, 0);
9649out:
9650 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
9651 alloc_flags = update_block_group_flags(root, cache->flags);
9652 lock_chunks(root->fs_info->chunk_root);
9653 check_system_chunk(trans, root, alloc_flags);
9654 unlock_chunks(root->fs_info->chunk_root);
9655 }
9656 mutex_unlock(&root->fs_info->ro_block_group_mutex);
9657
9658 btrfs_end_transaction(trans, root);
9659 return ret;
9660}
9661
9662int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
9663 struct btrfs_root *root, u64 type)
9664{
9665 u64 alloc_flags = get_alloc_profile(root, type);
9666 return do_chunk_alloc(trans, root, alloc_flags,
9667 CHUNK_ALLOC_FORCE);
9668}
9669
9670
9671
9672
9673
9674u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
9675{
9676 struct btrfs_block_group_cache *block_group;
9677 u64 free_bytes = 0;
9678 int factor;
9679
9680
9681 if (list_empty(&sinfo->ro_bgs))
9682 return 0;
9683
9684 spin_lock(&sinfo->lock);
9685 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
9686 spin_lock(&block_group->lock);
9687
9688 if (!block_group->ro) {
9689 spin_unlock(&block_group->lock);
9690 continue;
9691 }
9692
9693 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
9694 BTRFS_BLOCK_GROUP_RAID10 |
9695 BTRFS_BLOCK_GROUP_DUP))
9696 factor = 2;
9697 else
9698 factor = 1;
9699
9700 free_bytes += (block_group->key.offset -
9701 btrfs_block_group_used(&block_group->item)) *
9702 factor;
9703
9704 spin_unlock(&block_group->lock);
9705 }
9706 spin_unlock(&sinfo->lock);
9707
9708 return free_bytes;
9709}
9710
9711void btrfs_dec_block_group_ro(struct btrfs_root *root,
9712 struct btrfs_block_group_cache *cache)
9713{
9714 struct btrfs_space_info *sinfo = cache->space_info;
9715 u64 num_bytes;
9716
9717 BUG_ON(!cache->ro);
9718
9719 spin_lock(&sinfo->lock);
9720 spin_lock(&cache->lock);
9721 if (!--cache->ro) {
9722 num_bytes = cache->key.offset - cache->reserved -
9723 cache->pinned - cache->bytes_super -
9724 btrfs_block_group_used(&cache->item);
9725 sinfo->bytes_readonly -= num_bytes;
9726 list_del_init(&cache->ro_list);
9727 }
9728 spin_unlock(&cache->lock);
9729 spin_unlock(&sinfo->lock);
9730}
9731
9732
9733
9734
9735
9736
9737
9738int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
9739{
9740 struct btrfs_block_group_cache *block_group;
9741 struct btrfs_space_info *space_info;
9742 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
9743 struct btrfs_device *device;
9744 struct btrfs_trans_handle *trans;
9745 u64 min_free;
9746 u64 dev_min = 1;
9747 u64 dev_nr = 0;
9748 u64 target;
9749 int debug;
9750 int index;
9751 int full = 0;
9752 int ret = 0;
9753
9754 debug = btrfs_test_opt(root->fs_info, ENOSPC_DEBUG);
9755
9756 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
9757
9758
9759 if (!block_group) {
9760 if (debug)
9761 btrfs_warn(root->fs_info,
9762 "can't find block group for bytenr %llu",
9763 bytenr);
9764 return -1;
9765 }
9766
9767 min_free = btrfs_block_group_used(&block_group->item);
9768
9769
9770 if (!min_free)
9771 goto out;
9772
9773 space_info = block_group->space_info;
9774 spin_lock(&space_info->lock);
9775
9776 full = space_info->full;
9777
9778
9779
9780
9781
9782
9783
9784
9785 if ((space_info->total_bytes != block_group->key.offset) &&
9786 (space_info->bytes_used + space_info->bytes_reserved +
9787 space_info->bytes_pinned + space_info->bytes_readonly +
9788 min_free < space_info->total_bytes)) {
9789 spin_unlock(&space_info->lock);
9790 goto out;
9791 }
9792 spin_unlock(&space_info->lock);
9793
9794
9795
9796
9797
9798
9799
9800
9801 ret = -1;
9802
9803
9804
9805
9806
9807
9808
9809
9810
9811 target = get_restripe_target(root->fs_info, block_group->flags);
9812 if (target) {
9813 index = __get_raid_index(extended_to_chunk(target));
9814 } else {
9815
9816
9817
9818
9819 if (full) {
9820 if (debug)
9821 btrfs_warn(root->fs_info,
9822 "no space to alloc new chunk for block group %llu",
9823 block_group->key.objectid);
9824 goto out;
9825 }
9826
9827 index = get_block_group_index(block_group);
9828 }
9829
9830 if (index == BTRFS_RAID_RAID10) {
9831 dev_min = 4;
9832
9833 min_free >>= 1;
9834 } else if (index == BTRFS_RAID_RAID1) {
9835 dev_min = 2;
9836 } else if (index == BTRFS_RAID_DUP) {
9837
9838 min_free <<= 1;
9839 } else if (index == BTRFS_RAID_RAID0) {
9840 dev_min = fs_devices->rw_devices;
9841 min_free = div64_u64(min_free, dev_min);
9842 }
9843
9844
9845 trans = btrfs_join_transaction(root);
9846 if (IS_ERR(trans)) {
9847 ret = PTR_ERR(trans);
9848 goto out;
9849 }
9850
9851 mutex_lock(&root->fs_info->chunk_mutex);
9852 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
9853 u64 dev_offset;
9854
9855
9856
9857
9858
9859 if (device->total_bytes > device->bytes_used + min_free &&
9860 !device->is_tgtdev_for_dev_replace) {
9861 ret = find_free_dev_extent(trans, device, min_free,
9862 &dev_offset, NULL);
9863 if (!ret)
9864 dev_nr++;
9865
9866 if (dev_nr >= dev_min)
9867 break;
9868
9869 ret = -1;
9870 }
9871 }
9872 if (debug && ret == -1)
9873 btrfs_warn(root->fs_info,
9874 "no space to allocate a new chunk for block group %llu",
9875 block_group->key.objectid);
9876 mutex_unlock(&root->fs_info->chunk_mutex);
9877 btrfs_end_transaction(trans, root);
9878out:
9879 btrfs_put_block_group(block_group);
9880 return ret;
9881}
9882
9883static int find_first_block_group(struct btrfs_root *root,
9884 struct btrfs_path *path, struct btrfs_key *key)
9885{
9886 int ret = 0;
9887 struct btrfs_key found_key;
9888 struct extent_buffer *leaf;
9889 int slot;
9890
9891 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9892 if (ret < 0)
9893 goto out;
9894
9895 while (1) {
9896 slot = path->slots[0];
9897 leaf = path->nodes[0];
9898 if (slot >= btrfs_header_nritems(leaf)) {
9899 ret = btrfs_next_leaf(root, path);
9900 if (ret == 0)
9901 continue;
9902 if (ret < 0)
9903 goto out;
9904 break;
9905 }
9906 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9907
9908 if (found_key.objectid >= key->objectid &&
9909 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9910 struct extent_map_tree *em_tree;
9911 struct extent_map *em;
9912
9913 em_tree = &root->fs_info->mapping_tree.map_tree;
9914 read_lock(&em_tree->lock);
9915 em = lookup_extent_mapping(em_tree, found_key.objectid,
9916 found_key.offset);
9917 read_unlock(&em_tree->lock);
9918 if (!em) {
9919 btrfs_err(root->fs_info,
9920 "logical %llu len %llu found bg but no related chunk",
9921 found_key.objectid, found_key.offset);
9922 ret = -ENOENT;
9923 } else {
9924 ret = 0;
9925 }
9926 free_extent_map(em);
9927 goto out;
9928 }
9929 path->slots[0]++;
9930 }
9931out:
9932 return ret;
9933}
9934
9935void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
9936{
9937 struct btrfs_block_group_cache *block_group;
9938 u64 last = 0;
9939
9940 while (1) {
9941 struct inode *inode;
9942
9943 block_group = btrfs_lookup_first_block_group(info, last);
9944 while (block_group) {
9945 spin_lock(&block_group->lock);
9946 if (block_group->iref)
9947 break;
9948 spin_unlock(&block_group->lock);
9949 block_group = next_block_group(info->tree_root,
9950 block_group);
9951 }
9952 if (!block_group) {
9953 if (last == 0)
9954 break;
9955 last = 0;
9956 continue;
9957 }
9958
9959 inode = block_group->inode;
9960 block_group->iref = 0;
9961 block_group->inode = NULL;
9962 spin_unlock(&block_group->lock);
9963 ASSERT(block_group->io_ctl.inode == NULL);
9964 iput(inode);
9965 last = block_group->key.objectid + block_group->key.offset;
9966 btrfs_put_block_group(block_group);
9967 }
9968}
9969
9970int btrfs_free_block_groups(struct btrfs_fs_info *info)
9971{
9972 struct btrfs_block_group_cache *block_group;
9973 struct btrfs_space_info *space_info;
9974 struct btrfs_caching_control *caching_ctl;
9975 struct rb_node *n;
9976
9977 down_write(&info->commit_root_sem);
9978 while (!list_empty(&info->caching_block_groups)) {
9979 caching_ctl = list_entry(info->caching_block_groups.next,
9980 struct btrfs_caching_control, list);
9981 list_del(&caching_ctl->list);
9982 put_caching_control(caching_ctl);
9983 }
9984 up_write(&info->commit_root_sem);
9985
9986 spin_lock(&info->unused_bgs_lock);
9987 while (!list_empty(&info->unused_bgs)) {
9988 block_group = list_first_entry(&info->unused_bgs,
9989 struct btrfs_block_group_cache,
9990 bg_list);
9991 list_del_init(&block_group->bg_list);
9992 btrfs_put_block_group(block_group);
9993 }
9994 spin_unlock(&info->unused_bgs_lock);
9995
9996 spin_lock(&info->block_group_cache_lock);
9997 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
9998 block_group = rb_entry(n, struct btrfs_block_group_cache,
9999 cache_node);
10000 rb_erase(&block_group->cache_node,
10001 &info->block_group_cache_tree);
10002 RB_CLEAR_NODE(&block_group->cache_node);
10003 spin_unlock(&info->block_group_cache_lock);
10004
10005 down_write(&block_group->space_info->groups_sem);
10006 list_del(&block_group->list);
10007 up_write(&block_group->space_info->groups_sem);
10008
10009 if (block_group->cached == BTRFS_CACHE_STARTED)
10010 wait_block_group_cache_done(block_group);
10011
10012
10013
10014
10015
10016 if (block_group->cached == BTRFS_CACHE_NO ||
10017 block_group->cached == BTRFS_CACHE_ERROR)
10018 free_excluded_extents(info->extent_root, block_group);
10019
10020 btrfs_remove_free_space_cache(block_group);
10021 ASSERT(list_empty(&block_group->dirty_list));
10022 ASSERT(list_empty(&block_group->io_list));
10023 ASSERT(list_empty(&block_group->bg_list));
10024 ASSERT(atomic_read(&block_group->count) == 1);
10025 btrfs_put_block_group(block_group);
10026
10027 spin_lock(&info->block_group_cache_lock);
10028 }
10029 spin_unlock(&info->block_group_cache_lock);
10030
10031
10032
10033
10034
10035
10036
10037 synchronize_rcu();
10038
10039 release_global_block_rsv(info);
10040
10041 while (!list_empty(&info->space_info)) {
10042 int i;
10043
10044 space_info = list_entry(info->space_info.next,
10045 struct btrfs_space_info,
10046 list);
10047
10048
10049
10050
10051
10052 if (WARN_ON(space_info->bytes_pinned > 0 ||
10053 space_info->bytes_reserved > 0 ||
10054 space_info->bytes_may_use > 0))
10055 dump_space_info(space_info, 0, 0);
10056 list_del(&space_info->list);
10057 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
10058 struct kobject *kobj;
10059 kobj = space_info->block_group_kobjs[i];
10060 space_info->block_group_kobjs[i] = NULL;
10061 if (kobj) {
10062 kobject_del(kobj);
10063 kobject_put(kobj);
10064 }
10065 }
10066 kobject_del(&space_info->kobj);
10067 kobject_put(&space_info->kobj);
10068 }
10069 return 0;
10070}
10071
10072static void __link_block_group(struct btrfs_space_info *space_info,
10073 struct btrfs_block_group_cache *cache)
10074{
10075 int index = get_block_group_index(cache);
10076 bool first = false;
10077
10078 down_write(&space_info->groups_sem);
10079 if (list_empty(&space_info->block_groups[index]))
10080 first = true;
10081 list_add_tail(&cache->list, &space_info->block_groups[index]);
10082 up_write(&space_info->groups_sem);
10083
10084 if (first) {
10085 struct raid_kobject *rkobj;
10086 int ret;
10087
10088 rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
10089 if (!rkobj)
10090 goto out_err;
10091 rkobj->raid_type = index;
10092 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
10093 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
10094 "%s", get_raid_name(index));
10095 if (ret) {
10096 kobject_put(&rkobj->kobj);
10097 goto out_err;
10098 }
10099 space_info->block_group_kobjs[index] = &rkobj->kobj;
10100 }
10101
10102 return;
10103out_err:
10104 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
10105}
10106
10107static struct btrfs_block_group_cache *
10108btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
10109{
10110 struct btrfs_block_group_cache *cache;
10111
10112 cache = kzalloc(sizeof(*cache), GFP_NOFS);
10113 if (!cache)
10114 return NULL;
10115
10116 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
10117 GFP_NOFS);
10118 if (!cache->free_space_ctl) {
10119 kfree(cache);
10120 return NULL;
10121 }
10122
10123 cache->key.objectid = start;
10124 cache->key.offset = size;
10125 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10126
10127 cache->sectorsize = root->sectorsize;
10128 cache->fs_info = root->fs_info;
10129 cache->full_stripe_len = btrfs_full_stripe_len(root,
10130 &root->fs_info->mapping_tree,
10131 start);
10132 set_free_space_tree_thresholds(cache);
10133
10134 atomic_set(&cache->count, 1);
10135 spin_lock_init(&cache->lock);
10136 init_rwsem(&cache->data_rwsem);
10137 INIT_LIST_HEAD(&cache->list);
10138 INIT_LIST_HEAD(&cache->cluster_list);
10139 INIT_LIST_HEAD(&cache->bg_list);
10140 INIT_LIST_HEAD(&cache->ro_list);
10141 INIT_LIST_HEAD(&cache->dirty_list);
10142 INIT_LIST_HEAD(&cache->io_list);
10143 btrfs_init_free_space_ctl(cache);
10144 atomic_set(&cache->trimming, 0);
10145 mutex_init(&cache->free_space_lock);
10146
10147 return cache;
10148}
10149
10150int btrfs_read_block_groups(struct btrfs_root *root)
10151{
10152 struct btrfs_path *path;
10153 int ret;
10154 struct btrfs_block_group_cache *cache;
10155 struct btrfs_fs_info *info = root->fs_info;
10156 struct btrfs_space_info *space_info;
10157 struct btrfs_key key;
10158 struct btrfs_key found_key;
10159 struct extent_buffer *leaf;
10160 int need_clear = 0;
10161 u64 cache_gen;
10162 u64 feature;
10163 int mixed;
10164
10165 feature = btrfs_super_incompat_flags(info->super_copy);
10166 mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
10167
10168 root = info->extent_root;
10169 key.objectid = 0;
10170 key.offset = 0;
10171 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10172 path = btrfs_alloc_path();
10173 if (!path)
10174 return -ENOMEM;
10175 path->reada = READA_FORWARD;
10176
10177 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
10178 if (btrfs_test_opt(root->fs_info, SPACE_CACHE) &&
10179 btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
10180 need_clear = 1;
10181 if (btrfs_test_opt(root->fs_info, CLEAR_CACHE))
10182 need_clear = 1;
10183
10184 while (1) {
10185 ret = find_first_block_group(root, path, &key);
10186 if (ret > 0)
10187 break;
10188 if (ret != 0)
10189 goto error;
10190
10191 leaf = path->nodes[0];
10192 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10193
10194 cache = btrfs_create_block_group_cache(root, found_key.objectid,
10195 found_key.offset);
10196 if (!cache) {
10197 ret = -ENOMEM;
10198 goto error;
10199 }
10200
10201 if (need_clear) {
10202
10203
10204
10205
10206
10207
10208
10209
10210
10211
10212 if (btrfs_test_opt(root->fs_info, SPACE_CACHE))
10213 cache->disk_cache_state = BTRFS_DC_CLEAR;
10214 }
10215
10216 read_extent_buffer(leaf, &cache->item,
10217 btrfs_item_ptr_offset(leaf, path->slots[0]),
10218 sizeof(cache->item));
10219 cache->flags = btrfs_block_group_flags(&cache->item);
10220 if (!mixed &&
10221 ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
10222 (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
10223 btrfs_err(info,
10224"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
10225 cache->key.objectid);
10226 ret = -EINVAL;
10227 goto error;
10228 }
10229
10230 key.objectid = found_key.objectid + found_key.offset;
10231 btrfs_release_path(path);
10232
10233
10234
10235
10236
10237
10238 ret = exclude_super_stripes(root, cache);
10239 if (ret) {
10240
10241
10242
10243
10244 free_excluded_extents(root, cache);
10245 btrfs_put_block_group(cache);
10246 goto error;
10247 }
10248
10249
10250
10251
10252
10253
10254
10255
10256 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
10257 cache->last_byte_to_unpin = (u64)-1;
10258 cache->cached = BTRFS_CACHE_FINISHED;
10259 free_excluded_extents(root, cache);
10260 } else if (btrfs_block_group_used(&cache->item) == 0) {
10261 cache->last_byte_to_unpin = (u64)-1;
10262 cache->cached = BTRFS_CACHE_FINISHED;
10263 add_new_free_space(cache, root->fs_info,
10264 found_key.objectid,
10265 found_key.objectid +
10266 found_key.offset);
10267 free_excluded_extents(root, cache);
10268 }
10269
10270 ret = btrfs_add_block_group_cache(root->fs_info, cache);
10271 if (ret) {
10272 btrfs_remove_free_space_cache(cache);
10273 btrfs_put_block_group(cache);
10274 goto error;
10275 }
10276
10277 trace_btrfs_add_block_group(root->fs_info, cache, 0);
10278 ret = update_space_info(info, cache->flags, found_key.offset,
10279 btrfs_block_group_used(&cache->item),
10280 cache->bytes_super, &space_info);
10281 if (ret) {
10282 btrfs_remove_free_space_cache(cache);
10283 spin_lock(&info->block_group_cache_lock);
10284 rb_erase(&cache->cache_node,
10285 &info->block_group_cache_tree);
10286 RB_CLEAR_NODE(&cache->cache_node);
10287 spin_unlock(&info->block_group_cache_lock);
10288 btrfs_put_block_group(cache);
10289 goto error;
10290 }
10291
10292 cache->space_info = space_info;
10293
10294 __link_block_group(space_info, cache);
10295
10296 set_avail_alloc_bits(root->fs_info, cache->flags);
10297 if (btrfs_chunk_readonly(root, cache->key.objectid)) {
10298 inc_block_group_ro(cache, 1);
10299 } else if (btrfs_block_group_used(&cache->item) == 0) {
10300 spin_lock(&info->unused_bgs_lock);
10301
10302 if (list_empty(&cache->bg_list)) {
10303 btrfs_get_block_group(cache);
10304 list_add_tail(&cache->bg_list,
10305 &info->unused_bgs);
10306 }
10307 spin_unlock(&info->unused_bgs_lock);
10308 }
10309 }
10310
10311 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
10312 if (!(get_alloc_profile(root, space_info->flags) &
10313 (BTRFS_BLOCK_GROUP_RAID10 |
10314 BTRFS_BLOCK_GROUP_RAID1 |
10315 BTRFS_BLOCK_GROUP_RAID5 |
10316 BTRFS_BLOCK_GROUP_RAID6 |
10317 BTRFS_BLOCK_GROUP_DUP)))
10318 continue;
10319
10320
10321
10322
10323 list_for_each_entry(cache,
10324 &space_info->block_groups[BTRFS_RAID_RAID0],
10325 list)
10326 inc_block_group_ro(cache, 1);
10327 list_for_each_entry(cache,
10328 &space_info->block_groups[BTRFS_RAID_SINGLE],
10329 list)
10330 inc_block_group_ro(cache, 1);
10331 }
10332
10333 init_global_block_rsv(info);
10334 ret = 0;
10335error:
10336 btrfs_free_path(path);
10337 return ret;
10338}
10339
10340void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
10341 struct btrfs_root *root)
10342{
10343 struct btrfs_block_group_cache *block_group, *tmp;
10344 struct btrfs_root *extent_root = root->fs_info->extent_root;
10345 struct btrfs_block_group_item item;
10346 struct btrfs_key key;
10347 int ret = 0;
10348 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
10349
10350 trans->can_flush_pending_bgs = false;
10351 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
10352 if (ret)
10353 goto next;
10354
10355 spin_lock(&block_group->lock);
10356 memcpy(&item, &block_group->item, sizeof(item));
10357 memcpy(&key, &block_group->key, sizeof(key));
10358 spin_unlock(&block_group->lock);
10359
10360 ret = btrfs_insert_item(trans, extent_root, &key, &item,
10361 sizeof(item));
10362 if (ret)
10363 btrfs_abort_transaction(trans, extent_root, ret);
10364 ret = btrfs_finish_chunk_alloc(trans, extent_root,
10365 key.objectid, key.offset);
10366 if (ret)
10367 btrfs_abort_transaction(trans, extent_root, ret);
10368 add_block_group_free_space(trans, root->fs_info, block_group);
10369
10370next:
10371 list_del_init(&block_group->bg_list);
10372 }
10373 trans->can_flush_pending_bgs = can_flush_pending_bgs;
10374}
10375
10376int btrfs_make_block_group(struct btrfs_trans_handle *trans,
10377 struct btrfs_root *root, u64 bytes_used,
10378 u64 type, u64 chunk_objectid, u64 chunk_offset,
10379 u64 size)
10380{
10381 int ret;
10382 struct btrfs_root *extent_root;
10383 struct btrfs_block_group_cache *cache;
10384 extent_root = root->fs_info->extent_root;
10385
10386 btrfs_set_log_full_commit(root->fs_info, trans);
10387
10388 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
10389 if (!cache)
10390 return -ENOMEM;
10391
10392 btrfs_set_block_group_used(&cache->item, bytes_used);
10393 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
10394 btrfs_set_block_group_flags(&cache->item, type);
10395
10396 cache->flags = type;
10397 cache->last_byte_to_unpin = (u64)-1;
10398 cache->cached = BTRFS_CACHE_FINISHED;
10399 cache->needs_free_space = 1;
10400 ret = exclude_super_stripes(root, cache);
10401 if (ret) {
10402
10403
10404
10405
10406 free_excluded_extents(root, cache);
10407 btrfs_put_block_group(cache);
10408 return ret;
10409 }
10410
10411 add_new_free_space(cache, root->fs_info, chunk_offset,
10412 chunk_offset + size);
10413
10414 free_excluded_extents(root, cache);
10415
10416#ifdef CONFIG_BTRFS_DEBUG
10417 if (btrfs_should_fragment_free_space(root, cache)) {
10418 u64 new_bytes_used = size - bytes_used;
10419
10420 bytes_used += new_bytes_used >> 1;
10421 fragment_free_space(root, cache);
10422 }
10423#endif
10424
10425
10426
10427
10428
10429 ret = update_space_info(root->fs_info, cache->flags, 0, 0, 0,
10430 &cache->space_info);
10431 if (ret) {
10432 btrfs_remove_free_space_cache(cache);
10433 btrfs_put_block_group(cache);
10434 return ret;
10435 }
10436
10437 ret = btrfs_add_block_group_cache(root->fs_info, cache);
10438 if (ret) {
10439 btrfs_remove_free_space_cache(cache);
10440 btrfs_put_block_group(cache);
10441 return ret;
10442 }
10443
10444
10445
10446
10447
10448 trace_btrfs_add_block_group(root->fs_info, cache, 1);
10449 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
10450 cache->bytes_super, &cache->space_info);
10451 if (ret) {
10452 btrfs_remove_free_space_cache(cache);
10453 spin_lock(&root->fs_info->block_group_cache_lock);
10454 rb_erase(&cache->cache_node,
10455 &root->fs_info->block_group_cache_tree);
10456 RB_CLEAR_NODE(&cache->cache_node);
10457 spin_unlock(&root->fs_info->block_group_cache_lock);
10458 btrfs_put_block_group(cache);
10459 return ret;
10460 }
10461 update_global_block_rsv(root->fs_info);
10462
10463 __link_block_group(cache->space_info, cache);
10464
10465 list_add_tail(&cache->bg_list, &trans->new_bgs);
10466
10467 set_avail_alloc_bits(extent_root->fs_info, type);
10468 return 0;
10469}
10470
10471static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10472{
10473 u64 extra_flags = chunk_to_extended(flags) &
10474 BTRFS_EXTENDED_PROFILE_MASK;
10475
10476 write_seqlock(&fs_info->profiles_lock);
10477 if (flags & BTRFS_BLOCK_GROUP_DATA)
10478 fs_info->avail_data_alloc_bits &= ~extra_flags;
10479 if (flags & BTRFS_BLOCK_GROUP_METADATA)
10480 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
10481 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
10482 fs_info->avail_system_alloc_bits &= ~extra_flags;
10483 write_sequnlock(&fs_info->profiles_lock);
10484}
10485
10486int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10487 struct btrfs_root *root, u64 group_start,
10488 struct extent_map *em)
10489{
10490 struct btrfs_path *path;
10491 struct btrfs_block_group_cache *block_group;
10492 struct btrfs_free_cluster *cluster;
10493 struct btrfs_root *tree_root = root->fs_info->tree_root;
10494 struct btrfs_key key;
10495 struct inode *inode;
10496 struct kobject *kobj = NULL;
10497 int ret;
10498 int index;
10499 int factor;
10500 struct btrfs_caching_control *caching_ctl = NULL;
10501 bool remove_em;
10502
10503 root = root->fs_info->extent_root;
10504
10505 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
10506 BUG_ON(!block_group);
10507 BUG_ON(!block_group->ro);
10508
10509
10510
10511
10512
10513 free_excluded_extents(root, block_group);
10514
10515 memcpy(&key, &block_group->key, sizeof(key));
10516 index = get_block_group_index(block_group);
10517 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
10518 BTRFS_BLOCK_GROUP_RAID1 |
10519 BTRFS_BLOCK_GROUP_RAID10))
10520 factor = 2;
10521 else
10522 factor = 1;
10523
10524
10525 cluster = &root->fs_info->data_alloc_cluster;
10526 spin_lock(&cluster->refill_lock);
10527 btrfs_return_cluster_to_free_space(block_group, cluster);
10528 spin_unlock(&cluster->refill_lock);
10529
10530
10531
10532
10533
10534 cluster = &root->fs_info->meta_alloc_cluster;
10535 spin_lock(&cluster->refill_lock);
10536 btrfs_return_cluster_to_free_space(block_group, cluster);
10537 spin_unlock(&cluster->refill_lock);
10538
10539 path = btrfs_alloc_path();
10540 if (!path) {
10541 ret = -ENOMEM;
10542 goto out;
10543 }
10544
10545
10546
10547
10548
10549 inode = lookup_free_space_inode(tree_root, block_group, path);
10550
10551 mutex_lock(&trans->transaction->cache_write_mutex);
10552
10553
10554
10555
10556 spin_lock(&trans->transaction->dirty_bgs_lock);
10557 if (!list_empty(&block_group->io_list)) {
10558 list_del_init(&block_group->io_list);
10559
10560 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
10561
10562 spin_unlock(&trans->transaction->dirty_bgs_lock);
10563 btrfs_wait_cache_io(root, trans, block_group,
10564 &block_group->io_ctl, path,
10565 block_group->key.objectid);
10566 btrfs_put_block_group(block_group);
10567 spin_lock(&trans->transaction->dirty_bgs_lock);
10568 }
10569
10570 if (!list_empty(&block_group->dirty_list)) {
10571 list_del_init(&block_group->dirty_list);
10572 btrfs_put_block_group(block_group);
10573 }
10574 spin_unlock(&trans->transaction->dirty_bgs_lock);
10575 mutex_unlock(&trans->transaction->cache_write_mutex);
10576
10577 if (!IS_ERR(inode)) {
10578 ret = btrfs_orphan_add(trans, inode);
10579 if (ret) {
10580 btrfs_add_delayed_iput(inode);
10581 goto out;
10582 }
10583 clear_nlink(inode);
10584
10585 spin_lock(&block_group->lock);
10586 if (block_group->iref) {
10587 block_group->iref = 0;
10588 block_group->inode = NULL;
10589 spin_unlock(&block_group->lock);
10590 iput(inode);
10591 } else {
10592 spin_unlock(&block_group->lock);
10593 }
10594
10595 btrfs_add_delayed_iput(inode);
10596 }
10597
10598 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
10599 key.offset = block_group->key.objectid;
10600 key.type = 0;
10601
10602 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
10603 if (ret < 0)
10604 goto out;
10605 if (ret > 0)
10606 btrfs_release_path(path);
10607 if (ret == 0) {
10608 ret = btrfs_del_item(trans, tree_root, path);
10609 if (ret)
10610 goto out;
10611 btrfs_release_path(path);
10612 }
10613
10614 spin_lock(&root->fs_info->block_group_cache_lock);
10615 rb_erase(&block_group->cache_node,
10616 &root->fs_info->block_group_cache_tree);
10617 RB_CLEAR_NODE(&block_group->cache_node);
10618
10619 if (root->fs_info->first_logical_byte == block_group->key.objectid)
10620 root->fs_info->first_logical_byte = (u64)-1;
10621 spin_unlock(&root->fs_info->block_group_cache_lock);
10622
10623 down_write(&block_group->space_info->groups_sem);
10624
10625
10626
10627
10628 list_del_init(&block_group->list);
10629 if (list_empty(&block_group->space_info->block_groups[index])) {
10630 kobj = block_group->space_info->block_group_kobjs[index];
10631 block_group->space_info->block_group_kobjs[index] = NULL;
10632 clear_avail_alloc_bits(root->fs_info, block_group->flags);
10633 }
10634 up_write(&block_group->space_info->groups_sem);
10635 if (kobj) {
10636 kobject_del(kobj);
10637 kobject_put(kobj);
10638 }
10639
10640 if (block_group->has_caching_ctl)
10641 caching_ctl = get_caching_control(block_group);
10642 if (block_group->cached == BTRFS_CACHE_STARTED)
10643 wait_block_group_cache_done(block_group);
10644 if (block_group->has_caching_ctl) {
10645 down_write(&root->fs_info->commit_root_sem);
10646 if (!caching_ctl) {
10647 struct btrfs_caching_control *ctl;
10648
10649 list_for_each_entry(ctl,
10650 &root->fs_info->caching_block_groups, list)
10651 if (ctl->block_group == block_group) {
10652 caching_ctl = ctl;
10653 atomic_inc(&caching_ctl->count);
10654 break;
10655 }
10656 }
10657 if (caching_ctl)
10658 list_del_init(&caching_ctl->list);
10659 up_write(&root->fs_info->commit_root_sem);
10660 if (caching_ctl) {
10661
10662 put_caching_control(caching_ctl);
10663 put_caching_control(caching_ctl);
10664 }
10665 }
10666
10667 spin_lock(&trans->transaction->dirty_bgs_lock);
10668 if (!list_empty(&block_group->dirty_list)) {
10669 WARN_ON(1);
10670 }
10671 if (!list_empty(&block_group->io_list)) {
10672 WARN_ON(1);
10673 }
10674 spin_unlock(&trans->transaction->dirty_bgs_lock);
10675 btrfs_remove_free_space_cache(block_group);
10676
10677 spin_lock(&block_group->space_info->lock);
10678 list_del_init(&block_group->ro_list);
10679
10680 if (btrfs_test_opt(root->fs_info, ENOSPC_DEBUG)) {
10681 WARN_ON(block_group->space_info->total_bytes
10682 < block_group->key.offset);
10683 WARN_ON(block_group->space_info->bytes_readonly
10684 < block_group->key.offset);
10685 WARN_ON(block_group->space_info->disk_total
10686 < block_group->key.offset * factor);
10687 }
10688 block_group->space_info->total_bytes -= block_group->key.offset;
10689 block_group->space_info->bytes_readonly -= block_group->key.offset;
10690 block_group->space_info->disk_total -= block_group->key.offset * factor;
10691
10692 spin_unlock(&block_group->space_info->lock);
10693
10694 memcpy(&key, &block_group->key, sizeof(key));
10695
10696 lock_chunks(root);
10697 if (!list_empty(&em->list)) {
10698
10699 free_extent_map(em);
10700 }
10701 spin_lock(&block_group->lock);
10702 block_group->removed = 1;
10703
10704
10705
10706
10707
10708
10709
10710
10711
10712
10713
10714
10715
10716
10717
10718
10719
10720
10721
10722
10723
10724
10725
10726 remove_em = (atomic_read(&block_group->trimming) == 0);
10727
10728
10729
10730
10731
10732 if (!remove_em) {
10733
10734
10735
10736
10737
10738
10739
10740
10741
10742
10743
10744 list_move_tail(&em->list, &root->fs_info->pinned_chunks);
10745 }
10746 spin_unlock(&block_group->lock);
10747
10748 if (remove_em) {
10749 struct extent_map_tree *em_tree;
10750
10751 em_tree = &root->fs_info->mapping_tree.map_tree;
10752 write_lock(&em_tree->lock);
10753
10754
10755
10756
10757
10758 remove_extent_mapping(em_tree, em);
10759 write_unlock(&em_tree->lock);
10760
10761 free_extent_map(em);
10762 }
10763
10764 unlock_chunks(root);
10765
10766 ret = remove_block_group_free_space(trans, root->fs_info, block_group);
10767 if (ret)
10768 goto out;
10769
10770 btrfs_put_block_group(block_group);
10771 btrfs_put_block_group(block_group);
10772
10773 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10774 if (ret > 0)
10775 ret = -EIO;
10776 if (ret < 0)
10777 goto out;
10778
10779 ret = btrfs_del_item(trans, root, path);
10780out:
10781 btrfs_free_path(path);
10782 return ret;
10783}
10784
10785struct btrfs_trans_handle *
10786btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
10787 const u64 chunk_offset)
10788{
10789 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
10790 struct extent_map *em;
10791 struct map_lookup *map;
10792 unsigned int num_items;
10793
10794 read_lock(&em_tree->lock);
10795 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
10796 read_unlock(&em_tree->lock);
10797 ASSERT(em && em->start == chunk_offset);
10798
10799
10800
10801
10802
10803
10804
10805
10806
10807
10808
10809
10810
10811
10812
10813
10814
10815
10816
10817
10818 map = em->map_lookup;
10819 num_items = 3 + map->num_stripes;
10820 free_extent_map(em);
10821
10822 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
10823 num_items, 1);
10824}
10825
10826
10827
10828
10829
10830void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10831{
10832 struct btrfs_block_group_cache *block_group;
10833 struct btrfs_space_info *space_info;
10834 struct btrfs_root *root = fs_info->extent_root;
10835 struct btrfs_trans_handle *trans;
10836 int ret = 0;
10837
10838 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
10839 return;
10840
10841 spin_lock(&fs_info->unused_bgs_lock);
10842 while (!list_empty(&fs_info->unused_bgs)) {
10843 u64 start, end;
10844 int trimming;
10845
10846 block_group = list_first_entry(&fs_info->unused_bgs,
10847 struct btrfs_block_group_cache,
10848 bg_list);
10849 list_del_init(&block_group->bg_list);
10850
10851 space_info = block_group->space_info;
10852
10853 if (ret || btrfs_mixed_space_info(space_info)) {
10854 btrfs_put_block_group(block_group);
10855 continue;
10856 }
10857 spin_unlock(&fs_info->unused_bgs_lock);
10858
10859 mutex_lock(&fs_info->delete_unused_bgs_mutex);
10860
10861
10862 down_write(&space_info->groups_sem);
10863 spin_lock(&block_group->lock);
10864 if (block_group->reserved ||
10865 btrfs_block_group_used(&block_group->item) ||
10866 block_group->ro ||
10867 list_is_singular(&block_group->list)) {
10868
10869
10870
10871
10872
10873
10874 spin_unlock(&block_group->lock);
10875 up_write(&space_info->groups_sem);
10876 goto next;
10877 }
10878 spin_unlock(&block_group->lock);
10879
10880
10881 ret = inc_block_group_ro(block_group, 0);
10882 up_write(&space_info->groups_sem);
10883 if (ret < 0) {
10884 ret = 0;
10885 goto next;
10886 }
10887
10888
10889
10890
10891
10892 trans = btrfs_start_trans_remove_block_group(fs_info,
10893 block_group->key.objectid);
10894 if (IS_ERR(trans)) {
10895 btrfs_dec_block_group_ro(root, block_group);
10896 ret = PTR_ERR(trans);
10897 goto next;
10898 }
10899
10900
10901
10902
10903
10904 start = block_group->key.objectid;
10905 end = start + block_group->key.offset - 1;
10906
10907
10908
10909
10910
10911
10912
10913
10914
10915
10916
10917 mutex_lock(&fs_info->unused_bg_unpin_mutex);
10918 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
10919 EXTENT_DIRTY);
10920 if (ret) {
10921 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10922 btrfs_dec_block_group_ro(root, block_group);
10923 goto end_trans;
10924 }
10925 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
10926 EXTENT_DIRTY);
10927 if (ret) {
10928 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10929 btrfs_dec_block_group_ro(root, block_group);
10930 goto end_trans;
10931 }
10932 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
10933
10934
10935 spin_lock(&space_info->lock);
10936 spin_lock(&block_group->lock);
10937
10938 space_info->bytes_pinned -= block_group->pinned;
10939 space_info->bytes_readonly += block_group->pinned;
10940 percpu_counter_add(&space_info->total_bytes_pinned,
10941 -block_group->pinned);
10942 block_group->pinned = 0;
10943
10944 spin_unlock(&block_group->lock);
10945 spin_unlock(&space_info->lock);
10946
10947
10948 trimming = btrfs_test_opt(root->fs_info, DISCARD);
10949
10950
10951 if (trimming)
10952 btrfs_get_block_group_trimming(block_group);
10953
10954
10955
10956
10957
10958 ret = btrfs_remove_chunk(trans, root,
10959 block_group->key.objectid);
10960
10961 if (ret) {
10962 if (trimming)
10963 btrfs_put_block_group_trimming(block_group);
10964 goto end_trans;
10965 }
10966
10967
10968
10969
10970
10971
10972 if (trimming) {
10973 spin_lock(&fs_info->unused_bgs_lock);
10974
10975
10976
10977
10978
10979 list_move(&block_group->bg_list,
10980 &trans->transaction->deleted_bgs);
10981 spin_unlock(&fs_info->unused_bgs_lock);
10982 btrfs_get_block_group(block_group);
10983 }
10984end_trans:
10985 btrfs_end_transaction(trans, root);
10986next:
10987 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
10988 btrfs_put_block_group(block_group);
10989 spin_lock(&fs_info->unused_bgs_lock);
10990 }
10991 spin_unlock(&fs_info->unused_bgs_lock);
10992}
10993
10994int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
10995{
10996 struct btrfs_space_info *space_info;
10997 struct btrfs_super_block *disk_super;
10998 u64 features;
10999 u64 flags;
11000 int mixed = 0;
11001 int ret;
11002
11003 disk_super = fs_info->super_copy;
11004 if (!btrfs_super_root(disk_super))
11005 return -EINVAL;
11006
11007 features = btrfs_super_incompat_flags(disk_super);
11008 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
11009 mixed = 1;
11010
11011 flags = BTRFS_BLOCK_GROUP_SYSTEM;
11012 ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
11013 if (ret)
11014 goto out;
11015
11016 if (mixed) {
11017 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
11018 ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
11019 } else {
11020 flags = BTRFS_BLOCK_GROUP_METADATA;
11021 ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
11022 if (ret)
11023 goto out;
11024
11025 flags = BTRFS_BLOCK_GROUP_DATA;
11026 ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
11027 }
11028out:
11029 return ret;
11030}
11031
11032int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
11033{
11034 return unpin_extent_range(root, start, end, false);
11035}
11036
11037
11038
11039
11040
11041
11042
11043
11044
11045
11046
11047
11048
11049
11050
11051
11052
11053
11054
11055static int btrfs_trim_free_extents(struct btrfs_device *device,
11056 u64 minlen, u64 *trimmed)
11057{
11058 u64 start = 0, len = 0;
11059 int ret;
11060
11061 *trimmed = 0;
11062
11063
11064 if (!device->writeable)
11065 return 0;
11066
11067
11068 if (device->total_bytes <= device->bytes_used)
11069 return 0;
11070
11071 ret = 0;
11072
11073 while (1) {
11074 struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
11075 struct btrfs_transaction *trans;
11076 u64 bytes;
11077
11078 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
11079 if (ret)
11080 return ret;
11081
11082 down_read(&fs_info->commit_root_sem);
11083
11084 spin_lock(&fs_info->trans_lock);
11085 trans = fs_info->running_transaction;
11086 if (trans)
11087 atomic_inc(&trans->use_count);
11088 spin_unlock(&fs_info->trans_lock);
11089
11090 ret = find_free_dev_extent_start(trans, device, minlen, start,
11091 &start, &len);
11092 if (trans)
11093 btrfs_put_transaction(trans);
11094
11095 if (ret) {
11096 up_read(&fs_info->commit_root_sem);
11097 mutex_unlock(&fs_info->chunk_mutex);
11098 if (ret == -ENOSPC)
11099 ret = 0;
11100 break;
11101 }
11102
11103 ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
11104 up_read(&fs_info->commit_root_sem);
11105 mutex_unlock(&fs_info->chunk_mutex);
11106
11107 if (ret)
11108 break;
11109
11110 start += len;
11111 *trimmed += bytes;
11112
11113 if (fatal_signal_pending(current)) {
11114 ret = -ERESTARTSYS;
11115 break;
11116 }
11117
11118 cond_resched();
11119 }
11120
11121 return ret;
11122}
11123
11124int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
11125{
11126 struct btrfs_fs_info *fs_info = root->fs_info;
11127 struct btrfs_block_group_cache *cache = NULL;
11128 struct btrfs_device *device;
11129 struct list_head *devices;
11130 u64 group_trimmed;
11131 u64 start;
11132 u64 end;
11133 u64 trimmed = 0;
11134 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11135 int ret = 0;
11136
11137
11138
11139
11140 if (range->len == total_bytes)
11141 cache = btrfs_lookup_first_block_group(fs_info, range->start);
11142 else
11143 cache = btrfs_lookup_block_group(fs_info, range->start);
11144
11145 while (cache) {
11146 if (cache->key.objectid >= (range->start + range->len)) {
11147 btrfs_put_block_group(cache);
11148 break;
11149 }
11150
11151 start = max(range->start, cache->key.objectid);
11152 end = min(range->start + range->len,
11153 cache->key.objectid + cache->key.offset);
11154
11155 if (end - start >= range->minlen) {
11156 if (!block_group_cache_done(cache)) {
11157 ret = cache_block_group(cache, 0);
11158 if (ret) {
11159 btrfs_put_block_group(cache);
11160 break;
11161 }
11162 ret = wait_block_group_cache_done(cache);
11163 if (ret) {
11164 btrfs_put_block_group(cache);
11165 break;
11166 }
11167 }
11168 ret = btrfs_trim_block_group(cache,
11169 &group_trimmed,
11170 start,
11171 end,
11172 range->minlen);
11173
11174 trimmed += group_trimmed;
11175 if (ret) {
11176 btrfs_put_block_group(cache);
11177 break;
11178 }
11179 }
11180
11181 cache = next_block_group(fs_info->tree_root, cache);
11182 }
11183
11184 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
11185 devices = &root->fs_info->fs_devices->alloc_list;
11186 list_for_each_entry(device, devices, dev_alloc_list) {
11187 ret = btrfs_trim_free_extents(device, range->minlen,
11188 &group_trimmed);
11189 if (ret)
11190 break;
11191
11192 trimmed += group_trimmed;
11193 }
11194 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
11195
11196 range->len = trimmed;
11197 return ret;
11198}
11199
11200
11201
11202
11203
11204
11205
11206
11207
11208void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
11209{
11210 percpu_counter_dec(&root->subv_writers->counter);
11211
11212
11213
11214 smp_mb();
11215 if (waitqueue_active(&root->subv_writers->wait))
11216 wake_up(&root->subv_writers->wait);
11217}
11218
11219int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
11220{
11221 if (atomic_read(&root->will_be_snapshoted))
11222 return 0;
11223
11224 percpu_counter_inc(&root->subv_writers->counter);
11225
11226
11227
11228 smp_mb();
11229 if (atomic_read(&root->will_be_snapshoted)) {
11230 btrfs_end_write_no_snapshoting(root);
11231 return 0;
11232 }
11233 return 1;
11234}
11235
11236static int wait_snapshoting_atomic_t(atomic_t *a)
11237{
11238 schedule();
11239 return 0;
11240}
11241
11242void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
11243{
11244 while (true) {
11245 int ret;
11246
11247 ret = btrfs_start_write_no_snapshoting(root);
11248 if (ret)
11249 break;
11250 wait_on_atomic_t(&root->will_be_snapshoted,
11251 wait_snapshoting_atomic_t,
11252 TASK_UNINTERRUPTIBLE);
11253 }
11254}
11255