1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/signal.h>
8#include <linux/pagemap.h>
9#include <linux/writeback.h>
10#include <linux/blkdev.h>
11#include <linux/sort.h>
12#include <linux/rcupdate.h>
13#include <linux/kthread.h>
14#include <linux/slab.h>
15#include <linux/ratelimit.h>
16#include <linux/percpu_counter.h>
17#include <linux/lockdep.h>
18#include <linux/crc32c.h>
19#include "tree-log.h"
20#include "disk-io.h"
21#include "print-tree.h"
22#include "volumes.h"
23#include "raid56.h"
24#include "locking.h"
25#include "free-space-cache.h"
26#include "free-space-tree.h"
27#include "math.h"
28#include "sysfs.h"
29#include "qgroup.h"
30#include "ref-verify.h"
31
32#undef SCRAMBLE_DELAYED_REFS
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48enum {
49 CHUNK_ALLOC_NO_FORCE = 0,
50 CHUNK_ALLOC_LIMITED = 1,
51 CHUNK_ALLOC_FORCE = 2,
52};
53
54
55
56
57#define DECLARE_SPACE_INFO_UPDATE(name) \
58static inline void update_##name(struct btrfs_space_info *sinfo, \
59 s64 bytes) \
60{ \
61 if (bytes < 0 && sinfo->name < -bytes) { \
62 WARN_ON(1); \
63 sinfo->name = 0; \
64 return; \
65 } \
66 sinfo->name += bytes; \
67}
68
69DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
70DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
71
72static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
73 struct btrfs_delayed_ref_node *node, u64 parent,
74 u64 root_objectid, u64 owner_objectid,
75 u64 owner_offset, int refs_to_drop,
76 struct btrfs_delayed_extent_op *extra_op);
77static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
78 struct extent_buffer *leaf,
79 struct btrfs_extent_item *ei);
80static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
81 u64 parent, u64 root_objectid,
82 u64 flags, u64 owner, u64 offset,
83 struct btrfs_key *ins, int ref_mod);
84static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
85 struct btrfs_delayed_ref_node *node,
86 struct btrfs_delayed_extent_op *extent_op);
87static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
88 int force);
89static int find_next_key(struct btrfs_path *path, int level,
90 struct btrfs_key *key);
91static void dump_space_info(struct btrfs_fs_info *fs_info,
92 struct btrfs_space_info *info, u64 bytes,
93 int dump_block_groups);
94static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
95 u64 num_bytes);
96static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
97 struct btrfs_space_info *space_info,
98 u64 num_bytes);
99static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
100 struct btrfs_space_info *space_info,
101 u64 num_bytes);
102
103static noinline int
104block_group_cache_done(struct btrfs_block_group_cache *cache)
105{
106 smp_mb();
107 return cache->cached == BTRFS_CACHE_FINISHED ||
108 cache->cached == BTRFS_CACHE_ERROR;
109}
110
111static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
112{
113 return (cache->flags & bits) == bits;
114}
115
116void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
117{
118 atomic_inc(&cache->count);
119}
120
121void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
122{
123 if (atomic_dec_and_test(&cache->count)) {
124 WARN_ON(cache->pinned > 0);
125 WARN_ON(cache->reserved > 0);
126
127
128
129
130
131
132
133
134
135 WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
136 kfree(cache->free_space_ctl);
137 kfree(cache);
138 }
139}
140
141
142
143
144
145static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
146 struct btrfs_block_group_cache *block_group)
147{
148 struct rb_node **p;
149 struct rb_node *parent = NULL;
150 struct btrfs_block_group_cache *cache;
151
152 spin_lock(&info->block_group_cache_lock);
153 p = &info->block_group_cache_tree.rb_node;
154
155 while (*p) {
156 parent = *p;
157 cache = rb_entry(parent, struct btrfs_block_group_cache,
158 cache_node);
159 if (block_group->key.objectid < cache->key.objectid) {
160 p = &(*p)->rb_left;
161 } else if (block_group->key.objectid > cache->key.objectid) {
162 p = &(*p)->rb_right;
163 } else {
164 spin_unlock(&info->block_group_cache_lock);
165 return -EEXIST;
166 }
167 }
168
169 rb_link_node(&block_group->cache_node, parent, p);
170 rb_insert_color(&block_group->cache_node,
171 &info->block_group_cache_tree);
172
173 if (info->first_logical_byte > block_group->key.objectid)
174 info->first_logical_byte = block_group->key.objectid;
175
176 spin_unlock(&info->block_group_cache_lock);
177
178 return 0;
179}
180
181
182
183
184
185static struct btrfs_block_group_cache *
186block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
187 int contains)
188{
189 struct btrfs_block_group_cache *cache, *ret = NULL;
190 struct rb_node *n;
191 u64 end, start;
192
193 spin_lock(&info->block_group_cache_lock);
194 n = info->block_group_cache_tree.rb_node;
195
196 while (n) {
197 cache = rb_entry(n, struct btrfs_block_group_cache,
198 cache_node);
199 end = cache->key.objectid + cache->key.offset - 1;
200 start = cache->key.objectid;
201
202 if (bytenr < start) {
203 if (!contains && (!ret || start < ret->key.objectid))
204 ret = cache;
205 n = n->rb_left;
206 } else if (bytenr > start) {
207 if (contains && bytenr <= end) {
208 ret = cache;
209 break;
210 }
211 n = n->rb_right;
212 } else {
213 ret = cache;
214 break;
215 }
216 }
217 if (ret) {
218 btrfs_get_block_group(ret);
219 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
220 info->first_logical_byte = ret->key.objectid;
221 }
222 spin_unlock(&info->block_group_cache_lock);
223
224 return ret;
225}
226
227static int add_excluded_extent(struct btrfs_fs_info *fs_info,
228 u64 start, u64 num_bytes)
229{
230 u64 end = start + num_bytes - 1;
231 set_extent_bits(&fs_info->freed_extents[0],
232 start, end, EXTENT_UPTODATE);
233 set_extent_bits(&fs_info->freed_extents[1],
234 start, end, EXTENT_UPTODATE);
235 return 0;
236}
237
238static void free_excluded_extents(struct btrfs_block_group_cache *cache)
239{
240 struct btrfs_fs_info *fs_info = cache->fs_info;
241 u64 start, end;
242
243 start = cache->key.objectid;
244 end = start + cache->key.offset - 1;
245
246 clear_extent_bits(&fs_info->freed_extents[0],
247 start, end, EXTENT_UPTODATE);
248 clear_extent_bits(&fs_info->freed_extents[1],
249 start, end, EXTENT_UPTODATE);
250}
251
252static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
253{
254 struct btrfs_fs_info *fs_info = cache->fs_info;
255 u64 bytenr;
256 u64 *logical;
257 int stripe_len;
258 int i, nr, ret;
259
260 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
261 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
262 cache->bytes_super += stripe_len;
263 ret = add_excluded_extent(fs_info, cache->key.objectid,
264 stripe_len);
265 if (ret)
266 return ret;
267 }
268
269 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
270 bytenr = btrfs_sb_offset(i);
271 ret = btrfs_rmap_block(fs_info, cache->key.objectid,
272 bytenr, &logical, &nr, &stripe_len);
273 if (ret)
274 return ret;
275
276 while (nr--) {
277 u64 start, len;
278
279 if (logical[nr] > cache->key.objectid +
280 cache->key.offset)
281 continue;
282
283 if (logical[nr] + stripe_len <= cache->key.objectid)
284 continue;
285
286 start = logical[nr];
287 if (start < cache->key.objectid) {
288 start = cache->key.objectid;
289 len = (logical[nr] + stripe_len) - start;
290 } else {
291 len = min_t(u64, stripe_len,
292 cache->key.objectid +
293 cache->key.offset - start);
294 }
295
296 cache->bytes_super += len;
297 ret = add_excluded_extent(fs_info, start, len);
298 if (ret) {
299 kfree(logical);
300 return ret;
301 }
302 }
303
304 kfree(logical);
305 }
306 return 0;
307}
308
309static struct btrfs_caching_control *
310get_caching_control(struct btrfs_block_group_cache *cache)
311{
312 struct btrfs_caching_control *ctl;
313
314 spin_lock(&cache->lock);
315 if (!cache->caching_ctl) {
316 spin_unlock(&cache->lock);
317 return NULL;
318 }
319
320 ctl = cache->caching_ctl;
321 refcount_inc(&ctl->count);
322 spin_unlock(&cache->lock);
323 return ctl;
324}
325
326static void put_caching_control(struct btrfs_caching_control *ctl)
327{
328 if (refcount_dec_and_test(&ctl->count))
329 kfree(ctl);
330}
331
332#ifdef CONFIG_BTRFS_DEBUG
333static void fragment_free_space(struct btrfs_block_group_cache *block_group)
334{
335 struct btrfs_fs_info *fs_info = block_group->fs_info;
336 u64 start = block_group->key.objectid;
337 u64 len = block_group->key.offset;
338 u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
339 fs_info->nodesize : fs_info->sectorsize;
340 u64 step = chunk << 1;
341
342 while (len > chunk) {
343 btrfs_remove_free_space(block_group, start, chunk);
344 start += step;
345 if (len < step)
346 len = 0;
347 else
348 len -= step;
349 }
350}
351#endif
352
353
354
355
356
357
358u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
359 u64 start, u64 end)
360{
361 struct btrfs_fs_info *info = block_group->fs_info;
362 u64 extent_start, extent_end, size, total_added = 0;
363 int ret;
364
365 while (start < end) {
366 ret = find_first_extent_bit(info->pinned_extents, start,
367 &extent_start, &extent_end,
368 EXTENT_DIRTY | EXTENT_UPTODATE,
369 NULL);
370 if (ret)
371 break;
372
373 if (extent_start <= start) {
374 start = extent_end + 1;
375 } else if (extent_start > start && extent_start < end) {
376 size = extent_start - start;
377 total_added += size;
378 ret = btrfs_add_free_space(block_group, start,
379 size);
380 BUG_ON(ret);
381 start = extent_end + 1;
382 } else {
383 break;
384 }
385 }
386
387 if (start < end) {
388 size = end - start;
389 total_added += size;
390 ret = btrfs_add_free_space(block_group, start, size);
391 BUG_ON(ret);
392 }
393
394 return total_added;
395}
396
397static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
398{
399 struct btrfs_block_group_cache *block_group = caching_ctl->block_group;
400 struct btrfs_fs_info *fs_info = block_group->fs_info;
401 struct btrfs_root *extent_root = fs_info->extent_root;
402 struct btrfs_path *path;
403 struct extent_buffer *leaf;
404 struct btrfs_key key;
405 u64 total_found = 0;
406 u64 last = 0;
407 u32 nritems;
408 int ret;
409 bool wakeup = true;
410
411 path = btrfs_alloc_path();
412 if (!path)
413 return -ENOMEM;
414
415 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
416
417#ifdef CONFIG_BTRFS_DEBUG
418
419
420
421
422
423 if (btrfs_should_fragment_free_space(block_group))
424 wakeup = false;
425#endif
426
427
428
429
430
431
432 path->skip_locking = 1;
433 path->search_commit_root = 1;
434 path->reada = READA_FORWARD;
435
436 key.objectid = last;
437 key.offset = 0;
438 key.type = BTRFS_EXTENT_ITEM_KEY;
439
440next:
441 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
442 if (ret < 0)
443 goto out;
444
445 leaf = path->nodes[0];
446 nritems = btrfs_header_nritems(leaf);
447
448 while (1) {
449 if (btrfs_fs_closing(fs_info) > 1) {
450 last = (u64)-1;
451 break;
452 }
453
454 if (path->slots[0] < nritems) {
455 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
456 } else {
457 ret = find_next_key(path, 0, &key);
458 if (ret)
459 break;
460
461 if (need_resched() ||
462 rwsem_is_contended(&fs_info->commit_root_sem)) {
463 if (wakeup)
464 caching_ctl->progress = last;
465 btrfs_release_path(path);
466 up_read(&fs_info->commit_root_sem);
467 mutex_unlock(&caching_ctl->mutex);
468 cond_resched();
469 mutex_lock(&caching_ctl->mutex);
470 down_read(&fs_info->commit_root_sem);
471 goto next;
472 }
473
474 ret = btrfs_next_leaf(extent_root, path);
475 if (ret < 0)
476 goto out;
477 if (ret)
478 break;
479 leaf = path->nodes[0];
480 nritems = btrfs_header_nritems(leaf);
481 continue;
482 }
483
484 if (key.objectid < last) {
485 key.objectid = last;
486 key.offset = 0;
487 key.type = BTRFS_EXTENT_ITEM_KEY;
488
489 if (wakeup)
490 caching_ctl->progress = last;
491 btrfs_release_path(path);
492 goto next;
493 }
494
495 if (key.objectid < block_group->key.objectid) {
496 path->slots[0]++;
497 continue;
498 }
499
500 if (key.objectid >= block_group->key.objectid +
501 block_group->key.offset)
502 break;
503
504 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
505 key.type == BTRFS_METADATA_ITEM_KEY) {
506 total_found += add_new_free_space(block_group, last,
507 key.objectid);
508 if (key.type == BTRFS_METADATA_ITEM_KEY)
509 last = key.objectid +
510 fs_info->nodesize;
511 else
512 last = key.objectid + key.offset;
513
514 if (total_found > CACHING_CTL_WAKE_UP) {
515 total_found = 0;
516 if (wakeup)
517 wake_up(&caching_ctl->wait);
518 }
519 }
520 path->slots[0]++;
521 }
522 ret = 0;
523
524 total_found += add_new_free_space(block_group, last,
525 block_group->key.objectid +
526 block_group->key.offset);
527 caching_ctl->progress = (u64)-1;
528
529out:
530 btrfs_free_path(path);
531 return ret;
532}
533
534static noinline void caching_thread(struct btrfs_work *work)
535{
536 struct btrfs_block_group_cache *block_group;
537 struct btrfs_fs_info *fs_info;
538 struct btrfs_caching_control *caching_ctl;
539 int ret;
540
541 caching_ctl = container_of(work, struct btrfs_caching_control, work);
542 block_group = caching_ctl->block_group;
543 fs_info = block_group->fs_info;
544
545 mutex_lock(&caching_ctl->mutex);
546 down_read(&fs_info->commit_root_sem);
547
548 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
549 ret = load_free_space_tree(caching_ctl);
550 else
551 ret = load_extent_tree_free(caching_ctl);
552
553 spin_lock(&block_group->lock);
554 block_group->caching_ctl = NULL;
555 block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
556 spin_unlock(&block_group->lock);
557
558#ifdef CONFIG_BTRFS_DEBUG
559 if (btrfs_should_fragment_free_space(block_group)) {
560 u64 bytes_used;
561
562 spin_lock(&block_group->space_info->lock);
563 spin_lock(&block_group->lock);
564 bytes_used = block_group->key.offset -
565 btrfs_block_group_used(&block_group->item);
566 block_group->space_info->bytes_used += bytes_used >> 1;
567 spin_unlock(&block_group->lock);
568 spin_unlock(&block_group->space_info->lock);
569 fragment_free_space(block_group);
570 }
571#endif
572
573 caching_ctl->progress = (u64)-1;
574
575 up_read(&fs_info->commit_root_sem);
576 free_excluded_extents(block_group);
577 mutex_unlock(&caching_ctl->mutex);
578
579 wake_up(&caching_ctl->wait);
580
581 put_caching_control(caching_ctl);
582 btrfs_put_block_group(block_group);
583}
584
585static int cache_block_group(struct btrfs_block_group_cache *cache,
586 int load_cache_only)
587{
588 DEFINE_WAIT(wait);
589 struct btrfs_fs_info *fs_info = cache->fs_info;
590 struct btrfs_caching_control *caching_ctl;
591 int ret = 0;
592
593 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
594 if (!caching_ctl)
595 return -ENOMEM;
596
597 INIT_LIST_HEAD(&caching_ctl->list);
598 mutex_init(&caching_ctl->mutex);
599 init_waitqueue_head(&caching_ctl->wait);
600 caching_ctl->block_group = cache;
601 caching_ctl->progress = cache->key.objectid;
602 refcount_set(&caching_ctl->count, 1);
603 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
604 caching_thread, NULL, NULL);
605
606 spin_lock(&cache->lock);
607
608
609
610
611
612
613
614
615
616
617
618
619 while (cache->cached == BTRFS_CACHE_FAST) {
620 struct btrfs_caching_control *ctl;
621
622 ctl = cache->caching_ctl;
623 refcount_inc(&ctl->count);
624 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
625 spin_unlock(&cache->lock);
626
627 schedule();
628
629 finish_wait(&ctl->wait, &wait);
630 put_caching_control(ctl);
631 spin_lock(&cache->lock);
632 }
633
634 if (cache->cached != BTRFS_CACHE_NO) {
635 spin_unlock(&cache->lock);
636 kfree(caching_ctl);
637 return 0;
638 }
639 WARN_ON(cache->caching_ctl);
640 cache->caching_ctl = caching_ctl;
641 cache->cached = BTRFS_CACHE_FAST;
642 spin_unlock(&cache->lock);
643
644 if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
645 mutex_lock(&caching_ctl->mutex);
646 ret = load_free_space_cache(fs_info, cache);
647
648 spin_lock(&cache->lock);
649 if (ret == 1) {
650 cache->caching_ctl = NULL;
651 cache->cached = BTRFS_CACHE_FINISHED;
652 cache->last_byte_to_unpin = (u64)-1;
653 caching_ctl->progress = (u64)-1;
654 } else {
655 if (load_cache_only) {
656 cache->caching_ctl = NULL;
657 cache->cached = BTRFS_CACHE_NO;
658 } else {
659 cache->cached = BTRFS_CACHE_STARTED;
660 cache->has_caching_ctl = 1;
661 }
662 }
663 spin_unlock(&cache->lock);
664#ifdef CONFIG_BTRFS_DEBUG
665 if (ret == 1 &&
666 btrfs_should_fragment_free_space(cache)) {
667 u64 bytes_used;
668
669 spin_lock(&cache->space_info->lock);
670 spin_lock(&cache->lock);
671 bytes_used = cache->key.offset -
672 btrfs_block_group_used(&cache->item);
673 cache->space_info->bytes_used += bytes_used >> 1;
674 spin_unlock(&cache->lock);
675 spin_unlock(&cache->space_info->lock);
676 fragment_free_space(cache);
677 }
678#endif
679 mutex_unlock(&caching_ctl->mutex);
680
681 wake_up(&caching_ctl->wait);
682 if (ret == 1) {
683 put_caching_control(caching_ctl);
684 free_excluded_extents(cache);
685 return 0;
686 }
687 } else {
688
689
690
691
692 spin_lock(&cache->lock);
693 if (load_cache_only) {
694 cache->caching_ctl = NULL;
695 cache->cached = BTRFS_CACHE_NO;
696 } else {
697 cache->cached = BTRFS_CACHE_STARTED;
698 cache->has_caching_ctl = 1;
699 }
700 spin_unlock(&cache->lock);
701 wake_up(&caching_ctl->wait);
702 }
703
704 if (load_cache_only) {
705 put_caching_control(caching_ctl);
706 return 0;
707 }
708
709 down_write(&fs_info->commit_root_sem);
710 refcount_inc(&caching_ctl->count);
711 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
712 up_write(&fs_info->commit_root_sem);
713
714 btrfs_get_block_group(cache);
715
716 btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
717
718 return ret;
719}
720
721
722
723
724static struct btrfs_block_group_cache *
725btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
726{
727 return block_group_cache_tree_search(info, bytenr, 0);
728}
729
730
731
732
733struct btrfs_block_group_cache *btrfs_lookup_block_group(
734 struct btrfs_fs_info *info,
735 u64 bytenr)
736{
737 return block_group_cache_tree_search(info, bytenr, 1);
738}
739
740static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
741 u64 flags)
742{
743 struct list_head *head = &info->space_info;
744 struct btrfs_space_info *found;
745
746 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
747
748 rcu_read_lock();
749 list_for_each_entry_rcu(found, head, list) {
750 if (found->flags & flags) {
751 rcu_read_unlock();
752 return found;
753 }
754 }
755 rcu_read_unlock();
756 return NULL;
757}
758
759static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
760 bool metadata, u64 root_objectid)
761{
762 struct btrfs_space_info *space_info;
763 u64 flags;
764
765 if (metadata) {
766 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
767 flags = BTRFS_BLOCK_GROUP_SYSTEM;
768 else
769 flags = BTRFS_BLOCK_GROUP_METADATA;
770 } else {
771 flags = BTRFS_BLOCK_GROUP_DATA;
772 }
773
774 space_info = __find_space_info(fs_info, flags);
775 ASSERT(space_info);
776 percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
777 BTRFS_TOTAL_BYTES_PINNED_BATCH);
778}
779
780
781
782
783
784void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
785{
786 struct list_head *head = &info->space_info;
787 struct btrfs_space_info *found;
788
789 rcu_read_lock();
790 list_for_each_entry_rcu(found, head, list)
791 found->full = 0;
792 rcu_read_unlock();
793}
794
795
796int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
797{
798 int ret;
799 struct btrfs_key key;
800 struct btrfs_path *path;
801
802 path = btrfs_alloc_path();
803 if (!path)
804 return -ENOMEM;
805
806 key.objectid = start;
807 key.offset = len;
808 key.type = BTRFS_EXTENT_ITEM_KEY;
809 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
810 btrfs_free_path(path);
811 return ret;
812}
813
814
815
816
817
818
819
820
821
822
823int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
824 struct btrfs_fs_info *fs_info, u64 bytenr,
825 u64 offset, int metadata, u64 *refs, u64 *flags)
826{
827 struct btrfs_delayed_ref_head *head;
828 struct btrfs_delayed_ref_root *delayed_refs;
829 struct btrfs_path *path;
830 struct btrfs_extent_item *ei;
831 struct extent_buffer *leaf;
832 struct btrfs_key key;
833 u32 item_size;
834 u64 num_refs;
835 u64 extent_flags;
836 int ret;
837
838
839
840
841
842 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
843 offset = fs_info->nodesize;
844 metadata = 0;
845 }
846
847 path = btrfs_alloc_path();
848 if (!path)
849 return -ENOMEM;
850
851 if (!trans) {
852 path->skip_locking = 1;
853 path->search_commit_root = 1;
854 }
855
856search_again:
857 key.objectid = bytenr;
858 key.offset = offset;
859 if (metadata)
860 key.type = BTRFS_METADATA_ITEM_KEY;
861 else
862 key.type = BTRFS_EXTENT_ITEM_KEY;
863
864 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
865 if (ret < 0)
866 goto out_free;
867
868 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
869 if (path->slots[0]) {
870 path->slots[0]--;
871 btrfs_item_key_to_cpu(path->nodes[0], &key,
872 path->slots[0]);
873 if (key.objectid == bytenr &&
874 key.type == BTRFS_EXTENT_ITEM_KEY &&
875 key.offset == fs_info->nodesize)
876 ret = 0;
877 }
878 }
879
880 if (ret == 0) {
881 leaf = path->nodes[0];
882 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
883 if (item_size >= sizeof(*ei)) {
884 ei = btrfs_item_ptr(leaf, path->slots[0],
885 struct btrfs_extent_item);
886 num_refs = btrfs_extent_refs(leaf, ei);
887 extent_flags = btrfs_extent_flags(leaf, ei);
888 } else {
889 ret = -EINVAL;
890 btrfs_print_v0_err(fs_info);
891 if (trans)
892 btrfs_abort_transaction(trans, ret);
893 else
894 btrfs_handle_fs_error(fs_info, ret, NULL);
895
896 goto out_free;
897 }
898
899 BUG_ON(num_refs == 0);
900 } else {
901 num_refs = 0;
902 extent_flags = 0;
903 ret = 0;
904 }
905
906 if (!trans)
907 goto out;
908
909 delayed_refs = &trans->transaction->delayed_refs;
910 spin_lock(&delayed_refs->lock);
911 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
912 if (head) {
913 if (!mutex_trylock(&head->mutex)) {
914 refcount_inc(&head->refs);
915 spin_unlock(&delayed_refs->lock);
916
917 btrfs_release_path(path);
918
919
920
921
922
923 mutex_lock(&head->mutex);
924 mutex_unlock(&head->mutex);
925 btrfs_put_delayed_ref_head(head);
926 goto search_again;
927 }
928 spin_lock(&head->lock);
929 if (head->extent_op && head->extent_op->update_flags)
930 extent_flags |= head->extent_op->flags_to_set;
931 else
932 BUG_ON(num_refs == 0);
933
934 num_refs += head->ref_mod;
935 spin_unlock(&head->lock);
936 mutex_unlock(&head->mutex);
937 }
938 spin_unlock(&delayed_refs->lock);
939out:
940 WARN_ON(num_refs == 0);
941 if (refs)
942 *refs = num_refs;
943 if (flags)
944 *flags = extent_flags;
945out_free:
946 btrfs_free_path(path);
947 return ret;
948}
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
1062 struct btrfs_extent_inline_ref *iref,
1063 enum btrfs_inline_ref_type is_data)
1064{
1065 int type = btrfs_extent_inline_ref_type(eb, iref);
1066 u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
1067
1068 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1069 type == BTRFS_SHARED_BLOCK_REF_KEY ||
1070 type == BTRFS_SHARED_DATA_REF_KEY ||
1071 type == BTRFS_EXTENT_DATA_REF_KEY) {
1072 if (is_data == BTRFS_REF_TYPE_BLOCK) {
1073 if (type == BTRFS_TREE_BLOCK_REF_KEY)
1074 return type;
1075 if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1076 ASSERT(eb->fs_info);
1077
1078
1079
1080
1081
1082 if (offset &&
1083 IS_ALIGNED(offset, eb->fs_info->nodesize))
1084 return type;
1085 }
1086 } else if (is_data == BTRFS_REF_TYPE_DATA) {
1087 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1088 return type;
1089 if (type == BTRFS_SHARED_DATA_REF_KEY) {
1090 ASSERT(eb->fs_info);
1091
1092
1093
1094
1095
1096 if (offset &&
1097 IS_ALIGNED(offset, eb->fs_info->nodesize))
1098 return type;
1099 }
1100 } else {
1101 ASSERT(is_data == BTRFS_REF_TYPE_ANY);
1102 return type;
1103 }
1104 }
1105
1106 btrfs_print_leaf((struct extent_buffer *)eb);
1107 btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
1108 eb->start, type);
1109 WARN_ON(1);
1110
1111 return BTRFS_REF_TYPE_INVALID;
1112}
1113
1114static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1115{
1116 u32 high_crc = ~(u32)0;
1117 u32 low_crc = ~(u32)0;
1118 __le64 lenum;
1119
1120 lenum = cpu_to_le64(root_objectid);
1121 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
1122 lenum = cpu_to_le64(owner);
1123 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1124 lenum = cpu_to_le64(offset);
1125 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1126
1127 return ((u64)high_crc << 31) ^ (u64)low_crc;
1128}
1129
1130static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1131 struct btrfs_extent_data_ref *ref)
1132{
1133 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1134 btrfs_extent_data_ref_objectid(leaf, ref),
1135 btrfs_extent_data_ref_offset(leaf, ref));
1136}
1137
1138static int match_extent_data_ref(struct extent_buffer *leaf,
1139 struct btrfs_extent_data_ref *ref,
1140 u64 root_objectid, u64 owner, u64 offset)
1141{
1142 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1143 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1144 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1145 return 0;
1146 return 1;
1147}
1148
1149static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1150 struct btrfs_path *path,
1151 u64 bytenr, u64 parent,
1152 u64 root_objectid,
1153 u64 owner, u64 offset)
1154{
1155 struct btrfs_root *root = trans->fs_info->extent_root;
1156 struct btrfs_key key;
1157 struct btrfs_extent_data_ref *ref;
1158 struct extent_buffer *leaf;
1159 u32 nritems;
1160 int ret;
1161 int recow;
1162 int err = -ENOENT;
1163
1164 key.objectid = bytenr;
1165 if (parent) {
1166 key.type = BTRFS_SHARED_DATA_REF_KEY;
1167 key.offset = parent;
1168 } else {
1169 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1170 key.offset = hash_extent_data_ref(root_objectid,
1171 owner, offset);
1172 }
1173again:
1174 recow = 0;
1175 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1176 if (ret < 0) {
1177 err = ret;
1178 goto fail;
1179 }
1180
1181 if (parent) {
1182 if (!ret)
1183 return 0;
1184 goto fail;
1185 }
1186
1187 leaf = path->nodes[0];
1188 nritems = btrfs_header_nritems(leaf);
1189 while (1) {
1190 if (path->slots[0] >= nritems) {
1191 ret = btrfs_next_leaf(root, path);
1192 if (ret < 0)
1193 err = ret;
1194 if (ret)
1195 goto fail;
1196
1197 leaf = path->nodes[0];
1198 nritems = btrfs_header_nritems(leaf);
1199 recow = 1;
1200 }
1201
1202 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1203 if (key.objectid != bytenr ||
1204 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1205 goto fail;
1206
1207 ref = btrfs_item_ptr(leaf, path->slots[0],
1208 struct btrfs_extent_data_ref);
1209
1210 if (match_extent_data_ref(leaf, ref, root_objectid,
1211 owner, offset)) {
1212 if (recow) {
1213 btrfs_release_path(path);
1214 goto again;
1215 }
1216 err = 0;
1217 break;
1218 }
1219 path->slots[0]++;
1220 }
1221fail:
1222 return err;
1223}
1224
1225static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1226 struct btrfs_path *path,
1227 u64 bytenr, u64 parent,
1228 u64 root_objectid, u64 owner,
1229 u64 offset, int refs_to_add)
1230{
1231 struct btrfs_root *root = trans->fs_info->extent_root;
1232 struct btrfs_key key;
1233 struct extent_buffer *leaf;
1234 u32 size;
1235 u32 num_refs;
1236 int ret;
1237
1238 key.objectid = bytenr;
1239 if (parent) {
1240 key.type = BTRFS_SHARED_DATA_REF_KEY;
1241 key.offset = parent;
1242 size = sizeof(struct btrfs_shared_data_ref);
1243 } else {
1244 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1245 key.offset = hash_extent_data_ref(root_objectid,
1246 owner, offset);
1247 size = sizeof(struct btrfs_extent_data_ref);
1248 }
1249
1250 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1251 if (ret && ret != -EEXIST)
1252 goto fail;
1253
1254 leaf = path->nodes[0];
1255 if (parent) {
1256 struct btrfs_shared_data_ref *ref;
1257 ref = btrfs_item_ptr(leaf, path->slots[0],
1258 struct btrfs_shared_data_ref);
1259 if (ret == 0) {
1260 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1261 } else {
1262 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1263 num_refs += refs_to_add;
1264 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1265 }
1266 } else {
1267 struct btrfs_extent_data_ref *ref;
1268 while (ret == -EEXIST) {
1269 ref = btrfs_item_ptr(leaf, path->slots[0],
1270 struct btrfs_extent_data_ref);
1271 if (match_extent_data_ref(leaf, ref, root_objectid,
1272 owner, offset))
1273 break;
1274 btrfs_release_path(path);
1275 key.offset++;
1276 ret = btrfs_insert_empty_item(trans, root, path, &key,
1277 size);
1278 if (ret && ret != -EEXIST)
1279 goto fail;
1280
1281 leaf = path->nodes[0];
1282 }
1283 ref = btrfs_item_ptr(leaf, path->slots[0],
1284 struct btrfs_extent_data_ref);
1285 if (ret == 0) {
1286 btrfs_set_extent_data_ref_root(leaf, ref,
1287 root_objectid);
1288 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1289 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1290 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1291 } else {
1292 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1293 num_refs += refs_to_add;
1294 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1295 }
1296 }
1297 btrfs_mark_buffer_dirty(leaf);
1298 ret = 0;
1299fail:
1300 btrfs_release_path(path);
1301 return ret;
1302}
1303
1304static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1305 struct btrfs_path *path,
1306 int refs_to_drop, int *last_ref)
1307{
1308 struct btrfs_key key;
1309 struct btrfs_extent_data_ref *ref1 = NULL;
1310 struct btrfs_shared_data_ref *ref2 = NULL;
1311 struct extent_buffer *leaf;
1312 u32 num_refs = 0;
1313 int ret = 0;
1314
1315 leaf = path->nodes[0];
1316 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1317
1318 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1319 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1320 struct btrfs_extent_data_ref);
1321 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1322 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1323 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1324 struct btrfs_shared_data_ref);
1325 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1326 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
1327 btrfs_print_v0_err(trans->fs_info);
1328 btrfs_abort_transaction(trans, -EINVAL);
1329 return -EINVAL;
1330 } else {
1331 BUG();
1332 }
1333
1334 BUG_ON(num_refs < refs_to_drop);
1335 num_refs -= refs_to_drop;
1336
1337 if (num_refs == 0) {
1338 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1339 *last_ref = 1;
1340 } else {
1341 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1342 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1343 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1344 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1345 btrfs_mark_buffer_dirty(leaf);
1346 }
1347 return ret;
1348}
1349
1350static noinline u32 extent_data_ref_count(struct btrfs_path *path,
1351 struct btrfs_extent_inline_ref *iref)
1352{
1353 struct btrfs_key key;
1354 struct extent_buffer *leaf;
1355 struct btrfs_extent_data_ref *ref1;
1356 struct btrfs_shared_data_ref *ref2;
1357 u32 num_refs = 0;
1358 int type;
1359
1360 leaf = path->nodes[0];
1361 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1362
1363 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
1364 if (iref) {
1365
1366
1367
1368
1369 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
1370 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1371 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1372 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1373 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1374 } else {
1375 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1376 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1377 }
1378 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1379 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1380 struct btrfs_extent_data_ref);
1381 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1382 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1383 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1384 struct btrfs_shared_data_ref);
1385 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1386 } else {
1387 WARN_ON(1);
1388 }
1389 return num_refs;
1390}
1391
1392static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1393 struct btrfs_path *path,
1394 u64 bytenr, u64 parent,
1395 u64 root_objectid)
1396{
1397 struct btrfs_root *root = trans->fs_info->extent_root;
1398 struct btrfs_key key;
1399 int ret;
1400
1401 key.objectid = bytenr;
1402 if (parent) {
1403 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1404 key.offset = parent;
1405 } else {
1406 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1407 key.offset = root_objectid;
1408 }
1409
1410 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1411 if (ret > 0)
1412 ret = -ENOENT;
1413 return ret;
1414}
1415
1416static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1417 struct btrfs_path *path,
1418 u64 bytenr, u64 parent,
1419 u64 root_objectid)
1420{
1421 struct btrfs_key key;
1422 int ret;
1423
1424 key.objectid = bytenr;
1425 if (parent) {
1426 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1427 key.offset = parent;
1428 } else {
1429 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1430 key.offset = root_objectid;
1431 }
1432
1433 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root,
1434 path, &key, 0);
1435 btrfs_release_path(path);
1436 return ret;
1437}
1438
1439static inline int extent_ref_type(u64 parent, u64 owner)
1440{
1441 int type;
1442 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1443 if (parent > 0)
1444 type = BTRFS_SHARED_BLOCK_REF_KEY;
1445 else
1446 type = BTRFS_TREE_BLOCK_REF_KEY;
1447 } else {
1448 if (parent > 0)
1449 type = BTRFS_SHARED_DATA_REF_KEY;
1450 else
1451 type = BTRFS_EXTENT_DATA_REF_KEY;
1452 }
1453 return type;
1454}
1455
1456static int find_next_key(struct btrfs_path *path, int level,
1457 struct btrfs_key *key)
1458
1459{
1460 for (; level < BTRFS_MAX_LEVEL; level++) {
1461 if (!path->nodes[level])
1462 break;
1463 if (path->slots[level] + 1 >=
1464 btrfs_header_nritems(path->nodes[level]))
1465 continue;
1466 if (level == 0)
1467 btrfs_item_key_to_cpu(path->nodes[level], key,
1468 path->slots[level] + 1);
1469 else
1470 btrfs_node_key_to_cpu(path->nodes[level], key,
1471 path->slots[level] + 1);
1472 return 0;
1473 }
1474 return 1;
1475}
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490static noinline_for_stack
1491int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1492 struct btrfs_path *path,
1493 struct btrfs_extent_inline_ref **ref_ret,
1494 u64 bytenr, u64 num_bytes,
1495 u64 parent, u64 root_objectid,
1496 u64 owner, u64 offset, int insert)
1497{
1498 struct btrfs_fs_info *fs_info = trans->fs_info;
1499 struct btrfs_root *root = fs_info->extent_root;
1500 struct btrfs_key key;
1501 struct extent_buffer *leaf;
1502 struct btrfs_extent_item *ei;
1503 struct btrfs_extent_inline_ref *iref;
1504 u64 flags;
1505 u64 item_size;
1506 unsigned long ptr;
1507 unsigned long end;
1508 int extra_size;
1509 int type;
1510 int want;
1511 int ret;
1512 int err = 0;
1513 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
1514 int needed;
1515
1516 key.objectid = bytenr;
1517 key.type = BTRFS_EXTENT_ITEM_KEY;
1518 key.offset = num_bytes;
1519
1520 want = extent_ref_type(parent, owner);
1521 if (insert) {
1522 extra_size = btrfs_extent_inline_ref_size(want);
1523 path->keep_locks = 1;
1524 } else
1525 extra_size = -1;
1526
1527
1528
1529
1530
1531 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1532 key.type = BTRFS_METADATA_ITEM_KEY;
1533 key.offset = owner;
1534 }
1535
1536again:
1537 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1538 if (ret < 0) {
1539 err = ret;
1540 goto out;
1541 }
1542
1543
1544
1545
1546
1547 if (ret > 0 && skinny_metadata) {
1548 skinny_metadata = false;
1549 if (path->slots[0]) {
1550 path->slots[0]--;
1551 btrfs_item_key_to_cpu(path->nodes[0], &key,
1552 path->slots[0]);
1553 if (key.objectid == bytenr &&
1554 key.type == BTRFS_EXTENT_ITEM_KEY &&
1555 key.offset == num_bytes)
1556 ret = 0;
1557 }
1558 if (ret) {
1559 key.objectid = bytenr;
1560 key.type = BTRFS_EXTENT_ITEM_KEY;
1561 key.offset = num_bytes;
1562 btrfs_release_path(path);
1563 goto again;
1564 }
1565 }
1566
1567 if (ret && !insert) {
1568 err = -ENOENT;
1569 goto out;
1570 } else if (WARN_ON(ret)) {
1571 err = -EIO;
1572 goto out;
1573 }
1574
1575 leaf = path->nodes[0];
1576 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1577 if (unlikely(item_size < sizeof(*ei))) {
1578 err = -EINVAL;
1579 btrfs_print_v0_err(fs_info);
1580 btrfs_abort_transaction(trans, err);
1581 goto out;
1582 }
1583
1584 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1585 flags = btrfs_extent_flags(leaf, ei);
1586
1587 ptr = (unsigned long)(ei + 1);
1588 end = (unsigned long)ei + item_size;
1589
1590 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1591 ptr += sizeof(struct btrfs_tree_block_info);
1592 BUG_ON(ptr > end);
1593 }
1594
1595 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
1596 needed = BTRFS_REF_TYPE_DATA;
1597 else
1598 needed = BTRFS_REF_TYPE_BLOCK;
1599
1600 err = -ENOENT;
1601 while (1) {
1602 if (ptr >= end) {
1603 WARN_ON(ptr > end);
1604 break;
1605 }
1606 iref = (struct btrfs_extent_inline_ref *)ptr;
1607 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
1608 if (type == BTRFS_REF_TYPE_INVALID) {
1609 err = -EUCLEAN;
1610 goto out;
1611 }
1612
1613 if (want < type)
1614 break;
1615 if (want > type) {
1616 ptr += btrfs_extent_inline_ref_size(type);
1617 continue;
1618 }
1619
1620 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1621 struct btrfs_extent_data_ref *dref;
1622 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1623 if (match_extent_data_ref(leaf, dref, root_objectid,
1624 owner, offset)) {
1625 err = 0;
1626 break;
1627 }
1628 if (hash_extent_data_ref_item(leaf, dref) <
1629 hash_extent_data_ref(root_objectid, owner, offset))
1630 break;
1631 } else {
1632 u64 ref_offset;
1633 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1634 if (parent > 0) {
1635 if (parent == ref_offset) {
1636 err = 0;
1637 break;
1638 }
1639 if (ref_offset < parent)
1640 break;
1641 } else {
1642 if (root_objectid == ref_offset) {
1643 err = 0;
1644 break;
1645 }
1646 if (ref_offset < root_objectid)
1647 break;
1648 }
1649 }
1650 ptr += btrfs_extent_inline_ref_size(type);
1651 }
1652 if (err == -ENOENT && insert) {
1653 if (item_size + extra_size >=
1654 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1655 err = -EAGAIN;
1656 goto out;
1657 }
1658
1659
1660
1661
1662
1663
1664 if (find_next_key(path, 0, &key) == 0 &&
1665 key.objectid == bytenr &&
1666 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1667 err = -EAGAIN;
1668 goto out;
1669 }
1670 }
1671 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1672out:
1673 if (insert) {
1674 path->keep_locks = 0;
1675 btrfs_unlock_up_safe(path, 1);
1676 }
1677 return err;
1678}
1679
1680
1681
1682
1683static noinline_for_stack
1684void setup_inline_extent_backref(struct btrfs_fs_info *fs_info,
1685 struct btrfs_path *path,
1686 struct btrfs_extent_inline_ref *iref,
1687 u64 parent, u64 root_objectid,
1688 u64 owner, u64 offset, int refs_to_add,
1689 struct btrfs_delayed_extent_op *extent_op)
1690{
1691 struct extent_buffer *leaf;
1692 struct btrfs_extent_item *ei;
1693 unsigned long ptr;
1694 unsigned long end;
1695 unsigned long item_offset;
1696 u64 refs;
1697 int size;
1698 int type;
1699
1700 leaf = path->nodes[0];
1701 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1702 item_offset = (unsigned long)iref - (unsigned long)ei;
1703
1704 type = extent_ref_type(parent, owner);
1705 size = btrfs_extent_inline_ref_size(type);
1706
1707 btrfs_extend_item(fs_info, path, size);
1708
1709 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1710 refs = btrfs_extent_refs(leaf, ei);
1711 refs += refs_to_add;
1712 btrfs_set_extent_refs(leaf, ei, refs);
1713 if (extent_op)
1714 __run_delayed_extent_op(extent_op, leaf, ei);
1715
1716 ptr = (unsigned long)ei + item_offset;
1717 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1718 if (ptr < end - size)
1719 memmove_extent_buffer(leaf, ptr + size, ptr,
1720 end - size - ptr);
1721
1722 iref = (struct btrfs_extent_inline_ref *)ptr;
1723 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1724 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1725 struct btrfs_extent_data_ref *dref;
1726 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1727 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1728 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1729 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1730 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1731 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1732 struct btrfs_shared_data_ref *sref;
1733 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1734 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1735 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1736 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1737 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1738 } else {
1739 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1740 }
1741 btrfs_mark_buffer_dirty(leaf);
1742}
1743
1744static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1745 struct btrfs_path *path,
1746 struct btrfs_extent_inline_ref **ref_ret,
1747 u64 bytenr, u64 num_bytes, u64 parent,
1748 u64 root_objectid, u64 owner, u64 offset)
1749{
1750 int ret;
1751
1752 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1753 num_bytes, parent, root_objectid,
1754 owner, offset, 0);
1755 if (ret != -ENOENT)
1756 return ret;
1757
1758 btrfs_release_path(path);
1759 *ref_ret = NULL;
1760
1761 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1762 ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1763 root_objectid);
1764 } else {
1765 ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1766 root_objectid, owner, offset);
1767 }
1768 return ret;
1769}
1770
1771
1772
1773
1774static noinline_for_stack
1775void update_inline_extent_backref(struct btrfs_path *path,
1776 struct btrfs_extent_inline_ref *iref,
1777 int refs_to_mod,
1778 struct btrfs_delayed_extent_op *extent_op,
1779 int *last_ref)
1780{
1781 struct extent_buffer *leaf = path->nodes[0];
1782 struct btrfs_fs_info *fs_info = leaf->fs_info;
1783 struct btrfs_extent_item *ei;
1784 struct btrfs_extent_data_ref *dref = NULL;
1785 struct btrfs_shared_data_ref *sref = NULL;
1786 unsigned long ptr;
1787 unsigned long end;
1788 u32 item_size;
1789 int size;
1790 int type;
1791 u64 refs;
1792
1793 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1794 refs = btrfs_extent_refs(leaf, ei);
1795 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1796 refs += refs_to_mod;
1797 btrfs_set_extent_refs(leaf, ei, refs);
1798 if (extent_op)
1799 __run_delayed_extent_op(extent_op, leaf, ei);
1800
1801
1802
1803
1804
1805 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
1806 ASSERT(type != BTRFS_REF_TYPE_INVALID);
1807
1808 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1809 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1810 refs = btrfs_extent_data_ref_count(leaf, dref);
1811 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1812 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1813 refs = btrfs_shared_data_ref_count(leaf, sref);
1814 } else {
1815 refs = 1;
1816 BUG_ON(refs_to_mod != -1);
1817 }
1818
1819 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1820 refs += refs_to_mod;
1821
1822 if (refs > 0) {
1823 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1824 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1825 else
1826 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1827 } else {
1828 *last_ref = 1;
1829 size = btrfs_extent_inline_ref_size(type);
1830 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1831 ptr = (unsigned long)iref;
1832 end = (unsigned long)ei + item_size;
1833 if (ptr + size < end)
1834 memmove_extent_buffer(leaf, ptr, ptr + size,
1835 end - ptr - size);
1836 item_size -= size;
1837 btrfs_truncate_item(fs_info, path, item_size, 1);
1838 }
1839 btrfs_mark_buffer_dirty(leaf);
1840}
1841
1842static noinline_for_stack
1843int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1844 struct btrfs_path *path,
1845 u64 bytenr, u64 num_bytes, u64 parent,
1846 u64 root_objectid, u64 owner,
1847 u64 offset, int refs_to_add,
1848 struct btrfs_delayed_extent_op *extent_op)
1849{
1850 struct btrfs_extent_inline_ref *iref;
1851 int ret;
1852
1853 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1854 num_bytes, parent, root_objectid,
1855 owner, offset, 1);
1856 if (ret == 0) {
1857 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1858 update_inline_extent_backref(path, iref, refs_to_add,
1859 extent_op, NULL);
1860 } else if (ret == -ENOENT) {
1861 setup_inline_extent_backref(trans->fs_info, path, iref, parent,
1862 root_objectid, owner, offset,
1863 refs_to_add, extent_op);
1864 ret = 0;
1865 }
1866 return ret;
1867}
1868
1869static int insert_extent_backref(struct btrfs_trans_handle *trans,
1870 struct btrfs_path *path,
1871 u64 bytenr, u64 parent, u64 root_objectid,
1872 u64 owner, u64 offset, int refs_to_add)
1873{
1874 int ret;
1875 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1876 BUG_ON(refs_to_add != 1);
1877 ret = insert_tree_block_ref(trans, path, bytenr, parent,
1878 root_objectid);
1879 } else {
1880 ret = insert_extent_data_ref(trans, path, bytenr, parent,
1881 root_objectid, owner, offset,
1882 refs_to_add);
1883 }
1884 return ret;
1885}
1886
1887static int remove_extent_backref(struct btrfs_trans_handle *trans,
1888 struct btrfs_path *path,
1889 struct btrfs_extent_inline_ref *iref,
1890 int refs_to_drop, int is_data, int *last_ref)
1891{
1892 int ret = 0;
1893
1894 BUG_ON(!is_data && refs_to_drop != 1);
1895 if (iref) {
1896 update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
1897 last_ref);
1898 } else if (is_data) {
1899 ret = remove_extent_data_ref(trans, path, refs_to_drop,
1900 last_ref);
1901 } else {
1902 *last_ref = 1;
1903 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path);
1904 }
1905 return ret;
1906}
1907
1908#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
1909static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1910 u64 *discarded_bytes)
1911{
1912 int j, ret = 0;
1913 u64 bytes_left, end;
1914 u64 aligned_start = ALIGN(start, 1 << 9);
1915
1916 if (WARN_ON(start != aligned_start)) {
1917 len -= aligned_start - start;
1918 len = round_down(len, 1 << 9);
1919 start = aligned_start;
1920 }
1921
1922 *discarded_bytes = 0;
1923
1924 if (!len)
1925 return 0;
1926
1927 end = start + len;
1928 bytes_left = len;
1929
1930
1931 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
1932 u64 sb_start = btrfs_sb_offset(j);
1933 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
1934 u64 size = sb_start - start;
1935
1936 if (!in_range(sb_start, start, bytes_left) &&
1937 !in_range(sb_end, start, bytes_left) &&
1938 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
1939 continue;
1940
1941
1942
1943
1944
1945 if (sb_start <= start) {
1946 start += sb_end - start;
1947 if (start > end) {
1948 bytes_left = 0;
1949 break;
1950 }
1951 bytes_left = end - start;
1952 continue;
1953 }
1954
1955 if (size) {
1956 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
1957 GFP_NOFS, 0);
1958 if (!ret)
1959 *discarded_bytes += size;
1960 else if (ret != -EOPNOTSUPP)
1961 return ret;
1962 }
1963
1964 start = sb_end;
1965 if (start > end) {
1966 bytes_left = 0;
1967 break;
1968 }
1969 bytes_left = end - start;
1970 }
1971
1972 if (bytes_left) {
1973 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
1974 GFP_NOFS, 0);
1975 if (!ret)
1976 *discarded_bytes += bytes_left;
1977 }
1978 return ret;
1979}
1980
1981int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1982 u64 num_bytes, u64 *actual_bytes)
1983{
1984 int ret;
1985 u64 discarded_bytes = 0;
1986 struct btrfs_bio *bbio = NULL;
1987
1988
1989
1990
1991
1992
1993 btrfs_bio_counter_inc_blocked(fs_info);
1994
1995 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
1996 &bbio, 0);
1997
1998 if (!ret) {
1999 struct btrfs_bio_stripe *stripe = bbio->stripes;
2000 int i;
2001
2002
2003 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
2004 u64 bytes;
2005 struct request_queue *req_q;
2006
2007 if (!stripe->dev->bdev) {
2008 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
2009 continue;
2010 }
2011 req_q = bdev_get_queue(stripe->dev->bdev);
2012 if (!blk_queue_discard(req_q))
2013 continue;
2014
2015 ret = btrfs_issue_discard(stripe->dev->bdev,
2016 stripe->physical,
2017 stripe->length,
2018 &bytes);
2019 if (!ret)
2020 discarded_bytes += bytes;
2021 else if (ret != -EOPNOTSUPP)
2022 break;
2023
2024
2025
2026
2027
2028
2029 ret = 0;
2030 }
2031 btrfs_put_bbio(bbio);
2032 }
2033 btrfs_bio_counter_dec(fs_info);
2034
2035 if (actual_bytes)
2036 *actual_bytes = discarded_bytes;
2037
2038
2039 if (ret == -EOPNOTSUPP)
2040 ret = 0;
2041 return ret;
2042}
2043
2044
2045int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2046 struct btrfs_root *root,
2047 u64 bytenr, u64 num_bytes, u64 parent,
2048 u64 root_objectid, u64 owner, u64 offset)
2049{
2050 struct btrfs_fs_info *fs_info = root->fs_info;
2051 int old_ref_mod, new_ref_mod;
2052 int ret;
2053
2054 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
2055 root_objectid == BTRFS_TREE_LOG_OBJECTID);
2056
2057 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid,
2058 owner, offset, BTRFS_ADD_DELAYED_REF);
2059
2060 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2061 ret = btrfs_add_delayed_tree_ref(trans, bytenr,
2062 num_bytes, parent,
2063 root_objectid, (int)owner,
2064 BTRFS_ADD_DELAYED_REF, NULL,
2065 &old_ref_mod, &new_ref_mod);
2066 } else {
2067 ret = btrfs_add_delayed_data_ref(trans, bytenr,
2068 num_bytes, parent,
2069 root_objectid, owner, offset,
2070 0, BTRFS_ADD_DELAYED_REF,
2071 &old_ref_mod, &new_ref_mod);
2072 }
2073
2074 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) {
2075 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
2076
2077 add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid);
2078 }
2079
2080 return ret;
2081}
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2118 struct btrfs_delayed_ref_node *node,
2119 u64 parent, u64 root_objectid,
2120 u64 owner, u64 offset, int refs_to_add,
2121 struct btrfs_delayed_extent_op *extent_op)
2122{
2123 struct btrfs_path *path;
2124 struct extent_buffer *leaf;
2125 struct btrfs_extent_item *item;
2126 struct btrfs_key key;
2127 u64 bytenr = node->bytenr;
2128 u64 num_bytes = node->num_bytes;
2129 u64 refs;
2130 int ret;
2131
2132 path = btrfs_alloc_path();
2133 if (!path)
2134 return -ENOMEM;
2135
2136 path->reada = READA_FORWARD;
2137 path->leave_spinning = 1;
2138
2139 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
2140 parent, root_objectid, owner,
2141 offset, refs_to_add, extent_op);
2142 if ((ret < 0 && ret != -EAGAIN) || !ret)
2143 goto out;
2144
2145
2146
2147
2148
2149
2150 leaf = path->nodes[0];
2151 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2152 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2153 refs = btrfs_extent_refs(leaf, item);
2154 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
2155 if (extent_op)
2156 __run_delayed_extent_op(extent_op, leaf, item);
2157
2158 btrfs_mark_buffer_dirty(leaf);
2159 btrfs_release_path(path);
2160
2161 path->reada = READA_FORWARD;
2162 path->leave_spinning = 1;
2163
2164 ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
2165 owner, offset, refs_to_add);
2166 if (ret)
2167 btrfs_abort_transaction(trans, ret);
2168out:
2169 btrfs_free_path(path);
2170 return ret;
2171}
2172
2173static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2174 struct btrfs_delayed_ref_node *node,
2175 struct btrfs_delayed_extent_op *extent_op,
2176 int insert_reserved)
2177{
2178 int ret = 0;
2179 struct btrfs_delayed_data_ref *ref;
2180 struct btrfs_key ins;
2181 u64 parent = 0;
2182 u64 ref_root = 0;
2183 u64 flags = 0;
2184
2185 ins.objectid = node->bytenr;
2186 ins.offset = node->num_bytes;
2187 ins.type = BTRFS_EXTENT_ITEM_KEY;
2188
2189 ref = btrfs_delayed_node_to_data_ref(node);
2190 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
2191
2192 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2193 parent = ref->parent;
2194 ref_root = ref->root;
2195
2196 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2197 if (extent_op)
2198 flags |= extent_op->flags_to_set;
2199 ret = alloc_reserved_file_extent(trans, parent, ref_root,
2200 flags, ref->objectid,
2201 ref->offset, &ins,
2202 node->ref_mod);
2203 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2204 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2205 ref->objectid, ref->offset,
2206 node->ref_mod, extent_op);
2207 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2208 ret = __btrfs_free_extent(trans, node, parent,
2209 ref_root, ref->objectid,
2210 ref->offset, node->ref_mod,
2211 extent_op);
2212 } else {
2213 BUG();
2214 }
2215 return ret;
2216}
2217
2218static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2219 struct extent_buffer *leaf,
2220 struct btrfs_extent_item *ei)
2221{
2222 u64 flags = btrfs_extent_flags(leaf, ei);
2223 if (extent_op->update_flags) {
2224 flags |= extent_op->flags_to_set;
2225 btrfs_set_extent_flags(leaf, ei, flags);
2226 }
2227
2228 if (extent_op->update_key) {
2229 struct btrfs_tree_block_info *bi;
2230 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2231 bi = (struct btrfs_tree_block_info *)(ei + 1);
2232 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2233 }
2234}
2235
2236static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2237 struct btrfs_delayed_ref_head *head,
2238 struct btrfs_delayed_extent_op *extent_op)
2239{
2240 struct btrfs_fs_info *fs_info = trans->fs_info;
2241 struct btrfs_key key;
2242 struct btrfs_path *path;
2243 struct btrfs_extent_item *ei;
2244 struct extent_buffer *leaf;
2245 u32 item_size;
2246 int ret;
2247 int err = 0;
2248 int metadata = !extent_op->is_data;
2249
2250 if (trans->aborted)
2251 return 0;
2252
2253 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2254 metadata = 0;
2255
2256 path = btrfs_alloc_path();
2257 if (!path)
2258 return -ENOMEM;
2259
2260 key.objectid = head->bytenr;
2261
2262 if (metadata) {
2263 key.type = BTRFS_METADATA_ITEM_KEY;
2264 key.offset = extent_op->level;
2265 } else {
2266 key.type = BTRFS_EXTENT_ITEM_KEY;
2267 key.offset = head->num_bytes;
2268 }
2269
2270again:
2271 path->reada = READA_FORWARD;
2272 path->leave_spinning = 1;
2273 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
2274 if (ret < 0) {
2275 err = ret;
2276 goto out;
2277 }
2278 if (ret > 0) {
2279 if (metadata) {
2280 if (path->slots[0] > 0) {
2281 path->slots[0]--;
2282 btrfs_item_key_to_cpu(path->nodes[0], &key,
2283 path->slots[0]);
2284 if (key.objectid == head->bytenr &&
2285 key.type == BTRFS_EXTENT_ITEM_KEY &&
2286 key.offset == head->num_bytes)
2287 ret = 0;
2288 }
2289 if (ret > 0) {
2290 btrfs_release_path(path);
2291 metadata = 0;
2292
2293 key.objectid = head->bytenr;
2294 key.offset = head->num_bytes;
2295 key.type = BTRFS_EXTENT_ITEM_KEY;
2296 goto again;
2297 }
2298 } else {
2299 err = -EIO;
2300 goto out;
2301 }
2302 }
2303
2304 leaf = path->nodes[0];
2305 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2306
2307 if (unlikely(item_size < sizeof(*ei))) {
2308 err = -EINVAL;
2309 btrfs_print_v0_err(fs_info);
2310 btrfs_abort_transaction(trans, err);
2311 goto out;
2312 }
2313
2314 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2315 __run_delayed_extent_op(extent_op, leaf, ei);
2316
2317 btrfs_mark_buffer_dirty(leaf);
2318out:
2319 btrfs_free_path(path);
2320 return err;
2321}
2322
2323static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2324 struct btrfs_delayed_ref_node *node,
2325 struct btrfs_delayed_extent_op *extent_op,
2326 int insert_reserved)
2327{
2328 int ret = 0;
2329 struct btrfs_delayed_tree_ref *ref;
2330 u64 parent = 0;
2331 u64 ref_root = 0;
2332
2333 ref = btrfs_delayed_node_to_tree_ref(node);
2334 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
2335
2336 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2337 parent = ref->parent;
2338 ref_root = ref->root;
2339
2340 if (node->ref_mod != 1) {
2341 btrfs_err(trans->fs_info,
2342 "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
2343 node->bytenr, node->ref_mod, node->action, ref_root,
2344 parent);
2345 return -EIO;
2346 }
2347 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2348 BUG_ON(!extent_op || !extent_op->update_flags);
2349 ret = alloc_reserved_tree_block(trans, node, extent_op);
2350 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2351 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
2352 ref->level, 0, 1, extent_op);
2353 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2354 ret = __btrfs_free_extent(trans, node, parent, ref_root,
2355 ref->level, 0, 1, extent_op);
2356 } else {
2357 BUG();
2358 }
2359 return ret;
2360}
2361
2362
2363static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2364 struct btrfs_delayed_ref_node *node,
2365 struct btrfs_delayed_extent_op *extent_op,
2366 int insert_reserved)
2367{
2368 int ret = 0;
2369
2370 if (trans->aborted) {
2371 if (insert_reserved)
2372 btrfs_pin_extent(trans->fs_info, node->bytenr,
2373 node->num_bytes, 1);
2374 return 0;
2375 }
2376
2377 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2378 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2379 ret = run_delayed_tree_ref(trans, node, extent_op,
2380 insert_reserved);
2381 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2382 node->type == BTRFS_SHARED_DATA_REF_KEY)
2383 ret = run_delayed_data_ref(trans, node, extent_op,
2384 insert_reserved);
2385 else
2386 BUG();
2387 if (ret && insert_reserved)
2388 btrfs_pin_extent(trans->fs_info, node->bytenr,
2389 node->num_bytes, 1);
2390 return ret;
2391}
2392
2393static inline struct btrfs_delayed_ref_node *
2394select_delayed_ref(struct btrfs_delayed_ref_head *head)
2395{
2396 struct btrfs_delayed_ref_node *ref;
2397
2398 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
2399 return NULL;
2400
2401
2402
2403
2404
2405
2406
2407 if (!list_empty(&head->ref_add_list))
2408 return list_first_entry(&head->ref_add_list,
2409 struct btrfs_delayed_ref_node, add_list);
2410
2411 ref = rb_entry(rb_first_cached(&head->ref_tree),
2412 struct btrfs_delayed_ref_node, ref_node);
2413 ASSERT(list_empty(&ref->add_list));
2414 return ref;
2415}
2416
2417static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
2418 struct btrfs_delayed_ref_head *head)
2419{
2420 spin_lock(&delayed_refs->lock);
2421 head->processing = 0;
2422 delayed_refs->num_heads_ready++;
2423 spin_unlock(&delayed_refs->lock);
2424 btrfs_delayed_ref_unlock(head);
2425}
2426
2427static struct btrfs_delayed_extent_op *cleanup_extent_op(
2428 struct btrfs_delayed_ref_head *head)
2429{
2430 struct btrfs_delayed_extent_op *extent_op = head->extent_op;
2431
2432 if (!extent_op)
2433 return NULL;
2434
2435 if (head->must_insert_reserved) {
2436 head->extent_op = NULL;
2437 btrfs_free_delayed_extent_op(extent_op);
2438 return NULL;
2439 }
2440 return extent_op;
2441}
2442
2443static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
2444 struct btrfs_delayed_ref_head *head)
2445{
2446 struct btrfs_delayed_extent_op *extent_op;
2447 int ret;
2448
2449 extent_op = cleanup_extent_op(head);
2450 if (!extent_op)
2451 return 0;
2452 head->extent_op = NULL;
2453 spin_unlock(&head->lock);
2454 ret = run_delayed_extent_op(trans, head, extent_op);
2455 btrfs_free_delayed_extent_op(extent_op);
2456 return ret ? ret : 1;
2457}
2458
2459void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2460 struct btrfs_delayed_ref_root *delayed_refs,
2461 struct btrfs_delayed_ref_head *head)
2462{
2463 int nr_items = 1;
2464
2465 if (head->total_ref_mod < 0) {
2466 struct btrfs_space_info *space_info;
2467 u64 flags;
2468
2469 if (head->is_data)
2470 flags = BTRFS_BLOCK_GROUP_DATA;
2471 else if (head->is_system)
2472 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2473 else
2474 flags = BTRFS_BLOCK_GROUP_METADATA;
2475 space_info = __find_space_info(fs_info, flags);
2476 ASSERT(space_info);
2477 percpu_counter_add_batch(&space_info->total_bytes_pinned,
2478 -head->num_bytes,
2479 BTRFS_TOTAL_BYTES_PINNED_BATCH);
2480
2481
2482
2483
2484
2485
2486 if (head->is_data) {
2487 spin_lock(&delayed_refs->lock);
2488 delayed_refs->pending_csums -= head->num_bytes;
2489 spin_unlock(&delayed_refs->lock);
2490 nr_items += btrfs_csum_bytes_to_leaves(fs_info,
2491 head->num_bytes);
2492 }
2493 }
2494
2495
2496 btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
2497 head->qgroup_reserved);
2498 btrfs_delayed_refs_rsv_release(fs_info, nr_items);
2499}
2500
2501static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2502 struct btrfs_delayed_ref_head *head)
2503{
2504
2505 struct btrfs_fs_info *fs_info = trans->fs_info;
2506 struct btrfs_delayed_ref_root *delayed_refs;
2507 int ret;
2508
2509 delayed_refs = &trans->transaction->delayed_refs;
2510
2511 ret = run_and_cleanup_extent_op(trans, head);
2512 if (ret < 0) {
2513 unselect_delayed_ref_head(delayed_refs, head);
2514 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2515 return ret;
2516 } else if (ret) {
2517 return ret;
2518 }
2519
2520
2521
2522
2523
2524 spin_unlock(&head->lock);
2525 spin_lock(&delayed_refs->lock);
2526 spin_lock(&head->lock);
2527 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
2528 spin_unlock(&head->lock);
2529 spin_unlock(&delayed_refs->lock);
2530 return 1;
2531 }
2532 btrfs_delete_ref_head(delayed_refs, head);
2533 spin_unlock(&head->lock);
2534 spin_unlock(&delayed_refs->lock);
2535
2536 if (head->must_insert_reserved) {
2537 btrfs_pin_extent(fs_info, head->bytenr,
2538 head->num_bytes, 1);
2539 if (head->is_data) {
2540 ret = btrfs_del_csums(trans, fs_info, head->bytenr,
2541 head->num_bytes);
2542 }
2543 }
2544
2545 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
2546
2547 trace_run_delayed_ref_head(fs_info, head, 0);
2548 btrfs_delayed_ref_unlock(head);
2549 btrfs_put_delayed_ref_head(head);
2550 return 0;
2551}
2552
2553static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
2554 struct btrfs_trans_handle *trans)
2555{
2556 struct btrfs_delayed_ref_root *delayed_refs =
2557 &trans->transaction->delayed_refs;
2558 struct btrfs_delayed_ref_head *head = NULL;
2559 int ret;
2560
2561 spin_lock(&delayed_refs->lock);
2562 head = btrfs_select_ref_head(delayed_refs);
2563 if (!head) {
2564 spin_unlock(&delayed_refs->lock);
2565 return head;
2566 }
2567
2568
2569
2570
2571
2572 ret = btrfs_delayed_ref_lock(delayed_refs, head);
2573 spin_unlock(&delayed_refs->lock);
2574
2575
2576
2577
2578
2579
2580 if (ret == -EAGAIN)
2581 head = ERR_PTR(-EAGAIN);
2582
2583 return head;
2584}
2585
2586static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
2587 struct btrfs_delayed_ref_head *locked_ref,
2588 unsigned long *run_refs)
2589{
2590 struct btrfs_fs_info *fs_info = trans->fs_info;
2591 struct btrfs_delayed_ref_root *delayed_refs;
2592 struct btrfs_delayed_extent_op *extent_op;
2593 struct btrfs_delayed_ref_node *ref;
2594 int must_insert_reserved = 0;
2595 int ret;
2596
2597 delayed_refs = &trans->transaction->delayed_refs;
2598
2599 lockdep_assert_held(&locked_ref->mutex);
2600 lockdep_assert_held(&locked_ref->lock);
2601
2602 while ((ref = select_delayed_ref(locked_ref))) {
2603 if (ref->seq &&
2604 btrfs_check_delayed_seq(fs_info, ref->seq)) {
2605 spin_unlock(&locked_ref->lock);
2606 unselect_delayed_ref_head(delayed_refs, locked_ref);
2607 return -EAGAIN;
2608 }
2609
2610 (*run_refs)++;
2611 ref->in_tree = 0;
2612 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
2613 RB_CLEAR_NODE(&ref->ref_node);
2614 if (!list_empty(&ref->add_list))
2615 list_del(&ref->add_list);
2616
2617
2618
2619
2620 switch (ref->action) {
2621 case BTRFS_ADD_DELAYED_REF:
2622 case BTRFS_ADD_DELAYED_EXTENT:
2623 locked_ref->ref_mod -= ref->ref_mod;
2624 break;
2625 case BTRFS_DROP_DELAYED_REF:
2626 locked_ref->ref_mod += ref->ref_mod;
2627 break;
2628 default:
2629 WARN_ON(1);
2630 }
2631 atomic_dec(&delayed_refs->num_entries);
2632
2633
2634
2635
2636
2637 must_insert_reserved = locked_ref->must_insert_reserved;
2638 locked_ref->must_insert_reserved = 0;
2639
2640 extent_op = locked_ref->extent_op;
2641 locked_ref->extent_op = NULL;
2642 spin_unlock(&locked_ref->lock);
2643
2644 ret = run_one_delayed_ref(trans, ref, extent_op,
2645 must_insert_reserved);
2646
2647 btrfs_free_delayed_extent_op(extent_op);
2648 if (ret) {
2649 unselect_delayed_ref_head(delayed_refs, locked_ref);
2650 btrfs_put_delayed_ref(ref);
2651 btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
2652 ret);
2653 return ret;
2654 }
2655
2656 btrfs_put_delayed_ref(ref);
2657 cond_resched();
2658
2659 spin_lock(&locked_ref->lock);
2660 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2661 }
2662
2663 return 0;
2664}
2665
2666
2667
2668
2669
2670static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2671 unsigned long nr)
2672{
2673 struct btrfs_fs_info *fs_info = trans->fs_info;
2674 struct btrfs_delayed_ref_root *delayed_refs;
2675 struct btrfs_delayed_ref_head *locked_ref = NULL;
2676 ktime_t start = ktime_get();
2677 int ret;
2678 unsigned long count = 0;
2679 unsigned long actual_count = 0;
2680
2681 delayed_refs = &trans->transaction->delayed_refs;
2682 do {
2683 if (!locked_ref) {
2684 locked_ref = btrfs_obtain_ref_head(trans);
2685 if (IS_ERR_OR_NULL(locked_ref)) {
2686 if (PTR_ERR(locked_ref) == -EAGAIN) {
2687 continue;
2688 } else {
2689 break;
2690 }
2691 }
2692 count++;
2693 }
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706 spin_lock(&locked_ref->lock);
2707 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
2708
2709 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref,
2710 &actual_count);
2711 if (ret < 0 && ret != -EAGAIN) {
2712
2713
2714
2715
2716 return ret;
2717 } else if (!ret) {
2718
2719
2720
2721
2722 ret = cleanup_ref_head(trans, locked_ref);
2723 if (ret > 0 ) {
2724
2725 ret = 0;
2726 continue;
2727 } else if (ret) {
2728 return ret;
2729 }
2730 }
2731
2732
2733
2734
2735
2736
2737 locked_ref = NULL;
2738 cond_resched();
2739 } while ((nr != -1 && count < nr) || locked_ref);
2740
2741
2742
2743
2744
2745
2746 if (actual_count > 0) {
2747 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2748 u64 avg;
2749
2750
2751
2752
2753
2754 spin_lock(&delayed_refs->lock);
2755 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2756 fs_info->avg_delayed_ref_runtime = avg >> 2;
2757 spin_unlock(&delayed_refs->lock);
2758 }
2759 return 0;
2760}
2761
2762#ifdef SCRAMBLE_DELAYED_REFS
2763
2764
2765
2766
2767
2768static u64 find_middle(struct rb_root *root)
2769{
2770 struct rb_node *n = root->rb_node;
2771 struct btrfs_delayed_ref_node *entry;
2772 int alt = 1;
2773 u64 middle;
2774 u64 first = 0, last = 0;
2775
2776 n = rb_first(root);
2777 if (n) {
2778 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2779 first = entry->bytenr;
2780 }
2781 n = rb_last(root);
2782 if (n) {
2783 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2784 last = entry->bytenr;
2785 }
2786 n = root->rb_node;
2787
2788 while (n) {
2789 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2790 WARN_ON(!entry->in_tree);
2791
2792 middle = entry->bytenr;
2793
2794 if (alt)
2795 n = n->rb_left;
2796 else
2797 n = n->rb_right;
2798
2799 alt = 1 - alt;
2800 }
2801 return middle;
2802}
2803#endif
2804
2805static inline u64 heads_to_leaves(struct btrfs_fs_info *fs_info, u64 heads)
2806{
2807 u64 num_bytes;
2808
2809 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2810 sizeof(struct btrfs_extent_inline_ref));
2811 if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2812 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2813
2814
2815
2816
2817
2818 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(fs_info));
2819}
2820
2821
2822
2823
2824
2825u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
2826{
2827 u64 csum_size;
2828 u64 num_csums_per_leaf;
2829 u64 num_csums;
2830
2831 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
2832 num_csums_per_leaf = div64_u64(csum_size,
2833 (u64)btrfs_super_csum_size(fs_info->super_copy));
2834 num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
2835 num_csums += num_csums_per_leaf - 1;
2836 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2837 return num_csums;
2838}
2839
2840bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
2841{
2842 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
2843 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2844 bool ret = false;
2845 u64 reserved;
2846
2847 spin_lock(&global_rsv->lock);
2848 reserved = global_rsv->reserved;
2849 spin_unlock(&global_rsv->lock);
2850
2851
2852
2853
2854
2855
2856
2857 spin_lock(&delayed_refs_rsv->lock);
2858 reserved += delayed_refs_rsv->reserved;
2859 if (delayed_refs_rsv->size >= reserved)
2860 ret = true;
2861 spin_unlock(&delayed_refs_rsv->lock);
2862 return ret;
2863}
2864
2865int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
2866{
2867 u64 num_entries =
2868 atomic_read(&trans->transaction->delayed_refs.num_entries);
2869 u64 avg_runtime;
2870 u64 val;
2871
2872 smp_mb();
2873 avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
2874 val = num_entries * avg_runtime;
2875 if (val >= NSEC_PER_SEC)
2876 return 1;
2877 if (val >= NSEC_PER_SEC / 2)
2878 return 2;
2879
2880 return btrfs_check_space_for_delayed_refs(trans->fs_info);
2881}
2882
2883struct async_delayed_refs {
2884 struct btrfs_root *root;
2885 u64 transid;
2886 int count;
2887 int error;
2888 int sync;
2889 struct completion wait;
2890 struct btrfs_work work;
2891};
2892
2893static inline struct async_delayed_refs *
2894to_async_delayed_refs(struct btrfs_work *work)
2895{
2896 return container_of(work, struct async_delayed_refs, work);
2897}
2898
2899static void delayed_ref_async_start(struct btrfs_work *work)
2900{
2901 struct async_delayed_refs *async = to_async_delayed_refs(work);
2902 struct btrfs_trans_handle *trans;
2903 struct btrfs_fs_info *fs_info = async->root->fs_info;
2904 int ret;
2905
2906
2907 if (btrfs_transaction_blocked(fs_info))
2908 goto done;
2909
2910 trans = btrfs_join_transaction(async->root);
2911 if (IS_ERR(trans)) {
2912 async->error = PTR_ERR(trans);
2913 goto done;
2914 }
2915
2916
2917
2918
2919
2920 trans->sync = true;
2921
2922
2923 if (trans->transid > async->transid)
2924 goto end;
2925
2926 ret = btrfs_run_delayed_refs(trans, async->count);
2927 if (ret)
2928 async->error = ret;
2929end:
2930 ret = btrfs_end_transaction(trans);
2931 if (ret && !async->error)
2932 async->error = ret;
2933done:
2934 if (async->sync)
2935 complete(&async->wait);
2936 else
2937 kfree(async);
2938}
2939
2940int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
2941 unsigned long count, u64 transid, int wait)
2942{
2943 struct async_delayed_refs *async;
2944 int ret;
2945
2946 async = kmalloc(sizeof(*async), GFP_NOFS);
2947 if (!async)
2948 return -ENOMEM;
2949
2950 async->root = fs_info->tree_root;
2951 async->count = count;
2952 async->error = 0;
2953 async->transid = transid;
2954 if (wait)
2955 async->sync = 1;
2956 else
2957 async->sync = 0;
2958 init_completion(&async->wait);
2959
2960 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2961 delayed_ref_async_start, NULL, NULL);
2962
2963 btrfs_queue_work(fs_info->extent_workers, &async->work);
2964
2965 if (wait) {
2966 wait_for_completion(&async->wait);
2967 ret = async->error;
2968 kfree(async);
2969 return ret;
2970 }
2971 return 0;
2972}
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2985 unsigned long count)
2986{
2987 struct btrfs_fs_info *fs_info = trans->fs_info;
2988 struct rb_node *node;
2989 struct btrfs_delayed_ref_root *delayed_refs;
2990 struct btrfs_delayed_ref_head *head;
2991 int ret;
2992 int run_all = count == (unsigned long)-1;
2993
2994
2995 if (trans->aborted)
2996 return 0;
2997
2998 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2999 return 0;
3000
3001 delayed_refs = &trans->transaction->delayed_refs;
3002 if (count == 0)
3003 count = atomic_read(&delayed_refs->num_entries) * 2;
3004
3005again:
3006#ifdef SCRAMBLE_DELAYED_REFS
3007 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
3008#endif
3009 ret = __btrfs_run_delayed_refs(trans, count);
3010 if (ret < 0) {
3011 btrfs_abort_transaction(trans, ret);
3012 return ret;
3013 }
3014
3015 if (run_all) {
3016 if (!list_empty(&trans->new_bgs))
3017 btrfs_create_pending_block_groups(trans);
3018
3019 spin_lock(&delayed_refs->lock);
3020 node = rb_first_cached(&delayed_refs->href_root);
3021 if (!node) {
3022 spin_unlock(&delayed_refs->lock);
3023 goto out;
3024 }
3025 head = rb_entry(node, struct btrfs_delayed_ref_head,
3026 href_node);
3027 refcount_inc(&head->refs);
3028 spin_unlock(&delayed_refs->lock);
3029
3030
3031 mutex_lock(&head->mutex);
3032 mutex_unlock(&head->mutex);
3033
3034 btrfs_put_delayed_ref_head(head);
3035 cond_resched();
3036 goto again;
3037 }
3038out:
3039 return 0;
3040}
3041
3042int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3043 struct btrfs_fs_info *fs_info,
3044 u64 bytenr, u64 num_bytes, u64 flags,
3045 int level, int is_data)
3046{
3047 struct btrfs_delayed_extent_op *extent_op;
3048 int ret;
3049
3050 extent_op = btrfs_alloc_delayed_extent_op();
3051 if (!extent_op)
3052 return -ENOMEM;
3053
3054 extent_op->flags_to_set = flags;
3055 extent_op->update_flags = true;
3056 extent_op->update_key = false;
3057 extent_op->is_data = is_data ? true : false;
3058 extent_op->level = level;
3059
3060 ret = btrfs_add_delayed_extent_op(fs_info, trans, bytenr,
3061 num_bytes, extent_op);
3062 if (ret)
3063 btrfs_free_delayed_extent_op(extent_op);
3064 return ret;
3065}
3066
3067static noinline int check_delayed_ref(struct btrfs_root *root,
3068 struct btrfs_path *path,
3069 u64 objectid, u64 offset, u64 bytenr)
3070{
3071 struct btrfs_delayed_ref_head *head;
3072 struct btrfs_delayed_ref_node *ref;
3073 struct btrfs_delayed_data_ref *data_ref;
3074 struct btrfs_delayed_ref_root *delayed_refs;
3075 struct btrfs_transaction *cur_trans;
3076 struct rb_node *node;
3077 int ret = 0;
3078
3079 spin_lock(&root->fs_info->trans_lock);
3080 cur_trans = root->fs_info->running_transaction;
3081 if (cur_trans)
3082 refcount_inc(&cur_trans->use_count);
3083 spin_unlock(&root->fs_info->trans_lock);
3084 if (!cur_trans)
3085 return 0;
3086
3087 delayed_refs = &cur_trans->delayed_refs;
3088 spin_lock(&delayed_refs->lock);
3089 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
3090 if (!head) {
3091 spin_unlock(&delayed_refs->lock);
3092 btrfs_put_transaction(cur_trans);
3093 return 0;
3094 }
3095
3096 if (!mutex_trylock(&head->mutex)) {
3097 refcount_inc(&head->refs);
3098 spin_unlock(&delayed_refs->lock);
3099
3100 btrfs_release_path(path);
3101
3102
3103
3104
3105
3106 mutex_lock(&head->mutex);
3107 mutex_unlock(&head->mutex);
3108 btrfs_put_delayed_ref_head(head);
3109 btrfs_put_transaction(cur_trans);
3110 return -EAGAIN;
3111 }
3112 spin_unlock(&delayed_refs->lock);
3113
3114 spin_lock(&head->lock);
3115
3116
3117
3118
3119 for (node = rb_first_cached(&head->ref_tree); node;
3120 node = rb_next(node)) {
3121 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
3122
3123 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
3124 ret = 1;
3125 break;
3126 }
3127
3128 data_ref = btrfs_delayed_node_to_data_ref(ref);
3129
3130
3131
3132
3133
3134 if (data_ref->root != root->root_key.objectid ||
3135 data_ref->objectid != objectid ||
3136 data_ref->offset != offset) {
3137 ret = 1;
3138 break;
3139 }
3140 }
3141 spin_unlock(&head->lock);
3142 mutex_unlock(&head->mutex);
3143 btrfs_put_transaction(cur_trans);
3144 return ret;
3145}
3146
3147static noinline int check_committed_ref(struct btrfs_root *root,
3148 struct btrfs_path *path,
3149 u64 objectid, u64 offset, u64 bytenr)
3150{
3151 struct btrfs_fs_info *fs_info = root->fs_info;
3152 struct btrfs_root *extent_root = fs_info->extent_root;
3153 struct extent_buffer *leaf;
3154 struct btrfs_extent_data_ref *ref;
3155 struct btrfs_extent_inline_ref *iref;
3156 struct btrfs_extent_item *ei;
3157 struct btrfs_key key;
3158 u32 item_size;
3159 int type;
3160 int ret;
3161
3162 key.objectid = bytenr;
3163 key.offset = (u64)-1;
3164 key.type = BTRFS_EXTENT_ITEM_KEY;
3165
3166 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3167 if (ret < 0)
3168 goto out;
3169 BUG_ON(ret == 0);
3170
3171 ret = -ENOENT;
3172 if (path->slots[0] == 0)
3173 goto out;
3174
3175 path->slots[0]--;
3176 leaf = path->nodes[0];
3177 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3178
3179 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
3180 goto out;
3181
3182 ret = 1;
3183 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3184 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
3185
3186 if (item_size != sizeof(*ei) +
3187 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
3188 goto out;
3189
3190 if (btrfs_extent_generation(leaf, ei) <=
3191 btrfs_root_last_snapshot(&root->root_item))
3192 goto out;
3193
3194 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
3195
3196 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
3197 if (type != BTRFS_EXTENT_DATA_REF_KEY)
3198 goto out;
3199
3200 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3201 if (btrfs_extent_refs(leaf, ei) !=
3202 btrfs_extent_data_ref_count(leaf, ref) ||
3203 btrfs_extent_data_ref_root(leaf, ref) !=
3204 root->root_key.objectid ||
3205 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
3206 btrfs_extent_data_ref_offset(leaf, ref) != offset)
3207 goto out;
3208
3209 ret = 0;
3210out:
3211 return ret;
3212}
3213
3214int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
3215 u64 bytenr)
3216{
3217 struct btrfs_path *path;
3218 int ret;
3219
3220 path = btrfs_alloc_path();
3221 if (!path)
3222 return -ENOMEM;
3223
3224 do {
3225 ret = check_committed_ref(root, path, objectid,
3226 offset, bytenr);
3227 if (ret && ret != -ENOENT)
3228 goto out;
3229
3230 ret = check_delayed_ref(root, path, objectid, offset, bytenr);
3231 } while (ret == -EAGAIN);
3232
3233out:
3234 btrfs_free_path(path);
3235 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3236 WARN_ON(ret > 0);
3237 return ret;
3238}
3239
3240static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3241 struct btrfs_root *root,
3242 struct extent_buffer *buf,
3243 int full_backref, int inc)
3244{
3245 struct btrfs_fs_info *fs_info = root->fs_info;
3246 u64 bytenr;
3247 u64 num_bytes;
3248 u64 parent;
3249 u64 ref_root;
3250 u32 nritems;
3251 struct btrfs_key key;
3252 struct btrfs_file_extent_item *fi;
3253 int i;
3254 int level;
3255 int ret = 0;
3256 int (*process_func)(struct btrfs_trans_handle *,
3257 struct btrfs_root *,
3258 u64, u64, u64, u64, u64, u64);
3259
3260
3261 if (btrfs_is_testing(fs_info))
3262 return 0;
3263
3264 ref_root = btrfs_header_owner(buf);
3265 nritems = btrfs_header_nritems(buf);
3266 level = btrfs_header_level(buf);
3267
3268 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
3269 return 0;
3270
3271 if (inc)
3272 process_func = btrfs_inc_extent_ref;
3273 else
3274 process_func = btrfs_free_extent;
3275
3276 if (full_backref)
3277 parent = buf->start;
3278 else
3279 parent = 0;
3280
3281 for (i = 0; i < nritems; i++) {
3282 if (level == 0) {
3283 btrfs_item_key_to_cpu(buf, &key, i);
3284 if (key.type != BTRFS_EXTENT_DATA_KEY)
3285 continue;
3286 fi = btrfs_item_ptr(buf, i,
3287 struct btrfs_file_extent_item);
3288 if (btrfs_file_extent_type(buf, fi) ==
3289 BTRFS_FILE_EXTENT_INLINE)
3290 continue;
3291 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3292 if (bytenr == 0)
3293 continue;
3294
3295 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3296 key.offset -= btrfs_file_extent_offset(buf, fi);
3297 ret = process_func(trans, root, bytenr, num_bytes,
3298 parent, ref_root, key.objectid,
3299 key.offset);
3300 if (ret)
3301 goto fail;
3302 } else {
3303 bytenr = btrfs_node_blockptr(buf, i);
3304 num_bytes = fs_info->nodesize;
3305 ret = process_func(trans, root, bytenr, num_bytes,
3306 parent, ref_root, level - 1, 0);
3307 if (ret)
3308 goto fail;
3309 }
3310 }
3311 return 0;
3312fail:
3313 return ret;
3314}
3315
3316int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3317 struct extent_buffer *buf, int full_backref)
3318{
3319 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3320}
3321
3322int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3323 struct extent_buffer *buf, int full_backref)
3324{
3325 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3326}
3327
3328static int write_one_cache_group(struct btrfs_trans_handle *trans,
3329 struct btrfs_fs_info *fs_info,
3330 struct btrfs_path *path,
3331 struct btrfs_block_group_cache *cache)
3332{
3333 int ret;
3334 struct btrfs_root *extent_root = fs_info->extent_root;
3335 unsigned long bi;
3336 struct extent_buffer *leaf;
3337
3338 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3339 if (ret) {
3340 if (ret > 0)
3341 ret = -ENOENT;
3342 goto fail;
3343 }
3344
3345 leaf = path->nodes[0];
3346 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3347 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3348 btrfs_mark_buffer_dirty(leaf);
3349fail:
3350 btrfs_release_path(path);
3351 return ret;
3352
3353}
3354
3355static struct btrfs_block_group_cache *
3356next_block_group(struct btrfs_fs_info *fs_info,
3357 struct btrfs_block_group_cache *cache)
3358{
3359 struct rb_node *node;
3360
3361 spin_lock(&fs_info->block_group_cache_lock);
3362
3363
3364 if (RB_EMPTY_NODE(&cache->cache_node)) {
3365 const u64 next_bytenr = cache->key.objectid + cache->key.offset;
3366
3367 spin_unlock(&fs_info->block_group_cache_lock);
3368 btrfs_put_block_group(cache);
3369 cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache;
3370 }
3371 node = rb_next(&cache->cache_node);
3372 btrfs_put_block_group(cache);
3373 if (node) {
3374 cache = rb_entry(node, struct btrfs_block_group_cache,
3375 cache_node);
3376 btrfs_get_block_group(cache);
3377 } else
3378 cache = NULL;
3379 spin_unlock(&fs_info->block_group_cache_lock);
3380 return cache;
3381}
3382
3383static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3384 struct btrfs_trans_handle *trans,
3385 struct btrfs_path *path)
3386{
3387 struct btrfs_fs_info *fs_info = block_group->fs_info;
3388 struct btrfs_root *root = fs_info->tree_root;
3389 struct inode *inode = NULL;
3390 struct extent_changeset *data_reserved = NULL;
3391 u64 alloc_hint = 0;
3392 int dcs = BTRFS_DC_ERROR;
3393 u64 num_pages = 0;
3394 int retries = 0;
3395 int ret = 0;
3396
3397
3398
3399
3400
3401 if (block_group->key.offset < (100 * SZ_1M)) {
3402 spin_lock(&block_group->lock);
3403 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3404 spin_unlock(&block_group->lock);
3405 return 0;
3406 }
3407
3408 if (trans->aborted)
3409 return 0;
3410again:
3411 inode = lookup_free_space_inode(fs_info, block_group, path);
3412 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3413 ret = PTR_ERR(inode);
3414 btrfs_release_path(path);
3415 goto out;
3416 }
3417
3418 if (IS_ERR(inode)) {
3419 BUG_ON(retries);
3420 retries++;
3421
3422 if (block_group->ro)
3423 goto out_free;
3424
3425 ret = create_free_space_inode(fs_info, trans, block_group,
3426 path);
3427 if (ret)
3428 goto out_free;
3429 goto again;
3430 }
3431
3432
3433
3434
3435
3436
3437 BTRFS_I(inode)->generation = 0;
3438 ret = btrfs_update_inode(trans, root, inode);
3439 if (ret) {
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450 btrfs_abort_transaction(trans, ret);
3451 goto out_put;
3452 }
3453 WARN_ON(ret);
3454
3455
3456 if (block_group->cache_generation == trans->transid &&
3457 i_size_read(inode)) {
3458 dcs = BTRFS_DC_SETUP;
3459 goto out_put;
3460 }
3461
3462 if (i_size_read(inode) > 0) {
3463 ret = btrfs_check_trunc_cache_free_space(fs_info,
3464 &fs_info->global_block_rsv);
3465 if (ret)
3466 goto out_put;
3467
3468 ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
3469 if (ret)
3470 goto out_put;
3471 }
3472
3473 spin_lock(&block_group->lock);
3474 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3475 !btrfs_test_opt(fs_info, SPACE_CACHE)) {
3476
3477
3478
3479
3480
3481
3482 dcs = BTRFS_DC_WRITTEN;
3483 spin_unlock(&block_group->lock);
3484 goto out_put;
3485 }
3486 spin_unlock(&block_group->lock);
3487
3488
3489
3490
3491
3492 if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
3493 ret = -ENOSPC;
3494 goto out_put;
3495 }
3496
3497
3498
3499
3500
3501
3502
3503 num_pages = div_u64(block_group->key.offset, SZ_256M);
3504 if (!num_pages)
3505 num_pages = 1;
3506
3507 num_pages *= 16;
3508 num_pages *= PAGE_SIZE;
3509
3510 ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
3511 if (ret)
3512 goto out_put;
3513
3514 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3515 num_pages, num_pages,
3516 &alloc_hint);
3517
3518
3519
3520
3521
3522
3523
3524
3525 if (!ret)
3526 dcs = BTRFS_DC_SETUP;
3527 else if (ret == -ENOSPC)
3528 set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
3529
3530out_put:
3531 iput(inode);
3532out_free:
3533 btrfs_release_path(path);
3534out:
3535 spin_lock(&block_group->lock);
3536 if (!ret && dcs == BTRFS_DC_SETUP)
3537 block_group->cache_generation = trans->transid;
3538 block_group->disk_cache_state = dcs;
3539 spin_unlock(&block_group->lock);
3540
3541 extent_changeset_free(data_reserved);
3542 return ret;
3543}
3544
3545int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3546 struct btrfs_fs_info *fs_info)
3547{
3548 struct btrfs_block_group_cache *cache, *tmp;
3549 struct btrfs_transaction *cur_trans = trans->transaction;
3550 struct btrfs_path *path;
3551
3552 if (list_empty(&cur_trans->dirty_bgs) ||
3553 !btrfs_test_opt(fs_info, SPACE_CACHE))
3554 return 0;
3555
3556 path = btrfs_alloc_path();
3557 if (!path)
3558 return -ENOMEM;
3559
3560
3561 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3562 dirty_list) {
3563 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3564 cache_save_setup(cache, trans, path);
3565 }
3566
3567 btrfs_free_path(path);
3568 return 0;
3569}
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
3584{
3585 struct btrfs_fs_info *fs_info = trans->fs_info;
3586 struct btrfs_block_group_cache *cache;
3587 struct btrfs_transaction *cur_trans = trans->transaction;
3588 int ret = 0;
3589 int should_put;
3590 struct btrfs_path *path = NULL;
3591 LIST_HEAD(dirty);
3592 struct list_head *io = &cur_trans->io_bgs;
3593 int num_started = 0;
3594 int loops = 0;
3595
3596 spin_lock(&cur_trans->dirty_bgs_lock);
3597 if (list_empty(&cur_trans->dirty_bgs)) {
3598 spin_unlock(&cur_trans->dirty_bgs_lock);
3599 return 0;
3600 }
3601 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3602 spin_unlock(&cur_trans->dirty_bgs_lock);
3603
3604again:
3605
3606
3607
3608
3609 btrfs_create_pending_block_groups(trans);
3610
3611 if (!path) {
3612 path = btrfs_alloc_path();
3613 if (!path)
3614 return -ENOMEM;
3615 }
3616
3617
3618
3619
3620
3621
3622 mutex_lock(&trans->transaction->cache_write_mutex);
3623 while (!list_empty(&dirty)) {
3624 bool drop_reserve = true;
3625
3626 cache = list_first_entry(&dirty,
3627 struct btrfs_block_group_cache,
3628 dirty_list);
3629
3630
3631
3632
3633
3634 if (!list_empty(&cache->io_list)) {
3635 list_del_init(&cache->io_list);
3636 btrfs_wait_cache_io(trans, cache, path);
3637 btrfs_put_block_group(cache);
3638 }
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649 spin_lock(&cur_trans->dirty_bgs_lock);
3650 list_del_init(&cache->dirty_list);
3651 spin_unlock(&cur_trans->dirty_bgs_lock);
3652
3653 should_put = 1;
3654
3655 cache_save_setup(cache, trans, path);
3656
3657 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3658 cache->io_ctl.inode = NULL;
3659 ret = btrfs_write_out_cache(fs_info, trans,
3660 cache, path);
3661 if (ret == 0 && cache->io_ctl.inode) {
3662 num_started++;
3663 should_put = 0;
3664
3665
3666
3667
3668
3669
3670 list_add_tail(&cache->io_list, io);
3671 } else {
3672
3673
3674
3675
3676 ret = 0;
3677 }
3678 }
3679 if (!ret) {
3680 ret = write_one_cache_group(trans, fs_info,
3681 path, cache);
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691 if (ret == -ENOENT) {
3692 ret = 0;
3693 spin_lock(&cur_trans->dirty_bgs_lock);
3694 if (list_empty(&cache->dirty_list)) {
3695 list_add_tail(&cache->dirty_list,
3696 &cur_trans->dirty_bgs);
3697 btrfs_get_block_group(cache);
3698 drop_reserve = false;
3699 }
3700 spin_unlock(&cur_trans->dirty_bgs_lock);
3701 } else if (ret) {
3702 btrfs_abort_transaction(trans, ret);
3703 }
3704 }
3705
3706
3707 if (should_put)
3708 btrfs_put_block_group(cache);
3709 if (drop_reserve)
3710 btrfs_delayed_refs_rsv_release(fs_info, 1);
3711
3712 if (ret)
3713 break;
3714
3715
3716
3717
3718
3719
3720 mutex_unlock(&trans->transaction->cache_write_mutex);
3721 mutex_lock(&trans->transaction->cache_write_mutex);
3722 }
3723 mutex_unlock(&trans->transaction->cache_write_mutex);
3724
3725
3726
3727
3728
3729 ret = btrfs_run_delayed_refs(trans, 0);
3730 if (!ret && loops == 0) {
3731 loops++;
3732 spin_lock(&cur_trans->dirty_bgs_lock);
3733 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3734
3735
3736
3737
3738 if (!list_empty(&dirty)) {
3739 spin_unlock(&cur_trans->dirty_bgs_lock);
3740 goto again;
3741 }
3742 spin_unlock(&cur_trans->dirty_bgs_lock);
3743 } else if (ret < 0) {
3744 btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
3745 }
3746
3747 btrfs_free_path(path);
3748 return ret;
3749}
3750
3751int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3752 struct btrfs_fs_info *fs_info)
3753{
3754 struct btrfs_block_group_cache *cache;
3755 struct btrfs_transaction *cur_trans = trans->transaction;
3756 int ret = 0;
3757 int should_put;
3758 struct btrfs_path *path;
3759 struct list_head *io = &cur_trans->io_bgs;
3760 int num_started = 0;
3761
3762 path = btrfs_alloc_path();
3763 if (!path)
3764 return -ENOMEM;
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781 spin_lock(&cur_trans->dirty_bgs_lock);
3782 while (!list_empty(&cur_trans->dirty_bgs)) {
3783 cache = list_first_entry(&cur_trans->dirty_bgs,
3784 struct btrfs_block_group_cache,
3785 dirty_list);
3786
3787
3788
3789
3790
3791
3792 if (!list_empty(&cache->io_list)) {
3793 spin_unlock(&cur_trans->dirty_bgs_lock);
3794 list_del_init(&cache->io_list);
3795 btrfs_wait_cache_io(trans, cache, path);
3796 btrfs_put_block_group(cache);
3797 spin_lock(&cur_trans->dirty_bgs_lock);
3798 }
3799
3800
3801
3802
3803
3804 list_del_init(&cache->dirty_list);
3805 spin_unlock(&cur_trans->dirty_bgs_lock);
3806 should_put = 1;
3807
3808 cache_save_setup(cache, trans, path);
3809
3810 if (!ret)
3811 ret = btrfs_run_delayed_refs(trans,
3812 (unsigned long) -1);
3813
3814 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3815 cache->io_ctl.inode = NULL;
3816 ret = btrfs_write_out_cache(fs_info, trans,
3817 cache, path);
3818 if (ret == 0 && cache->io_ctl.inode) {
3819 num_started++;
3820 should_put = 0;
3821 list_add_tail(&cache->io_list, io);
3822 } else {
3823
3824
3825
3826
3827 ret = 0;
3828 }
3829 }
3830 if (!ret) {
3831 ret = write_one_cache_group(trans, fs_info,
3832 path, cache);
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846 if (ret == -ENOENT) {
3847 wait_event(cur_trans->writer_wait,
3848 atomic_read(&cur_trans->num_writers) == 1);
3849 ret = write_one_cache_group(trans, fs_info,
3850 path, cache);
3851 }
3852 if (ret)
3853 btrfs_abort_transaction(trans, ret);
3854 }
3855
3856
3857 if (should_put)
3858 btrfs_put_block_group(cache);
3859 btrfs_delayed_refs_rsv_release(fs_info, 1);
3860 spin_lock(&cur_trans->dirty_bgs_lock);
3861 }
3862 spin_unlock(&cur_trans->dirty_bgs_lock);
3863
3864
3865
3866
3867
3868 while (!list_empty(io)) {
3869 cache = list_first_entry(io, struct btrfs_block_group_cache,
3870 io_list);
3871 list_del_init(&cache->io_list);
3872 btrfs_wait_cache_io(trans, cache, path);
3873 btrfs_put_block_group(cache);
3874 }
3875
3876 btrfs_free_path(path);
3877 return ret;
3878}
3879
3880int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
3881{
3882 struct btrfs_block_group_cache *block_group;
3883 int readonly = 0;
3884
3885 block_group = btrfs_lookup_block_group(fs_info, bytenr);
3886 if (!block_group || block_group->ro)
3887 readonly = 1;
3888 if (block_group)
3889 btrfs_put_block_group(block_group);
3890 return readonly;
3891}
3892
3893bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3894{
3895 struct btrfs_block_group_cache *bg;
3896 bool ret = true;
3897
3898 bg = btrfs_lookup_block_group(fs_info, bytenr);
3899 if (!bg)
3900 return false;
3901
3902 spin_lock(&bg->lock);
3903 if (bg->ro)
3904 ret = false;
3905 else
3906 atomic_inc(&bg->nocow_writers);
3907 spin_unlock(&bg->lock);
3908
3909
3910 if (!ret)
3911 btrfs_put_block_group(bg);
3912
3913 return ret;
3914
3915}
3916
3917void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
3918{
3919 struct btrfs_block_group_cache *bg;
3920
3921 bg = btrfs_lookup_block_group(fs_info, bytenr);
3922 ASSERT(bg);
3923 if (atomic_dec_and_test(&bg->nocow_writers))
3924 wake_up_var(&bg->nocow_writers);
3925
3926
3927
3928
3929 btrfs_put_block_group(bg);
3930 btrfs_put_block_group(bg);
3931}
3932
3933void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
3934{
3935 wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
3936}
3937
3938static const char *alloc_name(u64 flags)
3939{
3940 switch (flags) {
3941 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3942 return "mixed";
3943 case BTRFS_BLOCK_GROUP_METADATA:
3944 return "metadata";
3945 case BTRFS_BLOCK_GROUP_DATA:
3946 return "data";
3947 case BTRFS_BLOCK_GROUP_SYSTEM:
3948 return "system";
3949 default:
3950 WARN_ON(1);
3951 return "invalid-combination";
3952 };
3953}
3954
3955static int create_space_info(struct btrfs_fs_info *info, u64 flags)
3956{
3957
3958 struct btrfs_space_info *space_info;
3959 int i;
3960 int ret;
3961
3962 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
3963 if (!space_info)
3964 return -ENOMEM;
3965
3966 ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
3967 GFP_KERNEL);
3968 if (ret) {
3969 kfree(space_info);
3970 return ret;
3971 }
3972
3973 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3974 INIT_LIST_HEAD(&space_info->block_groups[i]);
3975 init_rwsem(&space_info->groups_sem);
3976 spin_lock_init(&space_info->lock);
3977 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3978 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3979 init_waitqueue_head(&space_info->wait);
3980 INIT_LIST_HEAD(&space_info->ro_bgs);
3981 INIT_LIST_HEAD(&space_info->tickets);
3982 INIT_LIST_HEAD(&space_info->priority_tickets);
3983
3984 ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
3985 info->space_info_kobj, "%s",
3986 alloc_name(space_info->flags));
3987 if (ret) {
3988 percpu_counter_destroy(&space_info->total_bytes_pinned);
3989 kfree(space_info);
3990 return ret;
3991 }
3992
3993 list_add_rcu(&space_info->list, &info->space_info);
3994 if (flags & BTRFS_BLOCK_GROUP_DATA)
3995 info->data_sinfo = space_info;
3996
3997 return ret;
3998}
3999
4000static void update_space_info(struct btrfs_fs_info *info, u64 flags,
4001 u64 total_bytes, u64 bytes_used,
4002 u64 bytes_readonly,
4003 struct btrfs_space_info **space_info)
4004{
4005 struct btrfs_space_info *found;
4006 int factor;
4007
4008 factor = btrfs_bg_type_to_factor(flags);
4009
4010 found = __find_space_info(info, flags);
4011 ASSERT(found);
4012 spin_lock(&found->lock);
4013 found->total_bytes += total_bytes;
4014 found->disk_total += total_bytes * factor;
4015 found->bytes_used += bytes_used;
4016 found->disk_used += bytes_used * factor;
4017 found->bytes_readonly += bytes_readonly;
4018 if (total_bytes > 0)
4019 found->full = 0;
4020 space_info_add_new_bytes(info, found, total_bytes -
4021 bytes_used - bytes_readonly);
4022 spin_unlock(&found->lock);
4023 *space_info = found;
4024}
4025
4026static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
4027{
4028 u64 extra_flags = chunk_to_extended(flags) &
4029 BTRFS_EXTENDED_PROFILE_MASK;
4030
4031 write_seqlock(&fs_info->profiles_lock);
4032 if (flags & BTRFS_BLOCK_GROUP_DATA)
4033 fs_info->avail_data_alloc_bits |= extra_flags;
4034 if (flags & BTRFS_BLOCK_GROUP_METADATA)
4035 fs_info->avail_metadata_alloc_bits |= extra_flags;
4036 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4037 fs_info->avail_system_alloc_bits |= extra_flags;
4038 write_sequnlock(&fs_info->profiles_lock);
4039}
4040
4041
4042
4043
4044
4045
4046
4047static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
4048{
4049 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4050 u64 target = 0;
4051
4052 if (!bctl)
4053 return 0;
4054
4055 if (flags & BTRFS_BLOCK_GROUP_DATA &&
4056 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4057 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
4058 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
4059 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4060 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
4061 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
4062 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
4063 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
4064 }
4065
4066 return target;
4067}
4068
4069
4070
4071
4072
4073
4074
4075
4076static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
4077{
4078 u64 num_devices = fs_info->fs_devices->rw_devices;
4079 u64 target;
4080 u64 raid_type;
4081 u64 allowed = 0;
4082
4083
4084
4085
4086
4087 spin_lock(&fs_info->balance_lock);
4088 target = get_restripe_target(fs_info, flags);
4089 if (target) {
4090
4091 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
4092 spin_unlock(&fs_info->balance_lock);
4093 return extended_to_chunk(target);
4094 }
4095 }
4096 spin_unlock(&fs_info->balance_lock);
4097
4098
4099 for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
4100 if (num_devices >= btrfs_raid_array[raid_type].devs_min)
4101 allowed |= btrfs_raid_array[raid_type].bg_flag;
4102 }
4103 allowed &= flags;
4104
4105 if (allowed & BTRFS_BLOCK_GROUP_RAID6)
4106 allowed = BTRFS_BLOCK_GROUP_RAID6;
4107 else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
4108 allowed = BTRFS_BLOCK_GROUP_RAID5;
4109 else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
4110 allowed = BTRFS_BLOCK_GROUP_RAID10;
4111 else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
4112 allowed = BTRFS_BLOCK_GROUP_RAID1;
4113 else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
4114 allowed = BTRFS_BLOCK_GROUP_RAID0;
4115
4116 flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
4117
4118 return extended_to_chunk(flags | allowed);
4119}
4120
4121static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
4122{
4123 unsigned seq;
4124 u64 flags;
4125
4126 do {
4127 flags = orig_flags;
4128 seq = read_seqbegin(&fs_info->profiles_lock);
4129
4130 if (flags & BTRFS_BLOCK_GROUP_DATA)
4131 flags |= fs_info->avail_data_alloc_bits;
4132 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
4133 flags |= fs_info->avail_system_alloc_bits;
4134 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
4135 flags |= fs_info->avail_metadata_alloc_bits;
4136 } while (read_seqretry(&fs_info->profiles_lock, seq));
4137
4138 return btrfs_reduce_alloc_profile(fs_info, flags);
4139}
4140
4141static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
4142{
4143 struct btrfs_fs_info *fs_info = root->fs_info;
4144 u64 flags;
4145 u64 ret;
4146
4147 if (data)
4148 flags = BTRFS_BLOCK_GROUP_DATA;
4149 else if (root == fs_info->chunk_root)
4150 flags = BTRFS_BLOCK_GROUP_SYSTEM;
4151 else
4152 flags = BTRFS_BLOCK_GROUP_METADATA;
4153
4154 ret = get_alloc_profile(fs_info, flags);
4155 return ret;
4156}
4157
4158u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
4159{
4160 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
4161}
4162
4163u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
4164{
4165 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4166}
4167
4168u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
4169{
4170 return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4171}
4172
4173static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
4174 bool may_use_included)
4175{
4176 ASSERT(s_info);
4177 return s_info->bytes_used + s_info->bytes_reserved +
4178 s_info->bytes_pinned + s_info->bytes_readonly +
4179 (may_use_included ? s_info->bytes_may_use : 0);
4180}
4181
4182int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
4183{
4184 struct btrfs_root *root = inode->root;
4185 struct btrfs_fs_info *fs_info = root->fs_info;
4186 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
4187 u64 used;
4188 int ret = 0;
4189 int need_commit = 2;
4190 int have_pinned_space;
4191
4192
4193 bytes = ALIGN(bytes, fs_info->sectorsize);
4194
4195 if (btrfs_is_free_space_inode(inode)) {
4196 need_commit = 0;
4197 ASSERT(current->journal_info);
4198 }
4199
4200again:
4201
4202 spin_lock(&data_sinfo->lock);
4203 used = btrfs_space_info_used(data_sinfo, true);
4204
4205 if (used + bytes > data_sinfo->total_bytes) {
4206 struct btrfs_trans_handle *trans;
4207
4208
4209
4210
4211
4212 if (!data_sinfo->full) {
4213 u64 alloc_target;
4214
4215 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
4216 spin_unlock(&data_sinfo->lock);
4217
4218 alloc_target = btrfs_data_alloc_profile(fs_info);
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229 trans = btrfs_join_transaction(root);
4230 if (IS_ERR(trans))
4231 return PTR_ERR(trans);
4232
4233 ret = do_chunk_alloc(trans, alloc_target,
4234 CHUNK_ALLOC_NO_FORCE);
4235 btrfs_end_transaction(trans);
4236 if (ret < 0) {
4237 if (ret != -ENOSPC)
4238 return ret;
4239 else {
4240 have_pinned_space = 1;
4241 goto commit_trans;
4242 }
4243 }
4244
4245 goto again;
4246 }
4247
4248
4249
4250
4251
4252
4253 have_pinned_space = __percpu_counter_compare(
4254 &data_sinfo->total_bytes_pinned,
4255 used + bytes - data_sinfo->total_bytes,
4256 BTRFS_TOTAL_BYTES_PINNED_BATCH);
4257 spin_unlock(&data_sinfo->lock);
4258
4259
4260commit_trans:
4261 if (need_commit) {
4262 need_commit--;
4263
4264 if (need_commit > 0) {
4265 btrfs_start_delalloc_roots(fs_info, -1);
4266 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
4267 (u64)-1);
4268 }
4269
4270 trans = btrfs_join_transaction(root);
4271 if (IS_ERR(trans))
4272 return PTR_ERR(trans);
4273 if (have_pinned_space >= 0 ||
4274 test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
4275 &trans->transaction->flags) ||
4276 need_commit > 0) {
4277 ret = btrfs_commit_transaction(trans);
4278 if (ret)
4279 return ret;
4280
4281
4282
4283
4284
4285 mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
4286 mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
4287 goto again;
4288 } else {
4289 btrfs_end_transaction(trans);
4290 }
4291 }
4292
4293 trace_btrfs_space_reservation(fs_info,
4294 "space_info:enospc",
4295 data_sinfo->flags, bytes, 1);
4296 return -ENOSPC;
4297 }
4298 update_bytes_may_use(data_sinfo, bytes);
4299 trace_btrfs_space_reservation(fs_info, "space_info",
4300 data_sinfo->flags, bytes, 1);
4301 spin_unlock(&data_sinfo->lock);
4302
4303 return 0;
4304}
4305
4306int btrfs_check_data_free_space(struct inode *inode,
4307 struct extent_changeset **reserved, u64 start, u64 len)
4308{
4309 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4310 int ret;
4311
4312
4313 len = round_up(start + len, fs_info->sectorsize) -
4314 round_down(start, fs_info->sectorsize);
4315 start = round_down(start, fs_info->sectorsize);
4316
4317 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
4318 if (ret < 0)
4319 return ret;
4320
4321
4322 ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
4323 if (ret < 0)
4324 btrfs_free_reserved_data_space_noquota(inode, start, len);
4325 else
4326 ret = 0;
4327 return ret;
4328}
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
4339 u64 len)
4340{
4341 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4342 struct btrfs_space_info *data_sinfo;
4343
4344
4345 len = round_up(start + len, fs_info->sectorsize) -
4346 round_down(start, fs_info->sectorsize);
4347 start = round_down(start, fs_info->sectorsize);
4348
4349 data_sinfo = fs_info->data_sinfo;
4350 spin_lock(&data_sinfo->lock);
4351 update_bytes_may_use(data_sinfo, -len);
4352 trace_btrfs_space_reservation(fs_info, "space_info",
4353 data_sinfo->flags, len, 0);
4354 spin_unlock(&data_sinfo->lock);
4355}
4356
4357
4358
4359
4360
4361
4362
4363
4364void btrfs_free_reserved_data_space(struct inode *inode,
4365 struct extent_changeset *reserved, u64 start, u64 len)
4366{
4367 struct btrfs_root *root = BTRFS_I(inode)->root;
4368
4369
4370 len = round_up(start + len, root->fs_info->sectorsize) -
4371 round_down(start, root->fs_info->sectorsize);
4372 start = round_down(start, root->fs_info->sectorsize);
4373
4374 btrfs_free_reserved_data_space_noquota(inode, start, len);
4375 btrfs_qgroup_free_data(inode, reserved, start, len);
4376}
4377
4378static void force_metadata_allocation(struct btrfs_fs_info *info)
4379{
4380 struct list_head *head = &info->space_info;
4381 struct btrfs_space_info *found;
4382
4383 rcu_read_lock();
4384 list_for_each_entry_rcu(found, head, list) {
4385 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
4386 found->force_alloc = CHUNK_ALLOC_FORCE;
4387 }
4388 rcu_read_unlock();
4389}
4390
4391static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
4392{
4393 return (global->size << 1);
4394}
4395
4396static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
4397 struct btrfs_space_info *sinfo, int force)
4398{
4399 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4400 u64 bytes_used = btrfs_space_info_used(sinfo, false);
4401 u64 thresh;
4402
4403 if (force == CHUNK_ALLOC_FORCE)
4404 return 1;
4405
4406
4407
4408
4409
4410
4411 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
4412 bytes_used += calc_global_rsv_need_space(global_rsv);
4413
4414
4415
4416
4417
4418 if (force == CHUNK_ALLOC_LIMITED) {
4419 thresh = btrfs_super_total_bytes(fs_info->super_copy);
4420 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
4421
4422 if (sinfo->total_bytes - bytes_used < thresh)
4423 return 1;
4424 }
4425
4426 if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
4427 return 0;
4428 return 1;
4429}
4430
4431static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
4432{
4433 u64 num_dev;
4434
4435 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
4436 BTRFS_BLOCK_GROUP_RAID0 |
4437 BTRFS_BLOCK_GROUP_RAID5 |
4438 BTRFS_BLOCK_GROUP_RAID6))
4439 num_dev = fs_info->fs_devices->rw_devices;
4440 else if (type & BTRFS_BLOCK_GROUP_RAID1)
4441 num_dev = 2;
4442 else
4443 num_dev = 1;
4444
4445 return num_dev;
4446}
4447
4448
4449
4450
4451
4452
4453void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
4454{
4455 struct btrfs_fs_info *fs_info = trans->fs_info;
4456 struct btrfs_space_info *info;
4457 u64 left;
4458 u64 thresh;
4459 int ret = 0;
4460 u64 num_devs;
4461
4462
4463
4464
4465
4466 lockdep_assert_held(&fs_info->chunk_mutex);
4467
4468 info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4469 spin_lock(&info->lock);
4470 left = info->total_bytes - btrfs_space_info_used(info, true);
4471 spin_unlock(&info->lock);
4472
4473 num_devs = get_profile_num_devs(fs_info, type);
4474
4475
4476 thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
4477 btrfs_calc_trans_metadata_size(fs_info, 1);
4478
4479 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
4480 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
4481 left, thresh, type);
4482 dump_space_info(fs_info, info, 0, 0);
4483 }
4484
4485 if (left < thresh) {
4486 u64 flags = btrfs_system_alloc_profile(fs_info);
4487
4488
4489
4490
4491
4492
4493
4494 ret = btrfs_alloc_chunk(trans, flags);
4495 }
4496
4497 if (!ret) {
4498 ret = btrfs_block_rsv_add(fs_info->chunk_root,
4499 &fs_info->chunk_block_rsv,
4500 thresh, BTRFS_RESERVE_NO_FLUSH);
4501 if (!ret)
4502 trans->chunk_bytes_reserved += thresh;
4503 }
4504}
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
4516 int force)
4517{
4518 struct btrfs_fs_info *fs_info = trans->fs_info;
4519 struct btrfs_space_info *space_info;
4520 bool wait_for_alloc = false;
4521 bool should_alloc = false;
4522 int ret = 0;
4523
4524
4525 if (trans->allocating_chunk)
4526 return -ENOSPC;
4527
4528 space_info = __find_space_info(fs_info, flags);
4529 ASSERT(space_info);
4530
4531 do {
4532 spin_lock(&space_info->lock);
4533 if (force < space_info->force_alloc)
4534 force = space_info->force_alloc;
4535 should_alloc = should_alloc_chunk(fs_info, space_info, force);
4536 if (space_info->full) {
4537
4538 if (should_alloc)
4539 ret = -ENOSPC;
4540 else
4541 ret = 0;
4542 spin_unlock(&space_info->lock);
4543 return ret;
4544 } else if (!should_alloc) {
4545 spin_unlock(&space_info->lock);
4546 return 0;
4547 } else if (space_info->chunk_alloc) {
4548
4549
4550
4551
4552
4553
4554 wait_for_alloc = true;
4555 spin_unlock(&space_info->lock);
4556 mutex_lock(&fs_info->chunk_mutex);
4557 mutex_unlock(&fs_info->chunk_mutex);
4558 } else {
4559
4560 space_info->chunk_alloc = 1;
4561 wait_for_alloc = false;
4562 spin_unlock(&space_info->lock);
4563 }
4564
4565 cond_resched();
4566 } while (wait_for_alloc);
4567
4568 mutex_lock(&fs_info->chunk_mutex);
4569 trans->allocating_chunk = true;
4570
4571
4572
4573
4574
4575 if (btrfs_mixed_space_info(space_info))
4576 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
4577
4578
4579
4580
4581
4582
4583 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
4584 fs_info->data_chunk_allocations++;
4585 if (!(fs_info->data_chunk_allocations %
4586 fs_info->metadata_ratio))
4587 force_metadata_allocation(fs_info);
4588 }
4589
4590
4591
4592
4593
4594 check_system_chunk(trans, flags);
4595
4596 ret = btrfs_alloc_chunk(trans, flags);
4597 trans->allocating_chunk = false;
4598
4599 spin_lock(&space_info->lock);
4600 if (ret < 0) {
4601 if (ret == -ENOSPC)
4602 space_info->full = 1;
4603 else
4604 goto out;
4605 } else {
4606 ret = 1;
4607 space_info->max_extent_size = 0;
4608 }
4609
4610 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
4611out:
4612 space_info->chunk_alloc = 0;
4613 spin_unlock(&space_info->lock);
4614 mutex_unlock(&fs_info->chunk_mutex);
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629 if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
4630 btrfs_create_pending_block_groups(trans);
4631
4632 return ret;
4633}
4634
4635static int can_overcommit(struct btrfs_fs_info *fs_info,
4636 struct btrfs_space_info *space_info, u64 bytes,
4637 enum btrfs_reserve_flush_enum flush,
4638 bool system_chunk)
4639{
4640 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4641 u64 profile;
4642 u64 space_size;
4643 u64 avail;
4644 u64 used;
4645 int factor;
4646
4647
4648 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
4649 return 0;
4650
4651 if (system_chunk)
4652 profile = btrfs_system_alloc_profile(fs_info);
4653 else
4654 profile = btrfs_metadata_alloc_profile(fs_info);
4655
4656 used = btrfs_space_info_used(space_info, false);
4657
4658
4659
4660
4661
4662
4663
4664 spin_lock(&global_rsv->lock);
4665 space_size = calc_global_rsv_need_space(global_rsv);
4666 spin_unlock(&global_rsv->lock);
4667 if (used + space_size >= space_info->total_bytes)
4668 return 0;
4669
4670 used += space_info->bytes_may_use;
4671
4672 avail = atomic64_read(&fs_info->free_chunk_space);
4673
4674
4675
4676
4677
4678
4679
4680 factor = btrfs_bg_type_to_factor(profile);
4681 avail = div_u64(avail, factor);
4682
4683
4684
4685
4686
4687
4688 if (flush == BTRFS_RESERVE_FLUSH_ALL)
4689 avail >>= 3;
4690 else
4691 avail >>= 1;
4692
4693 if (used + bytes < space_info->total_bytes + avail)
4694 return 1;
4695 return 0;
4696}
4697
4698static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
4699 unsigned long nr_pages, int nr_items)
4700{
4701 struct super_block *sb = fs_info->sb;
4702
4703 if (down_read_trylock(&sb->s_umount)) {
4704 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
4705 up_read(&sb->s_umount);
4706 } else {
4707
4708
4709
4710
4711
4712
4713
4714 btrfs_start_delalloc_roots(fs_info, nr_items);
4715 if (!current->journal_info)
4716 btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
4717 }
4718}
4719
4720static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
4721 u64 to_reclaim)
4722{
4723 u64 bytes;
4724 u64 nr;
4725
4726 bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
4727 nr = div64_u64(to_reclaim, bytes);
4728 if (!nr)
4729 nr = 1;
4730 return nr;
4731}
4732
4733#define EXTENT_SIZE_PER_ITEM SZ_256K
4734
4735
4736
4737
4738static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
4739 u64 orig, bool wait_ordered)
4740{
4741 struct btrfs_space_info *space_info;
4742 struct btrfs_trans_handle *trans;
4743 u64 delalloc_bytes;
4744 u64 max_reclaim;
4745 u64 items;
4746 long time_left;
4747 unsigned long nr_pages;
4748 int loops;
4749
4750
4751 items = calc_reclaim_items_nr(fs_info, to_reclaim);
4752 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
4753
4754 trans = (struct btrfs_trans_handle *)current->journal_info;
4755 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4756
4757 delalloc_bytes = percpu_counter_sum_positive(
4758 &fs_info->delalloc_bytes);
4759 if (delalloc_bytes == 0) {
4760 if (trans)
4761 return;
4762 if (wait_ordered)
4763 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4764 return;
4765 }
4766
4767 loops = 0;
4768 while (delalloc_bytes && loops < 3) {
4769 max_reclaim = min(delalloc_bytes, to_reclaim);
4770 nr_pages = max_reclaim >> PAGE_SHIFT;
4771 btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
4772
4773
4774
4775
4776 max_reclaim = atomic_read(&fs_info->async_delalloc_pages);
4777 if (!max_reclaim)
4778 goto skip_async;
4779
4780 if (max_reclaim <= nr_pages)
4781 max_reclaim = 0;
4782 else
4783 max_reclaim -= nr_pages;
4784
4785 wait_event(fs_info->async_submit_wait,
4786 atomic_read(&fs_info->async_delalloc_pages) <=
4787 (int)max_reclaim);
4788skip_async:
4789 spin_lock(&space_info->lock);
4790 if (list_empty(&space_info->tickets) &&
4791 list_empty(&space_info->priority_tickets)) {
4792 spin_unlock(&space_info->lock);
4793 break;
4794 }
4795 spin_unlock(&space_info->lock);
4796
4797 loops++;
4798 if (wait_ordered && !trans) {
4799 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
4800 } else {
4801 time_left = schedule_timeout_killable(1);
4802 if (time_left)
4803 break;
4804 }
4805 delalloc_bytes = percpu_counter_sum_positive(
4806 &fs_info->delalloc_bytes);
4807 }
4808}
4809
4810struct reserve_ticket {
4811 u64 bytes;
4812 int error;
4813 struct list_head list;
4814 wait_queue_head_t wait;
4815};
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827static int may_commit_transaction(struct btrfs_fs_info *fs_info,
4828 struct btrfs_space_info *space_info)
4829{
4830 struct reserve_ticket *ticket = NULL;
4831 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
4832 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
4833 struct btrfs_trans_handle *trans;
4834 u64 bytes_needed;
4835 u64 reclaim_bytes = 0;
4836
4837 trans = (struct btrfs_trans_handle *)current->journal_info;
4838 if (trans)
4839 return -EAGAIN;
4840
4841 spin_lock(&space_info->lock);
4842 if (!list_empty(&space_info->priority_tickets))
4843 ticket = list_first_entry(&space_info->priority_tickets,
4844 struct reserve_ticket, list);
4845 else if (!list_empty(&space_info->tickets))
4846 ticket = list_first_entry(&space_info->tickets,
4847 struct reserve_ticket, list);
4848 bytes_needed = (ticket) ? ticket->bytes : 0;
4849 spin_unlock(&space_info->lock);
4850
4851 if (!bytes_needed)
4852 return 0;
4853
4854
4855 if (__percpu_counter_compare(&space_info->total_bytes_pinned,
4856 bytes_needed,
4857 BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
4858 goto commit;
4859
4860
4861
4862
4863
4864 if (space_info != delayed_rsv->space_info)
4865 return -ENOSPC;
4866
4867 spin_lock(&delayed_rsv->lock);
4868 reclaim_bytes += delayed_rsv->reserved;
4869 spin_unlock(&delayed_rsv->lock);
4870
4871 spin_lock(&delayed_refs_rsv->lock);
4872 reclaim_bytes += delayed_refs_rsv->reserved;
4873 spin_unlock(&delayed_refs_rsv->lock);
4874 if (reclaim_bytes >= bytes_needed)
4875 goto commit;
4876 bytes_needed -= reclaim_bytes;
4877
4878 if (__percpu_counter_compare(&space_info->total_bytes_pinned,
4879 bytes_needed,
4880 BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) {
4881 return -ENOSPC;
4882 }
4883
4884commit:
4885 trans = btrfs_join_transaction(fs_info->extent_root);
4886 if (IS_ERR(trans))
4887 return -ENOSPC;
4888
4889 return btrfs_commit_transaction(trans);
4890}
4891
4892
4893
4894
4895
4896
4897static void flush_space(struct btrfs_fs_info *fs_info,
4898 struct btrfs_space_info *space_info, u64 num_bytes,
4899 int state)
4900{
4901 struct btrfs_root *root = fs_info->extent_root;
4902 struct btrfs_trans_handle *trans;
4903 int nr;
4904 int ret = 0;
4905
4906 switch (state) {
4907 case FLUSH_DELAYED_ITEMS_NR:
4908 case FLUSH_DELAYED_ITEMS:
4909 if (state == FLUSH_DELAYED_ITEMS_NR)
4910 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
4911 else
4912 nr = -1;
4913
4914 trans = btrfs_join_transaction(root);
4915 if (IS_ERR(trans)) {
4916 ret = PTR_ERR(trans);
4917 break;
4918 }
4919 ret = btrfs_run_delayed_items_nr(trans, nr);
4920 btrfs_end_transaction(trans);
4921 break;
4922 case FLUSH_DELALLOC:
4923 case FLUSH_DELALLOC_WAIT:
4924 shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
4925 state == FLUSH_DELALLOC_WAIT);
4926 break;
4927 case FLUSH_DELAYED_REFS_NR:
4928 case FLUSH_DELAYED_REFS:
4929 trans = btrfs_join_transaction(root);
4930 if (IS_ERR(trans)) {
4931 ret = PTR_ERR(trans);
4932 break;
4933 }
4934 if (state == FLUSH_DELAYED_REFS_NR)
4935 nr = calc_reclaim_items_nr(fs_info, num_bytes);
4936 else
4937 nr = 0;
4938 btrfs_run_delayed_refs(trans, nr);
4939 btrfs_end_transaction(trans);
4940 break;
4941 case ALLOC_CHUNK:
4942 trans = btrfs_join_transaction(root);
4943 if (IS_ERR(trans)) {
4944 ret = PTR_ERR(trans);
4945 break;
4946 }
4947 ret = do_chunk_alloc(trans,
4948 btrfs_metadata_alloc_profile(fs_info),
4949 CHUNK_ALLOC_NO_FORCE);
4950 btrfs_end_transaction(trans);
4951 if (ret > 0 || ret == -ENOSPC)
4952 ret = 0;
4953 break;
4954 case COMMIT_TRANS:
4955
4956
4957
4958
4959
4960 mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
4961 btrfs_run_delayed_iputs(fs_info);
4962 mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
4963
4964 ret = may_commit_transaction(fs_info, space_info);
4965 break;
4966 default:
4967 ret = -ENOSPC;
4968 break;
4969 }
4970
4971 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
4972 ret);
4973 return;
4974}
4975
4976static inline u64
4977btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
4978 struct btrfs_space_info *space_info,
4979 bool system_chunk)
4980{
4981 struct reserve_ticket *ticket;
4982 u64 used;
4983 u64 expected;
4984 u64 to_reclaim = 0;
4985
4986 list_for_each_entry(ticket, &space_info->tickets, list)
4987 to_reclaim += ticket->bytes;
4988 list_for_each_entry(ticket, &space_info->priority_tickets, list)
4989 to_reclaim += ticket->bytes;
4990 if (to_reclaim)
4991 return to_reclaim;
4992
4993 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
4994 if (can_overcommit(fs_info, space_info, to_reclaim,
4995 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
4996 return 0;
4997
4998 used = btrfs_space_info_used(space_info, true);
4999
5000 if (can_overcommit(fs_info, space_info, SZ_1M,
5001 BTRFS_RESERVE_FLUSH_ALL, system_chunk))
5002 expected = div_factor_fine(space_info->total_bytes, 95);
5003 else
5004 expected = div_factor_fine(space_info->total_bytes, 90);
5005
5006 if (used > expected)
5007 to_reclaim = used - expected;
5008 else
5009 to_reclaim = 0;
5010 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
5011 space_info->bytes_reserved);
5012 return to_reclaim;
5013}
5014
5015static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
5016 struct btrfs_space_info *space_info,
5017 u64 used, bool system_chunk)
5018{
5019 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
5020
5021
5022 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
5023 return 0;
5024
5025 if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5026 system_chunk))
5027 return 0;
5028
5029 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
5030 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
5031}
5032
5033static void wake_all_tickets(struct list_head *head)
5034{
5035 struct reserve_ticket *ticket;
5036
5037 while (!list_empty(head)) {
5038 ticket = list_first_entry(head, struct reserve_ticket, list);
5039 list_del_init(&ticket->list);
5040 ticket->error = -ENOSPC;
5041 wake_up(&ticket->wait);
5042 }
5043}
5044
5045
5046
5047
5048
5049
5050static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
5051{
5052 struct btrfs_fs_info *fs_info;
5053 struct btrfs_space_info *space_info;
5054 u64 to_reclaim;
5055 int flush_state;
5056 int commit_cycles = 0;
5057 u64 last_tickets_id;
5058
5059 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
5060 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5061
5062 spin_lock(&space_info->lock);
5063 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5064 false);
5065 if (!to_reclaim) {
5066 space_info->flush = 0;
5067 spin_unlock(&space_info->lock);
5068 return;
5069 }
5070 last_tickets_id = space_info->tickets_id;
5071 spin_unlock(&space_info->lock);
5072
5073 flush_state = FLUSH_DELAYED_ITEMS_NR;
5074 do {
5075 flush_space(fs_info, space_info, to_reclaim, flush_state);
5076 spin_lock(&space_info->lock);
5077 if (list_empty(&space_info->tickets)) {
5078 space_info->flush = 0;
5079 spin_unlock(&space_info->lock);
5080 return;
5081 }
5082 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
5083 space_info,
5084 false);
5085 if (last_tickets_id == space_info->tickets_id) {
5086 flush_state++;
5087 } else {
5088 last_tickets_id = space_info->tickets_id;
5089 flush_state = FLUSH_DELAYED_ITEMS_NR;
5090 if (commit_cycles)
5091 commit_cycles--;
5092 }
5093
5094 if (flush_state > COMMIT_TRANS) {
5095 commit_cycles++;
5096 if (commit_cycles > 2) {
5097 wake_all_tickets(&space_info->tickets);
5098 space_info->flush = 0;
5099 } else {
5100 flush_state = FLUSH_DELAYED_ITEMS_NR;
5101 }
5102 }
5103 spin_unlock(&space_info->lock);
5104 } while (flush_state <= COMMIT_TRANS);
5105}
5106
5107void btrfs_init_async_reclaim_work(struct work_struct *work)
5108{
5109 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
5110}
5111
5112static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
5113 struct btrfs_space_info *space_info,
5114 struct reserve_ticket *ticket)
5115{
5116 u64 to_reclaim;
5117 int flush_state = FLUSH_DELAYED_ITEMS_NR;
5118
5119 spin_lock(&space_info->lock);
5120 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
5121 false);
5122 if (!to_reclaim) {
5123 spin_unlock(&space_info->lock);
5124 return;
5125 }
5126 spin_unlock(&space_info->lock);
5127
5128 do {
5129 flush_space(fs_info, space_info, to_reclaim, flush_state);
5130 flush_state++;
5131 spin_lock(&space_info->lock);
5132 if (ticket->bytes == 0) {
5133 spin_unlock(&space_info->lock);
5134 return;
5135 }
5136 spin_unlock(&space_info->lock);
5137
5138
5139
5140
5141
5142 if (flush_state == FLUSH_DELALLOC ||
5143 flush_state == FLUSH_DELALLOC_WAIT)
5144 flush_state = ALLOC_CHUNK;
5145 } while (flush_state < COMMIT_TRANS);
5146}
5147
5148static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
5149 struct btrfs_space_info *space_info,
5150 struct reserve_ticket *ticket, u64 orig_bytes)
5151
5152{
5153 DEFINE_WAIT(wait);
5154 int ret = 0;
5155
5156 spin_lock(&space_info->lock);
5157 while (ticket->bytes > 0 && ticket->error == 0) {
5158 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
5159 if (ret) {
5160 ret = -EINTR;
5161 break;
5162 }
5163 spin_unlock(&space_info->lock);
5164
5165 schedule();
5166
5167 finish_wait(&ticket->wait, &wait);
5168 spin_lock(&space_info->lock);
5169 }
5170 if (!ret)
5171 ret = ticket->error;
5172 if (!list_empty(&ticket->list))
5173 list_del_init(&ticket->list);
5174 if (ticket->bytes && ticket->bytes < orig_bytes) {
5175 u64 num_bytes = orig_bytes - ticket->bytes;
5176 update_bytes_may_use(space_info, -num_bytes);
5177 trace_btrfs_space_reservation(fs_info, "space_info",
5178 space_info->flags, num_bytes, 0);
5179 }
5180 spin_unlock(&space_info->lock);
5181
5182 return ret;
5183}
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
5200 struct btrfs_space_info *space_info,
5201 u64 orig_bytes,
5202 enum btrfs_reserve_flush_enum flush,
5203 bool system_chunk)
5204{
5205 struct reserve_ticket ticket;
5206 u64 used;
5207 int ret = 0;
5208
5209 ASSERT(orig_bytes);
5210 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
5211
5212 spin_lock(&space_info->lock);
5213 ret = -ENOSPC;
5214 used = btrfs_space_info_used(space_info, true);
5215
5216
5217
5218
5219
5220
5221 if (used + orig_bytes <= space_info->total_bytes) {
5222 update_bytes_may_use(space_info, orig_bytes);
5223 trace_btrfs_space_reservation(fs_info, "space_info",
5224 space_info->flags, orig_bytes, 1);
5225 ret = 0;
5226 } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
5227 system_chunk)) {
5228 update_bytes_may_use(space_info, orig_bytes);
5229 trace_btrfs_space_reservation(fs_info, "space_info",
5230 space_info->flags, orig_bytes, 1);
5231 ret = 0;
5232 }
5233
5234
5235
5236
5237
5238
5239
5240
5241 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
5242 ticket.bytes = orig_bytes;
5243 ticket.error = 0;
5244 init_waitqueue_head(&ticket.wait);
5245 if (flush == BTRFS_RESERVE_FLUSH_ALL) {
5246 list_add_tail(&ticket.list, &space_info->tickets);
5247 if (!space_info->flush) {
5248 space_info->flush = 1;
5249 trace_btrfs_trigger_flush(fs_info,
5250 space_info->flags,
5251 orig_bytes, flush,
5252 "enospc");
5253 queue_work(system_unbound_wq,
5254 &fs_info->async_reclaim_work);
5255 }
5256 } else {
5257 list_add_tail(&ticket.list,
5258 &space_info->priority_tickets);
5259 }
5260 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
5261 used += orig_bytes;
5262
5263
5264
5265
5266
5267 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
5268 need_do_async_reclaim(fs_info, space_info,
5269 used, system_chunk) &&
5270 !work_busy(&fs_info->async_reclaim_work)) {
5271 trace_btrfs_trigger_flush(fs_info, space_info->flags,
5272 orig_bytes, flush, "preempt");
5273 queue_work(system_unbound_wq,
5274 &fs_info->async_reclaim_work);
5275 }
5276 }
5277 spin_unlock(&space_info->lock);
5278 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
5279 return ret;
5280
5281 if (flush == BTRFS_RESERVE_FLUSH_ALL)
5282 return wait_reserve_ticket(fs_info, space_info, &ticket,
5283 orig_bytes);
5284
5285 ret = 0;
5286 priority_reclaim_metadata_space(fs_info, space_info, &ticket);
5287 spin_lock(&space_info->lock);
5288 if (ticket.bytes) {
5289 if (ticket.bytes < orig_bytes) {
5290 u64 num_bytes = orig_bytes - ticket.bytes;
5291 update_bytes_may_use(space_info, -num_bytes);
5292 trace_btrfs_space_reservation(fs_info, "space_info",
5293 space_info->flags,
5294 num_bytes, 0);
5295
5296 }
5297 list_del_init(&ticket.list);
5298 ret = -ENOSPC;
5299 }
5300 spin_unlock(&space_info->lock);
5301 ASSERT(list_empty(&ticket.list));
5302 return ret;
5303}
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319static int reserve_metadata_bytes(struct btrfs_root *root,
5320 struct btrfs_block_rsv *block_rsv,
5321 u64 orig_bytes,
5322 enum btrfs_reserve_flush_enum flush)
5323{
5324 struct btrfs_fs_info *fs_info = root->fs_info;
5325 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5326 int ret;
5327 bool system_chunk = (root == fs_info->chunk_root);
5328
5329 ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
5330 orig_bytes, flush, system_chunk);
5331 if (ret == -ENOSPC &&
5332 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
5333 if (block_rsv != global_rsv &&
5334 !block_rsv_use_bytes(global_rsv, orig_bytes))
5335 ret = 0;
5336 }
5337 if (ret == -ENOSPC) {
5338 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
5339 block_rsv->space_info->flags,
5340 orig_bytes, 1);
5341
5342 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
5343 dump_space_info(fs_info, block_rsv->space_info,
5344 orig_bytes, 0);
5345 }
5346 return ret;
5347}
5348
5349static struct btrfs_block_rsv *get_block_rsv(
5350 const struct btrfs_trans_handle *trans,
5351 const struct btrfs_root *root)
5352{
5353 struct btrfs_fs_info *fs_info = root->fs_info;
5354 struct btrfs_block_rsv *block_rsv = NULL;
5355
5356 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
5357 (root == fs_info->csum_root && trans->adding_csums) ||
5358 (root == fs_info->uuid_root))
5359 block_rsv = trans->block_rsv;
5360
5361 if (!block_rsv)
5362 block_rsv = root->block_rsv;
5363
5364 if (!block_rsv)
5365 block_rsv = &fs_info->empty_block_rsv;
5366
5367 return block_rsv;
5368}
5369
5370static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
5371 u64 num_bytes)
5372{
5373 int ret = -ENOSPC;
5374 spin_lock(&block_rsv->lock);
5375 if (block_rsv->reserved >= num_bytes) {
5376 block_rsv->reserved -= num_bytes;
5377 if (block_rsv->reserved < block_rsv->size)
5378 block_rsv->full = 0;
5379 ret = 0;
5380 }
5381 spin_unlock(&block_rsv->lock);
5382 return ret;
5383}
5384
5385static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
5386 u64 num_bytes, bool update_size)
5387{
5388 spin_lock(&block_rsv->lock);
5389 block_rsv->reserved += num_bytes;
5390 if (update_size)
5391 block_rsv->size += num_bytes;
5392 else if (block_rsv->reserved >= block_rsv->size)
5393 block_rsv->full = 1;
5394 spin_unlock(&block_rsv->lock);
5395}
5396
5397int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
5398 struct btrfs_block_rsv *dest, u64 num_bytes,
5399 int min_factor)
5400{
5401 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5402 u64 min_bytes;
5403
5404 if (global_rsv->space_info != dest->space_info)
5405 return -ENOSPC;
5406
5407 spin_lock(&global_rsv->lock);
5408 min_bytes = div_factor(global_rsv->size, min_factor);
5409 if (global_rsv->reserved < min_bytes + num_bytes) {
5410 spin_unlock(&global_rsv->lock);
5411 return -ENOSPC;
5412 }
5413 global_rsv->reserved -= num_bytes;
5414 if (global_rsv->reserved < global_rsv->size)
5415 global_rsv->full = 0;
5416 spin_unlock(&global_rsv->lock);
5417
5418 block_rsv_add_bytes(dest, num_bytes, true);
5419 return 0;
5420}
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
5432 struct btrfs_block_rsv *src,
5433 u64 num_bytes)
5434{
5435 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
5436 u64 to_free = 0;
5437
5438 spin_lock(&src->lock);
5439 src->reserved -= num_bytes;
5440 src->size -= num_bytes;
5441 spin_unlock(&src->lock);
5442
5443 spin_lock(&delayed_refs_rsv->lock);
5444 if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
5445 u64 delta = delayed_refs_rsv->size -
5446 delayed_refs_rsv->reserved;
5447 if (num_bytes > delta) {
5448 to_free = num_bytes - delta;
5449 num_bytes = delta;
5450 }
5451 } else {
5452 to_free = num_bytes;
5453 num_bytes = 0;
5454 }
5455
5456 if (num_bytes)
5457 delayed_refs_rsv->reserved += num_bytes;
5458 if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
5459 delayed_refs_rsv->full = 1;
5460 spin_unlock(&delayed_refs_rsv->lock);
5461
5462 if (num_bytes)
5463 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5464 0, num_bytes, 1);
5465 if (to_free)
5466 space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info,
5467 to_free);
5468}
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
5479 enum btrfs_reserve_flush_enum flush)
5480{
5481 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
5482 u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
5483 u64 num_bytes = 0;
5484 int ret = -ENOSPC;
5485
5486 spin_lock(&block_rsv->lock);
5487 if (block_rsv->reserved < block_rsv->size) {
5488 num_bytes = block_rsv->size - block_rsv->reserved;
5489 num_bytes = min(num_bytes, limit);
5490 }
5491 spin_unlock(&block_rsv->lock);
5492
5493 if (!num_bytes)
5494 return 0;
5495
5496 ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv,
5497 num_bytes, flush);
5498 if (ret)
5499 return ret;
5500 block_rsv_add_bytes(block_rsv, num_bytes, 0);
5501 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5502 0, num_bytes, 1);
5503 return 0;
5504}
5505
5506
5507
5508
5509
5510static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
5511 struct btrfs_space_info *space_info,
5512 u64 num_bytes)
5513{
5514 struct reserve_ticket *ticket;
5515 struct list_head *head;
5516 u64 used;
5517 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
5518 bool check_overcommit = false;
5519
5520 spin_lock(&space_info->lock);
5521 head = &space_info->priority_tickets;
5522
5523
5524
5525
5526
5527
5528 used = btrfs_space_info_used(space_info, true);
5529 if (used - num_bytes >= space_info->total_bytes)
5530 check_overcommit = true;
5531again:
5532 while (!list_empty(head) && num_bytes) {
5533 ticket = list_first_entry(head, struct reserve_ticket,
5534 list);
5535
5536
5537
5538
5539 if (check_overcommit &&
5540 !can_overcommit(fs_info, space_info, 0, flush, false))
5541 break;
5542 if (num_bytes >= ticket->bytes) {
5543 list_del_init(&ticket->list);
5544 num_bytes -= ticket->bytes;
5545 ticket->bytes = 0;
5546 space_info->tickets_id++;
5547 wake_up(&ticket->wait);
5548 } else {
5549 ticket->bytes -= num_bytes;
5550 num_bytes = 0;
5551 }
5552 }
5553
5554 if (num_bytes && head == &space_info->priority_tickets) {
5555 head = &space_info->tickets;
5556 flush = BTRFS_RESERVE_FLUSH_ALL;
5557 goto again;
5558 }
5559 update_bytes_may_use(space_info, -num_bytes);
5560 trace_btrfs_space_reservation(fs_info, "space_info",
5561 space_info->flags, num_bytes, 0);
5562 spin_unlock(&space_info->lock);
5563}
5564
5565
5566
5567
5568
5569
5570static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
5571 struct btrfs_space_info *space_info,
5572 u64 num_bytes)
5573{
5574 struct reserve_ticket *ticket;
5575 struct list_head *head = &space_info->priority_tickets;
5576
5577again:
5578 while (!list_empty(head) && num_bytes) {
5579 ticket = list_first_entry(head, struct reserve_ticket,
5580 list);
5581 if (num_bytes >= ticket->bytes) {
5582 trace_btrfs_space_reservation(fs_info, "space_info",
5583 space_info->flags,
5584 ticket->bytes, 1);
5585 list_del_init(&ticket->list);
5586 num_bytes -= ticket->bytes;
5587 update_bytes_may_use(space_info, ticket->bytes);
5588 ticket->bytes = 0;
5589 space_info->tickets_id++;
5590 wake_up(&ticket->wait);
5591 } else {
5592 trace_btrfs_space_reservation(fs_info, "space_info",
5593 space_info->flags,
5594 num_bytes, 1);
5595 update_bytes_may_use(space_info, num_bytes);
5596 ticket->bytes -= num_bytes;
5597 num_bytes = 0;
5598 }
5599 }
5600
5601 if (num_bytes && head == &space_info->priority_tickets) {
5602 head = &space_info->tickets;
5603 goto again;
5604 }
5605}
5606
5607static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
5608 struct btrfs_block_rsv *block_rsv,
5609 struct btrfs_block_rsv *dest, u64 num_bytes,
5610 u64 *qgroup_to_release_ret)
5611{
5612 struct btrfs_space_info *space_info = block_rsv->space_info;
5613 u64 qgroup_to_release = 0;
5614 u64 ret;
5615
5616 spin_lock(&block_rsv->lock);
5617 if (num_bytes == (u64)-1) {
5618 num_bytes = block_rsv->size;
5619 qgroup_to_release = block_rsv->qgroup_rsv_size;
5620 }
5621 block_rsv->size -= num_bytes;
5622 if (block_rsv->reserved >= block_rsv->size) {
5623 num_bytes = block_rsv->reserved - block_rsv->size;
5624 block_rsv->reserved = block_rsv->size;
5625 block_rsv->full = 1;
5626 } else {
5627 num_bytes = 0;
5628 }
5629 if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
5630 qgroup_to_release = block_rsv->qgroup_rsv_reserved -
5631 block_rsv->qgroup_rsv_size;
5632 block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
5633 } else {
5634 qgroup_to_release = 0;
5635 }
5636 spin_unlock(&block_rsv->lock);
5637
5638 ret = num_bytes;
5639 if (num_bytes > 0) {
5640 if (dest) {
5641 spin_lock(&dest->lock);
5642 if (!dest->full) {
5643 u64 bytes_to_add;
5644
5645 bytes_to_add = dest->size - dest->reserved;
5646 bytes_to_add = min(num_bytes, bytes_to_add);
5647 dest->reserved += bytes_to_add;
5648 if (dest->reserved >= dest->size)
5649 dest->full = 1;
5650 num_bytes -= bytes_to_add;
5651 }
5652 spin_unlock(&dest->lock);
5653 }
5654 if (num_bytes)
5655 space_info_add_old_bytes(fs_info, space_info,
5656 num_bytes);
5657 }
5658 if (qgroup_to_release_ret)
5659 *qgroup_to_release_ret = qgroup_to_release;
5660 return ret;
5661}
5662
5663int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
5664 struct btrfs_block_rsv *dst, u64 num_bytes,
5665 bool update_size)
5666{
5667 int ret;
5668
5669 ret = block_rsv_use_bytes(src, num_bytes);
5670 if (ret)
5671 return ret;
5672
5673 block_rsv_add_bytes(dst, num_bytes, update_size);
5674 return 0;
5675}
5676
5677void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
5678{
5679 memset(rsv, 0, sizeof(*rsv));
5680 spin_lock_init(&rsv->lock);
5681 rsv->type = type;
5682}
5683
5684void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
5685 struct btrfs_block_rsv *rsv,
5686 unsigned short type)
5687{
5688 btrfs_init_block_rsv(rsv, type);
5689 rsv->space_info = __find_space_info(fs_info,
5690 BTRFS_BLOCK_GROUP_METADATA);
5691}
5692
5693struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
5694 unsigned short type)
5695{
5696 struct btrfs_block_rsv *block_rsv;
5697
5698 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
5699 if (!block_rsv)
5700 return NULL;
5701
5702 btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
5703 return block_rsv;
5704}
5705
5706void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
5707 struct btrfs_block_rsv *rsv)
5708{
5709 if (!rsv)
5710 return;
5711 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
5712 kfree(rsv);
5713}
5714
5715int btrfs_block_rsv_add(struct btrfs_root *root,
5716 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
5717 enum btrfs_reserve_flush_enum flush)
5718{
5719 int ret;
5720
5721 if (num_bytes == 0)
5722 return 0;
5723
5724 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5725 if (!ret)
5726 block_rsv_add_bytes(block_rsv, num_bytes, true);
5727
5728 return ret;
5729}
5730
5731int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
5732{
5733 u64 num_bytes = 0;
5734 int ret = -ENOSPC;
5735
5736 if (!block_rsv)
5737 return 0;
5738
5739 spin_lock(&block_rsv->lock);
5740 num_bytes = div_factor(block_rsv->size, min_factor);
5741 if (block_rsv->reserved >= num_bytes)
5742 ret = 0;
5743 spin_unlock(&block_rsv->lock);
5744
5745 return ret;
5746}
5747
5748int btrfs_block_rsv_refill(struct btrfs_root *root,
5749 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
5750 enum btrfs_reserve_flush_enum flush)
5751{
5752 u64 num_bytes = 0;
5753 int ret = -ENOSPC;
5754
5755 if (!block_rsv)
5756 return 0;
5757
5758 spin_lock(&block_rsv->lock);
5759 num_bytes = min_reserved;
5760 if (block_rsv->reserved >= num_bytes)
5761 ret = 0;
5762 else
5763 num_bytes -= block_rsv->reserved;
5764 spin_unlock(&block_rsv->lock);
5765
5766 if (!ret)
5767 return 0;
5768
5769 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5770 if (!ret) {
5771 block_rsv_add_bytes(block_rsv, num_bytes, false);
5772 return 0;
5773 }
5774
5775 return ret;
5776}
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
5789 enum btrfs_reserve_flush_enum flush)
5790{
5791 struct btrfs_root *root = inode->root;
5792 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5793 u64 num_bytes = 0;
5794 u64 qgroup_num_bytes = 0;
5795 int ret = -ENOSPC;
5796
5797 spin_lock(&block_rsv->lock);
5798 if (block_rsv->reserved < block_rsv->size)
5799 num_bytes = block_rsv->size - block_rsv->reserved;
5800 if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
5801 qgroup_num_bytes = block_rsv->qgroup_rsv_size -
5802 block_rsv->qgroup_rsv_reserved;
5803 spin_unlock(&block_rsv->lock);
5804
5805 if (num_bytes == 0)
5806 return 0;
5807
5808 ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
5809 if (ret)
5810 return ret;
5811 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
5812 if (!ret) {
5813 block_rsv_add_bytes(block_rsv, num_bytes, false);
5814 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5815 btrfs_ino(inode), num_bytes, 1);
5816
5817
5818 spin_lock(&block_rsv->lock);
5819 block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
5820 spin_unlock(&block_rsv->lock);
5821 } else
5822 btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
5823 return ret;
5824}
5825
5826static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5827 struct btrfs_block_rsv *block_rsv,
5828 u64 num_bytes, u64 *qgroup_to_release)
5829{
5830 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5831 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
5832 struct btrfs_block_rsv *target = delayed_rsv;
5833
5834 if (target->full || target == block_rsv)
5835 target = global_rsv;
5836
5837 if (block_rsv->space_info != target->space_info)
5838 target = NULL;
5839
5840 return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
5841 qgroup_to_release);
5842}
5843
5844void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
5845 struct btrfs_block_rsv *block_rsv,
5846 u64 num_bytes)
5847{
5848 __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
5849}
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
5863{
5864 struct btrfs_fs_info *fs_info = inode->root->fs_info;
5865 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
5866 u64 released = 0;
5867 u64 qgroup_to_release = 0;
5868
5869
5870
5871
5872
5873
5874 released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
5875 &qgroup_to_release);
5876 if (released > 0)
5877 trace_btrfs_space_reservation(fs_info, "delalloc",
5878 btrfs_ino(inode), released, 0);
5879 if (qgroup_free)
5880 btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
5881 else
5882 btrfs_qgroup_convert_reserved_meta(inode->root,
5883 qgroup_to_release);
5884}
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
5895{
5896 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
5897 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5898 u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
5899 u64 released = 0;
5900
5901 released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv,
5902 num_bytes, NULL);
5903 if (released)
5904 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
5905 0, released, 0);
5906}
5907
5908static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
5909{
5910 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
5911 struct btrfs_space_info *sinfo = block_rsv->space_info;
5912 u64 num_bytes;
5913
5914
5915
5916
5917
5918
5919 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
5920 btrfs_root_used(&fs_info->csum_root->root_item) +
5921 btrfs_root_used(&fs_info->tree_root->root_item);
5922 num_bytes = max_t(u64, num_bytes, SZ_16M);
5923
5924 spin_lock(&sinfo->lock);
5925 spin_lock(&block_rsv->lock);
5926
5927 block_rsv->size = min_t(u64, num_bytes, SZ_512M);
5928
5929 if (block_rsv->reserved < block_rsv->size) {
5930 num_bytes = btrfs_space_info_used(sinfo, true);
5931 if (sinfo->total_bytes > num_bytes) {
5932 num_bytes = sinfo->total_bytes - num_bytes;
5933 num_bytes = min(num_bytes,
5934 block_rsv->size - block_rsv->reserved);
5935 block_rsv->reserved += num_bytes;
5936 update_bytes_may_use(sinfo, num_bytes);
5937 trace_btrfs_space_reservation(fs_info, "space_info",
5938 sinfo->flags, num_bytes,
5939 1);
5940 }
5941 } else if (block_rsv->reserved > block_rsv->size) {
5942 num_bytes = block_rsv->reserved - block_rsv->size;
5943 update_bytes_may_use(sinfo, -num_bytes);
5944 trace_btrfs_space_reservation(fs_info, "space_info",
5945 sinfo->flags, num_bytes, 0);
5946 block_rsv->reserved = block_rsv->size;
5947 }
5948
5949 if (block_rsv->reserved == block_rsv->size)
5950 block_rsv->full = 1;
5951 else
5952 block_rsv->full = 0;
5953
5954 spin_unlock(&block_rsv->lock);
5955 spin_unlock(&sinfo->lock);
5956}
5957
5958static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
5959{
5960 struct btrfs_space_info *space_info;
5961
5962 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
5963 fs_info->chunk_block_rsv.space_info = space_info;
5964
5965 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
5966 fs_info->global_block_rsv.space_info = space_info;
5967 fs_info->trans_block_rsv.space_info = space_info;
5968 fs_info->empty_block_rsv.space_info = space_info;
5969 fs_info->delayed_block_rsv.space_info = space_info;
5970 fs_info->delayed_refs_rsv.space_info = space_info;
5971
5972 fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
5973 fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
5974 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
5975 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
5976 if (fs_info->quota_root)
5977 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
5978 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
5979
5980 update_global_block_rsv(fs_info);
5981}
5982
5983static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
5984{
5985 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
5986 (u64)-1, NULL);
5987 WARN_ON(fs_info->trans_block_rsv.size > 0);
5988 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
5989 WARN_ON(fs_info->chunk_block_rsv.size > 0);
5990 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
5991 WARN_ON(fs_info->delayed_block_rsv.size > 0);
5992 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
5993 WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
5994 WARN_ON(fs_info->delayed_refs_rsv.size > 0);
5995}
5996
5997
5998
5999
6000
6001
6002
6003
6004void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
6005{
6006 struct btrfs_fs_info *fs_info = trans->fs_info;
6007 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
6008 u64 num_bytes;
6009
6010 if (!trans->delayed_ref_updates)
6011 return;
6012
6013 num_bytes = btrfs_calc_trans_metadata_size(fs_info,
6014 trans->delayed_ref_updates);
6015 spin_lock(&delayed_rsv->lock);
6016 delayed_rsv->size += num_bytes;
6017 delayed_rsv->full = 0;
6018 spin_unlock(&delayed_rsv->lock);
6019 trans->delayed_ref_updates = 0;
6020}
6021
6022
6023
6024
6025
6026void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
6027{
6028 struct btrfs_fs_info *fs_info = trans->fs_info;
6029
6030 if (!trans->chunk_bytes_reserved)
6031 return;
6032
6033 WARN_ON_ONCE(!list_empty(&trans->new_bgs));
6034
6035 block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
6036 trans->chunk_bytes_reserved, NULL);
6037 trans->chunk_bytes_reserved = 0;
6038}
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
6055 struct btrfs_block_rsv *rsv, int items,
6056 bool use_global_rsv)
6057{
6058 u64 qgroup_num_bytes = 0;
6059 u64 num_bytes;
6060 int ret;
6061 struct btrfs_fs_info *fs_info = root->fs_info;
6062 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6063
6064 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
6065
6066 qgroup_num_bytes = 3 * fs_info->nodesize;
6067 ret = btrfs_qgroup_reserve_meta_prealloc(root,
6068 qgroup_num_bytes, true);
6069 if (ret)
6070 return ret;
6071 }
6072
6073 num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
6074 rsv->space_info = __find_space_info(fs_info,
6075 BTRFS_BLOCK_GROUP_METADATA);
6076 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
6077 BTRFS_RESERVE_FLUSH_ALL);
6078
6079 if (ret == -ENOSPC && use_global_rsv)
6080 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
6081
6082 if (ret && qgroup_num_bytes)
6083 btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
6084
6085 return ret;
6086}
6087
6088void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
6089 struct btrfs_block_rsv *rsv)
6090{
6091 btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
6092}
6093
6094static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
6095 struct btrfs_inode *inode)
6096{
6097 struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
6098 u64 reserve_size = 0;
6099 u64 qgroup_rsv_size = 0;
6100 u64 csum_leaves;
6101 unsigned outstanding_extents;
6102
6103 lockdep_assert_held(&inode->lock);
6104 outstanding_extents = inode->outstanding_extents;
6105 if (outstanding_extents)
6106 reserve_size = btrfs_calc_trans_metadata_size(fs_info,
6107 outstanding_extents + 1);
6108 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
6109 inode->csum_bytes);
6110 reserve_size += btrfs_calc_trans_metadata_size(fs_info,
6111 csum_leaves);
6112
6113
6114
6115
6116
6117
6118 qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
6119
6120 spin_lock(&block_rsv->lock);
6121 block_rsv->size = reserve_size;
6122 block_rsv->qgroup_rsv_size = qgroup_rsv_size;
6123 spin_unlock(&block_rsv->lock);
6124}
6125
6126int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
6127{
6128 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6129 unsigned nr_extents;
6130 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
6131 int ret = 0;
6132 bool delalloc_lock = true;
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142 if (btrfs_is_free_space_inode(inode)) {
6143 flush = BTRFS_RESERVE_NO_FLUSH;
6144 delalloc_lock = false;
6145 } else {
6146 if (current->journal_info)
6147 flush = BTRFS_RESERVE_FLUSH_LIMIT;
6148
6149 if (btrfs_transaction_in_commit(fs_info))
6150 schedule_timeout(1);
6151 }
6152
6153 if (delalloc_lock)
6154 mutex_lock(&inode->delalloc_mutex);
6155
6156 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6157
6158
6159 spin_lock(&inode->lock);
6160 nr_extents = count_max_extents(num_bytes);
6161 btrfs_mod_outstanding_extents(inode, nr_extents);
6162 inode->csum_bytes += num_bytes;
6163 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6164 spin_unlock(&inode->lock);
6165
6166 ret = btrfs_inode_rsv_refill(inode, flush);
6167 if (unlikely(ret))
6168 goto out_fail;
6169
6170 if (delalloc_lock)
6171 mutex_unlock(&inode->delalloc_mutex);
6172 return 0;
6173
6174out_fail:
6175 spin_lock(&inode->lock);
6176 nr_extents = count_max_extents(num_bytes);
6177 btrfs_mod_outstanding_extents(inode, -nr_extents);
6178 inode->csum_bytes -= num_bytes;
6179 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6180 spin_unlock(&inode->lock);
6181
6182 btrfs_inode_rsv_release(inode, true);
6183 if (delalloc_lock)
6184 mutex_unlock(&inode->delalloc_mutex);
6185 return ret;
6186}
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
6199 bool qgroup_free)
6200{
6201 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6202
6203 num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
6204 spin_lock(&inode->lock);
6205 inode->csum_bytes -= num_bytes;
6206 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6207 spin_unlock(&inode->lock);
6208
6209 if (btrfs_is_testing(fs_info))
6210 return;
6211
6212 btrfs_inode_rsv_release(inode, qgroup_free);
6213}
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
6228 bool qgroup_free)
6229{
6230 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6231 unsigned num_extents;
6232
6233 spin_lock(&inode->lock);
6234 num_extents = count_max_extents(num_bytes);
6235 btrfs_mod_outstanding_extents(inode, -num_extents);
6236 btrfs_calculate_inode_block_rsv_size(fs_info, inode);
6237 spin_unlock(&inode->lock);
6238
6239 if (btrfs_is_testing(fs_info))
6240 return;
6241
6242 btrfs_inode_rsv_release(inode, qgroup_free);
6243}
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270int btrfs_delalloc_reserve_space(struct inode *inode,
6271 struct extent_changeset **reserved, u64 start, u64 len)
6272{
6273 int ret;
6274
6275 ret = btrfs_check_data_free_space(inode, reserved, start, len);
6276 if (ret < 0)
6277 return ret;
6278 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
6279 if (ret < 0)
6280 btrfs_free_reserved_data_space(inode, *reserved, start, len);
6281 return ret;
6282}
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296void btrfs_delalloc_release_space(struct inode *inode,
6297 struct extent_changeset *reserved,
6298 u64 start, u64 len, bool qgroup_free)
6299{
6300 btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
6301 btrfs_free_reserved_data_space(inode, reserved, start, len);
6302}
6303
6304static int update_block_group(struct btrfs_trans_handle *trans,
6305 struct btrfs_fs_info *info, u64 bytenr,
6306 u64 num_bytes, int alloc)
6307{
6308 struct btrfs_block_group_cache *cache = NULL;
6309 u64 total = num_bytes;
6310 u64 old_val;
6311 u64 byte_in_group;
6312 int factor;
6313 int ret = 0;
6314
6315
6316 spin_lock(&info->delalloc_root_lock);
6317 old_val = btrfs_super_bytes_used(info->super_copy);
6318 if (alloc)
6319 old_val += num_bytes;
6320 else
6321 old_val -= num_bytes;
6322 btrfs_set_super_bytes_used(info->super_copy, old_val);
6323 spin_unlock(&info->delalloc_root_lock);
6324
6325 while (total) {
6326 cache = btrfs_lookup_block_group(info, bytenr);
6327 if (!cache) {
6328 ret = -ENOENT;
6329 break;
6330 }
6331 factor = btrfs_bg_type_to_factor(cache->flags);
6332
6333
6334
6335
6336
6337
6338
6339 if (!alloc && cache->cached == BTRFS_CACHE_NO)
6340 cache_block_group(cache, 1);
6341
6342 byte_in_group = bytenr - cache->key.objectid;
6343 WARN_ON(byte_in_group > cache->key.offset);
6344
6345 spin_lock(&cache->space_info->lock);
6346 spin_lock(&cache->lock);
6347
6348 if (btrfs_test_opt(info, SPACE_CACHE) &&
6349 cache->disk_cache_state < BTRFS_DC_CLEAR)
6350 cache->disk_cache_state = BTRFS_DC_CLEAR;
6351
6352 old_val = btrfs_block_group_used(&cache->item);
6353 num_bytes = min(total, cache->key.offset - byte_in_group);
6354 if (alloc) {
6355 old_val += num_bytes;
6356 btrfs_set_block_group_used(&cache->item, old_val);
6357 cache->reserved -= num_bytes;
6358 cache->space_info->bytes_reserved -= num_bytes;
6359 cache->space_info->bytes_used += num_bytes;
6360 cache->space_info->disk_used += num_bytes * factor;
6361 spin_unlock(&cache->lock);
6362 spin_unlock(&cache->space_info->lock);
6363 } else {
6364 old_val -= num_bytes;
6365 btrfs_set_block_group_used(&cache->item, old_val);
6366 cache->pinned += num_bytes;
6367 update_bytes_pinned(cache->space_info, num_bytes);
6368 cache->space_info->bytes_used -= num_bytes;
6369 cache->space_info->disk_used -= num_bytes * factor;
6370 spin_unlock(&cache->lock);
6371 spin_unlock(&cache->space_info->lock);
6372
6373 trace_btrfs_space_reservation(info, "pinned",
6374 cache->space_info->flags,
6375 num_bytes, 1);
6376 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
6377 num_bytes,
6378 BTRFS_TOTAL_BYTES_PINNED_BATCH);
6379 set_extent_dirty(info->pinned_extents,
6380 bytenr, bytenr + num_bytes - 1,
6381 GFP_NOFS | __GFP_NOFAIL);
6382 }
6383
6384 spin_lock(&trans->transaction->dirty_bgs_lock);
6385 if (list_empty(&cache->dirty_list)) {
6386 list_add_tail(&cache->dirty_list,
6387 &trans->transaction->dirty_bgs);
6388 trans->transaction->num_dirty_bgs++;
6389 trans->delayed_ref_updates++;
6390 btrfs_get_block_group(cache);
6391 }
6392 spin_unlock(&trans->transaction->dirty_bgs_lock);
6393
6394
6395
6396
6397
6398
6399
6400 if (!alloc && old_val == 0)
6401 btrfs_mark_bg_unused(cache);
6402
6403 btrfs_put_block_group(cache);
6404 total -= num_bytes;
6405 bytenr += num_bytes;
6406 }
6407
6408
6409 btrfs_update_delayed_refs_rsv(trans);
6410 return ret;
6411}
6412
6413static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
6414{
6415 struct btrfs_block_group_cache *cache;
6416 u64 bytenr;
6417
6418 spin_lock(&fs_info->block_group_cache_lock);
6419 bytenr = fs_info->first_logical_byte;
6420 spin_unlock(&fs_info->block_group_cache_lock);
6421
6422 if (bytenr < (u64)-1)
6423 return bytenr;
6424
6425 cache = btrfs_lookup_first_block_group(fs_info, search_start);
6426 if (!cache)
6427 return 0;
6428
6429 bytenr = cache->key.objectid;
6430 btrfs_put_block_group(cache);
6431
6432 return bytenr;
6433}
6434
6435static int pin_down_extent(struct btrfs_fs_info *fs_info,
6436 struct btrfs_block_group_cache *cache,
6437 u64 bytenr, u64 num_bytes, int reserved)
6438{
6439 spin_lock(&cache->space_info->lock);
6440 spin_lock(&cache->lock);
6441 cache->pinned += num_bytes;
6442 update_bytes_pinned(cache->space_info, num_bytes);
6443 if (reserved) {
6444 cache->reserved -= num_bytes;
6445 cache->space_info->bytes_reserved -= num_bytes;
6446 }
6447 spin_unlock(&cache->lock);
6448 spin_unlock(&cache->space_info->lock);
6449
6450 trace_btrfs_space_reservation(fs_info, "pinned",
6451 cache->space_info->flags, num_bytes, 1);
6452 percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
6453 num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
6454 set_extent_dirty(fs_info->pinned_extents, bytenr,
6455 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
6456 return 0;
6457}
6458
6459
6460
6461
6462int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
6463 u64 bytenr, u64 num_bytes, int reserved)
6464{
6465 struct btrfs_block_group_cache *cache;
6466
6467 cache = btrfs_lookup_block_group(fs_info, bytenr);
6468 BUG_ON(!cache);
6469
6470 pin_down_extent(fs_info, cache, bytenr, num_bytes, reserved);
6471
6472 btrfs_put_block_group(cache);
6473 return 0;
6474}
6475
6476
6477
6478
6479int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
6480 u64 bytenr, u64 num_bytes)
6481{
6482 struct btrfs_block_group_cache *cache;
6483 int ret;
6484
6485 cache = btrfs_lookup_block_group(fs_info, bytenr);
6486 if (!cache)
6487 return -EINVAL;
6488
6489
6490
6491
6492
6493
6494
6495 cache_block_group(cache, 1);
6496
6497 pin_down_extent(fs_info, cache, bytenr, num_bytes, 0);
6498
6499
6500 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
6501 btrfs_put_block_group(cache);
6502 return ret;
6503}
6504
6505static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
6506 u64 start, u64 num_bytes)
6507{
6508 int ret;
6509 struct btrfs_block_group_cache *block_group;
6510 struct btrfs_caching_control *caching_ctl;
6511
6512 block_group = btrfs_lookup_block_group(fs_info, start);
6513 if (!block_group)
6514 return -EINVAL;
6515
6516 cache_block_group(block_group, 0);
6517 caching_ctl = get_caching_control(block_group);
6518
6519 if (!caching_ctl) {
6520
6521 BUG_ON(!block_group_cache_done(block_group));
6522 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6523 } else {
6524 mutex_lock(&caching_ctl->mutex);
6525
6526 if (start >= caching_ctl->progress) {
6527 ret = add_excluded_extent(fs_info, start, num_bytes);
6528 } else if (start + num_bytes <= caching_ctl->progress) {
6529 ret = btrfs_remove_free_space(block_group,
6530 start, num_bytes);
6531 } else {
6532 num_bytes = caching_ctl->progress - start;
6533 ret = btrfs_remove_free_space(block_group,
6534 start, num_bytes);
6535 if (ret)
6536 goto out_lock;
6537
6538 num_bytes = (start + num_bytes) -
6539 caching_ctl->progress;
6540 start = caching_ctl->progress;
6541 ret = add_excluded_extent(fs_info, start, num_bytes);
6542 }
6543out_lock:
6544 mutex_unlock(&caching_ctl->mutex);
6545 put_caching_control(caching_ctl);
6546 }
6547 btrfs_put_block_group(block_group);
6548 return ret;
6549}
6550
6551int btrfs_exclude_logged_extents(struct btrfs_fs_info *fs_info,
6552 struct extent_buffer *eb)
6553{
6554 struct btrfs_file_extent_item *item;
6555 struct btrfs_key key;
6556 int found_type;
6557 int i;
6558 int ret = 0;
6559
6560 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
6561 return 0;
6562
6563 for (i = 0; i < btrfs_header_nritems(eb); i++) {
6564 btrfs_item_key_to_cpu(eb, &key, i);
6565 if (key.type != BTRFS_EXTENT_DATA_KEY)
6566 continue;
6567 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
6568 found_type = btrfs_file_extent_type(eb, item);
6569 if (found_type == BTRFS_FILE_EXTENT_INLINE)
6570 continue;
6571 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
6572 continue;
6573 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
6574 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
6575 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
6576 if (ret)
6577 break;
6578 }
6579
6580 return ret;
6581}
6582
6583static void
6584btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
6585{
6586 atomic_inc(&bg->reservations);
6587}
6588
6589void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
6590 const u64 start)
6591{
6592 struct btrfs_block_group_cache *bg;
6593
6594 bg = btrfs_lookup_block_group(fs_info, start);
6595 ASSERT(bg);
6596 if (atomic_dec_and_test(&bg->reservations))
6597 wake_up_var(&bg->reservations);
6598 btrfs_put_block_group(bg);
6599}
6600
6601void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
6602{
6603 struct btrfs_space_info *space_info = bg->space_info;
6604
6605 ASSERT(bg->ro);
6606
6607 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
6608 return;
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620 down_write(&space_info->groups_sem);
6621 up_write(&space_info->groups_sem);
6622
6623 wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
6624}
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
6639 u64 ram_bytes, u64 num_bytes, int delalloc)
6640{
6641 struct btrfs_space_info *space_info = cache->space_info;
6642 int ret = 0;
6643
6644 spin_lock(&space_info->lock);
6645 spin_lock(&cache->lock);
6646 if (cache->ro) {
6647 ret = -EAGAIN;
6648 } else {
6649 cache->reserved += num_bytes;
6650 space_info->bytes_reserved += num_bytes;
6651 update_bytes_may_use(space_info, -ram_bytes);
6652 if (delalloc)
6653 cache->delalloc_bytes += num_bytes;
6654 }
6655 spin_unlock(&cache->lock);
6656 spin_unlock(&space_info->lock);
6657 return ret;
6658}
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672static void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
6673 u64 num_bytes, int delalloc)
6674{
6675 struct btrfs_space_info *space_info = cache->space_info;
6676
6677 spin_lock(&space_info->lock);
6678 spin_lock(&cache->lock);
6679 if (cache->ro)
6680 space_info->bytes_readonly += num_bytes;
6681 cache->reserved -= num_bytes;
6682 space_info->bytes_reserved -= num_bytes;
6683 space_info->max_extent_size = 0;
6684
6685 if (delalloc)
6686 cache->delalloc_bytes -= num_bytes;
6687 spin_unlock(&cache->lock);
6688 spin_unlock(&space_info->lock);
6689}
6690void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
6691{
6692 struct btrfs_caching_control *next;
6693 struct btrfs_caching_control *caching_ctl;
6694 struct btrfs_block_group_cache *cache;
6695
6696 down_write(&fs_info->commit_root_sem);
6697
6698 list_for_each_entry_safe(caching_ctl, next,
6699 &fs_info->caching_block_groups, list) {
6700 cache = caching_ctl->block_group;
6701 if (block_group_cache_done(cache)) {
6702 cache->last_byte_to_unpin = (u64)-1;
6703 list_del_init(&caching_ctl->list);
6704 put_caching_control(caching_ctl);
6705 } else {
6706 cache->last_byte_to_unpin = caching_ctl->progress;
6707 }
6708 }
6709
6710 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6711 fs_info->pinned_extents = &fs_info->freed_extents[1];
6712 else
6713 fs_info->pinned_extents = &fs_info->freed_extents[0];
6714
6715 up_write(&fs_info->commit_root_sem);
6716
6717 update_global_block_rsv(fs_info);
6718}
6719
6720
6721
6722
6723
6724static struct btrfs_free_cluster *
6725fetch_cluster_info(struct btrfs_fs_info *fs_info,
6726 struct btrfs_space_info *space_info, u64 *empty_cluster)
6727{
6728 struct btrfs_free_cluster *ret = NULL;
6729
6730 *empty_cluster = 0;
6731 if (btrfs_mixed_space_info(space_info))
6732 return ret;
6733
6734 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
6735 ret = &fs_info->meta_alloc_cluster;
6736 if (btrfs_test_opt(fs_info, SSD))
6737 *empty_cluster = SZ_2M;
6738 else
6739 *empty_cluster = SZ_64K;
6740 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
6741 btrfs_test_opt(fs_info, SSD_SPREAD)) {
6742 *empty_cluster = SZ_2M;
6743 ret = &fs_info->data_alloc_cluster;
6744 }
6745
6746 return ret;
6747}
6748
6749static int unpin_extent_range(struct btrfs_fs_info *fs_info,
6750 u64 start, u64 end,
6751 const bool return_free_space)
6752{
6753 struct btrfs_block_group_cache *cache = NULL;
6754 struct btrfs_space_info *space_info;
6755 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
6756 struct btrfs_free_cluster *cluster = NULL;
6757 u64 len;
6758 u64 total_unpinned = 0;
6759 u64 empty_cluster = 0;
6760 bool readonly;
6761
6762 while (start <= end) {
6763 readonly = false;
6764 if (!cache ||
6765 start >= cache->key.objectid + cache->key.offset) {
6766 if (cache)
6767 btrfs_put_block_group(cache);
6768 total_unpinned = 0;
6769 cache = btrfs_lookup_block_group(fs_info, start);
6770 BUG_ON(!cache);
6771
6772 cluster = fetch_cluster_info(fs_info,
6773 cache->space_info,
6774 &empty_cluster);
6775 empty_cluster <<= 1;
6776 }
6777
6778 len = cache->key.objectid + cache->key.offset - start;
6779 len = min(len, end + 1 - start);
6780
6781 if (start < cache->last_byte_to_unpin) {
6782 len = min(len, cache->last_byte_to_unpin - start);
6783 if (return_free_space)
6784 btrfs_add_free_space(cache, start, len);
6785 }
6786
6787 start += len;
6788 total_unpinned += len;
6789 space_info = cache->space_info;
6790
6791
6792
6793
6794
6795
6796
6797 if (cluster && cluster->fragmented &&
6798 total_unpinned > empty_cluster) {
6799 spin_lock(&cluster->lock);
6800 cluster->fragmented = 0;
6801 spin_unlock(&cluster->lock);
6802 }
6803
6804 spin_lock(&space_info->lock);
6805 spin_lock(&cache->lock);
6806 cache->pinned -= len;
6807 update_bytes_pinned(space_info, -len);
6808
6809 trace_btrfs_space_reservation(fs_info, "pinned",
6810 space_info->flags, len, 0);
6811 space_info->max_extent_size = 0;
6812 percpu_counter_add_batch(&space_info->total_bytes_pinned,
6813 -len, BTRFS_TOTAL_BYTES_PINNED_BATCH);
6814 if (cache->ro) {
6815 space_info->bytes_readonly += len;
6816 readonly = true;
6817 }
6818 spin_unlock(&cache->lock);
6819 if (!readonly && return_free_space &&
6820 global_rsv->space_info == space_info) {
6821 u64 to_add = len;
6822
6823 spin_lock(&global_rsv->lock);
6824 if (!global_rsv->full) {
6825 to_add = min(len, global_rsv->size -
6826 global_rsv->reserved);
6827 global_rsv->reserved += to_add;
6828 update_bytes_may_use(space_info, to_add);
6829 if (global_rsv->reserved >= global_rsv->size)
6830 global_rsv->full = 1;
6831 trace_btrfs_space_reservation(fs_info,
6832 "space_info",
6833 space_info->flags,
6834 to_add, 1);
6835 len -= to_add;
6836 }
6837 spin_unlock(&global_rsv->lock);
6838
6839 if (len)
6840 space_info_add_new_bytes(fs_info, space_info,
6841 len);
6842 }
6843 spin_unlock(&space_info->lock);
6844 }
6845
6846 if (cache)
6847 btrfs_put_block_group(cache);
6848 return 0;
6849}
6850
6851int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
6852{
6853 struct btrfs_fs_info *fs_info = trans->fs_info;
6854 struct btrfs_block_group_cache *block_group, *tmp;
6855 struct list_head *deleted_bgs;
6856 struct extent_io_tree *unpin;
6857 u64 start;
6858 u64 end;
6859 int ret;
6860
6861 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
6862 unpin = &fs_info->freed_extents[1];
6863 else
6864 unpin = &fs_info->freed_extents[0];
6865
6866 while (!trans->aborted) {
6867 struct extent_state *cached_state = NULL;
6868
6869 mutex_lock(&fs_info->unused_bg_unpin_mutex);
6870 ret = find_first_extent_bit(unpin, 0, &start, &end,
6871 EXTENT_DIRTY, &cached_state);
6872 if (ret) {
6873 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6874 break;
6875 }
6876
6877 if (btrfs_test_opt(fs_info, DISCARD))
6878 ret = btrfs_discard_extent(fs_info, start,
6879 end + 1 - start, NULL);
6880
6881 clear_extent_dirty(unpin, start, end, &cached_state);
6882 unpin_extent_range(fs_info, start, end, true);
6883 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
6884 free_extent_state(cached_state);
6885 cond_resched();
6886 }
6887
6888
6889
6890
6891
6892
6893 deleted_bgs = &trans->transaction->deleted_bgs;
6894 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
6895 u64 trimmed = 0;
6896
6897 ret = -EROFS;
6898 if (!trans->aborted)
6899 ret = btrfs_discard_extent(fs_info,
6900 block_group->key.objectid,
6901 block_group->key.offset,
6902 &trimmed);
6903
6904 list_del_init(&block_group->bg_list);
6905 btrfs_put_block_group_trimming(block_group);
6906 btrfs_put_block_group(block_group);
6907
6908 if (ret) {
6909 const char *errstr = btrfs_decode_error(ret);
6910 btrfs_warn(fs_info,
6911 "discard failed while removing blockgroup: errno=%d %s",
6912 ret, errstr);
6913 }
6914 }
6915
6916 return 0;
6917}
6918
6919static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
6920 struct btrfs_delayed_ref_node *node, u64 parent,
6921 u64 root_objectid, u64 owner_objectid,
6922 u64 owner_offset, int refs_to_drop,
6923 struct btrfs_delayed_extent_op *extent_op)
6924{
6925 struct btrfs_fs_info *info = trans->fs_info;
6926 struct btrfs_key key;
6927 struct btrfs_path *path;
6928 struct btrfs_root *extent_root = info->extent_root;
6929 struct extent_buffer *leaf;
6930 struct btrfs_extent_item *ei;
6931 struct btrfs_extent_inline_ref *iref;
6932 int ret;
6933 int is_data;
6934 int extent_slot = 0;
6935 int found_extent = 0;
6936 int num_to_del = 1;
6937 u32 item_size;
6938 u64 refs;
6939 u64 bytenr = node->bytenr;
6940 u64 num_bytes = node->num_bytes;
6941 int last_ref = 0;
6942 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
6943
6944 path = btrfs_alloc_path();
6945 if (!path)
6946 return -ENOMEM;
6947
6948 path->reada = READA_FORWARD;
6949 path->leave_spinning = 1;
6950
6951 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
6952 BUG_ON(!is_data && refs_to_drop != 1);
6953
6954 if (is_data)
6955 skinny_metadata = false;
6956
6957 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
6958 parent, root_objectid, owner_objectid,
6959 owner_offset);
6960 if (ret == 0) {
6961 extent_slot = path->slots[0];
6962 while (extent_slot >= 0) {
6963 btrfs_item_key_to_cpu(path->nodes[0], &key,
6964 extent_slot);
6965 if (key.objectid != bytenr)
6966 break;
6967 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
6968 key.offset == num_bytes) {
6969 found_extent = 1;
6970 break;
6971 }
6972 if (key.type == BTRFS_METADATA_ITEM_KEY &&
6973 key.offset == owner_objectid) {
6974 found_extent = 1;
6975 break;
6976 }
6977 if (path->slots[0] - extent_slot > 5)
6978 break;
6979 extent_slot--;
6980 }
6981
6982 if (!found_extent) {
6983 BUG_ON(iref);
6984 ret = remove_extent_backref(trans, path, NULL,
6985 refs_to_drop,
6986 is_data, &last_ref);
6987 if (ret) {
6988 btrfs_abort_transaction(trans, ret);
6989 goto out;
6990 }
6991 btrfs_release_path(path);
6992 path->leave_spinning = 1;
6993
6994 key.objectid = bytenr;
6995 key.type = BTRFS_EXTENT_ITEM_KEY;
6996 key.offset = num_bytes;
6997
6998 if (!is_data && skinny_metadata) {
6999 key.type = BTRFS_METADATA_ITEM_KEY;
7000 key.offset = owner_objectid;
7001 }
7002
7003 ret = btrfs_search_slot(trans, extent_root,
7004 &key, path, -1, 1);
7005 if (ret > 0 && skinny_metadata && path->slots[0]) {
7006
7007
7008
7009
7010 path->slots[0]--;
7011 btrfs_item_key_to_cpu(path->nodes[0], &key,
7012 path->slots[0]);
7013 if (key.objectid == bytenr &&
7014 key.type == BTRFS_EXTENT_ITEM_KEY &&
7015 key.offset == num_bytes)
7016 ret = 0;
7017 }
7018
7019 if (ret > 0 && skinny_metadata) {
7020 skinny_metadata = false;
7021 key.objectid = bytenr;
7022 key.type = BTRFS_EXTENT_ITEM_KEY;
7023 key.offset = num_bytes;
7024 btrfs_release_path(path);
7025 ret = btrfs_search_slot(trans, extent_root,
7026 &key, path, -1, 1);
7027 }
7028
7029 if (ret) {
7030 btrfs_err(info,
7031 "umm, got %d back from search, was looking for %llu",
7032 ret, bytenr);
7033 if (ret > 0)
7034 btrfs_print_leaf(path->nodes[0]);
7035 }
7036 if (ret < 0) {
7037 btrfs_abort_transaction(trans, ret);
7038 goto out;
7039 }
7040 extent_slot = path->slots[0];
7041 }
7042 } else if (WARN_ON(ret == -ENOENT)) {
7043 btrfs_print_leaf(path->nodes[0]);
7044 btrfs_err(info,
7045 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
7046 bytenr, parent, root_objectid, owner_objectid,
7047 owner_offset);
7048 btrfs_abort_transaction(trans, ret);
7049 goto out;
7050 } else {
7051 btrfs_abort_transaction(trans, ret);
7052 goto out;
7053 }
7054
7055 leaf = path->nodes[0];
7056 item_size = btrfs_item_size_nr(leaf, extent_slot);
7057 if (unlikely(item_size < sizeof(*ei))) {
7058 ret = -EINVAL;
7059 btrfs_print_v0_err(info);
7060 btrfs_abort_transaction(trans, ret);
7061 goto out;
7062 }
7063 ei = btrfs_item_ptr(leaf, extent_slot,
7064 struct btrfs_extent_item);
7065 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
7066 key.type == BTRFS_EXTENT_ITEM_KEY) {
7067 struct btrfs_tree_block_info *bi;
7068 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
7069 bi = (struct btrfs_tree_block_info *)(ei + 1);
7070 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
7071 }
7072
7073 refs = btrfs_extent_refs(leaf, ei);
7074 if (refs < refs_to_drop) {
7075 btrfs_err(info,
7076 "trying to drop %d refs but we only have %Lu for bytenr %Lu",
7077 refs_to_drop, refs, bytenr);
7078 ret = -EINVAL;
7079 btrfs_abort_transaction(trans, ret);
7080 goto out;
7081 }
7082 refs -= refs_to_drop;
7083
7084 if (refs > 0) {
7085 if (extent_op)
7086 __run_delayed_extent_op(extent_op, leaf, ei);
7087
7088
7089
7090
7091 if (iref) {
7092 BUG_ON(!found_extent);
7093 } else {
7094 btrfs_set_extent_refs(leaf, ei, refs);
7095 btrfs_mark_buffer_dirty(leaf);
7096 }
7097 if (found_extent) {
7098 ret = remove_extent_backref(trans, path, iref,
7099 refs_to_drop, is_data,
7100 &last_ref);
7101 if (ret) {
7102 btrfs_abort_transaction(trans, ret);
7103 goto out;
7104 }
7105 }
7106 } else {
7107 if (found_extent) {
7108 BUG_ON(is_data && refs_to_drop !=
7109 extent_data_ref_count(path, iref));
7110 if (iref) {
7111 BUG_ON(path->slots[0] != extent_slot);
7112 } else {
7113 BUG_ON(path->slots[0] != extent_slot + 1);
7114 path->slots[0] = extent_slot;
7115 num_to_del = 2;
7116 }
7117 }
7118
7119 last_ref = 1;
7120 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
7121 num_to_del);
7122 if (ret) {
7123 btrfs_abort_transaction(trans, ret);
7124 goto out;
7125 }
7126 btrfs_release_path(path);
7127
7128 if (is_data) {
7129 ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
7130 if (ret) {
7131 btrfs_abort_transaction(trans, ret);
7132 goto out;
7133 }
7134 }
7135
7136 ret = add_to_free_space_tree(trans, bytenr, num_bytes);
7137 if (ret) {
7138 btrfs_abort_transaction(trans, ret);
7139 goto out;
7140 }
7141
7142 ret = update_block_group(trans, info, bytenr, num_bytes, 0);
7143 if (ret) {
7144 btrfs_abort_transaction(trans, ret);
7145 goto out;
7146 }
7147 }
7148 btrfs_release_path(path);
7149
7150out:
7151 btrfs_free_path(path);
7152 return ret;
7153}
7154
7155
7156
7157
7158
7159
7160
7161static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
7162 u64 bytenr)
7163{
7164 struct btrfs_delayed_ref_head *head;
7165 struct btrfs_delayed_ref_root *delayed_refs;
7166 int ret = 0;
7167
7168 delayed_refs = &trans->transaction->delayed_refs;
7169 spin_lock(&delayed_refs->lock);
7170 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
7171 if (!head)
7172 goto out_delayed_unlock;
7173
7174 spin_lock(&head->lock);
7175 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
7176 goto out;
7177
7178 if (cleanup_extent_op(head) != NULL)
7179 goto out;
7180
7181
7182
7183
7184
7185 if (!mutex_trylock(&head->mutex))
7186 goto out;
7187
7188 btrfs_delete_ref_head(delayed_refs, head);
7189 head->processing = 0;
7190
7191 spin_unlock(&head->lock);
7192 spin_unlock(&delayed_refs->lock);
7193
7194 BUG_ON(head->extent_op);
7195 if (head->must_insert_reserved)
7196 ret = 1;
7197
7198 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
7199 mutex_unlock(&head->mutex);
7200 btrfs_put_delayed_ref_head(head);
7201 return ret;
7202out:
7203 spin_unlock(&head->lock);
7204
7205out_delayed_unlock:
7206 spin_unlock(&delayed_refs->lock);
7207 return 0;
7208}
7209
7210void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7211 struct btrfs_root *root,
7212 struct extent_buffer *buf,
7213 u64 parent, int last_ref)
7214{
7215 struct btrfs_fs_info *fs_info = root->fs_info;
7216 int pin = 1;
7217 int ret;
7218
7219 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7220 int old_ref_mod, new_ref_mod;
7221
7222 btrfs_ref_tree_mod(root, buf->start, buf->len, parent,
7223 root->root_key.objectid,
7224 btrfs_header_level(buf), 0,
7225 BTRFS_DROP_DELAYED_REF);
7226 ret = btrfs_add_delayed_tree_ref(trans, buf->start,
7227 buf->len, parent,
7228 root->root_key.objectid,
7229 btrfs_header_level(buf),
7230 BTRFS_DROP_DELAYED_REF, NULL,
7231 &old_ref_mod, &new_ref_mod);
7232 BUG_ON(ret);
7233 pin = old_ref_mod >= 0 && new_ref_mod < 0;
7234 }
7235
7236 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
7237 struct btrfs_block_group_cache *cache;
7238
7239 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7240 ret = check_ref_cleanup(trans, buf->start);
7241 if (!ret)
7242 goto out;
7243 }
7244
7245 pin = 0;
7246 cache = btrfs_lookup_block_group(fs_info, buf->start);
7247
7248 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
7249 pin_down_extent(fs_info, cache, buf->start,
7250 buf->len, 1);
7251 btrfs_put_block_group(cache);
7252 goto out;
7253 }
7254
7255 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
7256
7257 btrfs_add_free_space(cache, buf->start, buf->len);
7258 btrfs_free_reserved_bytes(cache, buf->len, 0);
7259 btrfs_put_block_group(cache);
7260 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
7261 }
7262out:
7263 if (pin)
7264 add_pinned_bytes(fs_info, buf->len, true,
7265 root->root_key.objectid);
7266
7267 if (last_ref) {
7268
7269
7270
7271
7272 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
7273 }
7274}
7275
7276
7277int btrfs_free_extent(struct btrfs_trans_handle *trans,
7278 struct btrfs_root *root,
7279 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
7280 u64 owner, u64 offset)
7281{
7282 struct btrfs_fs_info *fs_info = root->fs_info;
7283 int old_ref_mod, new_ref_mod;
7284 int ret;
7285
7286 if (btrfs_is_testing(fs_info))
7287 return 0;
7288
7289 if (root_objectid != BTRFS_TREE_LOG_OBJECTID)
7290 btrfs_ref_tree_mod(root, bytenr, num_bytes, parent,
7291 root_objectid, owner, offset,
7292 BTRFS_DROP_DELAYED_REF);
7293
7294
7295
7296
7297
7298 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
7299 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
7300
7301 btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
7302 old_ref_mod = new_ref_mod = 0;
7303 ret = 0;
7304 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
7305 ret = btrfs_add_delayed_tree_ref(trans, bytenr,
7306 num_bytes, parent,
7307 root_objectid, (int)owner,
7308 BTRFS_DROP_DELAYED_REF, NULL,
7309 &old_ref_mod, &new_ref_mod);
7310 } else {
7311 ret = btrfs_add_delayed_data_ref(trans, bytenr,
7312 num_bytes, parent,
7313 root_objectid, owner, offset,
7314 0, BTRFS_DROP_DELAYED_REF,
7315 &old_ref_mod, &new_ref_mod);
7316 }
7317
7318 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) {
7319 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
7320
7321 add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid);
7322 }
7323
7324 return ret;
7325}
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341static noinline void
7342wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
7343 u64 num_bytes)
7344{
7345 struct btrfs_caching_control *caching_ctl;
7346
7347 caching_ctl = get_caching_control(cache);
7348 if (!caching_ctl)
7349 return;
7350
7351 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
7352 (cache->free_space_ctl->free_space >= num_bytes));
7353
7354 put_caching_control(caching_ctl);
7355}
7356
7357static noinline int
7358wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
7359{
7360 struct btrfs_caching_control *caching_ctl;
7361 int ret = 0;
7362
7363 caching_ctl = get_caching_control(cache);
7364 if (!caching_ctl)
7365 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
7366
7367 wait_event(caching_ctl->wait, block_group_cache_done(cache));
7368 if (cache->cached == BTRFS_CACHE_ERROR)
7369 ret = -EIO;
7370 put_caching_control(caching_ctl);
7371 return ret;
7372}
7373
7374enum btrfs_loop_type {
7375 LOOP_CACHING_NOWAIT = 0,
7376 LOOP_CACHING_WAIT = 1,
7377 LOOP_ALLOC_CHUNK = 2,
7378 LOOP_NO_EMPTY_SIZE = 3,
7379};
7380
7381static inline void
7382btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
7383 int delalloc)
7384{
7385 if (delalloc)
7386 down_read(&cache->data_rwsem);
7387}
7388
7389static inline void
7390btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
7391 int delalloc)
7392{
7393 btrfs_get_block_group(cache);
7394 if (delalloc)
7395 down_read(&cache->data_rwsem);
7396}
7397
7398static struct btrfs_block_group_cache *
7399btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
7400 struct btrfs_free_cluster *cluster,
7401 int delalloc)
7402{
7403 struct btrfs_block_group_cache *used_bg = NULL;
7404
7405 spin_lock(&cluster->refill_lock);
7406 while (1) {
7407 used_bg = cluster->block_group;
7408 if (!used_bg)
7409 return NULL;
7410
7411 if (used_bg == block_group)
7412 return used_bg;
7413
7414 btrfs_get_block_group(used_bg);
7415
7416 if (!delalloc)
7417 return used_bg;
7418
7419 if (down_read_trylock(&used_bg->data_rwsem))
7420 return used_bg;
7421
7422 spin_unlock(&cluster->refill_lock);
7423
7424
7425 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
7426
7427 spin_lock(&cluster->refill_lock);
7428 if (used_bg == cluster->block_group)
7429 return used_bg;
7430
7431 up_read(&used_bg->data_rwsem);
7432 btrfs_put_block_group(used_bg);
7433 }
7434}
7435
7436static inline void
7437btrfs_release_block_group(struct btrfs_block_group_cache *cache,
7438 int delalloc)
7439{
7440 if (delalloc)
7441 up_read(&cache->data_rwsem);
7442 btrfs_put_block_group(cache);
7443}
7444
7445
7446
7447
7448
7449struct find_free_extent_ctl {
7450
7451 u64 ram_bytes;
7452 u64 num_bytes;
7453 u64 empty_size;
7454 u64 flags;
7455 int delalloc;
7456
7457
7458 u64 search_start;
7459
7460
7461 u64 empty_cluster;
7462
7463 bool have_caching_bg;
7464 bool orig_have_caching_bg;
7465
7466
7467 int index;
7468
7469
7470
7471
7472 int loop;
7473
7474
7475
7476
7477
7478 bool retry_clustered;
7479
7480
7481
7482
7483
7484 bool retry_unclustered;
7485
7486
7487 int cached;
7488
7489
7490 u64 max_extent_size;
7491
7492
7493 u64 total_free_space;
7494
7495
7496 u64 found_offset;
7497};
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
7509 struct btrfs_free_cluster *last_ptr,
7510 struct find_free_extent_ctl *ffe_ctl,
7511 struct btrfs_block_group_cache **cluster_bg_ret)
7512{
7513 struct btrfs_fs_info *fs_info = bg->fs_info;
7514 struct btrfs_block_group_cache *cluster_bg;
7515 u64 aligned_cluster;
7516 u64 offset;
7517 int ret;
7518
7519 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
7520 if (!cluster_bg)
7521 goto refill_cluster;
7522 if (cluster_bg != bg && (cluster_bg->ro ||
7523 !block_group_bits(cluster_bg, ffe_ctl->flags)))
7524 goto release_cluster;
7525
7526 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
7527 ffe_ctl->num_bytes, cluster_bg->key.objectid,
7528 &ffe_ctl->max_extent_size);
7529 if (offset) {
7530
7531 spin_unlock(&last_ptr->refill_lock);
7532 trace_btrfs_reserve_extent_cluster(cluster_bg,
7533 ffe_ctl->search_start, ffe_ctl->num_bytes);
7534 *cluster_bg_ret = cluster_bg;
7535 ffe_ctl->found_offset = offset;
7536 return 0;
7537 }
7538 WARN_ON(last_ptr->block_group != cluster_bg);
7539
7540release_cluster:
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
7553 spin_unlock(&last_ptr->refill_lock);
7554 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
7555 return -ENOENT;
7556 }
7557
7558
7559 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7560
7561 if (cluster_bg != bg)
7562 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
7563
7564refill_cluster:
7565 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
7566 spin_unlock(&last_ptr->refill_lock);
7567 return -ENOENT;
7568 }
7569
7570 aligned_cluster = max_t(u64,
7571 ffe_ctl->empty_cluster + ffe_ctl->empty_size,
7572 bg->full_stripe_len);
7573 ret = btrfs_find_space_cluster(fs_info, bg, last_ptr,
7574 ffe_ctl->search_start, ffe_ctl->num_bytes,
7575 aligned_cluster);
7576 if (ret == 0) {
7577
7578 offset = btrfs_alloc_from_cluster(bg, last_ptr,
7579 ffe_ctl->num_bytes, ffe_ctl->search_start,
7580 &ffe_ctl->max_extent_size);
7581 if (offset) {
7582
7583 spin_unlock(&last_ptr->refill_lock);
7584 trace_btrfs_reserve_extent_cluster(bg,
7585 ffe_ctl->search_start,
7586 ffe_ctl->num_bytes);
7587 ffe_ctl->found_offset = offset;
7588 return 0;
7589 }
7590 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
7591 !ffe_ctl->retry_clustered) {
7592 spin_unlock(&last_ptr->refill_lock);
7593
7594 ffe_ctl->retry_clustered = true;
7595 wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
7596 ffe_ctl->empty_cluster + ffe_ctl->empty_size);
7597 return -EAGAIN;
7598 }
7599
7600
7601
7602
7603
7604 btrfs_return_cluster_to_free_space(NULL, last_ptr);
7605 spin_unlock(&last_ptr->refill_lock);
7606 return 1;
7607}
7608
7609
7610
7611
7612
7613
7614static int find_free_extent_unclustered(struct btrfs_block_group_cache *bg,
7615 struct btrfs_free_cluster *last_ptr,
7616 struct find_free_extent_ctl *ffe_ctl)
7617{
7618 u64 offset;
7619
7620
7621
7622
7623
7624
7625 if (unlikely(last_ptr)) {
7626 spin_lock(&last_ptr->lock);
7627 last_ptr->fragmented = 1;
7628 spin_unlock(&last_ptr->lock);
7629 }
7630 if (ffe_ctl->cached) {
7631 struct btrfs_free_space_ctl *free_space_ctl;
7632
7633 free_space_ctl = bg->free_space_ctl;
7634 spin_lock(&free_space_ctl->tree_lock);
7635 if (free_space_ctl->free_space <
7636 ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
7637 ffe_ctl->empty_size) {
7638 ffe_ctl->total_free_space = max_t(u64,
7639 ffe_ctl->total_free_space,
7640 free_space_ctl->free_space);
7641 spin_unlock(&free_space_ctl->tree_lock);
7642 return 1;
7643 }
7644 spin_unlock(&free_space_ctl->tree_lock);
7645 }
7646
7647 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
7648 ffe_ctl->num_bytes, ffe_ctl->empty_size,
7649 &ffe_ctl->max_extent_size);
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
7661 ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
7662 wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
7663 ffe_ctl->empty_size);
7664 ffe_ctl->retry_unclustered = true;
7665 return -EAGAIN;
7666 } else if (!offset) {
7667 return 1;
7668 }
7669 ffe_ctl->found_offset = offset;
7670 return 0;
7671}
7672
7673
7674
7675
7676
7677
7678static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
7679 struct btrfs_free_cluster *last_ptr,
7680 struct btrfs_key *ins,
7681 struct find_free_extent_ctl *ffe_ctl,
7682 int full_search, bool use_cluster)
7683{
7684 struct btrfs_root *root = fs_info->extent_root;
7685 int ret;
7686
7687 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
7688 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
7689 ffe_ctl->orig_have_caching_bg = true;
7690
7691 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
7692 ffe_ctl->have_caching_bg)
7693 return 1;
7694
7695 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
7696 return 1;
7697
7698 if (ins->objectid) {
7699 if (!use_cluster && last_ptr) {
7700 spin_lock(&last_ptr->lock);
7701 last_ptr->window_start = ins->objectid;
7702 spin_unlock(&last_ptr->lock);
7703 }
7704 return 0;
7705 }
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
7716 ffe_ctl->index = 0;
7717 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
7718
7719
7720
7721
7722
7723 if (ffe_ctl->orig_have_caching_bg || !full_search)
7724 ffe_ctl->loop = LOOP_CACHING_WAIT;
7725 else
7726 ffe_ctl->loop = LOOP_ALLOC_CHUNK;
7727 } else {
7728 ffe_ctl->loop++;
7729 }
7730
7731 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
7732 struct btrfs_trans_handle *trans;
7733 int exist = 0;
7734
7735 trans = current->journal_info;
7736 if (trans)
7737 exist = 1;
7738 else
7739 trans = btrfs_join_transaction(root);
7740
7741 if (IS_ERR(trans)) {
7742 ret = PTR_ERR(trans);
7743 return ret;
7744 }
7745
7746 ret = do_chunk_alloc(trans, ffe_ctl->flags,
7747 CHUNK_ALLOC_FORCE);
7748
7749
7750
7751
7752
7753
7754 if (ret == -ENOSPC)
7755 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
7756
7757
7758 if (ret < 0 && ret != -ENOSPC)
7759 btrfs_abort_transaction(trans, ret);
7760 else
7761 ret = 0;
7762 if (!exist)
7763 btrfs_end_transaction(trans);
7764 if (ret)
7765 return ret;
7766 }
7767
7768 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
7769
7770
7771
7772
7773 if (ffe_ctl->empty_size == 0 &&
7774 ffe_ctl->empty_cluster == 0)
7775 return -ENOSPC;
7776 ffe_ctl->empty_size = 0;
7777 ffe_ctl->empty_cluster = 0;
7778 }
7779 return 1;
7780 }
7781 return -ENOSPC;
7782}
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
7810 u64 ram_bytes, u64 num_bytes, u64 empty_size,
7811 u64 hint_byte, struct btrfs_key *ins,
7812 u64 flags, int delalloc)
7813{
7814 int ret = 0;
7815 struct btrfs_free_cluster *last_ptr = NULL;
7816 struct btrfs_block_group_cache *block_group = NULL;
7817 struct find_free_extent_ctl ffe_ctl = {0};
7818 struct btrfs_space_info *space_info;
7819 bool use_cluster = true;
7820 bool full_search = false;
7821
7822 WARN_ON(num_bytes < fs_info->sectorsize);
7823
7824 ffe_ctl.ram_bytes = ram_bytes;
7825 ffe_ctl.num_bytes = num_bytes;
7826 ffe_ctl.empty_size = empty_size;
7827 ffe_ctl.flags = flags;
7828 ffe_ctl.search_start = 0;
7829 ffe_ctl.retry_clustered = false;
7830 ffe_ctl.retry_unclustered = false;
7831 ffe_ctl.delalloc = delalloc;
7832 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
7833 ffe_ctl.have_caching_bg = false;
7834 ffe_ctl.orig_have_caching_bg = false;
7835 ffe_ctl.found_offset = 0;
7836
7837 ins->type = BTRFS_EXTENT_ITEM_KEY;
7838 ins->objectid = 0;
7839 ins->offset = 0;
7840
7841 trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
7842
7843 space_info = __find_space_info(fs_info, flags);
7844 if (!space_info) {
7845 btrfs_err(fs_info, "No space info for %llu", flags);
7846 return -ENOSPC;
7847 }
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859 if (unlikely(space_info->max_extent_size)) {
7860 spin_lock(&space_info->lock);
7861 if (space_info->max_extent_size &&
7862 num_bytes > space_info->max_extent_size) {
7863 ins->offset = space_info->max_extent_size;
7864 spin_unlock(&space_info->lock);
7865 return -ENOSPC;
7866 } else if (space_info->max_extent_size) {
7867 use_cluster = false;
7868 }
7869 spin_unlock(&space_info->lock);
7870 }
7871
7872 last_ptr = fetch_cluster_info(fs_info, space_info,
7873 &ffe_ctl.empty_cluster);
7874 if (last_ptr) {
7875 spin_lock(&last_ptr->lock);
7876 if (last_ptr->block_group)
7877 hint_byte = last_ptr->window_start;
7878 if (last_ptr->fragmented) {
7879
7880
7881
7882
7883
7884 hint_byte = last_ptr->window_start;
7885 use_cluster = false;
7886 }
7887 spin_unlock(&last_ptr->lock);
7888 }
7889
7890 ffe_ctl.search_start = max(ffe_ctl.search_start,
7891 first_logical_byte(fs_info, 0));
7892 ffe_ctl.search_start = max(ffe_ctl.search_start, hint_byte);
7893 if (ffe_ctl.search_start == hint_byte) {
7894 block_group = btrfs_lookup_block_group(fs_info,
7895 ffe_ctl.search_start);
7896
7897
7898
7899
7900
7901
7902
7903 if (block_group && block_group_bits(block_group, flags) &&
7904 block_group->cached != BTRFS_CACHE_NO) {
7905 down_read(&space_info->groups_sem);
7906 if (list_empty(&block_group->list) ||
7907 block_group->ro) {
7908
7909
7910
7911
7912
7913
7914 btrfs_put_block_group(block_group);
7915 up_read(&space_info->groups_sem);
7916 } else {
7917 ffe_ctl.index = btrfs_bg_flags_to_raid_index(
7918 block_group->flags);
7919 btrfs_lock_block_group(block_group, delalloc);
7920 goto have_block_group;
7921 }
7922 } else if (block_group) {
7923 btrfs_put_block_group(block_group);
7924 }
7925 }
7926search:
7927 ffe_ctl.have_caching_bg = false;
7928 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
7929 ffe_ctl.index == 0)
7930 full_search = true;
7931 down_read(&space_info->groups_sem);
7932 list_for_each_entry(block_group,
7933 &space_info->block_groups[ffe_ctl.index], list) {
7934
7935 if (unlikely(block_group->ro))
7936 continue;
7937
7938 btrfs_grab_block_group(block_group, delalloc);
7939 ffe_ctl.search_start = block_group->key.objectid;
7940
7941
7942
7943
7944
7945
7946 if (!block_group_bits(block_group, flags)) {
7947 u64 extra = BTRFS_BLOCK_GROUP_DUP |
7948 BTRFS_BLOCK_GROUP_RAID1 |
7949 BTRFS_BLOCK_GROUP_RAID5 |
7950 BTRFS_BLOCK_GROUP_RAID6 |
7951 BTRFS_BLOCK_GROUP_RAID10;
7952
7953
7954
7955
7956
7957
7958 if ((flags & extra) && !(block_group->flags & extra))
7959 goto loop;
7960 }
7961
7962have_block_group:
7963 ffe_ctl.cached = block_group_cache_done(block_group);
7964 if (unlikely(!ffe_ctl.cached)) {
7965 ffe_ctl.have_caching_bg = true;
7966 ret = cache_block_group(block_group, 0);
7967 BUG_ON(ret < 0);
7968 ret = 0;
7969 }
7970
7971 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
7972 goto loop;
7973
7974
7975
7976
7977
7978 if (last_ptr && use_cluster) {
7979 struct btrfs_block_group_cache *cluster_bg = NULL;
7980
7981 ret = find_free_extent_clustered(block_group, last_ptr,
7982 &ffe_ctl, &cluster_bg);
7983
7984 if (ret == 0) {
7985 if (cluster_bg && cluster_bg != block_group) {
7986 btrfs_release_block_group(block_group,
7987 delalloc);
7988 block_group = cluster_bg;
7989 }
7990 goto checks;
7991 } else if (ret == -EAGAIN) {
7992 goto have_block_group;
7993 } else if (ret > 0) {
7994 goto loop;
7995 }
7996
7997 }
7998
7999 ret = find_free_extent_unclustered(block_group, last_ptr,
8000 &ffe_ctl);
8001 if (ret == -EAGAIN)
8002 goto have_block_group;
8003 else if (ret > 0)
8004 goto loop;
8005
8006checks:
8007 ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
8008 fs_info->stripesize);
8009
8010
8011 if (ffe_ctl.search_start + num_bytes >
8012 block_group->key.objectid + block_group->key.offset) {
8013 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
8014 num_bytes);
8015 goto loop;
8016 }
8017
8018 if (ffe_ctl.found_offset < ffe_ctl.search_start)
8019 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
8020 ffe_ctl.search_start - ffe_ctl.found_offset);
8021
8022 ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
8023 num_bytes, delalloc);
8024 if (ret == -EAGAIN) {
8025 btrfs_add_free_space(block_group, ffe_ctl.found_offset,
8026 num_bytes);
8027 goto loop;
8028 }
8029 btrfs_inc_block_group_reservations(block_group);
8030
8031
8032 ins->objectid = ffe_ctl.search_start;
8033 ins->offset = num_bytes;
8034
8035 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
8036 num_bytes);
8037 btrfs_release_block_group(block_group, delalloc);
8038 break;
8039loop:
8040 ffe_ctl.retry_clustered = false;
8041 ffe_ctl.retry_unclustered = false;
8042 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
8043 ffe_ctl.index);
8044 btrfs_release_block_group(block_group, delalloc);
8045 cond_resched();
8046 }
8047 up_read(&space_info->groups_sem);
8048
8049 ret = find_free_extent_update_loop(fs_info, last_ptr, ins, &ffe_ctl,
8050 full_search, use_cluster);
8051 if (ret > 0)
8052 goto search;
8053
8054 if (ret == -ENOSPC) {
8055
8056
8057
8058
8059 if (!ffe_ctl.max_extent_size)
8060 ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
8061 spin_lock(&space_info->lock);
8062 space_info->max_extent_size = ffe_ctl.max_extent_size;
8063 spin_unlock(&space_info->lock);
8064 ins->offset = ffe_ctl.max_extent_size;
8065 }
8066 return ret;
8067}
8068
8069static void dump_space_info(struct btrfs_fs_info *fs_info,
8070 struct btrfs_space_info *info, u64 bytes,
8071 int dump_block_groups)
8072{
8073 struct btrfs_block_group_cache *cache;
8074 int index = 0;
8075
8076 spin_lock(&info->lock);
8077 btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
8078 info->flags,
8079 info->total_bytes - btrfs_space_info_used(info, true),
8080 info->full ? "" : "not ");
8081 btrfs_info(fs_info,
8082 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
8083 info->total_bytes, info->bytes_used, info->bytes_pinned,
8084 info->bytes_reserved, info->bytes_may_use,
8085 info->bytes_readonly);
8086 spin_unlock(&info->lock);
8087
8088 if (!dump_block_groups)
8089 return;
8090
8091 down_read(&info->groups_sem);
8092again:
8093 list_for_each_entry(cache, &info->block_groups[index], list) {
8094 spin_lock(&cache->lock);
8095 btrfs_info(fs_info,
8096 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
8097 cache->key.objectid, cache->key.offset,
8098 btrfs_block_group_used(&cache->item), cache->pinned,
8099 cache->reserved, cache->ro ? "[readonly]" : "");
8100 btrfs_dump_free_space(cache, bytes);
8101 spin_unlock(&cache->lock);
8102 }
8103 if (++index < BTRFS_NR_RAID_TYPES)
8104 goto again;
8105 up_read(&info->groups_sem);
8106}
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
8153int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
8154 u64 num_bytes, u64 min_alloc_size,
8155 u64 empty_size, u64 hint_byte,
8156 struct btrfs_key *ins, int is_data, int delalloc)
8157{
8158 struct btrfs_fs_info *fs_info = root->fs_info;
8159 bool final_tried = num_bytes == min_alloc_size;
8160 u64 flags;
8161 int ret;
8162
8163 flags = get_alloc_profile_by_root(root, is_data);
8164again:
8165 WARN_ON(num_bytes < fs_info->sectorsize);
8166 ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
8167 hint_byte, ins, flags, delalloc);
8168 if (!ret && !is_data) {
8169 btrfs_dec_block_group_reservations(fs_info, ins->objectid);
8170 } else if (ret == -ENOSPC) {
8171 if (!final_tried && ins->offset) {
8172 num_bytes = min(num_bytes >> 1, ins->offset);
8173 num_bytes = round_down(num_bytes,
8174 fs_info->sectorsize);
8175 num_bytes = max(num_bytes, min_alloc_size);
8176 ram_bytes = num_bytes;
8177 if (num_bytes == min_alloc_size)
8178 final_tried = true;
8179 goto again;
8180 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8181 struct btrfs_space_info *sinfo;
8182
8183 sinfo = __find_space_info(fs_info, flags);
8184 btrfs_err(fs_info,
8185 "allocation failed flags %llu, wanted %llu",
8186 flags, num_bytes);
8187 if (sinfo)
8188 dump_space_info(fs_info, sinfo, num_bytes, 1);
8189 }
8190 }
8191
8192 return ret;
8193}
8194
8195static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
8196 u64 start, u64 len,
8197 int pin, int delalloc)
8198{
8199 struct btrfs_block_group_cache *cache;
8200 int ret = 0;
8201
8202 cache = btrfs_lookup_block_group(fs_info, start);
8203 if (!cache) {
8204 btrfs_err(fs_info, "Unable to find block group for %llu",
8205 start);
8206 return -ENOSPC;
8207 }
8208
8209 if (pin)
8210 pin_down_extent(fs_info, cache, start, len, 1);
8211 else {
8212 if (btrfs_test_opt(fs_info, DISCARD))
8213 ret = btrfs_discard_extent(fs_info, start, len, NULL);
8214 btrfs_add_free_space(cache, start, len);
8215 btrfs_free_reserved_bytes(cache, len, delalloc);
8216 trace_btrfs_reserved_extent_free(fs_info, start, len);
8217 }
8218
8219 btrfs_put_block_group(cache);
8220 return ret;
8221}
8222
8223int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
8224 u64 start, u64 len, int delalloc)
8225{
8226 return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
8227}
8228
8229int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
8230 u64 start, u64 len)
8231{
8232 return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
8233}
8234
8235static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8236 u64 parent, u64 root_objectid,
8237 u64 flags, u64 owner, u64 offset,
8238 struct btrfs_key *ins, int ref_mod)
8239{
8240 struct btrfs_fs_info *fs_info = trans->fs_info;
8241 int ret;
8242 struct btrfs_extent_item *extent_item;
8243 struct btrfs_extent_inline_ref *iref;
8244 struct btrfs_path *path;
8245 struct extent_buffer *leaf;
8246 int type;
8247 u32 size;
8248
8249 if (parent > 0)
8250 type = BTRFS_SHARED_DATA_REF_KEY;
8251 else
8252 type = BTRFS_EXTENT_DATA_REF_KEY;
8253
8254 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
8255
8256 path = btrfs_alloc_path();
8257 if (!path)
8258 return -ENOMEM;
8259
8260 path->leave_spinning = 1;
8261 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8262 ins, size);
8263 if (ret) {
8264 btrfs_free_path(path);
8265 return ret;
8266 }
8267
8268 leaf = path->nodes[0];
8269 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8270 struct btrfs_extent_item);
8271 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
8272 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8273 btrfs_set_extent_flags(leaf, extent_item,
8274 flags | BTRFS_EXTENT_FLAG_DATA);
8275
8276 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8277 btrfs_set_extent_inline_ref_type(leaf, iref, type);
8278 if (parent > 0) {
8279 struct btrfs_shared_data_ref *ref;
8280 ref = (struct btrfs_shared_data_ref *)(iref + 1);
8281 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
8282 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
8283 } else {
8284 struct btrfs_extent_data_ref *ref;
8285 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
8286 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
8287 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
8288 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
8289 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
8290 }
8291
8292 btrfs_mark_buffer_dirty(path->nodes[0]);
8293 btrfs_free_path(path);
8294
8295 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
8296 if (ret)
8297 return ret;
8298
8299 ret = update_block_group(trans, fs_info, ins->objectid, ins->offset, 1);
8300 if (ret) {
8301 btrfs_err(fs_info, "update block group failed for %llu %llu",
8302 ins->objectid, ins->offset);
8303 BUG();
8304 }
8305 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
8306 return ret;
8307}
8308
8309static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
8310 struct btrfs_delayed_ref_node *node,
8311 struct btrfs_delayed_extent_op *extent_op)
8312{
8313 struct btrfs_fs_info *fs_info = trans->fs_info;
8314 int ret;
8315 struct btrfs_extent_item *extent_item;
8316 struct btrfs_key extent_key;
8317 struct btrfs_tree_block_info *block_info;
8318 struct btrfs_extent_inline_ref *iref;
8319 struct btrfs_path *path;
8320 struct extent_buffer *leaf;
8321 struct btrfs_delayed_tree_ref *ref;
8322 u32 size = sizeof(*extent_item) + sizeof(*iref);
8323 u64 num_bytes;
8324 u64 flags = extent_op->flags_to_set;
8325 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
8326
8327 ref = btrfs_delayed_node_to_tree_ref(node);
8328
8329 extent_key.objectid = node->bytenr;
8330 if (skinny_metadata) {
8331 extent_key.offset = ref->level;
8332 extent_key.type = BTRFS_METADATA_ITEM_KEY;
8333 num_bytes = fs_info->nodesize;
8334 } else {
8335 extent_key.offset = node->num_bytes;
8336 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
8337 size += sizeof(*block_info);
8338 num_bytes = node->num_bytes;
8339 }
8340
8341 path = btrfs_alloc_path();
8342 if (!path)
8343 return -ENOMEM;
8344
8345 path->leave_spinning = 1;
8346 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
8347 &extent_key, size);
8348 if (ret) {
8349 btrfs_free_path(path);
8350 return ret;
8351 }
8352
8353 leaf = path->nodes[0];
8354 extent_item = btrfs_item_ptr(leaf, path->slots[0],
8355 struct btrfs_extent_item);
8356 btrfs_set_extent_refs(leaf, extent_item, 1);
8357 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
8358 btrfs_set_extent_flags(leaf, extent_item,
8359 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
8360
8361 if (skinny_metadata) {
8362 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
8363 } else {
8364 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
8365 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
8366 btrfs_set_tree_block_level(leaf, block_info, ref->level);
8367 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
8368 }
8369
8370 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
8371 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
8372 btrfs_set_extent_inline_ref_type(leaf, iref,
8373 BTRFS_SHARED_BLOCK_REF_KEY);
8374 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
8375 } else {
8376 btrfs_set_extent_inline_ref_type(leaf, iref,
8377 BTRFS_TREE_BLOCK_REF_KEY);
8378 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
8379 }
8380
8381 btrfs_mark_buffer_dirty(leaf);
8382 btrfs_free_path(path);
8383
8384 ret = remove_from_free_space_tree(trans, extent_key.objectid,
8385 num_bytes);
8386 if (ret)
8387 return ret;
8388
8389 ret = update_block_group(trans, fs_info, extent_key.objectid,
8390 fs_info->nodesize, 1);
8391 if (ret) {
8392 btrfs_err(fs_info, "update block group failed for %llu %llu",
8393 extent_key.objectid, extent_key.offset);
8394 BUG();
8395 }
8396
8397 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
8398 fs_info->nodesize);
8399 return ret;
8400}
8401
8402int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
8403 struct btrfs_root *root, u64 owner,
8404 u64 offset, u64 ram_bytes,
8405 struct btrfs_key *ins)
8406{
8407 int ret;
8408
8409 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
8410
8411 btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0,
8412 root->root_key.objectid, owner, offset,
8413 BTRFS_ADD_DELAYED_EXTENT);
8414
8415 ret = btrfs_add_delayed_data_ref(trans, ins->objectid,
8416 ins->offset, 0,
8417 root->root_key.objectid, owner,
8418 offset, ram_bytes,
8419 BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
8420 return ret;
8421}
8422
8423
8424
8425
8426
8427
8428int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
8429 u64 root_objectid, u64 owner, u64 offset,
8430 struct btrfs_key *ins)
8431{
8432 struct btrfs_fs_info *fs_info = trans->fs_info;
8433 int ret;
8434 struct btrfs_block_group_cache *block_group;
8435 struct btrfs_space_info *space_info;
8436
8437
8438
8439
8440
8441 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
8442 ret = __exclude_logged_extent(fs_info, ins->objectid,
8443 ins->offset);
8444 if (ret)
8445 return ret;
8446 }
8447
8448 block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
8449 if (!block_group)
8450 return -EINVAL;
8451
8452 space_info = block_group->space_info;
8453 spin_lock(&space_info->lock);
8454 spin_lock(&block_group->lock);
8455 space_info->bytes_reserved += ins->offset;
8456 block_group->reserved += ins->offset;
8457 spin_unlock(&block_group->lock);
8458 spin_unlock(&space_info->lock);
8459
8460 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
8461 offset, ins, 1);
8462 btrfs_put_block_group(block_group);
8463 return ret;
8464}
8465
8466static struct extent_buffer *
8467btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
8468 u64 bytenr, int level, u64 owner)
8469{
8470 struct btrfs_fs_info *fs_info = root->fs_info;
8471 struct extent_buffer *buf;
8472
8473 buf = btrfs_find_create_tree_block(fs_info, bytenr);
8474 if (IS_ERR(buf))
8475 return buf;
8476
8477
8478
8479
8480
8481
8482 if (buf->lock_owner == current->pid) {
8483 btrfs_err_rl(fs_info,
8484"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
8485 buf->start, btrfs_header_owner(buf), current->pid);
8486 free_extent_buffer(buf);
8487 return ERR_PTR(-EUCLEAN);
8488 }
8489
8490 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
8491 btrfs_tree_lock(buf);
8492 clean_tree_block(fs_info, buf);
8493 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
8494
8495 btrfs_set_lock_blocking(buf);
8496 set_extent_buffer_uptodate(buf);
8497
8498 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
8499 btrfs_set_header_level(buf, level);
8500 btrfs_set_header_bytenr(buf, buf->start);
8501 btrfs_set_header_generation(buf, trans->transid);
8502 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
8503 btrfs_set_header_owner(buf, owner);
8504 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
8505 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
8506 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
8507 buf->log_index = root->log_transid % 2;
8508
8509
8510
8511
8512 if (buf->log_index == 0)
8513 set_extent_dirty(&root->dirty_log_pages, buf->start,
8514 buf->start + buf->len - 1, GFP_NOFS);
8515 else
8516 set_extent_new(&root->dirty_log_pages, buf->start,
8517 buf->start + buf->len - 1);
8518 } else {
8519 buf->log_index = -1;
8520 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
8521 buf->start + buf->len - 1, GFP_NOFS);
8522 }
8523 trans->dirty = true;
8524
8525 return buf;
8526}
8527
8528static struct btrfs_block_rsv *
8529use_block_rsv(struct btrfs_trans_handle *trans,
8530 struct btrfs_root *root, u32 blocksize)
8531{
8532 struct btrfs_fs_info *fs_info = root->fs_info;
8533 struct btrfs_block_rsv *block_rsv;
8534 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
8535 int ret;
8536 bool global_updated = false;
8537
8538 block_rsv = get_block_rsv(trans, root);
8539
8540 if (unlikely(block_rsv->size == 0))
8541 goto try_reserve;
8542again:
8543 ret = block_rsv_use_bytes(block_rsv, blocksize);
8544 if (!ret)
8545 return block_rsv;
8546
8547 if (block_rsv->failfast)
8548 return ERR_PTR(ret);
8549
8550 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
8551 global_updated = true;
8552 update_global_block_rsv(fs_info);
8553 goto again;
8554 }
8555
8556
8557
8558
8559
8560 if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
8561 btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
8562 static DEFINE_RATELIMIT_STATE(_rs,
8563 DEFAULT_RATELIMIT_INTERVAL * 10,
8564 1);
8565 if (__ratelimit(&_rs))
8566 WARN(1, KERN_DEBUG
8567 "BTRFS: block rsv returned %d\n", ret);
8568 }
8569try_reserve:
8570 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
8571 BTRFS_RESERVE_NO_FLUSH);
8572 if (!ret)
8573 return block_rsv;
8574
8575
8576
8577
8578
8579 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
8580 block_rsv->space_info == global_rsv->space_info) {
8581 ret = block_rsv_use_bytes(global_rsv, blocksize);
8582 if (!ret)
8583 return global_rsv;
8584 }
8585 return ERR_PTR(ret);
8586}
8587
8588static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
8589 struct btrfs_block_rsv *block_rsv, u32 blocksize)
8590{
8591 block_rsv_add_bytes(block_rsv, blocksize, false);
8592 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
8593}
8594
8595
8596
8597
8598
8599struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
8600 struct btrfs_root *root,
8601 u64 parent, u64 root_objectid,
8602 const struct btrfs_disk_key *key,
8603 int level, u64 hint,
8604 u64 empty_size)
8605{
8606 struct btrfs_fs_info *fs_info = root->fs_info;
8607 struct btrfs_key ins;
8608 struct btrfs_block_rsv *block_rsv;
8609 struct extent_buffer *buf;
8610 struct btrfs_delayed_extent_op *extent_op;
8611 u64 flags = 0;
8612 int ret;
8613 u32 blocksize = fs_info->nodesize;
8614 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
8615
8616#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8617 if (btrfs_is_testing(fs_info)) {
8618 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
8619 level, root_objectid);
8620 if (!IS_ERR(buf))
8621 root->alloc_bytenr += blocksize;
8622 return buf;
8623 }
8624#endif
8625
8626 block_rsv = use_block_rsv(trans, root, blocksize);
8627 if (IS_ERR(block_rsv))
8628 return ERR_CAST(block_rsv);
8629
8630 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
8631 empty_size, hint, &ins, 0, 0);
8632 if (ret)
8633 goto out_unuse;
8634
8635 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
8636 root_objectid);
8637 if (IS_ERR(buf)) {
8638 ret = PTR_ERR(buf);
8639 goto out_free_reserved;
8640 }
8641
8642 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
8643 if (parent == 0)
8644 parent = ins.objectid;
8645 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8646 } else
8647 BUG_ON(parent > 0);
8648
8649 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
8650 extent_op = btrfs_alloc_delayed_extent_op();
8651 if (!extent_op) {
8652 ret = -ENOMEM;
8653 goto out_free_buf;
8654 }
8655 if (key)
8656 memcpy(&extent_op->key, key, sizeof(extent_op->key));
8657 else
8658 memset(&extent_op->key, 0, sizeof(extent_op->key));
8659 extent_op->flags_to_set = flags;
8660 extent_op->update_key = skinny_metadata ? false : true;
8661 extent_op->update_flags = true;
8662 extent_op->is_data = false;
8663 extent_op->level = level;
8664
8665 btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent,
8666 root_objectid, level, 0,
8667 BTRFS_ADD_DELAYED_EXTENT);
8668 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
8669 ins.offset, parent,
8670 root_objectid, level,
8671 BTRFS_ADD_DELAYED_EXTENT,
8672 extent_op, NULL, NULL);
8673 if (ret)
8674 goto out_free_delayed;
8675 }
8676 return buf;
8677
8678out_free_delayed:
8679 btrfs_free_delayed_extent_op(extent_op);
8680out_free_buf:
8681 free_extent_buffer(buf);
8682out_free_reserved:
8683 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
8684out_unuse:
8685 unuse_block_rsv(fs_info, block_rsv, blocksize);
8686 return ERR_PTR(ret);
8687}
8688
8689struct walk_control {
8690 u64 refs[BTRFS_MAX_LEVEL];
8691 u64 flags[BTRFS_MAX_LEVEL];
8692 struct btrfs_key update_progress;
8693 int stage;
8694 int level;
8695 int shared_level;
8696 int update_ref;
8697 int keep_locks;
8698 int reada_slot;
8699 int reada_count;
8700};
8701
8702#define DROP_REFERENCE 1
8703#define UPDATE_BACKREF 2
8704
8705static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
8706 struct btrfs_root *root,
8707 struct walk_control *wc,
8708 struct btrfs_path *path)
8709{
8710 struct btrfs_fs_info *fs_info = root->fs_info;
8711 u64 bytenr;
8712 u64 generation;
8713 u64 refs;
8714 u64 flags;
8715 u32 nritems;
8716 struct btrfs_key key;
8717 struct extent_buffer *eb;
8718 int ret;
8719 int slot;
8720 int nread = 0;
8721
8722 if (path->slots[wc->level] < wc->reada_slot) {
8723 wc->reada_count = wc->reada_count * 2 / 3;
8724 wc->reada_count = max(wc->reada_count, 2);
8725 } else {
8726 wc->reada_count = wc->reada_count * 3 / 2;
8727 wc->reada_count = min_t(int, wc->reada_count,
8728 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
8729 }
8730
8731 eb = path->nodes[wc->level];
8732 nritems = btrfs_header_nritems(eb);
8733
8734 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
8735 if (nread >= wc->reada_count)
8736 break;
8737
8738 cond_resched();
8739 bytenr = btrfs_node_blockptr(eb, slot);
8740 generation = btrfs_node_ptr_generation(eb, slot);
8741
8742 if (slot == path->slots[wc->level])
8743 goto reada;
8744
8745 if (wc->stage == UPDATE_BACKREF &&
8746 generation <= root->root_key.offset)
8747 continue;
8748
8749
8750 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
8751 wc->level - 1, 1, &refs,
8752 &flags);
8753
8754 if (ret < 0)
8755 continue;
8756 BUG_ON(refs == 0);
8757
8758 if (wc->stage == DROP_REFERENCE) {
8759 if (refs == 1)
8760 goto reada;
8761
8762 if (wc->level == 1 &&
8763 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8764 continue;
8765 if (!wc->update_ref ||
8766 generation <= root->root_key.offset)
8767 continue;
8768 btrfs_node_key_to_cpu(eb, &key, slot);
8769 ret = btrfs_comp_cpu_keys(&key,
8770 &wc->update_progress);
8771 if (ret < 0)
8772 continue;
8773 } else {
8774 if (wc->level == 1 &&
8775 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8776 continue;
8777 }
8778reada:
8779 readahead_tree_block(fs_info, bytenr);
8780 nread++;
8781 }
8782 wc->reada_slot = slot;
8783}
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
8794 struct btrfs_root *root,
8795 struct btrfs_path *path,
8796 struct walk_control *wc, int lookup_info)
8797{
8798 struct btrfs_fs_info *fs_info = root->fs_info;
8799 int level = wc->level;
8800 struct extent_buffer *eb = path->nodes[level];
8801 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8802 int ret;
8803
8804 if (wc->stage == UPDATE_BACKREF &&
8805 btrfs_header_owner(eb) != root->root_key.objectid)
8806 return 1;
8807
8808
8809
8810
8811
8812 if (lookup_info &&
8813 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
8814 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
8815 BUG_ON(!path->locks[level]);
8816 ret = btrfs_lookup_extent_info(trans, fs_info,
8817 eb->start, level, 1,
8818 &wc->refs[level],
8819 &wc->flags[level]);
8820 BUG_ON(ret == -ENOMEM);
8821 if (ret)
8822 return ret;
8823 BUG_ON(wc->refs[level] == 0);
8824 }
8825
8826 if (wc->stage == DROP_REFERENCE) {
8827 if (wc->refs[level] > 1)
8828 return 1;
8829
8830 if (path->locks[level] && !wc->keep_locks) {
8831 btrfs_tree_unlock_rw(eb, path->locks[level]);
8832 path->locks[level] = 0;
8833 }
8834 return 0;
8835 }
8836
8837
8838 if (!(wc->flags[level] & flag)) {
8839 BUG_ON(!path->locks[level]);
8840 ret = btrfs_inc_ref(trans, root, eb, 1);
8841 BUG_ON(ret);
8842 ret = btrfs_dec_ref(trans, root, eb, 0);
8843 BUG_ON(ret);
8844 ret = btrfs_set_disk_extent_flags(trans, fs_info, eb->start,
8845 eb->len, flag,
8846 btrfs_header_level(eb), 0);
8847 BUG_ON(ret);
8848 wc->flags[level] |= flag;
8849 }
8850
8851
8852
8853
8854
8855 if (path->locks[level] && level > 0) {
8856 btrfs_tree_unlock_rw(eb, path->locks[level]);
8857 path->locks[level] = 0;
8858 }
8859 return 0;
8860}
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875static noinline int do_walk_down(struct btrfs_trans_handle *trans,
8876 struct btrfs_root *root,
8877 struct btrfs_path *path,
8878 struct walk_control *wc, int *lookup_info)
8879{
8880 struct btrfs_fs_info *fs_info = root->fs_info;
8881 u64 bytenr;
8882 u64 generation;
8883 u64 parent;
8884 struct btrfs_key key;
8885 struct btrfs_key first_key;
8886 struct extent_buffer *next;
8887 int level = wc->level;
8888 int reada = 0;
8889 int ret = 0;
8890 bool need_account = false;
8891
8892 generation = btrfs_node_ptr_generation(path->nodes[level],
8893 path->slots[level]);
8894
8895
8896
8897
8898
8899 if (wc->stage == UPDATE_BACKREF &&
8900 generation <= root->root_key.offset) {
8901 *lookup_info = 1;
8902 return 1;
8903 }
8904
8905 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
8906 btrfs_node_key_to_cpu(path->nodes[level], &first_key,
8907 path->slots[level]);
8908
8909 next = find_extent_buffer(fs_info, bytenr);
8910 if (!next) {
8911 next = btrfs_find_create_tree_block(fs_info, bytenr);
8912 if (IS_ERR(next))
8913 return PTR_ERR(next);
8914
8915 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
8916 level - 1);
8917 reada = 1;
8918 }
8919 btrfs_tree_lock(next);
8920 btrfs_set_lock_blocking(next);
8921
8922 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
8923 &wc->refs[level - 1],
8924 &wc->flags[level - 1]);
8925 if (ret < 0)
8926 goto out_unlock;
8927
8928 if (unlikely(wc->refs[level - 1] == 0)) {
8929 btrfs_err(fs_info, "Missing references.");
8930 ret = -EIO;
8931 goto out_unlock;
8932 }
8933 *lookup_info = 0;
8934
8935 if (wc->stage == DROP_REFERENCE) {
8936 if (wc->refs[level - 1] > 1) {
8937 need_account = true;
8938 if (level == 1 &&
8939 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8940 goto skip;
8941
8942 if (!wc->update_ref ||
8943 generation <= root->root_key.offset)
8944 goto skip;
8945
8946 btrfs_node_key_to_cpu(path->nodes[level], &key,
8947 path->slots[level]);
8948 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
8949 if (ret < 0)
8950 goto skip;
8951
8952 wc->stage = UPDATE_BACKREF;
8953 wc->shared_level = level - 1;
8954 }
8955 } else {
8956 if (level == 1 &&
8957 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
8958 goto skip;
8959 }
8960
8961 if (!btrfs_buffer_uptodate(next, generation, 0)) {
8962 btrfs_tree_unlock(next);
8963 free_extent_buffer(next);
8964 next = NULL;
8965 *lookup_info = 1;
8966 }
8967
8968 if (!next) {
8969 if (reada && level == 1)
8970 reada_walk_down(trans, root, wc, path);
8971 next = read_tree_block(fs_info, bytenr, generation, level - 1,
8972 &first_key);
8973 if (IS_ERR(next)) {
8974 return PTR_ERR(next);
8975 } else if (!extent_buffer_uptodate(next)) {
8976 free_extent_buffer(next);
8977 return -EIO;
8978 }
8979 btrfs_tree_lock(next);
8980 btrfs_set_lock_blocking(next);
8981 }
8982
8983 level--;
8984 ASSERT(level == btrfs_header_level(next));
8985 if (level != btrfs_header_level(next)) {
8986 btrfs_err(root->fs_info, "mismatched level");
8987 ret = -EIO;
8988 goto out_unlock;
8989 }
8990 path->nodes[level] = next;
8991 path->slots[level] = 0;
8992 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8993 wc->level = level;
8994 if (wc->level == 1)
8995 wc->reada_slot = 0;
8996 return 0;
8997skip:
8998 wc->refs[level - 1] = 0;
8999 wc->flags[level - 1] = 0;
9000 if (wc->stage == DROP_REFERENCE) {
9001 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9002 parent = path->nodes[level]->start;
9003 } else {
9004 ASSERT(root->root_key.objectid ==
9005 btrfs_header_owner(path->nodes[level]));
9006 if (root->root_key.objectid !=
9007 btrfs_header_owner(path->nodes[level])) {
9008 btrfs_err(root->fs_info,
9009 "mismatched block owner");
9010 ret = -EIO;
9011 goto out_unlock;
9012 }
9013 parent = 0;
9014 }
9015
9016
9017
9018
9019
9020
9021 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
9022 need_account) {
9023 ret = btrfs_qgroup_trace_subtree(trans, next,
9024 generation, level - 1);
9025 if (ret) {
9026 btrfs_err_rl(fs_info,
9027 "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
9028 ret);
9029 }
9030 }
9031 ret = btrfs_free_extent(trans, root, bytenr, fs_info->nodesize,
9032 parent, root->root_key.objectid,
9033 level - 1, 0);
9034 if (ret)
9035 goto out_unlock;
9036 }
9037
9038 *lookup_info = 1;
9039 ret = 1;
9040
9041out_unlock:
9042 btrfs_tree_unlock(next);
9043 free_extent_buffer(next);
9044
9045 return ret;
9046}
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
9061 struct btrfs_root *root,
9062 struct btrfs_path *path,
9063 struct walk_control *wc)
9064{
9065 struct btrfs_fs_info *fs_info = root->fs_info;
9066 int ret;
9067 int level = wc->level;
9068 struct extent_buffer *eb = path->nodes[level];
9069 u64 parent = 0;
9070
9071 if (wc->stage == UPDATE_BACKREF) {
9072 BUG_ON(wc->shared_level < level);
9073 if (level < wc->shared_level)
9074 goto out;
9075
9076 ret = find_next_key(path, level + 1, &wc->update_progress);
9077 if (ret > 0)
9078 wc->update_ref = 0;
9079
9080 wc->stage = DROP_REFERENCE;
9081 wc->shared_level = -1;
9082 path->slots[level] = 0;
9083
9084
9085
9086
9087
9088
9089 if (!path->locks[level]) {
9090 BUG_ON(level == 0);
9091 btrfs_tree_lock(eb);
9092 btrfs_set_lock_blocking(eb);
9093 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9094
9095 ret = btrfs_lookup_extent_info(trans, fs_info,
9096 eb->start, level, 1,
9097 &wc->refs[level],
9098 &wc->flags[level]);
9099 if (ret < 0) {
9100 btrfs_tree_unlock_rw(eb, path->locks[level]);
9101 path->locks[level] = 0;
9102 return ret;
9103 }
9104 BUG_ON(wc->refs[level] == 0);
9105 if (wc->refs[level] == 1) {
9106 btrfs_tree_unlock_rw(eb, path->locks[level]);
9107 path->locks[level] = 0;
9108 return 1;
9109 }
9110 }
9111 }
9112
9113
9114 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
9115
9116 if (wc->refs[level] == 1) {
9117 if (level == 0) {
9118 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9119 ret = btrfs_dec_ref(trans, root, eb, 1);
9120 else
9121 ret = btrfs_dec_ref(trans, root, eb, 0);
9122 BUG_ON(ret);
9123 ret = btrfs_qgroup_trace_leaf_items(trans, eb);
9124 if (ret) {
9125 btrfs_err_rl(fs_info,
9126 "error %d accounting leaf items. Quota is out of sync, rescan required.",
9127 ret);
9128 }
9129 }
9130
9131 if (!path->locks[level] &&
9132 btrfs_header_generation(eb) == trans->transid) {
9133 btrfs_tree_lock(eb);
9134 btrfs_set_lock_blocking(eb);
9135 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9136 }
9137 clean_tree_block(fs_info, eb);
9138 }
9139
9140 if (eb == root->node) {
9141 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9142 parent = eb->start;
9143 else if (root->root_key.objectid != btrfs_header_owner(eb))
9144 goto owner_mismatch;
9145 } else {
9146 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
9147 parent = path->nodes[level + 1]->start;
9148 else if (root->root_key.objectid !=
9149 btrfs_header_owner(path->nodes[level + 1]))
9150 goto owner_mismatch;
9151 }
9152
9153 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
9154out:
9155 wc->refs[level] = 0;
9156 wc->flags[level] = 0;
9157 return 0;
9158
9159owner_mismatch:
9160 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
9161 btrfs_header_owner(eb), root->root_key.objectid);
9162 return -EUCLEAN;
9163}
9164
9165static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
9166 struct btrfs_root *root,
9167 struct btrfs_path *path,
9168 struct walk_control *wc)
9169{
9170 int level = wc->level;
9171 int lookup_info = 1;
9172 int ret;
9173
9174 while (level >= 0) {
9175 ret = walk_down_proc(trans, root, path, wc, lookup_info);
9176 if (ret > 0)
9177 break;
9178
9179 if (level == 0)
9180 break;
9181
9182 if (path->slots[level] >=
9183 btrfs_header_nritems(path->nodes[level]))
9184 break;
9185
9186 ret = do_walk_down(trans, root, path, wc, &lookup_info);
9187 if (ret > 0) {
9188 path->slots[level]++;
9189 continue;
9190 } else if (ret < 0)
9191 return ret;
9192 level = wc->level;
9193 }
9194 return 0;
9195}
9196
9197static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
9198 struct btrfs_root *root,
9199 struct btrfs_path *path,
9200 struct walk_control *wc, int max_level)
9201{
9202 int level = wc->level;
9203 int ret;
9204
9205 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
9206 while (level < max_level && path->nodes[level]) {
9207 wc->level = level;
9208 if (path->slots[level] + 1 <
9209 btrfs_header_nritems(path->nodes[level])) {
9210 path->slots[level]++;
9211 return 0;
9212 } else {
9213 ret = walk_up_proc(trans, root, path, wc);
9214 if (ret > 0)
9215 return 0;
9216 if (ret < 0)
9217 return ret;
9218
9219 if (path->locks[level]) {
9220 btrfs_tree_unlock_rw(path->nodes[level],
9221 path->locks[level]);
9222 path->locks[level] = 0;
9223 }
9224 free_extent_buffer(path->nodes[level]);
9225 path->nodes[level] = NULL;
9226 level++;
9227 }
9228 }
9229 return 1;
9230}
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245int btrfs_drop_snapshot(struct btrfs_root *root,
9246 struct btrfs_block_rsv *block_rsv, int update_ref,
9247 int for_reloc)
9248{
9249 struct btrfs_fs_info *fs_info = root->fs_info;
9250 struct btrfs_path *path;
9251 struct btrfs_trans_handle *trans;
9252 struct btrfs_root *tree_root = fs_info->tree_root;
9253 struct btrfs_root_item *root_item = &root->root_item;
9254 struct walk_control *wc;
9255 struct btrfs_key key;
9256 int err = 0;
9257 int ret;
9258 int level;
9259 bool root_dropped = false;
9260
9261 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
9262
9263 path = btrfs_alloc_path();
9264 if (!path) {
9265 err = -ENOMEM;
9266 goto out;
9267 }
9268
9269 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9270 if (!wc) {
9271 btrfs_free_path(path);
9272 err = -ENOMEM;
9273 goto out;
9274 }
9275
9276 trans = btrfs_start_transaction(tree_root, 0);
9277 if (IS_ERR(trans)) {
9278 err = PTR_ERR(trans);
9279 goto out_free;
9280 }
9281
9282 err = btrfs_run_delayed_items(trans);
9283 if (err)
9284 goto out_end_trans;
9285
9286 if (block_rsv)
9287 trans->block_rsv = block_rsv;
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297 set_bit(BTRFS_ROOT_DELETING, &root->state);
9298 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
9299 level = btrfs_header_level(root->node);
9300 path->nodes[level] = btrfs_lock_root_node(root);
9301 btrfs_set_lock_blocking(path->nodes[level]);
9302 path->slots[level] = 0;
9303 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9304 memset(&wc->update_progress, 0,
9305 sizeof(wc->update_progress));
9306 } else {
9307 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
9308 memcpy(&wc->update_progress, &key,
9309 sizeof(wc->update_progress));
9310
9311 level = root_item->drop_level;
9312 BUG_ON(level == 0);
9313 path->lowest_level = level;
9314 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9315 path->lowest_level = 0;
9316 if (ret < 0) {
9317 err = ret;
9318 goto out_end_trans;
9319 }
9320 WARN_ON(ret > 0);
9321
9322
9323
9324
9325
9326 btrfs_unlock_up_safe(path, 0);
9327
9328 level = btrfs_header_level(root->node);
9329 while (1) {
9330 btrfs_tree_lock(path->nodes[level]);
9331 btrfs_set_lock_blocking(path->nodes[level]);
9332 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9333
9334 ret = btrfs_lookup_extent_info(trans, fs_info,
9335 path->nodes[level]->start,
9336 level, 1, &wc->refs[level],
9337 &wc->flags[level]);
9338 if (ret < 0) {
9339 err = ret;
9340 goto out_end_trans;
9341 }
9342 BUG_ON(wc->refs[level] == 0);
9343
9344 if (level == root_item->drop_level)
9345 break;
9346
9347 btrfs_tree_unlock(path->nodes[level]);
9348 path->locks[level] = 0;
9349 WARN_ON(wc->refs[level] != 1);
9350 level--;
9351 }
9352 }
9353
9354 wc->level = level;
9355 wc->shared_level = -1;
9356 wc->stage = DROP_REFERENCE;
9357 wc->update_ref = update_ref;
9358 wc->keep_locks = 0;
9359 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9360
9361 while (1) {
9362
9363 ret = walk_down_tree(trans, root, path, wc);
9364 if (ret < 0) {
9365 err = ret;
9366 break;
9367 }
9368
9369 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
9370 if (ret < 0) {
9371 err = ret;
9372 break;
9373 }
9374
9375 if (ret > 0) {
9376 BUG_ON(wc->stage != DROP_REFERENCE);
9377 break;
9378 }
9379
9380 if (wc->stage == DROP_REFERENCE) {
9381 level = wc->level;
9382 btrfs_node_key(path->nodes[level],
9383 &root_item->drop_progress,
9384 path->slots[level]);
9385 root_item->drop_level = level;
9386 }
9387
9388 BUG_ON(wc->level == 0);
9389 if (btrfs_should_end_transaction(trans) ||
9390 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
9391 ret = btrfs_update_root(trans, tree_root,
9392 &root->root_key,
9393 root_item);
9394 if (ret) {
9395 btrfs_abort_transaction(trans, ret);
9396 err = ret;
9397 goto out_end_trans;
9398 }
9399
9400 btrfs_end_transaction_throttle(trans);
9401 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
9402 btrfs_debug(fs_info,
9403 "drop snapshot early exit");
9404 err = -EAGAIN;
9405 goto out_free;
9406 }
9407
9408 trans = btrfs_start_transaction(tree_root, 0);
9409 if (IS_ERR(trans)) {
9410 err = PTR_ERR(trans);
9411 goto out_free;
9412 }
9413 if (block_rsv)
9414 trans->block_rsv = block_rsv;
9415 }
9416 }
9417 btrfs_release_path(path);
9418 if (err)
9419 goto out_end_trans;
9420
9421 ret = btrfs_del_root(trans, &root->root_key);
9422 if (ret) {
9423 btrfs_abort_transaction(trans, ret);
9424 err = ret;
9425 goto out_end_trans;
9426 }
9427
9428 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9429 ret = btrfs_find_root(tree_root, &root->root_key, path,
9430 NULL, NULL);
9431 if (ret < 0) {
9432 btrfs_abort_transaction(trans, ret);
9433 err = ret;
9434 goto out_end_trans;
9435 } else if (ret > 0) {
9436
9437
9438
9439
9440
9441 btrfs_del_orphan_item(trans, tree_root,
9442 root->root_key.objectid);
9443 }
9444 }
9445
9446 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
9447 btrfs_add_dropped_root(trans, root);
9448 } else {
9449 free_extent_buffer(root->node);
9450 free_extent_buffer(root->commit_root);
9451 btrfs_put_fs_root(root);
9452 }
9453 root_dropped = true;
9454out_end_trans:
9455 btrfs_end_transaction_throttle(trans);
9456out_free:
9457 kfree(wc);
9458 btrfs_free_path(path);
9459out:
9460
9461
9462
9463
9464
9465
9466
9467 if (!for_reloc && !root_dropped)
9468 btrfs_add_dead_root(root);
9469 if (err && err != -EAGAIN)
9470 btrfs_handle_fs_error(fs_info, err, NULL);
9471 return err;
9472}
9473
9474
9475
9476
9477
9478
9479
9480int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
9481 struct btrfs_root *root,
9482 struct extent_buffer *node,
9483 struct extent_buffer *parent)
9484{
9485 struct btrfs_fs_info *fs_info = root->fs_info;
9486 struct btrfs_path *path;
9487 struct walk_control *wc;
9488 int level;
9489 int parent_level;
9490 int ret = 0;
9491 int wret;
9492
9493 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
9494
9495 path = btrfs_alloc_path();
9496 if (!path)
9497 return -ENOMEM;
9498
9499 wc = kzalloc(sizeof(*wc), GFP_NOFS);
9500 if (!wc) {
9501 btrfs_free_path(path);
9502 return -ENOMEM;
9503 }
9504
9505 btrfs_assert_tree_locked(parent);
9506 parent_level = btrfs_header_level(parent);
9507 extent_buffer_get(parent);
9508 path->nodes[parent_level] = parent;
9509 path->slots[parent_level] = btrfs_header_nritems(parent);
9510
9511 btrfs_assert_tree_locked(node);
9512 level = btrfs_header_level(node);
9513 path->nodes[level] = node;
9514 path->slots[level] = 0;
9515 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
9516
9517 wc->refs[parent_level] = 1;
9518 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
9519 wc->level = level;
9520 wc->shared_level = -1;
9521 wc->stage = DROP_REFERENCE;
9522 wc->update_ref = 0;
9523 wc->keep_locks = 1;
9524 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
9525
9526 while (1) {
9527 wret = walk_down_tree(trans, root, path, wc);
9528 if (wret < 0) {
9529 ret = wret;
9530 break;
9531 }
9532
9533 wret = walk_up_tree(trans, root, path, wc, parent_level);
9534 if (wret < 0)
9535 ret = wret;
9536 if (wret != 0)
9537 break;
9538 }
9539
9540 kfree(wc);
9541 btrfs_free_path(path);
9542 return ret;
9543}
9544
9545static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
9546{
9547 u64 num_devices;
9548 u64 stripped;
9549
9550
9551
9552
9553
9554 stripped = get_restripe_target(fs_info, flags);
9555 if (stripped)
9556 return extended_to_chunk(stripped);
9557
9558 num_devices = fs_info->fs_devices->rw_devices;
9559
9560 stripped = BTRFS_BLOCK_GROUP_RAID0 |
9561 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
9562 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
9563
9564 if (num_devices == 1) {
9565 stripped |= BTRFS_BLOCK_GROUP_DUP;
9566 stripped = flags & ~stripped;
9567
9568
9569 if (flags & BTRFS_BLOCK_GROUP_RAID0)
9570 return stripped;
9571
9572
9573 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
9574 BTRFS_BLOCK_GROUP_RAID10))
9575 return stripped | BTRFS_BLOCK_GROUP_DUP;
9576 } else {
9577
9578 if (flags & stripped)
9579 return flags;
9580
9581 stripped |= BTRFS_BLOCK_GROUP_DUP;
9582 stripped = flags & ~stripped;
9583
9584
9585 if (flags & BTRFS_BLOCK_GROUP_DUP)
9586 return stripped | BTRFS_BLOCK_GROUP_RAID1;
9587
9588
9589 }
9590
9591 return flags;
9592}
9593
9594static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
9595{
9596 struct btrfs_space_info *sinfo = cache->space_info;
9597 u64 num_bytes;
9598 u64 min_allocable_bytes;
9599 int ret = -ENOSPC;
9600
9601
9602
9603
9604
9605
9606 if ((sinfo->flags &
9607 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
9608 !force)
9609 min_allocable_bytes = SZ_1M;
9610 else
9611 min_allocable_bytes = 0;
9612
9613 spin_lock(&sinfo->lock);
9614 spin_lock(&cache->lock);
9615
9616 if (cache->ro) {
9617 cache->ro++;
9618 ret = 0;
9619 goto out;
9620 }
9621
9622 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
9623 cache->bytes_super - btrfs_block_group_used(&cache->item);
9624
9625 if (btrfs_space_info_used(sinfo, true) + num_bytes +
9626 min_allocable_bytes <= sinfo->total_bytes) {
9627 sinfo->bytes_readonly += num_bytes;
9628 cache->ro++;
9629 list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
9630 ret = 0;
9631 }
9632out:
9633 spin_unlock(&cache->lock);
9634 spin_unlock(&sinfo->lock);
9635 return ret;
9636}
9637
9638int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
9639
9640{
9641 struct btrfs_fs_info *fs_info = cache->fs_info;
9642 struct btrfs_trans_handle *trans;
9643 u64 alloc_flags;
9644 int ret;
9645
9646again:
9647 trans = btrfs_join_transaction(fs_info->extent_root);
9648 if (IS_ERR(trans))
9649 return PTR_ERR(trans);
9650
9651
9652
9653
9654
9655
9656 mutex_lock(&fs_info->ro_block_group_mutex);
9657 if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
9658 u64 transid = trans->transid;
9659
9660 mutex_unlock(&fs_info->ro_block_group_mutex);
9661 btrfs_end_transaction(trans);
9662
9663 ret = btrfs_wait_for_commit(fs_info, transid);
9664 if (ret)
9665 return ret;
9666 goto again;
9667 }
9668
9669
9670
9671
9672
9673 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9674 if (alloc_flags != cache->flags) {
9675 ret = do_chunk_alloc(trans, alloc_flags,
9676 CHUNK_ALLOC_FORCE);
9677
9678
9679
9680
9681
9682 if (ret == -ENOSPC)
9683 ret = 0;
9684 if (ret < 0)
9685 goto out;
9686 }
9687
9688 ret = inc_block_group_ro(cache, 0);
9689 if (!ret)
9690 goto out;
9691 alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
9692 ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
9693 if (ret < 0)
9694 goto out;
9695 ret = inc_block_group_ro(cache, 0);
9696out:
9697 if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
9698 alloc_flags = update_block_group_flags(fs_info, cache->flags);
9699 mutex_lock(&fs_info->chunk_mutex);
9700 check_system_chunk(trans, alloc_flags);
9701 mutex_unlock(&fs_info->chunk_mutex);
9702 }
9703 mutex_unlock(&fs_info->ro_block_group_mutex);
9704
9705 btrfs_end_transaction(trans);
9706 return ret;
9707}
9708
9709int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
9710{
9711 u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
9712
9713 return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
9714}
9715
9716
9717
9718
9719
9720u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
9721{
9722 struct btrfs_block_group_cache *block_group;
9723 u64 free_bytes = 0;
9724 int factor;
9725
9726
9727 if (list_empty(&sinfo->ro_bgs))
9728 return 0;
9729
9730 spin_lock(&sinfo->lock);
9731 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
9732 spin_lock(&block_group->lock);
9733
9734 if (!block_group->ro) {
9735 spin_unlock(&block_group->lock);
9736 continue;
9737 }
9738
9739 factor = btrfs_bg_type_to_factor(block_group->flags);
9740 free_bytes += (block_group->key.offset -
9741 btrfs_block_group_used(&block_group->item)) *
9742 factor;
9743
9744 spin_unlock(&block_group->lock);
9745 }
9746 spin_unlock(&sinfo->lock);
9747
9748 return free_bytes;
9749}
9750
9751void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
9752{
9753 struct btrfs_space_info *sinfo = cache->space_info;
9754 u64 num_bytes;
9755
9756 BUG_ON(!cache->ro);
9757
9758 spin_lock(&sinfo->lock);
9759 spin_lock(&cache->lock);
9760 if (!--cache->ro) {
9761 num_bytes = cache->key.offset - cache->reserved -
9762 cache->pinned - cache->bytes_super -
9763 btrfs_block_group_used(&cache->item);
9764 sinfo->bytes_readonly -= num_bytes;
9765 list_del_init(&cache->ro_list);
9766 }
9767 spin_unlock(&cache->lock);
9768 spin_unlock(&sinfo->lock);
9769}
9770
9771
9772
9773
9774
9775
9776
9777int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
9778{
9779 struct btrfs_root *root = fs_info->extent_root;
9780 struct btrfs_block_group_cache *block_group;
9781 struct btrfs_space_info *space_info;
9782 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
9783 struct btrfs_device *device;
9784 struct btrfs_trans_handle *trans;
9785 u64 min_free;
9786 u64 dev_min = 1;
9787 u64 dev_nr = 0;
9788 u64 target;
9789 int debug;
9790 int index;
9791 int full = 0;
9792 int ret = 0;
9793
9794 debug = btrfs_test_opt(fs_info, ENOSPC_DEBUG);
9795
9796 block_group = btrfs_lookup_block_group(fs_info, bytenr);
9797
9798
9799 if (!block_group) {
9800 if (debug)
9801 btrfs_warn(fs_info,
9802 "can't find block group for bytenr %llu",
9803 bytenr);
9804 return -1;
9805 }
9806
9807 min_free = btrfs_block_group_used(&block_group->item);
9808
9809
9810 if (!min_free)
9811 goto out;
9812
9813 space_info = block_group->space_info;
9814 spin_lock(&space_info->lock);
9815
9816 full = space_info->full;
9817
9818
9819
9820
9821
9822
9823
9824
9825 if ((space_info->total_bytes != block_group->key.offset) &&
9826 (btrfs_space_info_used(space_info, false) + min_free <
9827 space_info->total_bytes)) {
9828 spin_unlock(&space_info->lock);
9829 goto out;
9830 }
9831 spin_unlock(&space_info->lock);
9832
9833
9834
9835
9836
9837
9838
9839
9840 ret = -1;
9841
9842
9843
9844
9845
9846
9847
9848
9849
9850 target = get_restripe_target(fs_info, block_group->flags);
9851 if (target) {
9852 index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target));
9853 } else {
9854
9855
9856
9857
9858 if (full) {
9859 if (debug)
9860 btrfs_warn(fs_info,
9861 "no space to alloc new chunk for block group %llu",
9862 block_group->key.objectid);
9863 goto out;
9864 }
9865
9866 index = btrfs_bg_flags_to_raid_index(block_group->flags);
9867 }
9868
9869 if (index == BTRFS_RAID_RAID10) {
9870 dev_min = 4;
9871
9872 min_free >>= 1;
9873 } else if (index == BTRFS_RAID_RAID1) {
9874 dev_min = 2;
9875 } else if (index == BTRFS_RAID_DUP) {
9876
9877 min_free <<= 1;
9878 } else if (index == BTRFS_RAID_RAID0) {
9879 dev_min = fs_devices->rw_devices;
9880 min_free = div64_u64(min_free, dev_min);
9881 }
9882
9883
9884 trans = btrfs_join_transaction(root);
9885 if (IS_ERR(trans)) {
9886 ret = PTR_ERR(trans);
9887 goto out;
9888 }
9889
9890 mutex_lock(&fs_info->chunk_mutex);
9891 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
9892 u64 dev_offset;
9893
9894
9895
9896
9897
9898 if (device->total_bytes > device->bytes_used + min_free &&
9899 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
9900 ret = find_free_dev_extent(trans, device, min_free,
9901 &dev_offset, NULL);
9902 if (!ret)
9903 dev_nr++;
9904
9905 if (dev_nr >= dev_min)
9906 break;
9907
9908 ret = -1;
9909 }
9910 }
9911 if (debug && ret == -1)
9912 btrfs_warn(fs_info,
9913 "no space to allocate a new chunk for block group %llu",
9914 block_group->key.objectid);
9915 mutex_unlock(&fs_info->chunk_mutex);
9916 btrfs_end_transaction(trans);
9917out:
9918 btrfs_put_block_group(block_group);
9919 return ret;
9920}
9921
9922static int find_first_block_group(struct btrfs_fs_info *fs_info,
9923 struct btrfs_path *path,
9924 struct btrfs_key *key)
9925{
9926 struct btrfs_root *root = fs_info->extent_root;
9927 int ret = 0;
9928 struct btrfs_key found_key;
9929 struct extent_buffer *leaf;
9930 struct btrfs_block_group_item bg;
9931 u64 flags;
9932 int slot;
9933
9934 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
9935 if (ret < 0)
9936 goto out;
9937
9938 while (1) {
9939 slot = path->slots[0];
9940 leaf = path->nodes[0];
9941 if (slot >= btrfs_header_nritems(leaf)) {
9942 ret = btrfs_next_leaf(root, path);
9943 if (ret == 0)
9944 continue;
9945 if (ret < 0)
9946 goto out;
9947 break;
9948 }
9949 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9950
9951 if (found_key.objectid >= key->objectid &&
9952 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9953 struct extent_map_tree *em_tree;
9954 struct extent_map *em;
9955
9956 em_tree = &root->fs_info->mapping_tree.map_tree;
9957 read_lock(&em_tree->lock);
9958 em = lookup_extent_mapping(em_tree, found_key.objectid,
9959 found_key.offset);
9960 read_unlock(&em_tree->lock);
9961 if (!em) {
9962 btrfs_err(fs_info,
9963 "logical %llu len %llu found bg but no related chunk",
9964 found_key.objectid, found_key.offset);
9965 ret = -ENOENT;
9966 } else if (em->start != found_key.objectid ||
9967 em->len != found_key.offset) {
9968 btrfs_err(fs_info,
9969 "block group %llu len %llu mismatch with chunk %llu len %llu",
9970 found_key.objectid, found_key.offset,
9971 em->start, em->len);
9972 ret = -EUCLEAN;
9973 } else {
9974 read_extent_buffer(leaf, &bg,
9975 btrfs_item_ptr_offset(leaf, slot),
9976 sizeof(bg));
9977 flags = btrfs_block_group_flags(&bg) &
9978 BTRFS_BLOCK_GROUP_TYPE_MASK;
9979
9980 if (flags != (em->map_lookup->type &
9981 BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9982 btrfs_err(fs_info,
9983"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
9984 found_key.objectid,
9985 found_key.offset, flags,
9986 (BTRFS_BLOCK_GROUP_TYPE_MASK &
9987 em->map_lookup->type));
9988 ret = -EUCLEAN;
9989 } else {
9990 ret = 0;
9991 }
9992 }
9993 free_extent_map(em);
9994 goto out;
9995 }
9996 path->slots[0]++;
9997 }
9998out:
9999 return ret;
10000}
10001
10002void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
10003{
10004 struct btrfs_block_group_cache *block_group;
10005 u64 last = 0;
10006
10007 while (1) {
10008 struct inode *inode;
10009
10010 block_group = btrfs_lookup_first_block_group(info, last);
10011 while (block_group) {
10012 wait_block_group_cache_done(block_group);
10013 spin_lock(&block_group->lock);
10014 if (block_group->iref)
10015 break;
10016 spin_unlock(&block_group->lock);
10017 block_group = next_block_group(info, block_group);
10018 }
10019 if (!block_group) {
10020 if (last == 0)
10021 break;
10022 last = 0;
10023 continue;
10024 }
10025
10026 inode = block_group->inode;
10027 block_group->iref = 0;
10028 block_group->inode = NULL;
10029 spin_unlock(&block_group->lock);
10030 ASSERT(block_group->io_ctl.inode == NULL);
10031 iput(inode);
10032 last = block_group->key.objectid + block_group->key.offset;
10033 btrfs_put_block_group(block_group);
10034 }
10035}
10036
10037
10038
10039
10040
10041
10042int btrfs_free_block_groups(struct btrfs_fs_info *info)
10043{
10044 struct btrfs_block_group_cache *block_group;
10045 struct btrfs_space_info *space_info;
10046 struct btrfs_caching_control *caching_ctl;
10047 struct rb_node *n;
10048
10049 down_write(&info->commit_root_sem);
10050 while (!list_empty(&info->caching_block_groups)) {
10051 caching_ctl = list_entry(info->caching_block_groups.next,
10052 struct btrfs_caching_control, list);
10053 list_del(&caching_ctl->list);
10054 put_caching_control(caching_ctl);
10055 }
10056 up_write(&info->commit_root_sem);
10057
10058 spin_lock(&info->unused_bgs_lock);
10059 while (!list_empty(&info->unused_bgs)) {
10060 block_group = list_first_entry(&info->unused_bgs,
10061 struct btrfs_block_group_cache,
10062 bg_list);
10063 list_del_init(&block_group->bg_list);
10064 btrfs_put_block_group(block_group);
10065 }
10066 spin_unlock(&info->unused_bgs_lock);
10067
10068 spin_lock(&info->block_group_cache_lock);
10069 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
10070 block_group = rb_entry(n, struct btrfs_block_group_cache,
10071 cache_node);
10072 rb_erase(&block_group->cache_node,
10073 &info->block_group_cache_tree);
10074 RB_CLEAR_NODE(&block_group->cache_node);
10075 spin_unlock(&info->block_group_cache_lock);
10076
10077 down_write(&block_group->space_info->groups_sem);
10078 list_del(&block_group->list);
10079 up_write(&block_group->space_info->groups_sem);
10080
10081
10082
10083
10084
10085 if (block_group->cached == BTRFS_CACHE_NO ||
10086 block_group->cached == BTRFS_CACHE_ERROR)
10087 free_excluded_extents(block_group);
10088
10089 btrfs_remove_free_space_cache(block_group);
10090 ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
10091 ASSERT(list_empty(&block_group->dirty_list));
10092 ASSERT(list_empty(&block_group->io_list));
10093 ASSERT(list_empty(&block_group->bg_list));
10094 ASSERT(atomic_read(&block_group->count) == 1);
10095 btrfs_put_block_group(block_group);
10096
10097 spin_lock(&info->block_group_cache_lock);
10098 }
10099 spin_unlock(&info->block_group_cache_lock);
10100
10101
10102
10103
10104
10105
10106
10107 synchronize_rcu();
10108
10109 release_global_block_rsv(info);
10110
10111 while (!list_empty(&info->space_info)) {
10112 int i;
10113
10114 space_info = list_entry(info->space_info.next,
10115 struct btrfs_space_info,
10116 list);
10117
10118
10119
10120
10121
10122 if (WARN_ON(space_info->bytes_pinned > 0 ||
10123 space_info->bytes_reserved > 0 ||
10124 space_info->bytes_may_use > 0))
10125 dump_space_info(info, space_info, 0, 0);
10126 list_del(&space_info->list);
10127 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
10128 struct kobject *kobj;
10129 kobj = space_info->block_group_kobjs[i];
10130 space_info->block_group_kobjs[i] = NULL;
10131 if (kobj) {
10132 kobject_del(kobj);
10133 kobject_put(kobj);
10134 }
10135 }
10136 kobject_del(&space_info->kobj);
10137 kobject_put(&space_info->kobj);
10138 }
10139 return 0;
10140}
10141
10142
10143void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
10144{
10145 struct btrfs_space_info *space_info;
10146 struct raid_kobject *rkobj;
10147 LIST_HEAD(list);
10148 int index;
10149 int ret = 0;
10150
10151 spin_lock(&fs_info->pending_raid_kobjs_lock);
10152 list_splice_init(&fs_info->pending_raid_kobjs, &list);
10153 spin_unlock(&fs_info->pending_raid_kobjs_lock);
10154
10155 list_for_each_entry(rkobj, &list, list) {
10156 space_info = __find_space_info(fs_info, rkobj->flags);
10157 index = btrfs_bg_flags_to_raid_index(rkobj->flags);
10158
10159 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
10160 "%s", get_raid_name(index));
10161 if (ret) {
10162 kobject_put(&rkobj->kobj);
10163 break;
10164 }
10165 }
10166 if (ret)
10167 btrfs_warn(fs_info,
10168 "failed to add kobject for block cache, ignoring");
10169}
10170
10171static void link_block_group(struct btrfs_block_group_cache *cache)
10172{
10173 struct btrfs_space_info *space_info = cache->space_info;
10174 struct btrfs_fs_info *fs_info = cache->fs_info;
10175 int index = btrfs_bg_flags_to_raid_index(cache->flags);
10176 bool first = false;
10177
10178 down_write(&space_info->groups_sem);
10179 if (list_empty(&space_info->block_groups[index]))
10180 first = true;
10181 list_add_tail(&cache->list, &space_info->block_groups[index]);
10182 up_write(&space_info->groups_sem);
10183
10184 if (first) {
10185 struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
10186 if (!rkobj) {
10187 btrfs_warn(cache->fs_info,
10188 "couldn't alloc memory for raid level kobject");
10189 return;
10190 }
10191 rkobj->flags = cache->flags;
10192 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
10193
10194 spin_lock(&fs_info->pending_raid_kobjs_lock);
10195 list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
10196 spin_unlock(&fs_info->pending_raid_kobjs_lock);
10197 space_info->block_group_kobjs[index] = &rkobj->kobj;
10198 }
10199}
10200
10201static struct btrfs_block_group_cache *
10202btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
10203 u64 start, u64 size)
10204{
10205 struct btrfs_block_group_cache *cache;
10206
10207 cache = kzalloc(sizeof(*cache), GFP_NOFS);
10208 if (!cache)
10209 return NULL;
10210
10211 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
10212 GFP_NOFS);
10213 if (!cache->free_space_ctl) {
10214 kfree(cache);
10215 return NULL;
10216 }
10217
10218 cache->key.objectid = start;
10219 cache->key.offset = size;
10220 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10221
10222 cache->fs_info = fs_info;
10223 cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
10224 set_free_space_tree_thresholds(cache);
10225
10226 atomic_set(&cache->count, 1);
10227 spin_lock_init(&cache->lock);
10228 init_rwsem(&cache->data_rwsem);
10229 INIT_LIST_HEAD(&cache->list);
10230 INIT_LIST_HEAD(&cache->cluster_list);
10231 INIT_LIST_HEAD(&cache->bg_list);
10232 INIT_LIST_HEAD(&cache->ro_list);
10233 INIT_LIST_HEAD(&cache->dirty_list);
10234 INIT_LIST_HEAD(&cache->io_list);
10235 btrfs_init_free_space_ctl(cache);
10236 atomic_set(&cache->trimming, 0);
10237 mutex_init(&cache->free_space_lock);
10238 btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
10239
10240 return cache;
10241}
10242
10243
10244
10245
10246
10247
10248static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
10249{
10250 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
10251 struct extent_map *em;
10252 struct btrfs_block_group_cache *bg;
10253 u64 start = 0;
10254 int ret = 0;
10255
10256 while (1) {
10257 read_lock(&map_tree->map_tree.lock);
10258
10259
10260
10261
10262
10263 em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
10264 read_unlock(&map_tree->map_tree.lock);
10265 if (!em)
10266 break;
10267
10268 bg = btrfs_lookup_block_group(fs_info, em->start);
10269 if (!bg) {
10270 btrfs_err(fs_info,
10271 "chunk start=%llu len=%llu doesn't have corresponding block group",
10272 em->start, em->len);
10273 ret = -EUCLEAN;
10274 free_extent_map(em);
10275 break;
10276 }
10277 if (bg->key.objectid != em->start ||
10278 bg->key.offset != em->len ||
10279 (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
10280 (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10281 btrfs_err(fs_info,
10282"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
10283 em->start, em->len,
10284 em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
10285 bg->key.objectid, bg->key.offset,
10286 bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
10287 ret = -EUCLEAN;
10288 free_extent_map(em);
10289 btrfs_put_block_group(bg);
10290 break;
10291 }
10292 start = em->start + em->len;
10293 free_extent_map(em);
10294 btrfs_put_block_group(bg);
10295 }
10296 return ret;
10297}
10298
10299int btrfs_read_block_groups(struct btrfs_fs_info *info)
10300{
10301 struct btrfs_path *path;
10302 int ret;
10303 struct btrfs_block_group_cache *cache;
10304 struct btrfs_space_info *space_info;
10305 struct btrfs_key key;
10306 struct btrfs_key found_key;
10307 struct extent_buffer *leaf;
10308 int need_clear = 0;
10309 u64 cache_gen;
10310 u64 feature;
10311 int mixed;
10312
10313 feature = btrfs_super_incompat_flags(info->super_copy);
10314 mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
10315
10316 key.objectid = 0;
10317 key.offset = 0;
10318 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10319 path = btrfs_alloc_path();
10320 if (!path)
10321 return -ENOMEM;
10322 path->reada = READA_FORWARD;
10323
10324 cache_gen = btrfs_super_cache_generation(info->super_copy);
10325 if (btrfs_test_opt(info, SPACE_CACHE) &&
10326 btrfs_super_generation(info->super_copy) != cache_gen)
10327 need_clear = 1;
10328 if (btrfs_test_opt(info, CLEAR_CACHE))
10329 need_clear = 1;
10330
10331 while (1) {
10332 ret = find_first_block_group(info, path, &key);
10333 if (ret > 0)
10334 break;
10335 if (ret != 0)
10336 goto error;
10337
10338 leaf = path->nodes[0];
10339 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10340
10341 cache = btrfs_create_block_group_cache(info, found_key.objectid,
10342 found_key.offset);
10343 if (!cache) {
10344 ret = -ENOMEM;
10345 goto error;
10346 }
10347
10348 if (need_clear) {
10349
10350
10351
10352
10353
10354
10355
10356
10357
10358
10359 if (btrfs_test_opt(info, SPACE_CACHE))
10360 cache->disk_cache_state = BTRFS_DC_CLEAR;
10361 }
10362
10363 read_extent_buffer(leaf, &cache->item,
10364 btrfs_item_ptr_offset(leaf, path->slots[0]),
10365 sizeof(cache->item));
10366 cache->flags = btrfs_block_group_flags(&cache->item);
10367 if (!mixed &&
10368 ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
10369 (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
10370 btrfs_err(info,
10371"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
10372 cache->key.objectid);
10373 ret = -EINVAL;
10374 goto error;
10375 }
10376
10377 key.objectid = found_key.objectid + found_key.offset;
10378 btrfs_release_path(path);
10379
10380
10381
10382
10383
10384
10385 ret = exclude_super_stripes(cache);
10386 if (ret) {
10387
10388
10389
10390
10391 free_excluded_extents(cache);
10392 btrfs_put_block_group(cache);
10393 goto error;
10394 }
10395
10396
10397
10398
10399
10400
10401
10402
10403 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
10404 cache->last_byte_to_unpin = (u64)-1;
10405 cache->cached = BTRFS_CACHE_FINISHED;
10406 free_excluded_extents(cache);
10407 } else if (btrfs_block_group_used(&cache->item) == 0) {
10408 cache->last_byte_to_unpin = (u64)-1;
10409 cache->cached = BTRFS_CACHE_FINISHED;
10410 add_new_free_space(cache, found_key.objectid,
10411 found_key.objectid +
10412 found_key.offset);
10413 free_excluded_extents(cache);
10414 }
10415
10416 ret = btrfs_add_block_group_cache(info, cache);
10417 if (ret) {
10418 btrfs_remove_free_space_cache(cache);
10419 btrfs_put_block_group(cache);
10420 goto error;
10421 }
10422
10423 trace_btrfs_add_block_group(info, cache, 0);
10424 update_space_info(info, cache->flags, found_key.offset,
10425 btrfs_block_group_used(&cache->item),
10426 cache->bytes_super, &space_info);
10427
10428 cache->space_info = space_info;
10429
10430 link_block_group(cache);
10431
10432 set_avail_alloc_bits(info, cache->flags);
10433 if (btrfs_chunk_readonly(info, cache->key.objectid)) {
10434 inc_block_group_ro(cache, 1);
10435 } else if (btrfs_block_group_used(&cache->item) == 0) {
10436 ASSERT(list_empty(&cache->bg_list));
10437 btrfs_mark_bg_unused(cache);
10438 }
10439 }
10440
10441 list_for_each_entry_rcu(space_info, &info->space_info, list) {
10442 if (!(get_alloc_profile(info, space_info->flags) &
10443 (BTRFS_BLOCK_GROUP_RAID10 |
10444 BTRFS_BLOCK_GROUP_RAID1 |
10445 BTRFS_BLOCK_GROUP_RAID5 |
10446 BTRFS_BLOCK_GROUP_RAID6 |
10447 BTRFS_BLOCK_GROUP_DUP)))
10448 continue;
10449
10450
10451
10452
10453 list_for_each_entry(cache,
10454 &space_info->block_groups[BTRFS_RAID_RAID0],
10455 list)
10456 inc_block_group_ro(cache, 1);
10457 list_for_each_entry(cache,
10458 &space_info->block_groups[BTRFS_RAID_SINGLE],
10459 list)
10460 inc_block_group_ro(cache, 1);
10461 }
10462
10463 btrfs_add_raid_kobjects(info);
10464 init_global_block_rsv(info);
10465 ret = check_chunk_block_group_mappings(info);
10466error:
10467 btrfs_free_path(path);
10468 return ret;
10469}
10470
10471void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
10472{
10473 struct btrfs_fs_info *fs_info = trans->fs_info;
10474 struct btrfs_block_group_cache *block_group;
10475 struct btrfs_root *extent_root = fs_info->extent_root;
10476 struct btrfs_block_group_item item;
10477 struct btrfs_key key;
10478 int ret = 0;
10479
10480 if (!trans->can_flush_pending_bgs)
10481 return;
10482
10483 while (!list_empty(&trans->new_bgs)) {
10484 block_group = list_first_entry(&trans->new_bgs,
10485 struct btrfs_block_group_cache,
10486 bg_list);
10487 if (ret)
10488 goto next;
10489
10490 spin_lock(&block_group->lock);
10491 memcpy(&item, &block_group->item, sizeof(item));
10492 memcpy(&key, &block_group->key, sizeof(key));
10493 spin_unlock(&block_group->lock);
10494
10495 ret = btrfs_insert_item(trans, extent_root, &key, &item,
10496 sizeof(item));
10497 if (ret)
10498 btrfs_abort_transaction(trans, ret);
10499 ret = btrfs_finish_chunk_alloc(trans, key.objectid, key.offset);
10500 if (ret)
10501 btrfs_abort_transaction(trans, ret);
10502 add_block_group_free_space(trans, block_group);
10503
10504next:
10505 btrfs_delayed_refs_rsv_release(fs_info, 1);
10506 list_del_init(&block_group->bg_list);
10507 }
10508 btrfs_trans_release_chunk_metadata(trans);
10509}
10510
10511int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
10512 u64 type, u64 chunk_offset, u64 size)
10513{
10514 struct btrfs_fs_info *fs_info = trans->fs_info;
10515 struct btrfs_block_group_cache *cache;
10516 int ret;
10517
10518 btrfs_set_log_full_commit(fs_info, trans);
10519
10520 cache = btrfs_create_block_group_cache(fs_info, chunk_offset, size);
10521 if (!cache)
10522 return -ENOMEM;
10523
10524 btrfs_set_block_group_used(&cache->item, bytes_used);
10525 btrfs_set_block_group_chunk_objectid(&cache->item,
10526 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
10527 btrfs_set_block_group_flags(&cache->item, type);
10528
10529 cache->flags = type;
10530 cache->last_byte_to_unpin = (u64)-1;
10531 cache->cached = BTRFS_CACHE_FINISHED;
10532 cache->needs_free_space = 1;
10533 ret = exclude_super_stripes(cache);
10534 if (ret) {
10535
10536
10537
10538
10539 free_excluded_extents(cache);
10540 btrfs_put_block_group(cache);
10541 return ret;
10542 }
10543
10544 add_new_free_space(cache, chunk_offset, chunk_offset + size);
10545
10546 free_excluded_extents(cache);
10547
10548#ifdef CONFIG_BTRFS_DEBUG
10549 if (btrfs_should_fragment_free_space(cache)) {
10550 u64 new_bytes_used = size - bytes_used;
10551
10552 bytes_used += new_bytes_used >> 1;
10553 fragment_free_space(cache);
10554 }
10555#endif
10556
10557
10558
10559
10560
10561 cache->space_info = __find_space_info(fs_info, cache->flags);
10562 ASSERT(cache->space_info);
10563
10564 ret = btrfs_add_block_group_cache(fs_info, cache);
10565 if (ret) {
10566 btrfs_remove_free_space_cache(cache);
10567 btrfs_put_block_group(cache);
10568 return ret;
10569 }
10570
10571
10572
10573
10574
10575 trace_btrfs_add_block_group(fs_info, cache, 1);
10576 update_space_info(fs_info, cache->flags, size, bytes_used,
10577 cache->bytes_super, &cache->space_info);
10578 update_global_block_rsv(fs_info);
10579
10580 link_block_group(cache);
10581
10582 list_add_tail(&cache->bg_list, &trans->new_bgs);
10583 trans->delayed_ref_updates++;
10584 btrfs_update_delayed_refs_rsv(trans);
10585
10586 set_avail_alloc_bits(fs_info, type);
10587 return 0;
10588}
10589
10590static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
10591{
10592 u64 extra_flags = chunk_to_extended(flags) &
10593 BTRFS_EXTENDED_PROFILE_MASK;
10594
10595 write_seqlock(&fs_info->profiles_lock);
10596 if (flags & BTRFS_BLOCK_GROUP_DATA)
10597 fs_info->avail_data_alloc_bits &= ~extra_flags;
10598 if (flags & BTRFS_BLOCK_GROUP_METADATA)
10599 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
10600 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
10601 fs_info->avail_system_alloc_bits &= ~extra_flags;
10602 write_sequnlock(&fs_info->profiles_lock);
10603}
10604
10605int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
10606 u64 group_start, struct extent_map *em)
10607{
10608 struct btrfs_fs_info *fs_info = trans->fs_info;
10609 struct btrfs_root *root = fs_info->extent_root;
10610 struct btrfs_path *path;
10611 struct btrfs_block_group_cache *block_group;
10612 struct btrfs_free_cluster *cluster;
10613 struct btrfs_root *tree_root = fs_info->tree_root;
10614 struct btrfs_key key;
10615 struct inode *inode;
10616 struct kobject *kobj = NULL;
10617 int ret;
10618 int index;
10619 int factor;
10620 struct btrfs_caching_control *caching_ctl = NULL;
10621 bool remove_em;
10622 bool remove_rsv = false;
10623
10624 block_group = btrfs_lookup_block_group(fs_info, group_start);
10625 BUG_ON(!block_group);
10626 BUG_ON(!block_group->ro);
10627
10628 trace_btrfs_remove_block_group(block_group);
10629
10630
10631
10632
10633 free_excluded_extents(block_group);
10634 btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
10635 block_group->key.offset);
10636
10637 memcpy(&key, &block_group->key, sizeof(key));
10638 index = btrfs_bg_flags_to_raid_index(block_group->flags);
10639 factor = btrfs_bg_type_to_factor(block_group->flags);
10640
10641
10642 cluster = &fs_info->data_alloc_cluster;
10643 spin_lock(&cluster->refill_lock);
10644 btrfs_return_cluster_to_free_space(block_group, cluster);
10645 spin_unlock(&cluster->refill_lock);
10646
10647
10648
10649
10650
10651 cluster = &fs_info->meta_alloc_cluster;
10652 spin_lock(&cluster->refill_lock);
10653 btrfs_return_cluster_to_free_space(block_group, cluster);
10654 spin_unlock(&cluster->refill_lock);
10655
10656 path = btrfs_alloc_path();
10657 if (!path) {
10658 ret = -ENOMEM;
10659 goto out;
10660 }
10661
10662
10663
10664
10665
10666 inode = lookup_free_space_inode(fs_info, block_group, path);
10667
10668 mutex_lock(&trans->transaction->cache_write_mutex);
10669
10670
10671
10672
10673 spin_lock(&trans->transaction->dirty_bgs_lock);
10674 if (!list_empty(&block_group->io_list)) {
10675 list_del_init(&block_group->io_list);
10676
10677 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
10678
10679 spin_unlock(&trans->transaction->dirty_bgs_lock);
10680 btrfs_wait_cache_io(trans, block_group, path);
10681 btrfs_put_block_group(block_group);
10682 spin_lock(&trans->transaction->dirty_bgs_lock);
10683 }
10684
10685 if (!list_empty(&block_group->dirty_list)) {
10686 list_del_init(&block_group->dirty_list);
10687 remove_rsv = true;
10688 btrfs_put_block_group(block_group);
10689 }
10690 spin_unlock(&trans->transaction->dirty_bgs_lock);
10691 mutex_unlock(&trans->transaction->cache_write_mutex);
10692
10693 if (!IS_ERR(inode)) {
10694 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
10695 if (ret) {
10696 btrfs_add_delayed_iput(inode);
10697 goto out;
10698 }
10699 clear_nlink(inode);
10700
10701 spin_lock(&block_group->lock);
10702 if (block_group->iref) {
10703 block_group->iref = 0;
10704 block_group->inode = NULL;
10705 spin_unlock(&block_group->lock);
10706 iput(inode);
10707 } else {
10708 spin_unlock(&block_group->lock);
10709 }
10710
10711 btrfs_add_delayed_iput(inode);
10712 }
10713
10714 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
10715 key.offset = block_group->key.objectid;
10716 key.type = 0;
10717
10718 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
10719 if (ret < 0)
10720 goto out;
10721 if (ret > 0)
10722 btrfs_release_path(path);
10723 if (ret == 0) {
10724 ret = btrfs_del_item(trans, tree_root, path);
10725 if (ret)
10726 goto out;
10727 btrfs_release_path(path);
10728 }
10729
10730 spin_lock(&fs_info->block_group_cache_lock);
10731 rb_erase(&block_group->cache_node,
10732 &fs_info->block_group_cache_tree);
10733 RB_CLEAR_NODE(&block_group->cache_node);
10734
10735 if (fs_info->first_logical_byte == block_group->key.objectid)
10736 fs_info->first_logical_byte = (u64)-1;
10737 spin_unlock(&fs_info->block_group_cache_lock);
10738
10739 down_write(&block_group->space_info->groups_sem);
10740
10741
10742
10743
10744 list_del_init(&block_group->list);
10745 if (list_empty(&block_group->space_info->block_groups[index])) {
10746 kobj = block_group->space_info->block_group_kobjs[index];
10747 block_group->space_info->block_group_kobjs[index] = NULL;
10748 clear_avail_alloc_bits(fs_info, block_group->flags);
10749 }
10750 up_write(&block_group->space_info->groups_sem);
10751 if (kobj) {
10752 kobject_del(kobj);
10753 kobject_put(kobj);
10754 }
10755
10756 if (block_group->has_caching_ctl)
10757 caching_ctl = get_caching_control(block_group);
10758 if (block_group->cached == BTRFS_CACHE_STARTED)
10759 wait_block_group_cache_done(block_group);
10760 if (block_group->has_caching_ctl) {
10761 down_write(&fs_info->commit_root_sem);
10762 if (!caching_ctl) {
10763 struct btrfs_caching_control *ctl;
10764
10765 list_for_each_entry(ctl,
10766 &fs_info->caching_block_groups, list)
10767 if (ctl->block_group == block_group) {
10768 caching_ctl = ctl;
10769 refcount_inc(&caching_ctl->count);
10770 break;
10771 }
10772 }
10773 if (caching_ctl)
10774 list_del_init(&caching_ctl->list);
10775 up_write(&fs_info->commit_root_sem);
10776 if (caching_ctl) {
10777
10778 put_caching_control(caching_ctl);
10779 put_caching_control(caching_ctl);
10780 }
10781 }
10782
10783 spin_lock(&trans->transaction->dirty_bgs_lock);
10784 if (!list_empty(&block_group->dirty_list)) {
10785 WARN_ON(1);
10786 }
10787 if (!list_empty(&block_group->io_list)) {
10788 WARN_ON(1);
10789 }
10790 spin_unlock(&trans->transaction->dirty_bgs_lock);
10791 btrfs_remove_free_space_cache(block_group);
10792
10793 spin_lock(&block_group->space_info->lock);
10794 list_del_init(&block_group->ro_list);
10795
10796 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
10797 WARN_ON(block_group->space_info->total_bytes
10798 < block_group->key.offset);
10799 WARN_ON(block_group->space_info->bytes_readonly
10800 < block_group->key.offset);
10801 WARN_ON(block_group->space_info->disk_total
10802 < block_group->key.offset * factor);
10803 }
10804 block_group->space_info->total_bytes -= block_group->key.offset;
10805 block_group->space_info->bytes_readonly -= block_group->key.offset;
10806 block_group->space_info->disk_total -= block_group->key.offset * factor;
10807
10808 spin_unlock(&block_group->space_info->lock);
10809
10810 memcpy(&key, &block_group->key, sizeof(key));
10811
10812 mutex_lock(&fs_info->chunk_mutex);
10813 if (!list_empty(&em->list)) {
10814
10815 free_extent_map(em);
10816 }
10817 spin_lock(&block_group->lock);
10818 block_group->removed = 1;
10819
10820
10821
10822
10823
10824
10825
10826
10827
10828
10829
10830
10831
10832
10833
10834
10835
10836
10837
10838
10839
10840
10841
10842 remove_em = (atomic_read(&block_group->trimming) == 0);
10843
10844
10845
10846
10847
10848 if (!remove_em) {
10849
10850
10851
10852
10853
10854
10855
10856
10857
10858
10859
10860 list_move_tail(&em->list, &fs_info->pinned_chunks);
10861 }
10862 spin_unlock(&block_group->lock);
10863
10864 if (remove_em) {
10865 struct extent_map_tree *em_tree;
10866
10867 em_tree = &fs_info->mapping_tree.map_tree;
10868 write_lock(&em_tree->lock);
10869
10870
10871
10872
10873
10874 remove_extent_mapping(em_tree, em);
10875 write_unlock(&em_tree->lock);
10876
10877 free_extent_map(em);
10878 }
10879
10880 mutex_unlock(&fs_info->chunk_mutex);
10881
10882 ret = remove_block_group_free_space(trans, block_group);
10883 if (ret)
10884 goto out;
10885
10886 btrfs_put_block_group(block_group);
10887 btrfs_put_block_group(block_group);
10888
10889 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10890 if (ret > 0)
10891 ret = -EIO;
10892 if (ret < 0)
10893 goto out;
10894
10895 ret = btrfs_del_item(trans, root, path);
10896out:
10897 if (remove_rsv)
10898 btrfs_delayed_refs_rsv_release(fs_info, 1);
10899 btrfs_free_path(path);
10900 return ret;
10901}
10902
10903struct btrfs_trans_handle *
10904btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
10905 const u64 chunk_offset)
10906{
10907 struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
10908 struct extent_map *em;
10909 struct map_lookup *map;
10910 unsigned int num_items;
10911
10912 read_lock(&em_tree->lock);
10913 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
10914 read_unlock(&em_tree->lock);
10915 ASSERT(em && em->start == chunk_offset);
10916
10917
10918
10919
10920
10921
10922
10923
10924
10925
10926
10927
10928
10929
10930
10931
10932
10933
10934
10935
10936 map = em->map_lookup;
10937 num_items = 3 + map->num_stripes;
10938 free_extent_map(em);
10939
10940 return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
10941 num_items, 1);
10942}
10943
10944
10945
10946
10947
10948void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
10949{
10950 struct btrfs_block_group_cache *block_group;
10951 struct btrfs_space_info *space_info;
10952 struct btrfs_trans_handle *trans;
10953 int ret = 0;
10954
10955 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
10956 return;
10957
10958 spin_lock(&fs_info->unused_bgs_lock);
10959 while (!list_empty(&fs_info->unused_bgs)) {
10960 u64 start, end;
10961 int trimming;
10962
10963 block_group = list_first_entry(&fs_info->unused_bgs,
10964 struct btrfs_block_group_cache,
10965 bg_list);
10966 list_del_init(&block_group->bg_list);
10967
10968 space_info = block_group->space_info;
10969
10970 if (ret || btrfs_mixed_space_info(space_info)) {
10971 btrfs_put_block_group(block_group);
10972 continue;
10973 }
10974 spin_unlock(&fs_info->unused_bgs_lock);
10975
10976 mutex_lock(&fs_info->delete_unused_bgs_mutex);
10977
10978
10979 down_write(&space_info->groups_sem);
10980 spin_lock(&block_group->lock);
10981 if (block_group->reserved || block_group->pinned ||
10982 btrfs_block_group_used(&block_group->item) ||
10983 block_group->ro ||
10984 list_is_singular(&block_group->list)) {
10985
10986
10987
10988
10989
10990
10991 trace_btrfs_skip_unused_block_group(block_group);
10992 spin_unlock(&block_group->lock);
10993 up_write(&space_info->groups_sem);
10994 goto next;
10995 }
10996 spin_unlock(&block_group->lock);
10997
10998
10999 ret = inc_block_group_ro(block_group, 0);
11000 up_write(&space_info->groups_sem);
11001 if (ret < 0) {
11002 ret = 0;
11003 goto next;
11004 }
11005
11006
11007
11008
11009
11010 trans = btrfs_start_trans_remove_block_group(fs_info,
11011 block_group->key.objectid);
11012 if (IS_ERR(trans)) {
11013 btrfs_dec_block_group_ro(block_group);
11014 ret = PTR_ERR(trans);
11015 goto next;
11016 }
11017
11018
11019
11020
11021
11022 start = block_group->key.objectid;
11023 end = start + block_group->key.offset - 1;
11024
11025
11026
11027
11028
11029
11030
11031
11032
11033
11034
11035 mutex_lock(&fs_info->unused_bg_unpin_mutex);
11036 ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
11037 EXTENT_DIRTY);
11038 if (ret) {
11039 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
11040 btrfs_dec_block_group_ro(block_group);
11041 goto end_trans;
11042 }
11043 ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
11044 EXTENT_DIRTY);
11045 if (ret) {
11046 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
11047 btrfs_dec_block_group_ro(block_group);
11048 goto end_trans;
11049 }
11050 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
11051
11052
11053 spin_lock(&space_info->lock);
11054 spin_lock(&block_group->lock);
11055
11056 update_bytes_pinned(space_info, -block_group->pinned);
11057 space_info->bytes_readonly += block_group->pinned;
11058 percpu_counter_add_batch(&space_info->total_bytes_pinned,
11059 -block_group->pinned,
11060 BTRFS_TOTAL_BYTES_PINNED_BATCH);
11061 block_group->pinned = 0;
11062
11063 spin_unlock(&block_group->lock);
11064 spin_unlock(&space_info->lock);
11065
11066
11067 trimming = btrfs_test_opt(fs_info, DISCARD);
11068
11069
11070 if (trimming)
11071 btrfs_get_block_group_trimming(block_group);
11072
11073
11074
11075
11076
11077 ret = btrfs_remove_chunk(trans, block_group->key.objectid);
11078
11079 if (ret) {
11080 if (trimming)
11081 btrfs_put_block_group_trimming(block_group);
11082 goto end_trans;
11083 }
11084
11085
11086
11087
11088
11089
11090 if (trimming) {
11091 spin_lock(&fs_info->unused_bgs_lock);
11092
11093
11094
11095
11096
11097 list_move(&block_group->bg_list,
11098 &trans->transaction->deleted_bgs);
11099 spin_unlock(&fs_info->unused_bgs_lock);
11100 btrfs_get_block_group(block_group);
11101 }
11102end_trans:
11103 btrfs_end_transaction(trans);
11104next:
11105 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
11106 btrfs_put_block_group(block_group);
11107 spin_lock(&fs_info->unused_bgs_lock);
11108 }
11109 spin_unlock(&fs_info->unused_bgs_lock);
11110}
11111
11112int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
11113{
11114 struct btrfs_super_block *disk_super;
11115 u64 features;
11116 u64 flags;
11117 int mixed = 0;
11118 int ret;
11119
11120 disk_super = fs_info->super_copy;
11121 if (!btrfs_super_root(disk_super))
11122 return -EINVAL;
11123
11124 features = btrfs_super_incompat_flags(disk_super);
11125 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
11126 mixed = 1;
11127
11128 flags = BTRFS_BLOCK_GROUP_SYSTEM;
11129 ret = create_space_info(fs_info, flags);
11130 if (ret)
11131 goto out;
11132
11133 if (mixed) {
11134 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
11135 ret = create_space_info(fs_info, flags);
11136 } else {
11137 flags = BTRFS_BLOCK_GROUP_METADATA;
11138 ret = create_space_info(fs_info, flags);
11139 if (ret)
11140 goto out;
11141
11142 flags = BTRFS_BLOCK_GROUP_DATA;
11143 ret = create_space_info(fs_info, flags);
11144 }
11145out:
11146 return ret;
11147}
11148
11149int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
11150 u64 start, u64 end)
11151{
11152 return unpin_extent_range(fs_info, start, end, false);
11153}
11154
11155
11156
11157
11158
11159
11160
11161
11162
11163
11164
11165
11166
11167
11168
11169
11170
11171
11172
11173
11174
11175static int btrfs_trim_free_extents(struct btrfs_device *device,
11176 u64 minlen, u64 *trimmed)
11177{
11178 u64 start = 0, len = 0;
11179 int ret;
11180
11181 *trimmed = 0;
11182
11183
11184 if (!blk_queue_discard(bdev_get_queue(device->bdev)))
11185 return 0;
11186
11187
11188 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
11189 return 0;
11190
11191
11192 if (device->total_bytes <= device->bytes_used)
11193 return 0;
11194
11195 ret = 0;
11196
11197 while (1) {
11198 struct btrfs_fs_info *fs_info = device->fs_info;
11199 struct btrfs_transaction *trans;
11200 u64 bytes;
11201
11202 ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
11203 if (ret)
11204 break;
11205
11206 ret = down_read_killable(&fs_info->commit_root_sem);
11207 if (ret) {
11208 mutex_unlock(&fs_info->chunk_mutex);
11209 break;
11210 }
11211
11212 spin_lock(&fs_info->trans_lock);
11213 trans = fs_info->running_transaction;
11214 if (trans)
11215 refcount_inc(&trans->use_count);
11216 spin_unlock(&fs_info->trans_lock);
11217
11218 if (!trans)
11219 up_read(&fs_info->commit_root_sem);
11220
11221 ret = find_free_dev_extent_start(trans, device, minlen, start,
11222 &start, &len);
11223 if (trans) {
11224 up_read(&fs_info->commit_root_sem);
11225 btrfs_put_transaction(trans);
11226 }
11227
11228 if (ret) {
11229 mutex_unlock(&fs_info->chunk_mutex);
11230 if (ret == -ENOSPC)
11231 ret = 0;
11232 break;
11233 }
11234
11235 ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
11236 mutex_unlock(&fs_info->chunk_mutex);
11237
11238 if (ret)
11239 break;
11240
11241 start += len;
11242 *trimmed += bytes;
11243
11244 if (fatal_signal_pending(current)) {
11245 ret = -ERESTARTSYS;
11246 break;
11247 }
11248
11249 cond_resched();
11250 }
11251
11252 return ret;
11253}
11254
11255
11256
11257
11258
11259
11260
11261
11262
11263
11264int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
11265{
11266 struct btrfs_block_group_cache *cache = NULL;
11267 struct btrfs_device *device;
11268 struct list_head *devices;
11269 u64 group_trimmed;
11270 u64 start;
11271 u64 end;
11272 u64 trimmed = 0;
11273 u64 bg_failed = 0;
11274 u64 dev_failed = 0;
11275 int bg_ret = 0;
11276 int dev_ret = 0;
11277 int ret = 0;
11278
11279 cache = btrfs_lookup_first_block_group(fs_info, range->start);
11280 for (; cache; cache = next_block_group(fs_info, cache)) {
11281 if (cache->key.objectid >= (range->start + range->len)) {
11282 btrfs_put_block_group(cache);
11283 break;
11284 }
11285
11286 start = max(range->start, cache->key.objectid);
11287 end = min(range->start + range->len,
11288 cache->key.objectid + cache->key.offset);
11289
11290 if (end - start >= range->minlen) {
11291 if (!block_group_cache_done(cache)) {
11292 ret = cache_block_group(cache, 0);
11293 if (ret) {
11294 bg_failed++;
11295 bg_ret = ret;
11296 continue;
11297 }
11298 ret = wait_block_group_cache_done(cache);
11299 if (ret) {
11300 bg_failed++;
11301 bg_ret = ret;
11302 continue;
11303 }
11304 }
11305 ret = btrfs_trim_block_group(cache,
11306 &group_trimmed,
11307 start,
11308 end,
11309 range->minlen);
11310
11311 trimmed += group_trimmed;
11312 if (ret) {
11313 bg_failed++;
11314 bg_ret = ret;
11315 continue;
11316 }
11317 }
11318 }
11319
11320 if (bg_failed)
11321 btrfs_warn(fs_info,
11322 "failed to trim %llu block group(s), last error %d",
11323 bg_failed, bg_ret);
11324 mutex_lock(&fs_info->fs_devices->device_list_mutex);
11325 devices = &fs_info->fs_devices->devices;
11326 list_for_each_entry(device, devices, dev_list) {
11327 ret = btrfs_trim_free_extents(device, range->minlen,
11328 &group_trimmed);
11329 if (ret) {
11330 dev_failed++;
11331 dev_ret = ret;
11332 break;
11333 }
11334
11335 trimmed += group_trimmed;
11336 }
11337 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
11338
11339 if (dev_failed)
11340 btrfs_warn(fs_info,
11341 "failed to trim %llu device(s), last error %d",
11342 dev_failed, dev_ret);
11343 range->len = trimmed;
11344 if (bg_ret)
11345 return bg_ret;
11346 return dev_ret;
11347}
11348
11349
11350
11351
11352
11353
11354
11355
11356
11357void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
11358{
11359 percpu_counter_dec(&root->subv_writers->counter);
11360 cond_wake_up(&root->subv_writers->wait);
11361}
11362
11363int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
11364{
11365 if (atomic_read(&root->will_be_snapshotted))
11366 return 0;
11367
11368 percpu_counter_inc(&root->subv_writers->counter);
11369
11370
11371
11372 smp_mb();
11373 if (atomic_read(&root->will_be_snapshotted)) {
11374 btrfs_end_write_no_snapshotting(root);
11375 return 0;
11376 }
11377 return 1;
11378}
11379
11380void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
11381{
11382 while (true) {
11383 int ret;
11384
11385 ret = btrfs_start_write_no_snapshotting(root);
11386 if (ret)
11387 break;
11388 wait_var_event(&root->will_be_snapshotted,
11389 !atomic_read(&root->will_be_snapshotted));
11390 }
11391}
11392
11393void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg)
11394{
11395 struct btrfs_fs_info *fs_info = bg->fs_info;
11396
11397 spin_lock(&fs_info->unused_bgs_lock);
11398 if (list_empty(&bg->bg_list)) {
11399 btrfs_get_block_group(bg);
11400 trace_btrfs_add_unused_block_group(bg);
11401 list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
11402 }
11403 spin_unlock(&fs_info->unused_bgs_lock);
11404}
11405