1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/sched.h>
19#include <linux/pagemap.h>
20#include <linux/writeback.h>
21#include <linux/blkdev.h>
22#include <linux/sort.h>
23#include <linux/rcupdate.h>
24#include <linux/kthread.h>
25#include <linux/slab.h>
26#include <linux/ratelimit.h>
27#include <linux/percpu_counter.h>
28#include "compat.h"
29#include "hash.h"
30#include "ctree.h"
31#include "disk-io.h"
32#include "print-tree.h"
33#include "transaction.h"
34#include "volumes.h"
35#include "raid56.h"
36#include "locking.h"
37#include "free-space-cache.h"
38#include "math.h"
39
40#undef SCRAMBLE_DELAYED_REFS
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56enum {
57 CHUNK_ALLOC_NO_FORCE = 0,
58 CHUNK_ALLOC_LIMITED = 1,
59 CHUNK_ALLOC_FORCE = 2,
60};
61
62
63
64
65
66
67
68
69
70
71enum {
72 RESERVE_FREE = 0,
73 RESERVE_ALLOC = 1,
74 RESERVE_ALLOC_NO_ACCOUNT = 2,
75};
76
77static int update_block_group(struct btrfs_root *root,
78 u64 bytenr, u64 num_bytes, int alloc);
79static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
80 struct btrfs_root *root,
81 u64 bytenr, u64 num_bytes, u64 parent,
82 u64 root_objectid, u64 owner_objectid,
83 u64 owner_offset, int refs_to_drop,
84 struct btrfs_delayed_extent_op *extra_op);
85static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
86 struct extent_buffer *leaf,
87 struct btrfs_extent_item *ei);
88static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
89 struct btrfs_root *root,
90 u64 parent, u64 root_objectid,
91 u64 flags, u64 owner, u64 offset,
92 struct btrfs_key *ins, int ref_mod);
93static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
94 struct btrfs_root *root,
95 u64 parent, u64 root_objectid,
96 u64 flags, struct btrfs_disk_key *key,
97 int level, struct btrfs_key *ins);
98static int do_chunk_alloc(struct btrfs_trans_handle *trans,
99 struct btrfs_root *extent_root, u64 flags,
100 int force);
101static int find_next_key(struct btrfs_path *path, int level,
102 struct btrfs_key *key);
103static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
104 int dump_block_groups);
105static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
106 u64 num_bytes, int reserve);
107static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
108 u64 num_bytes);
109int btrfs_pin_extent(struct btrfs_root *root,
110 u64 bytenr, u64 num_bytes, int reserved);
111
112static noinline int
113block_group_cache_done(struct btrfs_block_group_cache *cache)
114{
115 smp_mb();
116 return cache->cached == BTRFS_CACHE_FINISHED;
117}
118
119static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
120{
121 return (cache->flags & bits) == bits;
122}
123
124static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
125{
126 atomic_inc(&cache->count);
127}
128
129void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
130{
131 if (atomic_dec_and_test(&cache->count)) {
132 WARN_ON(cache->pinned > 0);
133 WARN_ON(cache->reserved > 0);
134 kfree(cache->free_space_ctl);
135 kfree(cache);
136 }
137}
138
139
140
141
142
143static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
144 struct btrfs_block_group_cache *block_group)
145{
146 struct rb_node **p;
147 struct rb_node *parent = NULL;
148 struct btrfs_block_group_cache *cache;
149
150 spin_lock(&info->block_group_cache_lock);
151 p = &info->block_group_cache_tree.rb_node;
152
153 while (*p) {
154 parent = *p;
155 cache = rb_entry(parent, struct btrfs_block_group_cache,
156 cache_node);
157 if (block_group->key.objectid < cache->key.objectid) {
158 p = &(*p)->rb_left;
159 } else if (block_group->key.objectid > cache->key.objectid) {
160 p = &(*p)->rb_right;
161 } else {
162 spin_unlock(&info->block_group_cache_lock);
163 return -EEXIST;
164 }
165 }
166
167 rb_link_node(&block_group->cache_node, parent, p);
168 rb_insert_color(&block_group->cache_node,
169 &info->block_group_cache_tree);
170
171 if (info->first_logical_byte > block_group->key.objectid)
172 info->first_logical_byte = block_group->key.objectid;
173
174 spin_unlock(&info->block_group_cache_lock);
175
176 return 0;
177}
178
179
180
181
182
183static struct btrfs_block_group_cache *
184block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
185 int contains)
186{
187 struct btrfs_block_group_cache *cache, *ret = NULL;
188 struct rb_node *n;
189 u64 end, start;
190
191 spin_lock(&info->block_group_cache_lock);
192 n = info->block_group_cache_tree.rb_node;
193
194 while (n) {
195 cache = rb_entry(n, struct btrfs_block_group_cache,
196 cache_node);
197 end = cache->key.objectid + cache->key.offset - 1;
198 start = cache->key.objectid;
199
200 if (bytenr < start) {
201 if (!contains && (!ret || start < ret->key.objectid))
202 ret = cache;
203 n = n->rb_left;
204 } else if (bytenr > start) {
205 if (contains && bytenr <= end) {
206 ret = cache;
207 break;
208 }
209 n = n->rb_right;
210 } else {
211 ret = cache;
212 break;
213 }
214 }
215 if (ret) {
216 btrfs_get_block_group(ret);
217 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
218 info->first_logical_byte = ret->key.objectid;
219 }
220 spin_unlock(&info->block_group_cache_lock);
221
222 return ret;
223}
224
225static int add_excluded_extent(struct btrfs_root *root,
226 u64 start, u64 num_bytes)
227{
228 u64 end = start + num_bytes - 1;
229 set_extent_bits(&root->fs_info->freed_extents[0],
230 start, end, EXTENT_UPTODATE, GFP_NOFS);
231 set_extent_bits(&root->fs_info->freed_extents[1],
232 start, end, EXTENT_UPTODATE, GFP_NOFS);
233 return 0;
234}
235
236static void free_excluded_extents(struct btrfs_root *root,
237 struct btrfs_block_group_cache *cache)
238{
239 u64 start, end;
240
241 start = cache->key.objectid;
242 end = start + cache->key.offset - 1;
243
244 clear_extent_bits(&root->fs_info->freed_extents[0],
245 start, end, EXTENT_UPTODATE, GFP_NOFS);
246 clear_extent_bits(&root->fs_info->freed_extents[1],
247 start, end, EXTENT_UPTODATE, GFP_NOFS);
248}
249
250static int exclude_super_stripes(struct btrfs_root *root,
251 struct btrfs_block_group_cache *cache)
252{
253 u64 bytenr;
254 u64 *logical;
255 int stripe_len;
256 int i, nr, ret;
257
258 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
259 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
260 cache->bytes_super += stripe_len;
261 ret = add_excluded_extent(root, cache->key.objectid,
262 stripe_len);
263 if (ret)
264 return ret;
265 }
266
267 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
268 bytenr = btrfs_sb_offset(i);
269 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
270 cache->key.objectid, bytenr,
271 0, &logical, &nr, &stripe_len);
272 if (ret)
273 return ret;
274
275 while (nr--) {
276 u64 start, len;
277
278 if (logical[nr] > cache->key.objectid +
279 cache->key.offset)
280 continue;
281
282 if (logical[nr] + stripe_len <= cache->key.objectid)
283 continue;
284
285 start = logical[nr];
286 if (start < cache->key.objectid) {
287 start = cache->key.objectid;
288 len = (logical[nr] + stripe_len) - start;
289 } else {
290 len = min_t(u64, stripe_len,
291 cache->key.objectid +
292 cache->key.offset - start);
293 }
294
295 cache->bytes_super += len;
296 ret = add_excluded_extent(root, start, len);
297 if (ret) {
298 kfree(logical);
299 return ret;
300 }
301 }
302
303 kfree(logical);
304 }
305 return 0;
306}
307
308static struct btrfs_caching_control *
309get_caching_control(struct btrfs_block_group_cache *cache)
310{
311 struct btrfs_caching_control *ctl;
312
313 spin_lock(&cache->lock);
314 if (cache->cached != BTRFS_CACHE_STARTED) {
315 spin_unlock(&cache->lock);
316 return NULL;
317 }
318
319
320 if (!cache->caching_ctl) {
321 spin_unlock(&cache->lock);
322 return NULL;
323 }
324
325 ctl = cache->caching_ctl;
326 atomic_inc(&ctl->count);
327 spin_unlock(&cache->lock);
328 return ctl;
329}
330
331static void put_caching_control(struct btrfs_caching_control *ctl)
332{
333 if (atomic_dec_and_test(&ctl->count))
334 kfree(ctl);
335}
336
337
338
339
340
341
342static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
343 struct btrfs_fs_info *info, u64 start, u64 end)
344{
345 u64 extent_start, extent_end, size, total_added = 0;
346 int ret;
347
348 while (start < end) {
349 ret = find_first_extent_bit(info->pinned_extents, start,
350 &extent_start, &extent_end,
351 EXTENT_DIRTY | EXTENT_UPTODATE,
352 NULL);
353 if (ret)
354 break;
355
356 if (extent_start <= start) {
357 start = extent_end + 1;
358 } else if (extent_start > start && extent_start < end) {
359 size = extent_start - start;
360 total_added += size;
361 ret = btrfs_add_free_space(block_group, start,
362 size);
363 BUG_ON(ret);
364 start = extent_end + 1;
365 } else {
366 break;
367 }
368 }
369
370 if (start < end) {
371 size = end - start;
372 total_added += size;
373 ret = btrfs_add_free_space(block_group, start, size);
374 BUG_ON(ret);
375 }
376
377 return total_added;
378}
379
380static noinline void caching_thread(struct btrfs_work *work)
381{
382 struct btrfs_block_group_cache *block_group;
383 struct btrfs_fs_info *fs_info;
384 struct btrfs_caching_control *caching_ctl;
385 struct btrfs_root *extent_root;
386 struct btrfs_path *path;
387 struct extent_buffer *leaf;
388 struct btrfs_key key;
389 u64 total_found = 0;
390 u64 last = 0;
391 u32 nritems;
392 int ret = 0;
393
394 caching_ctl = container_of(work, struct btrfs_caching_control, work);
395 block_group = caching_ctl->block_group;
396 fs_info = block_group->fs_info;
397 extent_root = fs_info->extent_root;
398
399 path = btrfs_alloc_path();
400 if (!path)
401 goto out;
402
403 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
404
405
406
407
408
409
410
411 path->skip_locking = 1;
412 path->search_commit_root = 1;
413 path->reada = 1;
414
415 key.objectid = last;
416 key.offset = 0;
417 key.type = BTRFS_EXTENT_ITEM_KEY;
418again:
419 mutex_lock(&caching_ctl->mutex);
420
421 down_read(&fs_info->extent_commit_sem);
422
423 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
424 if (ret < 0)
425 goto err;
426
427 leaf = path->nodes[0];
428 nritems = btrfs_header_nritems(leaf);
429
430 while (1) {
431 if (btrfs_fs_closing(fs_info) > 1) {
432 last = (u64)-1;
433 break;
434 }
435
436 if (path->slots[0] < nritems) {
437 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
438 } else {
439 ret = find_next_key(path, 0, &key);
440 if (ret)
441 break;
442
443 if (need_resched()) {
444 caching_ctl->progress = last;
445 btrfs_release_path(path);
446 up_read(&fs_info->extent_commit_sem);
447 mutex_unlock(&caching_ctl->mutex);
448 cond_resched();
449 goto again;
450 }
451
452 ret = btrfs_next_leaf(extent_root, path);
453 if (ret < 0)
454 goto err;
455 if (ret)
456 break;
457 leaf = path->nodes[0];
458 nritems = btrfs_header_nritems(leaf);
459 continue;
460 }
461
462 if (key.objectid < block_group->key.objectid) {
463 path->slots[0]++;
464 continue;
465 }
466
467 if (key.objectid >= block_group->key.objectid +
468 block_group->key.offset)
469 break;
470
471 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
472 key.type == BTRFS_METADATA_ITEM_KEY) {
473 total_found += add_new_free_space(block_group,
474 fs_info, last,
475 key.objectid);
476 if (key.type == BTRFS_METADATA_ITEM_KEY)
477 last = key.objectid +
478 fs_info->tree_root->leafsize;
479 else
480 last = key.objectid + key.offset;
481
482 if (total_found > (1024 * 1024 * 2)) {
483 total_found = 0;
484 wake_up(&caching_ctl->wait);
485 }
486 }
487 path->slots[0]++;
488 }
489 ret = 0;
490
491 total_found += add_new_free_space(block_group, fs_info, last,
492 block_group->key.objectid +
493 block_group->key.offset);
494 caching_ctl->progress = (u64)-1;
495
496 spin_lock(&block_group->lock);
497 block_group->caching_ctl = NULL;
498 block_group->cached = BTRFS_CACHE_FINISHED;
499 spin_unlock(&block_group->lock);
500
501err:
502 btrfs_free_path(path);
503 up_read(&fs_info->extent_commit_sem);
504
505 free_excluded_extents(extent_root, block_group);
506
507 mutex_unlock(&caching_ctl->mutex);
508out:
509 wake_up(&caching_ctl->wait);
510
511 put_caching_control(caching_ctl);
512 btrfs_put_block_group(block_group);
513}
514
515static int cache_block_group(struct btrfs_block_group_cache *cache,
516 int load_cache_only)
517{
518 DEFINE_WAIT(wait);
519 struct btrfs_fs_info *fs_info = cache->fs_info;
520 struct btrfs_caching_control *caching_ctl;
521 int ret = 0;
522
523 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
524 if (!caching_ctl)
525 return -ENOMEM;
526
527 INIT_LIST_HEAD(&caching_ctl->list);
528 mutex_init(&caching_ctl->mutex);
529 init_waitqueue_head(&caching_ctl->wait);
530 caching_ctl->block_group = cache;
531 caching_ctl->progress = cache->key.objectid;
532 atomic_set(&caching_ctl->count, 1);
533 caching_ctl->work.func = caching_thread;
534
535 spin_lock(&cache->lock);
536
537
538
539
540
541
542
543
544
545
546
547
548 while (cache->cached == BTRFS_CACHE_FAST) {
549 struct btrfs_caching_control *ctl;
550
551 ctl = cache->caching_ctl;
552 atomic_inc(&ctl->count);
553 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
554 spin_unlock(&cache->lock);
555
556 schedule();
557
558 finish_wait(&ctl->wait, &wait);
559 put_caching_control(ctl);
560 spin_lock(&cache->lock);
561 }
562
563 if (cache->cached != BTRFS_CACHE_NO) {
564 spin_unlock(&cache->lock);
565 kfree(caching_ctl);
566 return 0;
567 }
568 WARN_ON(cache->caching_ctl);
569 cache->caching_ctl = caching_ctl;
570 cache->cached = BTRFS_CACHE_FAST;
571 spin_unlock(&cache->lock);
572
573 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
574 ret = load_free_space_cache(fs_info, cache);
575
576 spin_lock(&cache->lock);
577 if (ret == 1) {
578 cache->caching_ctl = NULL;
579 cache->cached = BTRFS_CACHE_FINISHED;
580 cache->last_byte_to_unpin = (u64)-1;
581 } else {
582 if (load_cache_only) {
583 cache->caching_ctl = NULL;
584 cache->cached = BTRFS_CACHE_NO;
585 } else {
586 cache->cached = BTRFS_CACHE_STARTED;
587 }
588 }
589 spin_unlock(&cache->lock);
590 wake_up(&caching_ctl->wait);
591 if (ret == 1) {
592 put_caching_control(caching_ctl);
593 free_excluded_extents(fs_info->extent_root, cache);
594 return 0;
595 }
596 } else {
597
598
599
600
601 spin_lock(&cache->lock);
602 if (load_cache_only) {
603 cache->caching_ctl = NULL;
604 cache->cached = BTRFS_CACHE_NO;
605 } else {
606 cache->cached = BTRFS_CACHE_STARTED;
607 }
608 spin_unlock(&cache->lock);
609 wake_up(&caching_ctl->wait);
610 }
611
612 if (load_cache_only) {
613 put_caching_control(caching_ctl);
614 return 0;
615 }
616
617 down_write(&fs_info->extent_commit_sem);
618 atomic_inc(&caching_ctl->count);
619 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
620 up_write(&fs_info->extent_commit_sem);
621
622 btrfs_get_block_group(cache);
623
624 btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
625
626 return ret;
627}
628
629
630
631
632static struct btrfs_block_group_cache *
633btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
634{
635 struct btrfs_block_group_cache *cache;
636
637 cache = block_group_cache_tree_search(info, bytenr, 0);
638
639 return cache;
640}
641
642
643
644
645struct btrfs_block_group_cache *btrfs_lookup_block_group(
646 struct btrfs_fs_info *info,
647 u64 bytenr)
648{
649 struct btrfs_block_group_cache *cache;
650
651 cache = block_group_cache_tree_search(info, bytenr, 1);
652
653 return cache;
654}
655
656static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
657 u64 flags)
658{
659 struct list_head *head = &info->space_info;
660 struct btrfs_space_info *found;
661
662 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
663
664 rcu_read_lock();
665 list_for_each_entry_rcu(found, head, list) {
666 if (found->flags & flags) {
667 rcu_read_unlock();
668 return found;
669 }
670 }
671 rcu_read_unlock();
672 return NULL;
673}
674
675
676
677
678
679void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
680{
681 struct list_head *head = &info->space_info;
682 struct btrfs_space_info *found;
683
684 rcu_read_lock();
685 list_for_each_entry_rcu(found, head, list)
686 found->full = 0;
687 rcu_read_unlock();
688}
689
690
691int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
692{
693 int ret;
694 struct btrfs_key key;
695 struct btrfs_path *path;
696
697 path = btrfs_alloc_path();
698 if (!path)
699 return -ENOMEM;
700
701 key.objectid = start;
702 key.offset = len;
703 key.type = BTRFS_EXTENT_ITEM_KEY;
704 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
705 0, 0);
706 if (ret > 0) {
707 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
708 if (key.objectid == start &&
709 key.type == BTRFS_METADATA_ITEM_KEY)
710 ret = 0;
711 }
712 btrfs_free_path(path);
713 return ret;
714}
715
716
717
718
719
720
721
722
723
724
725int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
726 struct btrfs_root *root, u64 bytenr,
727 u64 offset, int metadata, u64 *refs, u64 *flags)
728{
729 struct btrfs_delayed_ref_head *head;
730 struct btrfs_delayed_ref_root *delayed_refs;
731 struct btrfs_path *path;
732 struct btrfs_extent_item *ei;
733 struct extent_buffer *leaf;
734 struct btrfs_key key;
735 u32 item_size;
736 u64 num_refs;
737 u64 extent_flags;
738 int ret;
739
740
741
742
743
744 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
745 offset = root->leafsize;
746 metadata = 0;
747 }
748
749 path = btrfs_alloc_path();
750 if (!path)
751 return -ENOMEM;
752
753 if (metadata) {
754 key.objectid = bytenr;
755 key.type = BTRFS_METADATA_ITEM_KEY;
756 key.offset = offset;
757 } else {
758 key.objectid = bytenr;
759 key.type = BTRFS_EXTENT_ITEM_KEY;
760 key.offset = offset;
761 }
762
763 if (!trans) {
764 path->skip_locking = 1;
765 path->search_commit_root = 1;
766 }
767again:
768 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
769 &key, path, 0, 0);
770 if (ret < 0)
771 goto out_free;
772
773 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
774 key.type = BTRFS_EXTENT_ITEM_KEY;
775 key.offset = root->leafsize;
776 btrfs_release_path(path);
777 goto again;
778 }
779
780 if (ret == 0) {
781 leaf = path->nodes[0];
782 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
783 if (item_size >= sizeof(*ei)) {
784 ei = btrfs_item_ptr(leaf, path->slots[0],
785 struct btrfs_extent_item);
786 num_refs = btrfs_extent_refs(leaf, ei);
787 extent_flags = btrfs_extent_flags(leaf, ei);
788 } else {
789#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
790 struct btrfs_extent_item_v0 *ei0;
791 BUG_ON(item_size != sizeof(*ei0));
792 ei0 = btrfs_item_ptr(leaf, path->slots[0],
793 struct btrfs_extent_item_v0);
794 num_refs = btrfs_extent_refs_v0(leaf, ei0);
795
796 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
797#else
798 BUG();
799#endif
800 }
801 BUG_ON(num_refs == 0);
802 } else {
803 num_refs = 0;
804 extent_flags = 0;
805 ret = 0;
806 }
807
808 if (!trans)
809 goto out;
810
811 delayed_refs = &trans->transaction->delayed_refs;
812 spin_lock(&delayed_refs->lock);
813 head = btrfs_find_delayed_ref_head(trans, bytenr);
814 if (head) {
815 if (!mutex_trylock(&head->mutex)) {
816 atomic_inc(&head->node.refs);
817 spin_unlock(&delayed_refs->lock);
818
819 btrfs_release_path(path);
820
821
822
823
824
825 mutex_lock(&head->mutex);
826 mutex_unlock(&head->mutex);
827 btrfs_put_delayed_ref(&head->node);
828 goto again;
829 }
830 if (head->extent_op && head->extent_op->update_flags)
831 extent_flags |= head->extent_op->flags_to_set;
832 else
833 BUG_ON(num_refs == 0);
834
835 num_refs += head->node.ref_mod;
836 mutex_unlock(&head->mutex);
837 }
838 spin_unlock(&delayed_refs->lock);
839out:
840 WARN_ON(num_refs == 0);
841 if (refs)
842 *refs = num_refs;
843 if (flags)
844 *flags = extent_flags;
845out_free:
846 btrfs_free_path(path);
847 return ret;
848}
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
957static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
958 struct btrfs_root *root,
959 struct btrfs_path *path,
960 u64 owner, u32 extra_size)
961{
962 struct btrfs_extent_item *item;
963 struct btrfs_extent_item_v0 *ei0;
964 struct btrfs_extent_ref_v0 *ref0;
965 struct btrfs_tree_block_info *bi;
966 struct extent_buffer *leaf;
967 struct btrfs_key key;
968 struct btrfs_key found_key;
969 u32 new_size = sizeof(*item);
970 u64 refs;
971 int ret;
972
973 leaf = path->nodes[0];
974 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
975
976 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
977 ei0 = btrfs_item_ptr(leaf, path->slots[0],
978 struct btrfs_extent_item_v0);
979 refs = btrfs_extent_refs_v0(leaf, ei0);
980
981 if (owner == (u64)-1) {
982 while (1) {
983 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
984 ret = btrfs_next_leaf(root, path);
985 if (ret < 0)
986 return ret;
987 BUG_ON(ret > 0);
988 leaf = path->nodes[0];
989 }
990 btrfs_item_key_to_cpu(leaf, &found_key,
991 path->slots[0]);
992 BUG_ON(key.objectid != found_key.objectid);
993 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
994 path->slots[0]++;
995 continue;
996 }
997 ref0 = btrfs_item_ptr(leaf, path->slots[0],
998 struct btrfs_extent_ref_v0);
999 owner = btrfs_ref_objectid_v0(leaf, ref0);
1000 break;
1001 }
1002 }
1003 btrfs_release_path(path);
1004
1005 if (owner < BTRFS_FIRST_FREE_OBJECTID)
1006 new_size += sizeof(*bi);
1007
1008 new_size -= sizeof(*ei0);
1009 ret = btrfs_search_slot(trans, root, &key, path,
1010 new_size + extra_size, 1);
1011 if (ret < 0)
1012 return ret;
1013 BUG_ON(ret);
1014
1015 btrfs_extend_item(root, path, new_size);
1016
1017 leaf = path->nodes[0];
1018 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1019 btrfs_set_extent_refs(leaf, item, refs);
1020
1021 btrfs_set_extent_generation(leaf, item, 0);
1022 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1023 btrfs_set_extent_flags(leaf, item,
1024 BTRFS_EXTENT_FLAG_TREE_BLOCK |
1025 BTRFS_BLOCK_FLAG_FULL_BACKREF);
1026 bi = (struct btrfs_tree_block_info *)(item + 1);
1027
1028 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
1029 btrfs_set_tree_block_level(leaf, bi, (int)owner);
1030 } else {
1031 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
1032 }
1033 btrfs_mark_buffer_dirty(leaf);
1034 return 0;
1035}
1036#endif
1037
1038static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1039{
1040 u32 high_crc = ~(u32)0;
1041 u32 low_crc = ~(u32)0;
1042 __le64 lenum;
1043
1044 lenum = cpu_to_le64(root_objectid);
1045 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
1046 lenum = cpu_to_le64(owner);
1047 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1048 lenum = cpu_to_le64(offset);
1049 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
1050
1051 return ((u64)high_crc << 31) ^ (u64)low_crc;
1052}
1053
1054static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
1055 struct btrfs_extent_data_ref *ref)
1056{
1057 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
1058 btrfs_extent_data_ref_objectid(leaf, ref),
1059 btrfs_extent_data_ref_offset(leaf, ref));
1060}
1061
1062static int match_extent_data_ref(struct extent_buffer *leaf,
1063 struct btrfs_extent_data_ref *ref,
1064 u64 root_objectid, u64 owner, u64 offset)
1065{
1066 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1067 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1068 btrfs_extent_data_ref_offset(leaf, ref) != offset)
1069 return 0;
1070 return 1;
1071}
1072
1073static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1074 struct btrfs_root *root,
1075 struct btrfs_path *path,
1076 u64 bytenr, u64 parent,
1077 u64 root_objectid,
1078 u64 owner, u64 offset)
1079{
1080 struct btrfs_key key;
1081 struct btrfs_extent_data_ref *ref;
1082 struct extent_buffer *leaf;
1083 u32 nritems;
1084 int ret;
1085 int recow;
1086 int err = -ENOENT;
1087
1088 key.objectid = bytenr;
1089 if (parent) {
1090 key.type = BTRFS_SHARED_DATA_REF_KEY;
1091 key.offset = parent;
1092 } else {
1093 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1094 key.offset = hash_extent_data_ref(root_objectid,
1095 owner, offset);
1096 }
1097again:
1098 recow = 0;
1099 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1100 if (ret < 0) {
1101 err = ret;
1102 goto fail;
1103 }
1104
1105 if (parent) {
1106 if (!ret)
1107 return 0;
1108#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1109 key.type = BTRFS_EXTENT_REF_V0_KEY;
1110 btrfs_release_path(path);
1111 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1112 if (ret < 0) {
1113 err = ret;
1114 goto fail;
1115 }
1116 if (!ret)
1117 return 0;
1118#endif
1119 goto fail;
1120 }
1121
1122 leaf = path->nodes[0];
1123 nritems = btrfs_header_nritems(leaf);
1124 while (1) {
1125 if (path->slots[0] >= nritems) {
1126 ret = btrfs_next_leaf(root, path);
1127 if (ret < 0)
1128 err = ret;
1129 if (ret)
1130 goto fail;
1131
1132 leaf = path->nodes[0];
1133 nritems = btrfs_header_nritems(leaf);
1134 recow = 1;
1135 }
1136
1137 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1138 if (key.objectid != bytenr ||
1139 key.type != BTRFS_EXTENT_DATA_REF_KEY)
1140 goto fail;
1141
1142 ref = btrfs_item_ptr(leaf, path->slots[0],
1143 struct btrfs_extent_data_ref);
1144
1145 if (match_extent_data_ref(leaf, ref, root_objectid,
1146 owner, offset)) {
1147 if (recow) {
1148 btrfs_release_path(path);
1149 goto again;
1150 }
1151 err = 0;
1152 break;
1153 }
1154 path->slots[0]++;
1155 }
1156fail:
1157 return err;
1158}
1159
1160static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1161 struct btrfs_root *root,
1162 struct btrfs_path *path,
1163 u64 bytenr, u64 parent,
1164 u64 root_objectid, u64 owner,
1165 u64 offset, int refs_to_add)
1166{
1167 struct btrfs_key key;
1168 struct extent_buffer *leaf;
1169 u32 size;
1170 u32 num_refs;
1171 int ret;
1172
1173 key.objectid = bytenr;
1174 if (parent) {
1175 key.type = BTRFS_SHARED_DATA_REF_KEY;
1176 key.offset = parent;
1177 size = sizeof(struct btrfs_shared_data_ref);
1178 } else {
1179 key.type = BTRFS_EXTENT_DATA_REF_KEY;
1180 key.offset = hash_extent_data_ref(root_objectid,
1181 owner, offset);
1182 size = sizeof(struct btrfs_extent_data_ref);
1183 }
1184
1185 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1186 if (ret && ret != -EEXIST)
1187 goto fail;
1188
1189 leaf = path->nodes[0];
1190 if (parent) {
1191 struct btrfs_shared_data_ref *ref;
1192 ref = btrfs_item_ptr(leaf, path->slots[0],
1193 struct btrfs_shared_data_ref);
1194 if (ret == 0) {
1195 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1196 } else {
1197 num_refs = btrfs_shared_data_ref_count(leaf, ref);
1198 num_refs += refs_to_add;
1199 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1200 }
1201 } else {
1202 struct btrfs_extent_data_ref *ref;
1203 while (ret == -EEXIST) {
1204 ref = btrfs_item_ptr(leaf, path->slots[0],
1205 struct btrfs_extent_data_ref);
1206 if (match_extent_data_ref(leaf, ref, root_objectid,
1207 owner, offset))
1208 break;
1209 btrfs_release_path(path);
1210 key.offset++;
1211 ret = btrfs_insert_empty_item(trans, root, path, &key,
1212 size);
1213 if (ret && ret != -EEXIST)
1214 goto fail;
1215
1216 leaf = path->nodes[0];
1217 }
1218 ref = btrfs_item_ptr(leaf, path->slots[0],
1219 struct btrfs_extent_data_ref);
1220 if (ret == 0) {
1221 btrfs_set_extent_data_ref_root(leaf, ref,
1222 root_objectid);
1223 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1224 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1225 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1226 } else {
1227 num_refs = btrfs_extent_data_ref_count(leaf, ref);
1228 num_refs += refs_to_add;
1229 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1230 }
1231 }
1232 btrfs_mark_buffer_dirty(leaf);
1233 ret = 0;
1234fail:
1235 btrfs_release_path(path);
1236 return ret;
1237}
1238
1239static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1240 struct btrfs_root *root,
1241 struct btrfs_path *path,
1242 int refs_to_drop)
1243{
1244 struct btrfs_key key;
1245 struct btrfs_extent_data_ref *ref1 = NULL;
1246 struct btrfs_shared_data_ref *ref2 = NULL;
1247 struct extent_buffer *leaf;
1248 u32 num_refs = 0;
1249 int ret = 0;
1250
1251 leaf = path->nodes[0];
1252 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1253
1254 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1255 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1256 struct btrfs_extent_data_ref);
1257 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1258 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1259 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1260 struct btrfs_shared_data_ref);
1261 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1262#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1263 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1264 struct btrfs_extent_ref_v0 *ref0;
1265 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1266 struct btrfs_extent_ref_v0);
1267 num_refs = btrfs_ref_count_v0(leaf, ref0);
1268#endif
1269 } else {
1270 BUG();
1271 }
1272
1273 BUG_ON(num_refs < refs_to_drop);
1274 num_refs -= refs_to_drop;
1275
1276 if (num_refs == 0) {
1277 ret = btrfs_del_item(trans, root, path);
1278 } else {
1279 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1280 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1281 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1282 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1283#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1284 else {
1285 struct btrfs_extent_ref_v0 *ref0;
1286 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1287 struct btrfs_extent_ref_v0);
1288 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1289 }
1290#endif
1291 btrfs_mark_buffer_dirty(leaf);
1292 }
1293 return ret;
1294}
1295
1296static noinline u32 extent_data_ref_count(struct btrfs_root *root,
1297 struct btrfs_path *path,
1298 struct btrfs_extent_inline_ref *iref)
1299{
1300 struct btrfs_key key;
1301 struct extent_buffer *leaf;
1302 struct btrfs_extent_data_ref *ref1;
1303 struct btrfs_shared_data_ref *ref2;
1304 u32 num_refs = 0;
1305
1306 leaf = path->nodes[0];
1307 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1308 if (iref) {
1309 if (btrfs_extent_inline_ref_type(leaf, iref) ==
1310 BTRFS_EXTENT_DATA_REF_KEY) {
1311 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1312 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1313 } else {
1314 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1315 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1316 }
1317 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1318 ref1 = btrfs_item_ptr(leaf, path->slots[0],
1319 struct btrfs_extent_data_ref);
1320 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1321 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1322 ref2 = btrfs_item_ptr(leaf, path->slots[0],
1323 struct btrfs_shared_data_ref);
1324 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1325#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1326 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1327 struct btrfs_extent_ref_v0 *ref0;
1328 ref0 = btrfs_item_ptr(leaf, path->slots[0],
1329 struct btrfs_extent_ref_v0);
1330 num_refs = btrfs_ref_count_v0(leaf, ref0);
1331#endif
1332 } else {
1333 WARN_ON(1);
1334 }
1335 return num_refs;
1336}
1337
1338static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1339 struct btrfs_root *root,
1340 struct btrfs_path *path,
1341 u64 bytenr, u64 parent,
1342 u64 root_objectid)
1343{
1344 struct btrfs_key key;
1345 int ret;
1346
1347 key.objectid = bytenr;
1348 if (parent) {
1349 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1350 key.offset = parent;
1351 } else {
1352 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1353 key.offset = root_objectid;
1354 }
1355
1356 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1357 if (ret > 0)
1358 ret = -ENOENT;
1359#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1360 if (ret == -ENOENT && parent) {
1361 btrfs_release_path(path);
1362 key.type = BTRFS_EXTENT_REF_V0_KEY;
1363 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1364 if (ret > 0)
1365 ret = -ENOENT;
1366 }
1367#endif
1368 return ret;
1369}
1370
1371static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1372 struct btrfs_root *root,
1373 struct btrfs_path *path,
1374 u64 bytenr, u64 parent,
1375 u64 root_objectid)
1376{
1377 struct btrfs_key key;
1378 int ret;
1379
1380 key.objectid = bytenr;
1381 if (parent) {
1382 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1383 key.offset = parent;
1384 } else {
1385 key.type = BTRFS_TREE_BLOCK_REF_KEY;
1386 key.offset = root_objectid;
1387 }
1388
1389 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1390 btrfs_release_path(path);
1391 return ret;
1392}
1393
1394static inline int extent_ref_type(u64 parent, u64 owner)
1395{
1396 int type;
1397 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1398 if (parent > 0)
1399 type = BTRFS_SHARED_BLOCK_REF_KEY;
1400 else
1401 type = BTRFS_TREE_BLOCK_REF_KEY;
1402 } else {
1403 if (parent > 0)
1404 type = BTRFS_SHARED_DATA_REF_KEY;
1405 else
1406 type = BTRFS_EXTENT_DATA_REF_KEY;
1407 }
1408 return type;
1409}
1410
1411static int find_next_key(struct btrfs_path *path, int level,
1412 struct btrfs_key *key)
1413
1414{
1415 for (; level < BTRFS_MAX_LEVEL; level++) {
1416 if (!path->nodes[level])
1417 break;
1418 if (path->slots[level] + 1 >=
1419 btrfs_header_nritems(path->nodes[level]))
1420 continue;
1421 if (level == 0)
1422 btrfs_item_key_to_cpu(path->nodes[level], key,
1423 path->slots[level] + 1);
1424 else
1425 btrfs_node_key_to_cpu(path->nodes[level], key,
1426 path->slots[level] + 1);
1427 return 0;
1428 }
1429 return 1;
1430}
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445static noinline_for_stack
1446int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1447 struct btrfs_root *root,
1448 struct btrfs_path *path,
1449 struct btrfs_extent_inline_ref **ref_ret,
1450 u64 bytenr, u64 num_bytes,
1451 u64 parent, u64 root_objectid,
1452 u64 owner, u64 offset, int insert)
1453{
1454 struct btrfs_key key;
1455 struct extent_buffer *leaf;
1456 struct btrfs_extent_item *ei;
1457 struct btrfs_extent_inline_ref *iref;
1458 u64 flags;
1459 u64 item_size;
1460 unsigned long ptr;
1461 unsigned long end;
1462 int extra_size;
1463 int type;
1464 int want;
1465 int ret;
1466 int err = 0;
1467 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1468 SKINNY_METADATA);
1469
1470 key.objectid = bytenr;
1471 key.type = BTRFS_EXTENT_ITEM_KEY;
1472 key.offset = num_bytes;
1473
1474 want = extent_ref_type(parent, owner);
1475 if (insert) {
1476 extra_size = btrfs_extent_inline_ref_size(want);
1477 path->keep_locks = 1;
1478 } else
1479 extra_size = -1;
1480
1481
1482
1483
1484
1485 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1486 key.type = BTRFS_METADATA_ITEM_KEY;
1487 key.offset = owner;
1488 }
1489
1490again:
1491 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1492 if (ret < 0) {
1493 err = ret;
1494 goto out;
1495 }
1496
1497
1498
1499
1500
1501 if (ret > 0 && skinny_metadata) {
1502 skinny_metadata = false;
1503 if (path->slots[0]) {
1504 path->slots[0]--;
1505 btrfs_item_key_to_cpu(path->nodes[0], &key,
1506 path->slots[0]);
1507 if (key.objectid == bytenr &&
1508 key.type == BTRFS_EXTENT_ITEM_KEY &&
1509 key.offset == num_bytes)
1510 ret = 0;
1511 }
1512 if (ret) {
1513 key.type = BTRFS_EXTENT_ITEM_KEY;
1514 key.offset = num_bytes;
1515 btrfs_release_path(path);
1516 goto again;
1517 }
1518 }
1519
1520 if (ret && !insert) {
1521 err = -ENOENT;
1522 goto out;
1523 } else if (ret) {
1524 err = -EIO;
1525 WARN_ON(1);
1526 goto out;
1527 }
1528
1529 leaf = path->nodes[0];
1530 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1531#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1532 if (item_size < sizeof(*ei)) {
1533 if (!insert) {
1534 err = -ENOENT;
1535 goto out;
1536 }
1537 ret = convert_extent_item_v0(trans, root, path, owner,
1538 extra_size);
1539 if (ret < 0) {
1540 err = ret;
1541 goto out;
1542 }
1543 leaf = path->nodes[0];
1544 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1545 }
1546#endif
1547 BUG_ON(item_size < sizeof(*ei));
1548
1549 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1550 flags = btrfs_extent_flags(leaf, ei);
1551
1552 ptr = (unsigned long)(ei + 1);
1553 end = (unsigned long)ei + item_size;
1554
1555 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1556 ptr += sizeof(struct btrfs_tree_block_info);
1557 BUG_ON(ptr > end);
1558 }
1559
1560 err = -ENOENT;
1561 while (1) {
1562 if (ptr >= end) {
1563 WARN_ON(ptr > end);
1564 break;
1565 }
1566 iref = (struct btrfs_extent_inline_ref *)ptr;
1567 type = btrfs_extent_inline_ref_type(leaf, iref);
1568 if (want < type)
1569 break;
1570 if (want > type) {
1571 ptr += btrfs_extent_inline_ref_size(type);
1572 continue;
1573 }
1574
1575 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1576 struct btrfs_extent_data_ref *dref;
1577 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1578 if (match_extent_data_ref(leaf, dref, root_objectid,
1579 owner, offset)) {
1580 err = 0;
1581 break;
1582 }
1583 if (hash_extent_data_ref_item(leaf, dref) <
1584 hash_extent_data_ref(root_objectid, owner, offset))
1585 break;
1586 } else {
1587 u64 ref_offset;
1588 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1589 if (parent > 0) {
1590 if (parent == ref_offset) {
1591 err = 0;
1592 break;
1593 }
1594 if (ref_offset < parent)
1595 break;
1596 } else {
1597 if (root_objectid == ref_offset) {
1598 err = 0;
1599 break;
1600 }
1601 if (ref_offset < root_objectid)
1602 break;
1603 }
1604 }
1605 ptr += btrfs_extent_inline_ref_size(type);
1606 }
1607 if (err == -ENOENT && insert) {
1608 if (item_size + extra_size >=
1609 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1610 err = -EAGAIN;
1611 goto out;
1612 }
1613
1614
1615
1616
1617
1618
1619 if (find_next_key(path, 0, &key) == 0 &&
1620 key.objectid == bytenr &&
1621 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1622 err = -EAGAIN;
1623 goto out;
1624 }
1625 }
1626 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1627out:
1628 if (insert) {
1629 path->keep_locks = 0;
1630 btrfs_unlock_up_safe(path, 1);
1631 }
1632 return err;
1633}
1634
1635
1636
1637
1638static noinline_for_stack
1639void setup_inline_extent_backref(struct btrfs_root *root,
1640 struct btrfs_path *path,
1641 struct btrfs_extent_inline_ref *iref,
1642 u64 parent, u64 root_objectid,
1643 u64 owner, u64 offset, int refs_to_add,
1644 struct btrfs_delayed_extent_op *extent_op)
1645{
1646 struct extent_buffer *leaf;
1647 struct btrfs_extent_item *ei;
1648 unsigned long ptr;
1649 unsigned long end;
1650 unsigned long item_offset;
1651 u64 refs;
1652 int size;
1653 int type;
1654
1655 leaf = path->nodes[0];
1656 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1657 item_offset = (unsigned long)iref - (unsigned long)ei;
1658
1659 type = extent_ref_type(parent, owner);
1660 size = btrfs_extent_inline_ref_size(type);
1661
1662 btrfs_extend_item(root, path, size);
1663
1664 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1665 refs = btrfs_extent_refs(leaf, ei);
1666 refs += refs_to_add;
1667 btrfs_set_extent_refs(leaf, ei, refs);
1668 if (extent_op)
1669 __run_delayed_extent_op(extent_op, leaf, ei);
1670
1671 ptr = (unsigned long)ei + item_offset;
1672 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1673 if (ptr < end - size)
1674 memmove_extent_buffer(leaf, ptr + size, ptr,
1675 end - size - ptr);
1676
1677 iref = (struct btrfs_extent_inline_ref *)ptr;
1678 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1679 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1680 struct btrfs_extent_data_ref *dref;
1681 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1682 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1683 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1684 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1685 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1686 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1687 struct btrfs_shared_data_ref *sref;
1688 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1689 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1690 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1691 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1692 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1693 } else {
1694 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1695 }
1696 btrfs_mark_buffer_dirty(leaf);
1697}
1698
1699static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1700 struct btrfs_root *root,
1701 struct btrfs_path *path,
1702 struct btrfs_extent_inline_ref **ref_ret,
1703 u64 bytenr, u64 num_bytes, u64 parent,
1704 u64 root_objectid, u64 owner, u64 offset)
1705{
1706 int ret;
1707
1708 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1709 bytenr, num_bytes, parent,
1710 root_objectid, owner, offset, 0);
1711 if (ret != -ENOENT)
1712 return ret;
1713
1714 btrfs_release_path(path);
1715 *ref_ret = NULL;
1716
1717 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1718 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1719 root_objectid);
1720 } else {
1721 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1722 root_objectid, owner, offset);
1723 }
1724 return ret;
1725}
1726
1727
1728
1729
1730static noinline_for_stack
1731void update_inline_extent_backref(struct btrfs_root *root,
1732 struct btrfs_path *path,
1733 struct btrfs_extent_inline_ref *iref,
1734 int refs_to_mod,
1735 struct btrfs_delayed_extent_op *extent_op)
1736{
1737 struct extent_buffer *leaf;
1738 struct btrfs_extent_item *ei;
1739 struct btrfs_extent_data_ref *dref = NULL;
1740 struct btrfs_shared_data_ref *sref = NULL;
1741 unsigned long ptr;
1742 unsigned long end;
1743 u32 item_size;
1744 int size;
1745 int type;
1746 u64 refs;
1747
1748 leaf = path->nodes[0];
1749 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1750 refs = btrfs_extent_refs(leaf, ei);
1751 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1752 refs += refs_to_mod;
1753 btrfs_set_extent_refs(leaf, ei, refs);
1754 if (extent_op)
1755 __run_delayed_extent_op(extent_op, leaf, ei);
1756
1757 type = btrfs_extent_inline_ref_type(leaf, iref);
1758
1759 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1760 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1761 refs = btrfs_extent_data_ref_count(leaf, dref);
1762 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1763 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1764 refs = btrfs_shared_data_ref_count(leaf, sref);
1765 } else {
1766 refs = 1;
1767 BUG_ON(refs_to_mod != -1);
1768 }
1769
1770 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1771 refs += refs_to_mod;
1772
1773 if (refs > 0) {
1774 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1775 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1776 else
1777 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1778 } else {
1779 size = btrfs_extent_inline_ref_size(type);
1780 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1781 ptr = (unsigned long)iref;
1782 end = (unsigned long)ei + item_size;
1783 if (ptr + size < end)
1784 memmove_extent_buffer(leaf, ptr, ptr + size,
1785 end - ptr - size);
1786 item_size -= size;
1787 btrfs_truncate_item(root, path, item_size, 1);
1788 }
1789 btrfs_mark_buffer_dirty(leaf);
1790}
1791
1792static noinline_for_stack
1793int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1794 struct btrfs_root *root,
1795 struct btrfs_path *path,
1796 u64 bytenr, u64 num_bytes, u64 parent,
1797 u64 root_objectid, u64 owner,
1798 u64 offset, int refs_to_add,
1799 struct btrfs_delayed_extent_op *extent_op)
1800{
1801 struct btrfs_extent_inline_ref *iref;
1802 int ret;
1803
1804 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1805 bytenr, num_bytes, parent,
1806 root_objectid, owner, offset, 1);
1807 if (ret == 0) {
1808 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1809 update_inline_extent_backref(root, path, iref,
1810 refs_to_add, extent_op);
1811 } else if (ret == -ENOENT) {
1812 setup_inline_extent_backref(root, path, iref, parent,
1813 root_objectid, owner, offset,
1814 refs_to_add, extent_op);
1815 ret = 0;
1816 }
1817 return ret;
1818}
1819
1820static int insert_extent_backref(struct btrfs_trans_handle *trans,
1821 struct btrfs_root *root,
1822 struct btrfs_path *path,
1823 u64 bytenr, u64 parent, u64 root_objectid,
1824 u64 owner, u64 offset, int refs_to_add)
1825{
1826 int ret;
1827 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1828 BUG_ON(refs_to_add != 1);
1829 ret = insert_tree_block_ref(trans, root, path, bytenr,
1830 parent, root_objectid);
1831 } else {
1832 ret = insert_extent_data_ref(trans, root, path, bytenr,
1833 parent, root_objectid,
1834 owner, offset, refs_to_add);
1835 }
1836 return ret;
1837}
1838
1839static int remove_extent_backref(struct btrfs_trans_handle *trans,
1840 struct btrfs_root *root,
1841 struct btrfs_path *path,
1842 struct btrfs_extent_inline_ref *iref,
1843 int refs_to_drop, int is_data)
1844{
1845 int ret = 0;
1846
1847 BUG_ON(!is_data && refs_to_drop != 1);
1848 if (iref) {
1849 update_inline_extent_backref(root, path, iref,
1850 -refs_to_drop, NULL);
1851 } else if (is_data) {
1852 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1853 } else {
1854 ret = btrfs_del_item(trans, root, path);
1855 }
1856 return ret;
1857}
1858
1859static int btrfs_issue_discard(struct block_device *bdev,
1860 u64 start, u64 len)
1861{
1862 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
1863}
1864
1865static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1866 u64 num_bytes, u64 *actual_bytes)
1867{
1868 int ret;
1869 u64 discarded_bytes = 0;
1870 struct btrfs_bio *bbio = NULL;
1871
1872
1873
1874 ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
1875 bytenr, &num_bytes, &bbio, 0);
1876
1877 if (!ret) {
1878 struct btrfs_bio_stripe *stripe = bbio->stripes;
1879 int i;
1880
1881
1882 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
1883 if (!stripe->dev->can_discard)
1884 continue;
1885
1886 ret = btrfs_issue_discard(stripe->dev->bdev,
1887 stripe->physical,
1888 stripe->length);
1889 if (!ret)
1890 discarded_bytes += stripe->length;
1891 else if (ret != -EOPNOTSUPP)
1892 break;
1893
1894
1895
1896
1897
1898
1899 ret = 0;
1900 }
1901 kfree(bbio);
1902 }
1903
1904 if (actual_bytes)
1905 *actual_bytes = discarded_bytes;
1906
1907
1908 if (ret == -EOPNOTSUPP)
1909 ret = 0;
1910 return ret;
1911}
1912
1913
1914int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1915 struct btrfs_root *root,
1916 u64 bytenr, u64 num_bytes, u64 parent,
1917 u64 root_objectid, u64 owner, u64 offset, int for_cow)
1918{
1919 int ret;
1920 struct btrfs_fs_info *fs_info = root->fs_info;
1921
1922 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1923 root_objectid == BTRFS_TREE_LOG_OBJECTID);
1924
1925 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1926 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
1927 num_bytes,
1928 parent, root_objectid, (int)owner,
1929 BTRFS_ADD_DELAYED_REF, NULL, for_cow);
1930 } else {
1931 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
1932 num_bytes,
1933 parent, root_objectid, owner, offset,
1934 BTRFS_ADD_DELAYED_REF, NULL, for_cow);
1935 }
1936 return ret;
1937}
1938
1939static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1940 struct btrfs_root *root,
1941 u64 bytenr, u64 num_bytes,
1942 u64 parent, u64 root_objectid,
1943 u64 owner, u64 offset, int refs_to_add,
1944 struct btrfs_delayed_extent_op *extent_op)
1945{
1946 struct btrfs_path *path;
1947 struct extent_buffer *leaf;
1948 struct btrfs_extent_item *item;
1949 u64 refs;
1950 int ret;
1951 int err = 0;
1952
1953 path = btrfs_alloc_path();
1954 if (!path)
1955 return -ENOMEM;
1956
1957 path->reada = 1;
1958 path->leave_spinning = 1;
1959
1960 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1961 path, bytenr, num_bytes, parent,
1962 root_objectid, owner, offset,
1963 refs_to_add, extent_op);
1964 if (ret == 0)
1965 goto out;
1966
1967 if (ret != -EAGAIN) {
1968 err = ret;
1969 goto out;
1970 }
1971
1972 leaf = path->nodes[0];
1973 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1974 refs = btrfs_extent_refs(leaf, item);
1975 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1976 if (extent_op)
1977 __run_delayed_extent_op(extent_op, leaf, item);
1978
1979 btrfs_mark_buffer_dirty(leaf);
1980 btrfs_release_path(path);
1981
1982 path->reada = 1;
1983 path->leave_spinning = 1;
1984
1985
1986 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1987 path, bytenr, parent, root_objectid,
1988 owner, offset, refs_to_add);
1989 if (ret)
1990 btrfs_abort_transaction(trans, root, ret);
1991out:
1992 btrfs_free_path(path);
1993 return err;
1994}
1995
1996static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1997 struct btrfs_root *root,
1998 struct btrfs_delayed_ref_node *node,
1999 struct btrfs_delayed_extent_op *extent_op,
2000 int insert_reserved)
2001{
2002 int ret = 0;
2003 struct btrfs_delayed_data_ref *ref;
2004 struct btrfs_key ins;
2005 u64 parent = 0;
2006 u64 ref_root = 0;
2007 u64 flags = 0;
2008
2009 ins.objectid = node->bytenr;
2010 ins.offset = node->num_bytes;
2011 ins.type = BTRFS_EXTENT_ITEM_KEY;
2012
2013 ref = btrfs_delayed_node_to_data_ref(node);
2014 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2015 parent = ref->parent;
2016 else
2017 ref_root = ref->root;
2018
2019 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2020 if (extent_op)
2021 flags |= extent_op->flags_to_set;
2022 ret = alloc_reserved_file_extent(trans, root,
2023 parent, ref_root, flags,
2024 ref->objectid, ref->offset,
2025 &ins, node->ref_mod);
2026 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2027 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
2028 node->num_bytes, parent,
2029 ref_root, ref->objectid,
2030 ref->offset, node->ref_mod,
2031 extent_op);
2032 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2033 ret = __btrfs_free_extent(trans, root, node->bytenr,
2034 node->num_bytes, parent,
2035 ref_root, ref->objectid,
2036 ref->offset, node->ref_mod,
2037 extent_op);
2038 } else {
2039 BUG();
2040 }
2041 return ret;
2042}
2043
2044static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
2045 struct extent_buffer *leaf,
2046 struct btrfs_extent_item *ei)
2047{
2048 u64 flags = btrfs_extent_flags(leaf, ei);
2049 if (extent_op->update_flags) {
2050 flags |= extent_op->flags_to_set;
2051 btrfs_set_extent_flags(leaf, ei, flags);
2052 }
2053
2054 if (extent_op->update_key) {
2055 struct btrfs_tree_block_info *bi;
2056 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2057 bi = (struct btrfs_tree_block_info *)(ei + 1);
2058 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
2059 }
2060}
2061
2062static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2063 struct btrfs_root *root,
2064 struct btrfs_delayed_ref_node *node,
2065 struct btrfs_delayed_extent_op *extent_op)
2066{
2067 struct btrfs_key key;
2068 struct btrfs_path *path;
2069 struct btrfs_extent_item *ei;
2070 struct extent_buffer *leaf;
2071 u32 item_size;
2072 int ret;
2073 int err = 0;
2074 int metadata = !extent_op->is_data;
2075
2076 if (trans->aborted)
2077 return 0;
2078
2079 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2080 metadata = 0;
2081
2082 path = btrfs_alloc_path();
2083 if (!path)
2084 return -ENOMEM;
2085
2086 key.objectid = node->bytenr;
2087
2088 if (metadata) {
2089 key.type = BTRFS_METADATA_ITEM_KEY;
2090 key.offset = extent_op->level;
2091 } else {
2092 key.type = BTRFS_EXTENT_ITEM_KEY;
2093 key.offset = node->num_bytes;
2094 }
2095
2096again:
2097 path->reada = 1;
2098 path->leave_spinning = 1;
2099 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
2100 path, 0, 1);
2101 if (ret < 0) {
2102 err = ret;
2103 goto out;
2104 }
2105 if (ret > 0) {
2106 if (metadata) {
2107 btrfs_release_path(path);
2108 metadata = 0;
2109
2110 key.offset = node->num_bytes;
2111 key.type = BTRFS_EXTENT_ITEM_KEY;
2112 goto again;
2113 }
2114 err = -EIO;
2115 goto out;
2116 }
2117
2118 leaf = path->nodes[0];
2119 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2120#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2121 if (item_size < sizeof(*ei)) {
2122 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
2123 path, (u64)-1, 0);
2124 if (ret < 0) {
2125 err = ret;
2126 goto out;
2127 }
2128 leaf = path->nodes[0];
2129 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2130 }
2131#endif
2132 BUG_ON(item_size < sizeof(*ei));
2133 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2134 __run_delayed_extent_op(extent_op, leaf, ei);
2135
2136 btrfs_mark_buffer_dirty(leaf);
2137out:
2138 btrfs_free_path(path);
2139 return err;
2140}
2141
2142static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2143 struct btrfs_root *root,
2144 struct btrfs_delayed_ref_node *node,
2145 struct btrfs_delayed_extent_op *extent_op,
2146 int insert_reserved)
2147{
2148 int ret = 0;
2149 struct btrfs_delayed_tree_ref *ref;
2150 struct btrfs_key ins;
2151 u64 parent = 0;
2152 u64 ref_root = 0;
2153 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2154 SKINNY_METADATA);
2155
2156 ref = btrfs_delayed_node_to_tree_ref(node);
2157 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2158 parent = ref->parent;
2159 else
2160 ref_root = ref->root;
2161
2162 ins.objectid = node->bytenr;
2163 if (skinny_metadata) {
2164 ins.offset = ref->level;
2165 ins.type = BTRFS_METADATA_ITEM_KEY;
2166 } else {
2167 ins.offset = node->num_bytes;
2168 ins.type = BTRFS_EXTENT_ITEM_KEY;
2169 }
2170
2171 BUG_ON(node->ref_mod != 1);
2172 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2173 BUG_ON(!extent_op || !extent_op->update_flags);
2174 ret = alloc_reserved_tree_block(trans, root,
2175 parent, ref_root,
2176 extent_op->flags_to_set,
2177 &extent_op->key,
2178 ref->level, &ins);
2179 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
2180 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
2181 node->num_bytes, parent, ref_root,
2182 ref->level, 0, 1, extent_op);
2183 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
2184 ret = __btrfs_free_extent(trans, root, node->bytenr,
2185 node->num_bytes, parent, ref_root,
2186 ref->level, 0, 1, extent_op);
2187 } else {
2188 BUG();
2189 }
2190 return ret;
2191}
2192
2193
2194static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2195 struct btrfs_root *root,
2196 struct btrfs_delayed_ref_node *node,
2197 struct btrfs_delayed_extent_op *extent_op,
2198 int insert_reserved)
2199{
2200 int ret = 0;
2201
2202 if (trans->aborted)
2203 return 0;
2204
2205 if (btrfs_delayed_ref_is_head(node)) {
2206 struct btrfs_delayed_ref_head *head;
2207
2208
2209
2210
2211
2212
2213 BUG_ON(extent_op);
2214 head = btrfs_delayed_node_to_head(node);
2215 if (insert_reserved) {
2216 btrfs_pin_extent(root, node->bytenr,
2217 node->num_bytes, 1);
2218 if (head->is_data) {
2219 ret = btrfs_del_csums(trans, root,
2220 node->bytenr,
2221 node->num_bytes);
2222 }
2223 }
2224 return ret;
2225 }
2226
2227 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2228 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2229 ret = run_delayed_tree_ref(trans, root, node, extent_op,
2230 insert_reserved);
2231 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2232 node->type == BTRFS_SHARED_DATA_REF_KEY)
2233 ret = run_delayed_data_ref(trans, root, node, extent_op,
2234 insert_reserved);
2235 else
2236 BUG();
2237 return ret;
2238}
2239
2240static noinline struct btrfs_delayed_ref_node *
2241select_delayed_ref(struct btrfs_delayed_ref_head *head)
2242{
2243 struct rb_node *node;
2244 struct btrfs_delayed_ref_node *ref;
2245 int action = BTRFS_ADD_DELAYED_REF;
2246again:
2247
2248
2249
2250
2251
2252 node = rb_prev(&head->node.rb_node);
2253 while (1) {
2254 if (!node)
2255 break;
2256 ref = rb_entry(node, struct btrfs_delayed_ref_node,
2257 rb_node);
2258 if (ref->bytenr != head->node.bytenr)
2259 break;
2260 if (ref->action == action)
2261 return ref;
2262 node = rb_prev(node);
2263 }
2264 if (action == BTRFS_ADD_DELAYED_REF) {
2265 action = BTRFS_DROP_DELAYED_REF;
2266 goto again;
2267 }
2268 return NULL;
2269}
2270
2271
2272
2273
2274
2275static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2276 struct btrfs_root *root,
2277 struct list_head *cluster)
2278{
2279 struct btrfs_delayed_ref_root *delayed_refs;
2280 struct btrfs_delayed_ref_node *ref;
2281 struct btrfs_delayed_ref_head *locked_ref = NULL;
2282 struct btrfs_delayed_extent_op *extent_op;
2283 struct btrfs_fs_info *fs_info = root->fs_info;
2284 int ret;
2285 int count = 0;
2286 int must_insert_reserved = 0;
2287
2288 delayed_refs = &trans->transaction->delayed_refs;
2289 while (1) {
2290 if (!locked_ref) {
2291
2292 if (list_empty(cluster))
2293 break;
2294
2295 locked_ref = list_entry(cluster->next,
2296 struct btrfs_delayed_ref_head, cluster);
2297
2298
2299
2300 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2301
2302
2303
2304
2305
2306
2307
2308 if (ret == -EAGAIN) {
2309 locked_ref = NULL;
2310 count++;
2311 continue;
2312 }
2313 }
2314
2315
2316
2317
2318
2319
2320
2321
2322 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2323 locked_ref);
2324
2325
2326
2327
2328
2329 ref = select_delayed_ref(locked_ref);
2330
2331 if (ref && ref->seq &&
2332 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2333
2334
2335
2336
2337 list_del_init(&locked_ref->cluster);
2338 btrfs_delayed_ref_unlock(locked_ref);
2339 locked_ref = NULL;
2340 delayed_refs->num_heads_ready++;
2341 spin_unlock(&delayed_refs->lock);
2342 cond_resched();
2343 spin_lock(&delayed_refs->lock);
2344 continue;
2345 }
2346
2347
2348
2349
2350
2351 must_insert_reserved = locked_ref->must_insert_reserved;
2352 locked_ref->must_insert_reserved = 0;
2353
2354 extent_op = locked_ref->extent_op;
2355 locked_ref->extent_op = NULL;
2356
2357 if (!ref) {
2358
2359
2360
2361
2362 ref = &locked_ref->node;
2363
2364 if (extent_op && must_insert_reserved) {
2365 btrfs_free_delayed_extent_op(extent_op);
2366 extent_op = NULL;
2367 }
2368
2369 if (extent_op) {
2370 spin_unlock(&delayed_refs->lock);
2371
2372 ret = run_delayed_extent_op(trans, root,
2373 ref, extent_op);
2374 btrfs_free_delayed_extent_op(extent_op);
2375
2376 if (ret) {
2377 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2378 spin_lock(&delayed_refs->lock);
2379 btrfs_delayed_ref_unlock(locked_ref);
2380 return ret;
2381 }
2382
2383 goto next;
2384 }
2385 }
2386
2387 ref->in_tree = 0;
2388 rb_erase(&ref->rb_node, &delayed_refs->root);
2389 delayed_refs->num_entries--;
2390 if (!btrfs_delayed_ref_is_head(ref)) {
2391
2392
2393
2394
2395 switch (ref->action) {
2396 case BTRFS_ADD_DELAYED_REF:
2397 case BTRFS_ADD_DELAYED_EXTENT:
2398 locked_ref->node.ref_mod -= ref->ref_mod;
2399 break;
2400 case BTRFS_DROP_DELAYED_REF:
2401 locked_ref->node.ref_mod += ref->ref_mod;
2402 break;
2403 default:
2404 WARN_ON(1);
2405 }
2406 }
2407 spin_unlock(&delayed_refs->lock);
2408
2409 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2410 must_insert_reserved);
2411
2412 btrfs_free_delayed_extent_op(extent_op);
2413 if (ret) {
2414 btrfs_delayed_ref_unlock(locked_ref);
2415 btrfs_put_delayed_ref(ref);
2416 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2417 spin_lock(&delayed_refs->lock);
2418 return ret;
2419 }
2420
2421
2422
2423
2424
2425
2426
2427 if (btrfs_delayed_ref_is_head(ref)) {
2428 list_del_init(&locked_ref->cluster);
2429 btrfs_delayed_ref_unlock(locked_ref);
2430 locked_ref = NULL;
2431 }
2432 btrfs_put_delayed_ref(ref);
2433 count++;
2434next:
2435 cond_resched();
2436 spin_lock(&delayed_refs->lock);
2437 }
2438 return count;
2439}
2440
2441#ifdef SCRAMBLE_DELAYED_REFS
2442
2443
2444
2445
2446
2447static u64 find_middle(struct rb_root *root)
2448{
2449 struct rb_node *n = root->rb_node;
2450 struct btrfs_delayed_ref_node *entry;
2451 int alt = 1;
2452 u64 middle;
2453 u64 first = 0, last = 0;
2454
2455 n = rb_first(root);
2456 if (n) {
2457 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2458 first = entry->bytenr;
2459 }
2460 n = rb_last(root);
2461 if (n) {
2462 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2463 last = entry->bytenr;
2464 }
2465 n = root->rb_node;
2466
2467 while (n) {
2468 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2469 WARN_ON(!entry->in_tree);
2470
2471 middle = entry->bytenr;
2472
2473 if (alt)
2474 n = n->rb_left;
2475 else
2476 n = n->rb_right;
2477
2478 alt = 1 - alt;
2479 }
2480 return middle;
2481}
2482#endif
2483
2484int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2485 struct btrfs_fs_info *fs_info)
2486{
2487 struct qgroup_update *qgroup_update;
2488 int ret = 0;
2489
2490 if (list_empty(&trans->qgroup_ref_list) !=
2491 !trans->delayed_ref_elem.seq) {
2492
2493 btrfs_err(fs_info,
2494 "qgroup accounting update error, list is%s empty, seq is %#x.%x",
2495 list_empty(&trans->qgroup_ref_list) ? "" : " not",
2496 (u32)(trans->delayed_ref_elem.seq >> 32),
2497 (u32)trans->delayed_ref_elem.seq);
2498 BUG();
2499 }
2500
2501 if (!trans->delayed_ref_elem.seq)
2502 return 0;
2503
2504 while (!list_empty(&trans->qgroup_ref_list)) {
2505 qgroup_update = list_first_entry(&trans->qgroup_ref_list,
2506 struct qgroup_update, list);
2507 list_del(&qgroup_update->list);
2508 if (!ret)
2509 ret = btrfs_qgroup_account_ref(
2510 trans, fs_info, qgroup_update->node,
2511 qgroup_update->extent_op);
2512 kfree(qgroup_update);
2513 }
2514
2515 btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
2516
2517 return ret;
2518}
2519
2520static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
2521 int count)
2522{
2523 int val = atomic_read(&delayed_refs->ref_seq);
2524
2525 if (val < seq || val >= seq + count)
2526 return 1;
2527 return 0;
2528}
2529
2530static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2531{
2532 u64 num_bytes;
2533
2534 num_bytes = heads * (sizeof(struct btrfs_extent_item) +
2535 sizeof(struct btrfs_extent_inline_ref));
2536 if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2537 num_bytes += heads * sizeof(struct btrfs_tree_block_info);
2538
2539
2540
2541
2542
2543 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2544}
2545
2546int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2547 struct btrfs_root *root)
2548{
2549 struct btrfs_block_rsv *global_rsv;
2550 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2551 u64 num_bytes;
2552 int ret = 0;
2553
2554 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
2555 num_heads = heads_to_leaves(root, num_heads);
2556 if (num_heads > 1)
2557 num_bytes += (num_heads - 1) * root->leafsize;
2558 num_bytes <<= 1;
2559 global_rsv = &root->fs_info->global_block_rsv;
2560
2561
2562
2563
2564
2565 if (global_rsv->space_info->full)
2566 num_bytes <<= 1;
2567
2568 spin_lock(&global_rsv->lock);
2569 if (global_rsv->reserved <= num_bytes)
2570 ret = 1;
2571 spin_unlock(&global_rsv->lock);
2572 return ret;
2573}
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2586 struct btrfs_root *root, unsigned long count)
2587{
2588 struct rb_node *node;
2589 struct btrfs_delayed_ref_root *delayed_refs;
2590 struct btrfs_delayed_ref_node *ref;
2591 struct list_head cluster;
2592 int ret;
2593 u64 delayed_start;
2594 int run_all = count == (unsigned long)-1;
2595 int run_most = 0;
2596 int loops;
2597
2598
2599 if (trans->aborted)
2600 return 0;
2601
2602 if (root == root->fs_info->extent_root)
2603 root = root->fs_info->tree_root;
2604
2605 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
2606
2607 delayed_refs = &trans->transaction->delayed_refs;
2608 INIT_LIST_HEAD(&cluster);
2609 if (count == 0) {
2610 count = delayed_refs->num_entries * 2;
2611 run_most = 1;
2612 }
2613
2614 if (!run_all && !run_most) {
2615 int old;
2616 int seq = atomic_read(&delayed_refs->ref_seq);
2617
2618progress:
2619 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2620 if (old) {
2621 DEFINE_WAIT(__wait);
2622 if (delayed_refs->flushing ||
2623 !btrfs_should_throttle_delayed_refs(trans, root))
2624 return 0;
2625
2626 prepare_to_wait(&delayed_refs->wait, &__wait,
2627 TASK_UNINTERRUPTIBLE);
2628
2629 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2630 if (old) {
2631 schedule();
2632 finish_wait(&delayed_refs->wait, &__wait);
2633
2634 if (!refs_newer(delayed_refs, seq, 256))
2635 goto progress;
2636 else
2637 return 0;
2638 } else {
2639 finish_wait(&delayed_refs->wait, &__wait);
2640 goto again;
2641 }
2642 }
2643
2644 } else {
2645 atomic_inc(&delayed_refs->procs_running_refs);
2646 }
2647
2648again:
2649 loops = 0;
2650 spin_lock(&delayed_refs->lock);
2651
2652#ifdef SCRAMBLE_DELAYED_REFS
2653 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2654#endif
2655
2656 while (1) {
2657 if (!(run_all || run_most) &&
2658 !btrfs_should_throttle_delayed_refs(trans, root))
2659 break;
2660
2661
2662
2663
2664
2665
2666
2667 delayed_start = delayed_refs->run_delayed_start;
2668 ret = btrfs_find_ref_cluster(trans, &cluster,
2669 delayed_refs->run_delayed_start);
2670 if (ret)
2671 break;
2672
2673 ret = run_clustered_refs(trans, root, &cluster);
2674 if (ret < 0) {
2675 btrfs_release_ref_cluster(&cluster);
2676 spin_unlock(&delayed_refs->lock);
2677 btrfs_abort_transaction(trans, root, ret);
2678 atomic_dec(&delayed_refs->procs_running_refs);
2679 wake_up(&delayed_refs->wait);
2680 return ret;
2681 }
2682
2683 atomic_add(ret, &delayed_refs->ref_seq);
2684
2685 count -= min_t(unsigned long, ret, count);
2686
2687 if (count == 0)
2688 break;
2689
2690 if (delayed_start >= delayed_refs->run_delayed_start) {
2691 if (loops == 0) {
2692
2693
2694
2695
2696
2697
2698 loops = 1;
2699 } else {
2700
2701
2702
2703 BUG_ON(run_all);
2704 break;
2705 }
2706 }
2707 if (ret) {
2708
2709 loops = 0;
2710 }
2711 }
2712
2713 if (run_all) {
2714 if (!list_empty(&trans->new_bgs)) {
2715 spin_unlock(&delayed_refs->lock);
2716 btrfs_create_pending_block_groups(trans, root);
2717 spin_lock(&delayed_refs->lock);
2718 }
2719
2720 node = rb_first(&delayed_refs->root);
2721 if (!node)
2722 goto out;
2723 count = (unsigned long)-1;
2724
2725 while (node) {
2726 ref = rb_entry(node, struct btrfs_delayed_ref_node,
2727 rb_node);
2728 if (btrfs_delayed_ref_is_head(ref)) {
2729 struct btrfs_delayed_ref_head *head;
2730
2731 head = btrfs_delayed_node_to_head(ref);
2732 atomic_inc(&ref->refs);
2733
2734 spin_unlock(&delayed_refs->lock);
2735
2736
2737
2738
2739 mutex_lock(&head->mutex);
2740 mutex_unlock(&head->mutex);
2741
2742 btrfs_put_delayed_ref(ref);
2743 cond_resched();
2744 goto again;
2745 }
2746 node = rb_next(node);
2747 }
2748 spin_unlock(&delayed_refs->lock);
2749 schedule_timeout(1);
2750 goto again;
2751 }
2752out:
2753 atomic_dec(&delayed_refs->procs_running_refs);
2754 smp_mb();
2755 if (waitqueue_active(&delayed_refs->wait))
2756 wake_up(&delayed_refs->wait);
2757
2758 spin_unlock(&delayed_refs->lock);
2759 assert_qgroups_uptodate(trans);
2760 return 0;
2761}
2762
2763int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2764 struct btrfs_root *root,
2765 u64 bytenr, u64 num_bytes, u64 flags,
2766 int level, int is_data)
2767{
2768 struct btrfs_delayed_extent_op *extent_op;
2769 int ret;
2770
2771 extent_op = btrfs_alloc_delayed_extent_op();
2772 if (!extent_op)
2773 return -ENOMEM;
2774
2775 extent_op->flags_to_set = flags;
2776 extent_op->update_flags = 1;
2777 extent_op->update_key = 0;
2778 extent_op->is_data = is_data ? 1 : 0;
2779 extent_op->level = level;
2780
2781 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
2782 num_bytes, extent_op);
2783 if (ret)
2784 btrfs_free_delayed_extent_op(extent_op);
2785 return ret;
2786}
2787
2788static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2789 struct btrfs_root *root,
2790 struct btrfs_path *path,
2791 u64 objectid, u64 offset, u64 bytenr)
2792{
2793 struct btrfs_delayed_ref_head *head;
2794 struct btrfs_delayed_ref_node *ref;
2795 struct btrfs_delayed_data_ref *data_ref;
2796 struct btrfs_delayed_ref_root *delayed_refs;
2797 struct rb_node *node;
2798 int ret = 0;
2799
2800 ret = -ENOENT;
2801 delayed_refs = &trans->transaction->delayed_refs;
2802 spin_lock(&delayed_refs->lock);
2803 head = btrfs_find_delayed_ref_head(trans, bytenr);
2804 if (!head)
2805 goto out;
2806
2807 if (!mutex_trylock(&head->mutex)) {
2808 atomic_inc(&head->node.refs);
2809 spin_unlock(&delayed_refs->lock);
2810
2811 btrfs_release_path(path);
2812
2813
2814
2815
2816
2817 mutex_lock(&head->mutex);
2818 mutex_unlock(&head->mutex);
2819 btrfs_put_delayed_ref(&head->node);
2820 return -EAGAIN;
2821 }
2822
2823 node = rb_prev(&head->node.rb_node);
2824 if (!node)
2825 goto out_unlock;
2826
2827 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2828
2829 if (ref->bytenr != bytenr)
2830 goto out_unlock;
2831
2832 ret = 1;
2833 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2834 goto out_unlock;
2835
2836 data_ref = btrfs_delayed_node_to_data_ref(ref);
2837
2838 node = rb_prev(node);
2839 if (node) {
2840 int seq = ref->seq;
2841
2842 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2843 if (ref->bytenr == bytenr && ref->seq == seq)
2844 goto out_unlock;
2845 }
2846
2847 if (data_ref->root != root->root_key.objectid ||
2848 data_ref->objectid != objectid || data_ref->offset != offset)
2849 goto out_unlock;
2850
2851 ret = 0;
2852out_unlock:
2853 mutex_unlock(&head->mutex);
2854out:
2855 spin_unlock(&delayed_refs->lock);
2856 return ret;
2857}
2858
2859static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2860 struct btrfs_root *root,
2861 struct btrfs_path *path,
2862 u64 objectid, u64 offset, u64 bytenr)
2863{
2864 struct btrfs_root *extent_root = root->fs_info->extent_root;
2865 struct extent_buffer *leaf;
2866 struct btrfs_extent_data_ref *ref;
2867 struct btrfs_extent_inline_ref *iref;
2868 struct btrfs_extent_item *ei;
2869 struct btrfs_key key;
2870 u32 item_size;
2871 int ret;
2872
2873 key.objectid = bytenr;
2874 key.offset = (u64)-1;
2875 key.type = BTRFS_EXTENT_ITEM_KEY;
2876
2877 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2878 if (ret < 0)
2879 goto out;
2880 BUG_ON(ret == 0);
2881
2882 ret = -ENOENT;
2883 if (path->slots[0] == 0)
2884 goto out;
2885
2886 path->slots[0]--;
2887 leaf = path->nodes[0];
2888 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2889
2890 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2891 goto out;
2892
2893 ret = 1;
2894 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2895#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2896 if (item_size < sizeof(*ei)) {
2897 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2898 goto out;
2899 }
2900#endif
2901 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2902
2903 if (item_size != sizeof(*ei) +
2904 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2905 goto out;
2906
2907 if (btrfs_extent_generation(leaf, ei) <=
2908 btrfs_root_last_snapshot(&root->root_item))
2909 goto out;
2910
2911 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2912 if (btrfs_extent_inline_ref_type(leaf, iref) !=
2913 BTRFS_EXTENT_DATA_REF_KEY)
2914 goto out;
2915
2916 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2917 if (btrfs_extent_refs(leaf, ei) !=
2918 btrfs_extent_data_ref_count(leaf, ref) ||
2919 btrfs_extent_data_ref_root(leaf, ref) !=
2920 root->root_key.objectid ||
2921 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2922 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2923 goto out;
2924
2925 ret = 0;
2926out:
2927 return ret;
2928}
2929
2930int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2931 struct btrfs_root *root,
2932 u64 objectid, u64 offset, u64 bytenr)
2933{
2934 struct btrfs_path *path;
2935 int ret;
2936 int ret2;
2937
2938 path = btrfs_alloc_path();
2939 if (!path)
2940 return -ENOENT;
2941
2942 do {
2943 ret = check_committed_ref(trans, root, path, objectid,
2944 offset, bytenr);
2945 if (ret && ret != -ENOENT)
2946 goto out;
2947
2948 ret2 = check_delayed_ref(trans, root, path, objectid,
2949 offset, bytenr);
2950 } while (ret2 == -EAGAIN);
2951
2952 if (ret2 && ret2 != -ENOENT) {
2953 ret = ret2;
2954 goto out;
2955 }
2956
2957 if (ret != -ENOENT || ret2 != -ENOENT)
2958 ret = 0;
2959out:
2960 btrfs_free_path(path);
2961 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2962 WARN_ON(ret > 0);
2963 return ret;
2964}
2965
2966static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2967 struct btrfs_root *root,
2968 struct extent_buffer *buf,
2969 int full_backref, int inc, int for_cow)
2970{
2971 u64 bytenr;
2972 u64 num_bytes;
2973 u64 parent;
2974 u64 ref_root;
2975 u32 nritems;
2976 struct btrfs_key key;
2977 struct btrfs_file_extent_item *fi;
2978 int i;
2979 int level;
2980 int ret = 0;
2981 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
2982 u64, u64, u64, u64, u64, u64, int);
2983
2984 ref_root = btrfs_header_owner(buf);
2985 nritems = btrfs_header_nritems(buf);
2986 level = btrfs_header_level(buf);
2987
2988 if (!root->ref_cows && level == 0)
2989 return 0;
2990
2991 if (inc)
2992 process_func = btrfs_inc_extent_ref;
2993 else
2994 process_func = btrfs_free_extent;
2995
2996 if (full_backref)
2997 parent = buf->start;
2998 else
2999 parent = 0;
3000
3001 for (i = 0; i < nritems; i++) {
3002 if (level == 0) {
3003 btrfs_item_key_to_cpu(buf, &key, i);
3004 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3005 continue;
3006 fi = btrfs_item_ptr(buf, i,
3007 struct btrfs_file_extent_item);
3008 if (btrfs_file_extent_type(buf, fi) ==
3009 BTRFS_FILE_EXTENT_INLINE)
3010 continue;
3011 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
3012 if (bytenr == 0)
3013 continue;
3014
3015 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
3016 key.offset -= btrfs_file_extent_offset(buf, fi);
3017 ret = process_func(trans, root, bytenr, num_bytes,
3018 parent, ref_root, key.objectid,
3019 key.offset, for_cow);
3020 if (ret)
3021 goto fail;
3022 } else {
3023 bytenr = btrfs_node_blockptr(buf, i);
3024 num_bytes = btrfs_level_size(root, level - 1);
3025 ret = process_func(trans, root, bytenr, num_bytes,
3026 parent, ref_root, level - 1, 0,
3027 for_cow);
3028 if (ret)
3029 goto fail;
3030 }
3031 }
3032 return 0;
3033fail:
3034 return ret;
3035}
3036
3037int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3038 struct extent_buffer *buf, int full_backref, int for_cow)
3039{
3040 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
3041}
3042
3043int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3044 struct extent_buffer *buf, int full_backref, int for_cow)
3045{
3046 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
3047}
3048
3049static int write_one_cache_group(struct btrfs_trans_handle *trans,
3050 struct btrfs_root *root,
3051 struct btrfs_path *path,
3052 struct btrfs_block_group_cache *cache)
3053{
3054 int ret;
3055 struct btrfs_root *extent_root = root->fs_info->extent_root;
3056 unsigned long bi;
3057 struct extent_buffer *leaf;
3058
3059 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
3060 if (ret < 0)
3061 goto fail;
3062 BUG_ON(ret);
3063
3064 leaf = path->nodes[0];
3065 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
3066 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
3067 btrfs_mark_buffer_dirty(leaf);
3068 btrfs_release_path(path);
3069fail:
3070 if (ret) {
3071 btrfs_abort_transaction(trans, root, ret);
3072 return ret;
3073 }
3074 return 0;
3075
3076}
3077
3078static struct btrfs_block_group_cache *
3079next_block_group(struct btrfs_root *root,
3080 struct btrfs_block_group_cache *cache)
3081{
3082 struct rb_node *node;
3083 spin_lock(&root->fs_info->block_group_cache_lock);
3084 node = rb_next(&cache->cache_node);
3085 btrfs_put_block_group(cache);
3086 if (node) {
3087 cache = rb_entry(node, struct btrfs_block_group_cache,
3088 cache_node);
3089 btrfs_get_block_group(cache);
3090 } else
3091 cache = NULL;
3092 spin_unlock(&root->fs_info->block_group_cache_lock);
3093 return cache;
3094}
3095
3096static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3097 struct btrfs_trans_handle *trans,
3098 struct btrfs_path *path)
3099{
3100 struct btrfs_root *root = block_group->fs_info->tree_root;
3101 struct inode *inode = NULL;
3102 u64 alloc_hint = 0;
3103 int dcs = BTRFS_DC_ERROR;
3104 int num_pages = 0;
3105 int retries = 0;
3106 int ret = 0;
3107
3108
3109
3110
3111
3112 if (block_group->key.offset < (100 * 1024 * 1024)) {
3113 spin_lock(&block_group->lock);
3114 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
3115 spin_unlock(&block_group->lock);
3116 return 0;
3117 }
3118
3119again:
3120 inode = lookup_free_space_inode(root, block_group, path);
3121 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
3122 ret = PTR_ERR(inode);
3123 btrfs_release_path(path);
3124 goto out;
3125 }
3126
3127 if (IS_ERR(inode)) {
3128 BUG_ON(retries);
3129 retries++;
3130
3131 if (block_group->ro)
3132 goto out_free;
3133
3134 ret = create_free_space_inode(root, trans, block_group, path);
3135 if (ret)
3136 goto out_free;
3137 goto again;
3138 }
3139
3140
3141 if (block_group->cache_generation == trans->transid &&
3142 i_size_read(inode)) {
3143 dcs = BTRFS_DC_SETUP;
3144 goto out_put;
3145 }
3146
3147
3148
3149
3150
3151
3152 BTRFS_I(inode)->generation = 0;
3153 ret = btrfs_update_inode(trans, root, inode);
3154 WARN_ON(ret);
3155
3156 if (i_size_read(inode) > 0) {
3157 ret = btrfs_check_trunc_cache_free_space(root,
3158 &root->fs_info->global_block_rsv);
3159 if (ret)
3160 goto out_put;
3161
3162 ret = btrfs_truncate_free_space_cache(root, trans, path,
3163 inode);
3164 if (ret)
3165 goto out_put;
3166 }
3167
3168 spin_lock(&block_group->lock);
3169 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3170 !btrfs_test_opt(root, SPACE_CACHE)) {
3171
3172
3173
3174
3175
3176 dcs = BTRFS_DC_WRITTEN;
3177 spin_unlock(&block_group->lock);
3178 goto out_put;
3179 }
3180 spin_unlock(&block_group->lock);
3181
3182
3183
3184
3185
3186
3187
3188 num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
3189 if (!num_pages)
3190 num_pages = 1;
3191
3192 num_pages *= 16;
3193 num_pages *= PAGE_CACHE_SIZE;
3194
3195 ret = btrfs_check_data_free_space(inode, num_pages);
3196 if (ret)
3197 goto out_put;
3198
3199 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
3200 num_pages, num_pages,
3201 &alloc_hint);
3202 if (!ret)
3203 dcs = BTRFS_DC_SETUP;
3204 btrfs_free_reserved_data_space(inode, num_pages);
3205
3206out_put:
3207 iput(inode);
3208out_free:
3209 btrfs_release_path(path);
3210out:
3211 spin_lock(&block_group->lock);
3212 if (!ret && dcs == BTRFS_DC_SETUP)
3213 block_group->cache_generation = trans->transid;
3214 block_group->disk_cache_state = dcs;
3215 spin_unlock(&block_group->lock);
3216
3217 return ret;
3218}
3219
3220int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3221 struct btrfs_root *root)
3222{
3223 struct btrfs_block_group_cache *cache;
3224 int err = 0;
3225 struct btrfs_path *path;
3226 u64 last = 0;
3227
3228 path = btrfs_alloc_path();
3229 if (!path)
3230 return -ENOMEM;
3231
3232again:
3233 while (1) {
3234 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3235 while (cache) {
3236 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3237 break;
3238 cache = next_block_group(root, cache);
3239 }
3240 if (!cache) {
3241 if (last == 0)
3242 break;
3243 last = 0;
3244 continue;
3245 }
3246 err = cache_save_setup(cache, trans, path);
3247 last = cache->key.objectid + cache->key.offset;
3248 btrfs_put_block_group(cache);
3249 }
3250
3251 while (1) {
3252 if (last == 0) {
3253 err = btrfs_run_delayed_refs(trans, root,
3254 (unsigned long)-1);
3255 if (err)
3256 goto out;
3257 }
3258
3259 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3260 while (cache) {
3261 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
3262 btrfs_put_block_group(cache);
3263 goto again;
3264 }
3265
3266 if (cache->dirty)
3267 break;
3268 cache = next_block_group(root, cache);
3269 }
3270 if (!cache) {
3271 if (last == 0)
3272 break;
3273 last = 0;
3274 continue;
3275 }
3276
3277 if (cache->disk_cache_state == BTRFS_DC_SETUP)
3278 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
3279 cache->dirty = 0;
3280 last = cache->key.objectid + cache->key.offset;
3281
3282 err = write_one_cache_group(trans, root, path, cache);
3283 if (err)
3284 goto out;
3285
3286 btrfs_put_block_group(cache);
3287 }
3288
3289 while (1) {
3290
3291
3292
3293
3294
3295 if (last == 0) {
3296 err = btrfs_run_delayed_refs(trans, root,
3297 (unsigned long)-1);
3298 if (err)
3299 goto out;
3300 }
3301
3302 cache = btrfs_lookup_first_block_group(root->fs_info, last);
3303 while (cache) {
3304
3305
3306
3307
3308
3309 if (cache->dirty) {
3310 btrfs_put_block_group(cache);
3311 goto again;
3312 }
3313 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3314 break;
3315 cache = next_block_group(root, cache);
3316 }
3317 if (!cache) {
3318 if (last == 0)
3319 break;
3320 last = 0;
3321 continue;
3322 }
3323
3324 err = btrfs_write_out_cache(root, trans, cache, path);
3325
3326
3327
3328
3329
3330 if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
3331 cache->disk_cache_state = BTRFS_DC_WRITTEN;
3332 last = cache->key.objectid + cache->key.offset;
3333 btrfs_put_block_group(cache);
3334 }
3335out:
3336
3337 btrfs_free_path(path);
3338 return err;
3339}
3340
3341int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3342{
3343 struct btrfs_block_group_cache *block_group;
3344 int readonly = 0;
3345
3346 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
3347 if (!block_group || block_group->ro)
3348 readonly = 1;
3349 if (block_group)
3350 btrfs_put_block_group(block_group);
3351 return readonly;
3352}
3353
3354static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3355 u64 total_bytes, u64 bytes_used,
3356 struct btrfs_space_info **space_info)
3357{
3358 struct btrfs_space_info *found;
3359 int i;
3360 int factor;
3361 int ret;
3362
3363 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3364 BTRFS_BLOCK_GROUP_RAID10))
3365 factor = 2;
3366 else
3367 factor = 1;
3368
3369 found = __find_space_info(info, flags);
3370 if (found) {
3371 spin_lock(&found->lock);
3372 found->total_bytes += total_bytes;
3373 found->disk_total += total_bytes * factor;
3374 found->bytes_used += bytes_used;
3375 found->disk_used += bytes_used * factor;
3376 found->full = 0;
3377 spin_unlock(&found->lock);
3378 *space_info = found;
3379 return 0;
3380 }
3381 found = kzalloc(sizeof(*found), GFP_NOFS);
3382 if (!found)
3383 return -ENOMEM;
3384
3385 ret = percpu_counter_init(&found->total_bytes_pinned, 0);
3386 if (ret) {
3387 kfree(found);
3388 return ret;
3389 }
3390
3391 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3392 INIT_LIST_HEAD(&found->block_groups[i]);
3393 init_rwsem(&found->groups_sem);
3394 spin_lock_init(&found->lock);
3395 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3396 found->total_bytes = total_bytes;
3397 found->disk_total = total_bytes * factor;
3398 found->bytes_used = bytes_used;
3399 found->disk_used = bytes_used * factor;
3400 found->bytes_pinned = 0;
3401 found->bytes_reserved = 0;
3402 found->bytes_readonly = 0;
3403 found->bytes_may_use = 0;
3404 found->full = 0;
3405 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3406 found->chunk_alloc = 0;
3407 found->flush = 0;
3408 init_waitqueue_head(&found->wait);
3409 *space_info = found;
3410 list_add_rcu(&found->list, &info->space_info);
3411 if (flags & BTRFS_BLOCK_GROUP_DATA)
3412 info->data_sinfo = found;
3413 return 0;
3414}
3415
3416static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3417{
3418 u64 extra_flags = chunk_to_extended(flags) &
3419 BTRFS_EXTENDED_PROFILE_MASK;
3420
3421 write_seqlock(&fs_info->profiles_lock);
3422 if (flags & BTRFS_BLOCK_GROUP_DATA)
3423 fs_info->avail_data_alloc_bits |= extra_flags;
3424 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3425 fs_info->avail_metadata_alloc_bits |= extra_flags;
3426 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3427 fs_info->avail_system_alloc_bits |= extra_flags;
3428 write_sequnlock(&fs_info->profiles_lock);
3429}
3430
3431
3432
3433
3434
3435
3436
3437static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3438{
3439 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3440 u64 target = 0;
3441
3442 if (!bctl)
3443 return 0;
3444
3445 if (flags & BTRFS_BLOCK_GROUP_DATA &&
3446 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3447 target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3448 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
3449 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3450 target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3451 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
3452 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3453 target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3454 }
3455
3456 return target;
3457}
3458
3459
3460
3461
3462
3463
3464
3465
3466static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3467{
3468
3469
3470
3471
3472
3473 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3474 root->fs_info->fs_devices->missing_devices;
3475 u64 target;
3476 u64 tmp;
3477
3478
3479
3480
3481
3482 spin_lock(&root->fs_info->balance_lock);
3483 target = get_restripe_target(root->fs_info, flags);
3484 if (target) {
3485
3486 if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
3487 spin_unlock(&root->fs_info->balance_lock);
3488 return extended_to_chunk(target);
3489 }
3490 }
3491 spin_unlock(&root->fs_info->balance_lock);
3492
3493
3494 if (num_devices == 1)
3495 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 |
3496 BTRFS_BLOCK_GROUP_RAID5);
3497 if (num_devices < 3)
3498 flags &= ~BTRFS_BLOCK_GROUP_RAID6;
3499 if (num_devices < 4)
3500 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
3501
3502 tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 |
3503 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 |
3504 BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10);
3505 flags &= ~tmp;
3506
3507 if (tmp & BTRFS_BLOCK_GROUP_RAID6)
3508 tmp = BTRFS_BLOCK_GROUP_RAID6;
3509 else if (tmp & BTRFS_BLOCK_GROUP_RAID5)
3510 tmp = BTRFS_BLOCK_GROUP_RAID5;
3511 else if (tmp & BTRFS_BLOCK_GROUP_RAID10)
3512 tmp = BTRFS_BLOCK_GROUP_RAID10;
3513 else if (tmp & BTRFS_BLOCK_GROUP_RAID1)
3514 tmp = BTRFS_BLOCK_GROUP_RAID1;
3515 else if (tmp & BTRFS_BLOCK_GROUP_RAID0)
3516 tmp = BTRFS_BLOCK_GROUP_RAID0;
3517
3518 return extended_to_chunk(flags | tmp);
3519}
3520
3521static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
3522{
3523 unsigned seq;
3524
3525 do {
3526 seq = read_seqbegin(&root->fs_info->profiles_lock);
3527
3528 if (flags & BTRFS_BLOCK_GROUP_DATA)
3529 flags |= root->fs_info->avail_data_alloc_bits;
3530 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3531 flags |= root->fs_info->avail_system_alloc_bits;
3532 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
3533 flags |= root->fs_info->avail_metadata_alloc_bits;
3534 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
3535
3536 return btrfs_reduce_alloc_profile(root, flags);
3537}
3538
3539u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
3540{
3541 u64 flags;
3542 u64 ret;
3543
3544 if (data)
3545 flags = BTRFS_BLOCK_GROUP_DATA;
3546 else if (root == root->fs_info->chunk_root)
3547 flags = BTRFS_BLOCK_GROUP_SYSTEM;
3548 else
3549 flags = BTRFS_BLOCK_GROUP_METADATA;
3550
3551 ret = get_alloc_profile(root, flags);
3552 return ret;
3553}
3554
3555
3556
3557
3558
3559int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3560{
3561 struct btrfs_space_info *data_sinfo;
3562 struct btrfs_root *root = BTRFS_I(inode)->root;
3563 struct btrfs_fs_info *fs_info = root->fs_info;
3564 u64 used;
3565 int ret = 0, committed = 0, alloc_chunk = 1;
3566
3567
3568 bytes = ALIGN(bytes, root->sectorsize);
3569
3570 if (root == root->fs_info->tree_root ||
3571 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
3572 alloc_chunk = 0;
3573 committed = 1;
3574 }
3575
3576 data_sinfo = fs_info->data_sinfo;
3577 if (!data_sinfo)
3578 goto alloc;
3579
3580again:
3581
3582 spin_lock(&data_sinfo->lock);
3583 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
3584 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
3585 data_sinfo->bytes_may_use;
3586
3587 if (used + bytes > data_sinfo->total_bytes) {
3588 struct btrfs_trans_handle *trans;
3589
3590
3591
3592
3593
3594 if (!data_sinfo->full && alloc_chunk) {
3595 u64 alloc_target;
3596
3597 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3598 spin_unlock(&data_sinfo->lock);
3599alloc:
3600 alloc_target = btrfs_get_alloc_profile(root, 1);
3601 trans = btrfs_join_transaction(root);
3602 if (IS_ERR(trans))
3603 return PTR_ERR(trans);
3604
3605 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3606 alloc_target,
3607 CHUNK_ALLOC_NO_FORCE);
3608 btrfs_end_transaction(trans, root);
3609 if (ret < 0) {
3610 if (ret != -ENOSPC)
3611 return ret;
3612 else
3613 goto commit_trans;
3614 }
3615
3616 if (!data_sinfo)
3617 data_sinfo = fs_info->data_sinfo;
3618
3619 goto again;
3620 }
3621
3622
3623
3624
3625
3626 if (percpu_counter_compare(&data_sinfo->total_bytes_pinned,
3627 bytes) < 0)
3628 committed = 1;
3629 spin_unlock(&data_sinfo->lock);
3630
3631
3632commit_trans:
3633 if (!committed &&
3634 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3635 committed = 1;
3636
3637 trans = btrfs_join_transaction(root);
3638 if (IS_ERR(trans))
3639 return PTR_ERR(trans);
3640 ret = btrfs_commit_transaction(trans, root);
3641 if (ret)
3642 return ret;
3643 goto again;
3644 }
3645
3646 return -ENOSPC;
3647 }
3648 data_sinfo->bytes_may_use += bytes;
3649 trace_btrfs_space_reservation(root->fs_info, "space_info",
3650 data_sinfo->flags, bytes, 1);
3651 spin_unlock(&data_sinfo->lock);
3652
3653 return 0;
3654}
3655
3656
3657
3658
3659void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3660{
3661 struct btrfs_root *root = BTRFS_I(inode)->root;
3662 struct btrfs_space_info *data_sinfo;
3663
3664
3665 bytes = ALIGN(bytes, root->sectorsize);
3666
3667 data_sinfo = root->fs_info->data_sinfo;
3668 spin_lock(&data_sinfo->lock);
3669 WARN_ON(data_sinfo->bytes_may_use < bytes);
3670 data_sinfo->bytes_may_use -= bytes;
3671 trace_btrfs_space_reservation(root->fs_info, "space_info",
3672 data_sinfo->flags, bytes, 0);
3673 spin_unlock(&data_sinfo->lock);
3674}
3675
3676static void force_metadata_allocation(struct btrfs_fs_info *info)
3677{
3678 struct list_head *head = &info->space_info;
3679 struct btrfs_space_info *found;
3680
3681 rcu_read_lock();
3682 list_for_each_entry_rcu(found, head, list) {
3683 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3684 found->force_alloc = CHUNK_ALLOC_FORCE;
3685 }
3686 rcu_read_unlock();
3687}
3688
3689static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
3690{
3691 return (global->size << 1);
3692}
3693
3694static int should_alloc_chunk(struct btrfs_root *root,
3695 struct btrfs_space_info *sinfo, int force)
3696{
3697 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3698 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3699 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3700 u64 thresh;
3701
3702 if (force == CHUNK_ALLOC_FORCE)
3703 return 1;
3704
3705
3706
3707
3708
3709
3710 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
3711 num_allocated += calc_global_rsv_need_space(global_rsv);
3712
3713
3714
3715
3716
3717 if (force == CHUNK_ALLOC_LIMITED) {
3718 thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
3719 thresh = max_t(u64, 64 * 1024 * 1024,
3720 div_factor_fine(thresh, 1));
3721
3722 if (num_bytes - num_allocated < thresh)
3723 return 1;
3724 }
3725
3726 if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8))
3727 return 0;
3728 return 1;
3729}
3730
3731static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
3732{
3733 u64 num_dev;
3734
3735 if (type & (BTRFS_BLOCK_GROUP_RAID10 |
3736 BTRFS_BLOCK_GROUP_RAID0 |
3737 BTRFS_BLOCK_GROUP_RAID5 |
3738 BTRFS_BLOCK_GROUP_RAID6))
3739 num_dev = root->fs_info->fs_devices->rw_devices;
3740 else if (type & BTRFS_BLOCK_GROUP_RAID1)
3741 num_dev = 2;
3742 else
3743 num_dev = 1;
3744
3745
3746 return btrfs_calc_trans_metadata_size(root, num_dev + 1);
3747}
3748
3749static void check_system_chunk(struct btrfs_trans_handle *trans,
3750 struct btrfs_root *root, u64 type)
3751{
3752 struct btrfs_space_info *info;
3753 u64 left;
3754 u64 thresh;
3755
3756 info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3757 spin_lock(&info->lock);
3758 left = info->total_bytes - info->bytes_used - info->bytes_pinned -
3759 info->bytes_reserved - info->bytes_readonly;
3760 spin_unlock(&info->lock);
3761
3762 thresh = get_system_chunk_thresh(root, type);
3763 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
3764 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
3765 left, thresh, type);
3766 dump_space_info(info, 0, 0);
3767 }
3768
3769 if (left < thresh) {
3770 u64 flags;
3771
3772 flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
3773 btrfs_alloc_chunk(trans, root, flags);
3774 }
3775}
3776
3777static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3778 struct btrfs_root *extent_root, u64 flags, int force)
3779{
3780 struct btrfs_space_info *space_info;
3781 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3782 int wait_for_alloc = 0;
3783 int ret = 0;
3784
3785
3786 if (trans->allocating_chunk)
3787 return -ENOSPC;
3788
3789 space_info = __find_space_info(extent_root->fs_info, flags);
3790 if (!space_info) {
3791 ret = update_space_info(extent_root->fs_info, flags,
3792 0, 0, &space_info);
3793 BUG_ON(ret);
3794 }
3795 BUG_ON(!space_info);
3796
3797again:
3798 spin_lock(&space_info->lock);
3799 if (force < space_info->force_alloc)
3800 force = space_info->force_alloc;
3801 if (space_info->full) {
3802 spin_unlock(&space_info->lock);
3803 return 0;
3804 }
3805
3806 if (!should_alloc_chunk(extent_root, space_info, force)) {
3807 spin_unlock(&space_info->lock);
3808 return 0;
3809 } else if (space_info->chunk_alloc) {
3810 wait_for_alloc = 1;
3811 } else {
3812 space_info->chunk_alloc = 1;
3813 }
3814
3815 spin_unlock(&space_info->lock);
3816
3817 mutex_lock(&fs_info->chunk_mutex);
3818
3819
3820
3821
3822
3823
3824
3825 if (wait_for_alloc) {
3826 mutex_unlock(&fs_info->chunk_mutex);
3827 wait_for_alloc = 0;
3828 goto again;
3829 }
3830
3831 trans->allocating_chunk = true;
3832
3833
3834
3835
3836
3837 if (btrfs_mixed_space_info(space_info))
3838 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3839
3840
3841
3842
3843
3844
3845 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3846 fs_info->data_chunk_allocations++;
3847 if (!(fs_info->data_chunk_allocations %
3848 fs_info->metadata_ratio))
3849 force_metadata_allocation(fs_info);
3850 }
3851
3852
3853
3854
3855
3856 check_system_chunk(trans, extent_root, flags);
3857
3858 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3859 trans->allocating_chunk = false;
3860
3861 spin_lock(&space_info->lock);
3862 if (ret < 0 && ret != -ENOSPC)
3863 goto out;
3864 if (ret)
3865 space_info->full = 1;
3866 else
3867 ret = 1;
3868
3869 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3870out:
3871 space_info->chunk_alloc = 0;
3872 spin_unlock(&space_info->lock);
3873 mutex_unlock(&fs_info->chunk_mutex);
3874 return ret;
3875}
3876
3877static int can_overcommit(struct btrfs_root *root,
3878 struct btrfs_space_info *space_info, u64 bytes,
3879 enum btrfs_reserve_flush_enum flush)
3880{
3881 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3882 u64 profile = btrfs_get_alloc_profile(root, 0);
3883 u64 space_size;
3884 u64 avail;
3885 u64 used;
3886 u64 to_add;
3887
3888 used = space_info->bytes_used + space_info->bytes_reserved +
3889 space_info->bytes_pinned + space_info->bytes_readonly;
3890
3891
3892
3893
3894
3895
3896
3897 spin_lock(&global_rsv->lock);
3898 space_size = calc_global_rsv_need_space(global_rsv);
3899 spin_unlock(&global_rsv->lock);
3900 if (used + space_size >= space_info->total_bytes)
3901 return 0;
3902
3903 used += space_info->bytes_may_use;
3904
3905 spin_lock(&root->fs_info->free_chunk_lock);
3906 avail = root->fs_info->free_chunk_space;
3907 spin_unlock(&root->fs_info->free_chunk_lock);
3908
3909
3910
3911
3912
3913
3914
3915 if (profile & (BTRFS_BLOCK_GROUP_DUP |
3916 BTRFS_BLOCK_GROUP_RAID1 |
3917 BTRFS_BLOCK_GROUP_RAID10))
3918 avail >>= 1;
3919
3920 to_add = space_info->total_bytes;
3921
3922
3923
3924
3925
3926
3927 if (flush == BTRFS_RESERVE_FLUSH_ALL)
3928 to_add >>= 3;
3929 else
3930 to_add >>= 1;
3931
3932
3933
3934
3935
3936 to_add = min(avail, to_add);
3937
3938 if (used + bytes < space_info->total_bytes + to_add)
3939 return 1;
3940 return 0;
3941}
3942
3943static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
3944 unsigned long nr_pages)
3945{
3946 struct super_block *sb = root->fs_info->sb;
3947
3948 if (down_read_trylock(&sb->s_umount)) {
3949 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
3950 up_read(&sb->s_umount);
3951 } else {
3952
3953
3954
3955
3956
3957
3958
3959 btrfs_start_all_delalloc_inodes(root->fs_info, 0);
3960 if (!current->journal_info)
3961 btrfs_wait_all_ordered_extents(root->fs_info, 0);
3962 }
3963}
3964
3965
3966
3967
3968static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
3969 bool wait_ordered)
3970{
3971 struct btrfs_block_rsv *block_rsv;
3972 struct btrfs_space_info *space_info;
3973 struct btrfs_trans_handle *trans;
3974 u64 delalloc_bytes;
3975 u64 max_reclaim;
3976 long time_left;
3977 unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3978 int loops = 0;
3979 enum btrfs_reserve_flush_enum flush;
3980
3981 trans = (struct btrfs_trans_handle *)current->journal_info;
3982 block_rsv = &root->fs_info->delalloc_block_rsv;
3983 space_info = block_rsv->space_info;
3984
3985 smp_mb();
3986 delalloc_bytes = percpu_counter_sum_positive(
3987 &root->fs_info->delalloc_bytes);
3988 if (delalloc_bytes == 0) {
3989 if (trans)
3990 return;
3991 btrfs_wait_all_ordered_extents(root->fs_info, 0);
3992 return;
3993 }
3994
3995 while (delalloc_bytes && loops < 3) {
3996 max_reclaim = min(delalloc_bytes, to_reclaim);
3997 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
3998 btrfs_writeback_inodes_sb_nr(root, nr_pages);
3999
4000
4001
4002
4003 wait_event(root->fs_info->async_submit_wait,
4004 !atomic_read(&root->fs_info->async_delalloc_pages));
4005
4006 if (!trans)
4007 flush = BTRFS_RESERVE_FLUSH_ALL;
4008 else
4009 flush = BTRFS_RESERVE_NO_FLUSH;
4010 spin_lock(&space_info->lock);
4011 if (can_overcommit(root, space_info, orig, flush)) {
4012 spin_unlock(&space_info->lock);
4013 break;
4014 }
4015 spin_unlock(&space_info->lock);
4016
4017 loops++;
4018 if (wait_ordered && !trans) {
4019 btrfs_wait_all_ordered_extents(root->fs_info, 0);
4020 } else {
4021 time_left = schedule_timeout_killable(1);
4022 if (time_left)
4023 break;
4024 }
4025 smp_mb();
4026 delalloc_bytes = percpu_counter_sum_positive(
4027 &root->fs_info->delalloc_bytes);
4028 }
4029}
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041static int may_commit_transaction(struct btrfs_root *root,
4042 struct btrfs_space_info *space_info,
4043 u64 bytes, int force)
4044{
4045 struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
4046 struct btrfs_trans_handle *trans;
4047
4048 trans = (struct btrfs_trans_handle *)current->journal_info;
4049 if (trans)
4050 return -EAGAIN;
4051
4052 if (force)
4053 goto commit;
4054
4055
4056 spin_lock(&space_info->lock);
4057 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4058 bytes) >= 0) {
4059 spin_unlock(&space_info->lock);
4060 goto commit;
4061 }
4062 spin_unlock(&space_info->lock);
4063
4064
4065
4066
4067
4068 if (space_info != delayed_rsv->space_info)
4069 return -ENOSPC;
4070
4071 spin_lock(&space_info->lock);
4072 spin_lock(&delayed_rsv->lock);
4073 if (percpu_counter_compare(&space_info->total_bytes_pinned,
4074 bytes - delayed_rsv->size) >= 0) {
4075 spin_unlock(&delayed_rsv->lock);
4076 spin_unlock(&space_info->lock);
4077 return -ENOSPC;
4078 }
4079 spin_unlock(&delayed_rsv->lock);
4080 spin_unlock(&space_info->lock);
4081
4082commit:
4083 trans = btrfs_join_transaction(root);
4084 if (IS_ERR(trans))
4085 return -ENOSPC;
4086
4087 return btrfs_commit_transaction(trans, root);
4088}
4089
4090enum flush_state {
4091 FLUSH_DELAYED_ITEMS_NR = 1,
4092 FLUSH_DELAYED_ITEMS = 2,
4093 FLUSH_DELALLOC = 3,
4094 FLUSH_DELALLOC_WAIT = 4,
4095 ALLOC_CHUNK = 5,
4096 COMMIT_TRANS = 6,
4097};
4098
4099static int flush_space(struct btrfs_root *root,
4100 struct btrfs_space_info *space_info, u64 num_bytes,
4101 u64 orig_bytes, int state)
4102{
4103 struct btrfs_trans_handle *trans;
4104 int nr;
4105 int ret = 0;
4106
4107 switch (state) {
4108 case FLUSH_DELAYED_ITEMS_NR:
4109 case FLUSH_DELAYED_ITEMS:
4110 if (state == FLUSH_DELAYED_ITEMS_NR) {
4111 u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
4112
4113 nr = (int)div64_u64(num_bytes, bytes);
4114 if (!nr)
4115 nr = 1;
4116 nr *= 2;
4117 } else {
4118 nr = -1;
4119 }
4120 trans = btrfs_join_transaction(root);
4121 if (IS_ERR(trans)) {
4122 ret = PTR_ERR(trans);
4123 break;
4124 }
4125 ret = btrfs_run_delayed_items_nr(trans, root, nr);
4126 btrfs_end_transaction(trans, root);
4127 break;
4128 case FLUSH_DELALLOC:
4129 case FLUSH_DELALLOC_WAIT:
4130 shrink_delalloc(root, num_bytes, orig_bytes,
4131 state == FLUSH_DELALLOC_WAIT);
4132 break;
4133 case ALLOC_CHUNK:
4134 trans = btrfs_join_transaction(root);
4135 if (IS_ERR(trans)) {
4136 ret = PTR_ERR(trans);
4137 break;
4138 }
4139 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
4140 btrfs_get_alloc_profile(root, 0),
4141 CHUNK_ALLOC_NO_FORCE);
4142 btrfs_end_transaction(trans, root);
4143 if (ret == -ENOSPC)
4144 ret = 0;
4145 break;
4146 case COMMIT_TRANS:
4147 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
4148 break;
4149 default:
4150 ret = -ENOSPC;
4151 break;
4152 }
4153
4154 return ret;
4155}
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170static int reserve_metadata_bytes(struct btrfs_root *root,
4171 struct btrfs_block_rsv *block_rsv,
4172 u64 orig_bytes,
4173 enum btrfs_reserve_flush_enum flush)
4174{
4175 struct btrfs_space_info *space_info = block_rsv->space_info;
4176 u64 used;
4177 u64 num_bytes = orig_bytes;
4178 int flush_state = FLUSH_DELAYED_ITEMS_NR;
4179 int ret = 0;
4180 bool flushing = false;
4181
4182again:
4183 ret = 0;
4184 spin_lock(&space_info->lock);
4185
4186
4187
4188
4189 while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing &&
4190 space_info->flush) {
4191 spin_unlock(&space_info->lock);
4192
4193
4194
4195
4196
4197
4198 if (current->journal_info)
4199 return -EAGAIN;
4200 ret = wait_event_killable(space_info->wait, !space_info->flush);
4201
4202 if (ret)
4203 return -EINTR;
4204
4205 spin_lock(&space_info->lock);
4206 }
4207
4208 ret = -ENOSPC;
4209 used = space_info->bytes_used + space_info->bytes_reserved +
4210 space_info->bytes_pinned + space_info->bytes_readonly +
4211 space_info->bytes_may_use;
4212
4213
4214
4215
4216
4217
4218
4219
4220 if (used <= space_info->total_bytes) {
4221 if (used + orig_bytes <= space_info->total_bytes) {
4222 space_info->bytes_may_use += orig_bytes;
4223 trace_btrfs_space_reservation(root->fs_info,
4224 "space_info", space_info->flags, orig_bytes, 1);
4225 ret = 0;
4226 } else {
4227
4228
4229
4230
4231
4232 num_bytes = orig_bytes;
4233 }
4234 } else {
4235
4236
4237
4238
4239
4240 num_bytes = used - space_info->total_bytes +
4241 (orig_bytes * 2);
4242 }
4243
4244 if (ret && can_overcommit(root, space_info, orig_bytes, flush)) {
4245 space_info->bytes_may_use += orig_bytes;
4246 trace_btrfs_space_reservation(root->fs_info, "space_info",
4247 space_info->flags, orig_bytes,
4248 1);
4249 ret = 0;
4250 }
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
4261 flushing = true;
4262 space_info->flush = 1;
4263 }
4264
4265 spin_unlock(&space_info->lock);
4266
4267 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
4268 goto out;
4269
4270 ret = flush_space(root, space_info, num_bytes, orig_bytes,
4271 flush_state);
4272 flush_state++;
4273
4274
4275
4276
4277
4278 if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4279 (flush_state == FLUSH_DELALLOC ||
4280 flush_state == FLUSH_DELALLOC_WAIT))
4281 flush_state = ALLOC_CHUNK;
4282
4283 if (!ret)
4284 goto again;
4285 else if (flush == BTRFS_RESERVE_FLUSH_LIMIT &&
4286 flush_state < COMMIT_TRANS)
4287 goto again;
4288 else if (flush == BTRFS_RESERVE_FLUSH_ALL &&
4289 flush_state <= COMMIT_TRANS)
4290 goto again;
4291
4292out:
4293 if (ret == -ENOSPC &&
4294 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
4295 struct btrfs_block_rsv *global_rsv =
4296 &root->fs_info->global_block_rsv;
4297
4298 if (block_rsv != global_rsv &&
4299 !block_rsv_use_bytes(global_rsv, orig_bytes))
4300 ret = 0;
4301 }
4302 if (flushing) {
4303 spin_lock(&space_info->lock);
4304 space_info->flush = 0;
4305 wake_up_all(&space_info->wait);
4306 spin_unlock(&space_info->lock);
4307 }
4308 return ret;
4309}
4310
4311static struct btrfs_block_rsv *get_block_rsv(
4312 const struct btrfs_trans_handle *trans,
4313 const struct btrfs_root *root)
4314{
4315 struct btrfs_block_rsv *block_rsv = NULL;
4316
4317 if (root->ref_cows)
4318 block_rsv = trans->block_rsv;
4319
4320 if (root == root->fs_info->csum_root && trans->adding_csums)
4321 block_rsv = trans->block_rsv;
4322
4323 if (!block_rsv)
4324 block_rsv = root->block_rsv;
4325
4326 if (!block_rsv)
4327 block_rsv = &root->fs_info->empty_block_rsv;
4328
4329 return block_rsv;
4330}
4331
4332static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
4333 u64 num_bytes)
4334{
4335 int ret = -ENOSPC;
4336 spin_lock(&block_rsv->lock);
4337 if (block_rsv->reserved >= num_bytes) {
4338 block_rsv->reserved -= num_bytes;
4339 if (block_rsv->reserved < block_rsv->size)
4340 block_rsv->full = 0;
4341 ret = 0;
4342 }
4343 spin_unlock(&block_rsv->lock);
4344 return ret;
4345}
4346
4347static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
4348 u64 num_bytes, int update_size)
4349{
4350 spin_lock(&block_rsv->lock);
4351 block_rsv->reserved += num_bytes;
4352 if (update_size)
4353 block_rsv->size += num_bytes;
4354 else if (block_rsv->reserved >= block_rsv->size)
4355 block_rsv->full = 1;
4356 spin_unlock(&block_rsv->lock);
4357}
4358
4359int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
4360 struct btrfs_block_rsv *dest, u64 num_bytes,
4361 int min_factor)
4362{
4363 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
4364 u64 min_bytes;
4365
4366 if (global_rsv->space_info != dest->space_info)
4367 return -ENOSPC;
4368
4369 spin_lock(&global_rsv->lock);
4370 min_bytes = div_factor(global_rsv->size, min_factor);
4371 if (global_rsv->reserved < min_bytes + num_bytes) {
4372 spin_unlock(&global_rsv->lock);
4373 return -ENOSPC;
4374 }
4375 global_rsv->reserved -= num_bytes;
4376 if (global_rsv->reserved < global_rsv->size)
4377 global_rsv->full = 0;
4378 spin_unlock(&global_rsv->lock);
4379
4380 block_rsv_add_bytes(dest, num_bytes, 1);
4381 return 0;
4382}
4383
4384static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
4385 struct btrfs_block_rsv *block_rsv,
4386 struct btrfs_block_rsv *dest, u64 num_bytes)
4387{
4388 struct btrfs_space_info *space_info = block_rsv->space_info;
4389
4390 spin_lock(&block_rsv->lock);
4391 if (num_bytes == (u64)-1)
4392 num_bytes = block_rsv->size;
4393 block_rsv->size -= num_bytes;
4394 if (block_rsv->reserved >= block_rsv->size) {
4395 num_bytes = block_rsv->reserved - block_rsv->size;
4396 block_rsv->reserved = block_rsv->size;
4397 block_rsv->full = 1;
4398 } else {
4399 num_bytes = 0;
4400 }
4401 spin_unlock(&block_rsv->lock);
4402
4403 if (num_bytes > 0) {
4404 if (dest) {
4405 spin_lock(&dest->lock);
4406 if (!dest->full) {
4407 u64 bytes_to_add;
4408
4409 bytes_to_add = dest->size - dest->reserved;
4410 bytes_to_add = min(num_bytes, bytes_to_add);
4411 dest->reserved += bytes_to_add;
4412 if (dest->reserved >= dest->size)
4413 dest->full = 1;
4414 num_bytes -= bytes_to_add;
4415 }
4416 spin_unlock(&dest->lock);
4417 }
4418 if (num_bytes) {
4419 spin_lock(&space_info->lock);
4420 space_info->bytes_may_use -= num_bytes;
4421 trace_btrfs_space_reservation(fs_info, "space_info",
4422 space_info->flags, num_bytes, 0);
4423 space_info->reservation_progress++;
4424 spin_unlock(&space_info->lock);
4425 }
4426 }
4427}
4428
4429static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
4430 struct btrfs_block_rsv *dst, u64 num_bytes)
4431{
4432 int ret;
4433
4434 ret = block_rsv_use_bytes(src, num_bytes);
4435 if (ret)
4436 return ret;
4437
4438 block_rsv_add_bytes(dst, num_bytes, 1);
4439 return 0;
4440}
4441
4442void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
4443{
4444 memset(rsv, 0, sizeof(*rsv));
4445 spin_lock_init(&rsv->lock);
4446 rsv->type = type;
4447}
4448
4449struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
4450 unsigned short type)
4451{
4452 struct btrfs_block_rsv *block_rsv;
4453 struct btrfs_fs_info *fs_info = root->fs_info;
4454
4455 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
4456 if (!block_rsv)
4457 return NULL;
4458
4459 btrfs_init_block_rsv(block_rsv, type);
4460 block_rsv->space_info = __find_space_info(fs_info,
4461 BTRFS_BLOCK_GROUP_METADATA);
4462 return block_rsv;
4463}
4464
4465void btrfs_free_block_rsv(struct btrfs_root *root,
4466 struct btrfs_block_rsv *rsv)
4467{
4468 if (!rsv)
4469 return;
4470 btrfs_block_rsv_release(root, rsv, (u64)-1);
4471 kfree(rsv);
4472}
4473
4474int btrfs_block_rsv_add(struct btrfs_root *root,
4475 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
4476 enum btrfs_reserve_flush_enum flush)
4477{
4478 int ret;
4479
4480 if (num_bytes == 0)
4481 return 0;
4482
4483 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
4484 if (!ret) {
4485 block_rsv_add_bytes(block_rsv, num_bytes, 1);
4486 return 0;
4487 }
4488
4489 return ret;
4490}
4491
4492int btrfs_block_rsv_check(struct btrfs_root *root,
4493 struct btrfs_block_rsv *block_rsv, int min_factor)
4494{
4495 u64 num_bytes = 0;
4496 int ret = -ENOSPC;
4497
4498 if (!block_rsv)
4499 return 0;
4500
4501 spin_lock(&block_rsv->lock);
4502 num_bytes = div_factor(block_rsv->size, min_factor);
4503 if (block_rsv->reserved >= num_bytes)
4504 ret = 0;
4505 spin_unlock(&block_rsv->lock);
4506
4507 return ret;
4508}
4509
4510int btrfs_block_rsv_refill(struct btrfs_root *root,
4511 struct btrfs_block_rsv *block_rsv, u64 min_reserved,
4512 enum btrfs_reserve_flush_enum flush)
4513{
4514 u64 num_bytes = 0;
4515 int ret = -ENOSPC;
4516
4517 if (!block_rsv)
4518 return 0;
4519
4520 spin_lock(&block_rsv->lock);
4521 num_bytes = min_reserved;
4522 if (block_rsv->reserved >= num_bytes)
4523 ret = 0;
4524 else
4525 num_bytes -= block_rsv->reserved;
4526 spin_unlock(&block_rsv->lock);
4527
4528 if (!ret)
4529 return 0;
4530
4531 ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
4532 if (!ret) {
4533 block_rsv_add_bytes(block_rsv, num_bytes, 0);
4534 return 0;
4535 }
4536
4537 return ret;
4538}
4539
4540int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
4541 struct btrfs_block_rsv *dst_rsv,
4542 u64 num_bytes)
4543{
4544 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4545}
4546
4547void btrfs_block_rsv_release(struct btrfs_root *root,
4548 struct btrfs_block_rsv *block_rsv,
4549 u64 num_bytes)
4550{
4551 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4552 if (global_rsv->full || global_rsv == block_rsv ||
4553 block_rsv->space_info != global_rsv->space_info)
4554 global_rsv = NULL;
4555 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
4556 num_bytes);
4557}
4558
4559
4560
4561
4562
4563
4564static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
4565{
4566 struct btrfs_space_info *sinfo;
4567 u64 num_bytes;
4568 u64 meta_used;
4569 u64 data_used;
4570 int csum_size = btrfs_super_csum_size(fs_info->super_copy);
4571
4572 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
4573 spin_lock(&sinfo->lock);
4574 data_used = sinfo->bytes_used;
4575 spin_unlock(&sinfo->lock);
4576
4577 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4578 spin_lock(&sinfo->lock);
4579 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
4580 data_used = 0;
4581 meta_used = sinfo->bytes_used;
4582 spin_unlock(&sinfo->lock);
4583
4584 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
4585 csum_size * 2;
4586 num_bytes += div64_u64(data_used + meta_used, 50);
4587
4588 if (num_bytes * 3 > meta_used)
4589 num_bytes = div64_u64(meta_used, 3);
4590
4591 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
4592}
4593
4594static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4595{
4596 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
4597 struct btrfs_space_info *sinfo = block_rsv->space_info;
4598 u64 num_bytes;
4599
4600 num_bytes = calc_global_metadata_size(fs_info);
4601
4602 spin_lock(&sinfo->lock);
4603 spin_lock(&block_rsv->lock);
4604
4605 block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
4606
4607 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
4608 sinfo->bytes_reserved + sinfo->bytes_readonly +
4609 sinfo->bytes_may_use;
4610
4611 if (sinfo->total_bytes > num_bytes) {
4612 num_bytes = sinfo->total_bytes - num_bytes;
4613 block_rsv->reserved += num_bytes;
4614 sinfo->bytes_may_use += num_bytes;
4615 trace_btrfs_space_reservation(fs_info, "space_info",
4616 sinfo->flags, num_bytes, 1);
4617 }
4618
4619 if (block_rsv->reserved >= block_rsv->size) {
4620 num_bytes = block_rsv->reserved - block_rsv->size;
4621 sinfo->bytes_may_use -= num_bytes;
4622 trace_btrfs_space_reservation(fs_info, "space_info",
4623 sinfo->flags, num_bytes, 0);
4624 sinfo->reservation_progress++;
4625 block_rsv->reserved = block_rsv->size;
4626 block_rsv->full = 1;
4627 }
4628
4629 spin_unlock(&block_rsv->lock);
4630 spin_unlock(&sinfo->lock);
4631}
4632
4633static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
4634{
4635 struct btrfs_space_info *space_info;
4636
4637 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
4638 fs_info->chunk_block_rsv.space_info = space_info;
4639
4640 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4641 fs_info->global_block_rsv.space_info = space_info;
4642 fs_info->delalloc_block_rsv.space_info = space_info;
4643 fs_info->trans_block_rsv.space_info = space_info;
4644 fs_info->empty_block_rsv.space_info = space_info;
4645 fs_info->delayed_block_rsv.space_info = space_info;
4646
4647 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
4648 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
4649 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
4650 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
4651 if (fs_info->quota_root)
4652 fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
4653 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
4654
4655 update_global_block_rsv(fs_info);
4656}
4657
4658static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
4659{
4660 block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
4661 (u64)-1);
4662 WARN_ON(fs_info->delalloc_block_rsv.size > 0);
4663 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
4664 WARN_ON(fs_info->trans_block_rsv.size > 0);
4665 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
4666 WARN_ON(fs_info->chunk_block_rsv.size > 0);
4667 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
4668 WARN_ON(fs_info->delayed_block_rsv.size > 0);
4669 WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
4670}
4671
4672void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
4673 struct btrfs_root *root)
4674{
4675 if (!trans->block_rsv)
4676 return;
4677
4678 if (!trans->bytes_reserved)
4679 return;
4680
4681 trace_btrfs_space_reservation(root->fs_info, "transaction",
4682 trans->transid, trans->bytes_reserved, 0);
4683 btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
4684 trans->bytes_reserved = 0;
4685}
4686
4687
4688int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4689 struct inode *inode)
4690{
4691 struct btrfs_root *root = BTRFS_I(inode)->root;
4692 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
4693 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
4694
4695
4696
4697
4698
4699
4700 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
4701 trace_btrfs_space_reservation(root->fs_info, "orphan",
4702 btrfs_ino(inode), num_bytes, 1);
4703 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4704}
4705
4706void btrfs_orphan_release_metadata(struct inode *inode)
4707{
4708 struct btrfs_root *root = BTRFS_I(inode)->root;
4709 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
4710 trace_btrfs_space_reservation(root->fs_info, "orphan",
4711 btrfs_ino(inode), num_bytes, 0);
4712 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
4713}
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
4730 struct btrfs_block_rsv *rsv,
4731 int items,
4732 u64 *qgroup_reserved)
4733{
4734 u64 num_bytes;
4735 int ret;
4736
4737 if (root->fs_info->quota_enabled) {
4738
4739 num_bytes = 3 * root->leafsize;
4740 ret = btrfs_qgroup_reserve(root, num_bytes);
4741 if (ret)
4742 return ret;
4743 } else {
4744 num_bytes = 0;
4745 }
4746
4747 *qgroup_reserved = num_bytes;
4748
4749 num_bytes = btrfs_calc_trans_metadata_size(root, items);
4750 rsv->space_info = __find_space_info(root->fs_info,
4751 BTRFS_BLOCK_GROUP_METADATA);
4752 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
4753 BTRFS_RESERVE_FLUSH_ALL);
4754 if (ret) {
4755 if (*qgroup_reserved)
4756 btrfs_qgroup_free(root, *qgroup_reserved);
4757 }
4758
4759 return ret;
4760}
4761
4762void btrfs_subvolume_release_metadata(struct btrfs_root *root,
4763 struct btrfs_block_rsv *rsv,
4764 u64 qgroup_reserved)
4765{
4766 btrfs_block_rsv_release(root, rsv, (u64)-1);
4767 if (qgroup_reserved)
4768 btrfs_qgroup_free(root, qgroup_reserved);
4769}
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780static unsigned drop_outstanding_extent(struct inode *inode)
4781{
4782 unsigned drop_inode_space = 0;
4783 unsigned dropped_extents = 0;
4784
4785 BUG_ON(!BTRFS_I(inode)->outstanding_extents);
4786 BTRFS_I(inode)->outstanding_extents--;
4787
4788 if (BTRFS_I(inode)->outstanding_extents == 0 &&
4789 test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
4790 &BTRFS_I(inode)->runtime_flags))
4791 drop_inode_space = 1;
4792
4793
4794
4795
4796
4797 if (BTRFS_I(inode)->outstanding_extents >=
4798 BTRFS_I(inode)->reserved_extents)
4799 return drop_inode_space;
4800
4801 dropped_extents = BTRFS_I(inode)->reserved_extents -
4802 BTRFS_I(inode)->outstanding_extents;
4803 BTRFS_I(inode)->reserved_extents -= dropped_extents;
4804 return dropped_extents + drop_inode_space;
4805}
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
4826 int reserve)
4827{
4828 struct btrfs_root *root = BTRFS_I(inode)->root;
4829 u64 csum_size;
4830 int num_csums_per_leaf;
4831 int num_csums;
4832 int old_csums;
4833
4834 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
4835 BTRFS_I(inode)->csum_bytes == 0)
4836 return 0;
4837
4838 old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
4839 if (reserve)
4840 BTRFS_I(inode)->csum_bytes += num_bytes;
4841 else
4842 BTRFS_I(inode)->csum_bytes -= num_bytes;
4843 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
4844 num_csums_per_leaf = (int)div64_u64(csum_size,
4845 sizeof(struct btrfs_csum_item) +
4846 sizeof(struct btrfs_disk_key));
4847 num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
4848 num_csums = num_csums + num_csums_per_leaf - 1;
4849 num_csums = num_csums / num_csums_per_leaf;
4850
4851 old_csums = old_csums + num_csums_per_leaf - 1;
4852 old_csums = old_csums / num_csums_per_leaf;
4853
4854
4855 if (old_csums == num_csums)
4856 return 0;
4857
4858 if (reserve)
4859 return btrfs_calc_trans_metadata_size(root,
4860 num_csums - old_csums);
4861
4862 return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
4863}
4864
4865int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4866{
4867 struct btrfs_root *root = BTRFS_I(inode)->root;
4868 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
4869 u64 to_reserve = 0;
4870 u64 csum_bytes;
4871 unsigned nr_extents = 0;
4872 int extra_reserve = 0;
4873 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
4874 int ret = 0;
4875 bool delalloc_lock = true;
4876 u64 to_free = 0;
4877 unsigned dropped;
4878
4879
4880
4881
4882
4883
4884 if (btrfs_is_free_space_inode(inode)) {
4885 flush = BTRFS_RESERVE_NO_FLUSH;
4886 delalloc_lock = false;
4887 }
4888
4889 if (flush != BTRFS_RESERVE_NO_FLUSH &&
4890 btrfs_transaction_in_commit(root->fs_info))
4891 schedule_timeout(1);
4892
4893 if (delalloc_lock)
4894 mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
4895
4896 num_bytes = ALIGN(num_bytes, root->sectorsize);
4897
4898 spin_lock(&BTRFS_I(inode)->lock);
4899 BTRFS_I(inode)->outstanding_extents++;
4900
4901 if (BTRFS_I(inode)->outstanding_extents >
4902 BTRFS_I(inode)->reserved_extents)
4903 nr_extents = BTRFS_I(inode)->outstanding_extents -
4904 BTRFS_I(inode)->reserved_extents;
4905
4906
4907
4908
4909
4910 if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
4911 &BTRFS_I(inode)->runtime_flags)) {
4912 nr_extents++;
4913 extra_reserve = 1;
4914 }
4915
4916 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4917 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
4918 csum_bytes = BTRFS_I(inode)->csum_bytes;
4919 spin_unlock(&BTRFS_I(inode)->lock);
4920
4921 if (root->fs_info->quota_enabled) {
4922 ret = btrfs_qgroup_reserve(root, num_bytes +
4923 nr_extents * root->leafsize);
4924 if (ret)
4925 goto out_fail;
4926 }
4927
4928 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
4929 if (unlikely(ret)) {
4930 if (root->fs_info->quota_enabled)
4931 btrfs_qgroup_free(root, num_bytes +
4932 nr_extents * root->leafsize);
4933 goto out_fail;
4934 }
4935
4936 spin_lock(&BTRFS_I(inode)->lock);
4937 if (extra_reserve) {
4938 set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
4939 &BTRFS_I(inode)->runtime_flags);
4940 nr_extents--;
4941 }
4942 BTRFS_I(inode)->reserved_extents += nr_extents;
4943 spin_unlock(&BTRFS_I(inode)->lock);
4944
4945 if (delalloc_lock)
4946 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
4947
4948 if (to_reserve)
4949 trace_btrfs_space_reservation(root->fs_info,"delalloc",
4950 btrfs_ino(inode), to_reserve, 1);
4951 block_rsv_add_bytes(block_rsv, to_reserve, 1);
4952
4953 return 0;
4954
4955out_fail:
4956 spin_lock(&BTRFS_I(inode)->lock);
4957 dropped = drop_outstanding_extent(inode);
4958
4959
4960
4961
4962
4963 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
4964 calc_csum_metadata_size(inode, num_bytes, 0);
4965 } else {
4966 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
4967 u64 bytes;
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
4978 BTRFS_I(inode)->csum_bytes = csum_bytes;
4979 to_free = calc_csum_metadata_size(inode, bytes, 0);
4980
4981
4982
4983
4984
4985
4986
4987 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
4988 bytes = csum_bytes - orig_csum_bytes;
4989 bytes = calc_csum_metadata_size(inode, bytes, 0);
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
5000 if (bytes > to_free)
5001 to_free = bytes - to_free;
5002 else
5003 to_free = 0;
5004 }
5005 spin_unlock(&BTRFS_I(inode)->lock);
5006 if (dropped)
5007 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5008
5009 if (to_free) {
5010 btrfs_block_rsv_release(root, block_rsv, to_free);
5011 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5012 btrfs_ino(inode), to_free, 0);
5013 }
5014 if (delalloc_lock)
5015 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
5016 return ret;
5017}
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5029{
5030 struct btrfs_root *root = BTRFS_I(inode)->root;
5031 u64 to_free = 0;
5032 unsigned dropped;
5033
5034 num_bytes = ALIGN(num_bytes, root->sectorsize);
5035 spin_lock(&BTRFS_I(inode)->lock);
5036 dropped = drop_outstanding_extent(inode);
5037
5038 if (num_bytes)
5039 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
5040 spin_unlock(&BTRFS_I(inode)->lock);
5041 if (dropped > 0)
5042 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5043
5044 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5045 btrfs_ino(inode), to_free, 0);
5046 if (root->fs_info->quota_enabled) {
5047 btrfs_qgroup_free(root, num_bytes +
5048 dropped * root->leafsize);
5049 }
5050
5051 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
5052 to_free);
5053}
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
5071{
5072 int ret;
5073
5074 ret = btrfs_check_data_free_space(inode, num_bytes);
5075 if (ret)
5076 return ret;
5077
5078 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
5079 if (ret) {
5080 btrfs_free_reserved_data_space(inode, num_bytes);
5081 return ret;
5082 }
5083
5084 return 0;
5085}
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
5101{
5102 btrfs_delalloc_release_metadata(inode, num_bytes);
5103 btrfs_free_reserved_data_space(inode, num_bytes);
5104}
5105
5106static int update_block_group(struct btrfs_root *root,
5107 u64 bytenr, u64 num_bytes, int alloc)
5108{
5109 struct btrfs_block_group_cache *cache = NULL;
5110 struct btrfs_fs_info *info = root->fs_info;
5111 u64 total = num_bytes;
5112 u64 old_val;
5113 u64 byte_in_group;
5114 int factor;
5115
5116
5117 spin_lock(&info->delalloc_root_lock);
5118 old_val = btrfs_super_bytes_used(info->super_copy);
5119 if (alloc)
5120 old_val += num_bytes;
5121 else
5122 old_val -= num_bytes;
5123 btrfs_set_super_bytes_used(info->super_copy, old_val);
5124 spin_unlock(&info->delalloc_root_lock);
5125
5126 while (total) {
5127 cache = btrfs_lookup_block_group(info, bytenr);
5128 if (!cache)
5129 return -ENOENT;
5130 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
5131 BTRFS_BLOCK_GROUP_RAID1 |
5132 BTRFS_BLOCK_GROUP_RAID10))
5133 factor = 2;
5134 else
5135 factor = 1;
5136
5137
5138
5139
5140
5141
5142 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5143 cache_block_group(cache, 1);
5144
5145 byte_in_group = bytenr - cache->key.objectid;
5146 WARN_ON(byte_in_group > cache->key.offset);
5147
5148 spin_lock(&cache->space_info->lock);
5149 spin_lock(&cache->lock);
5150
5151 if (btrfs_test_opt(root, SPACE_CACHE) &&
5152 cache->disk_cache_state < BTRFS_DC_CLEAR)
5153 cache->disk_cache_state = BTRFS_DC_CLEAR;
5154
5155 cache->dirty = 1;
5156 old_val = btrfs_block_group_used(&cache->item);
5157 num_bytes = min(total, cache->key.offset - byte_in_group);
5158 if (alloc) {
5159 old_val += num_bytes;
5160 btrfs_set_block_group_used(&cache->item, old_val);
5161 cache->reserved -= num_bytes;
5162 cache->space_info->bytes_reserved -= num_bytes;
5163 cache->space_info->bytes_used += num_bytes;
5164 cache->space_info->disk_used += num_bytes * factor;
5165 spin_unlock(&cache->lock);
5166 spin_unlock(&cache->space_info->lock);
5167 } else {
5168 old_val -= num_bytes;
5169 btrfs_set_block_group_used(&cache->item, old_val);
5170 cache->pinned += num_bytes;
5171 cache->space_info->bytes_pinned += num_bytes;
5172 cache->space_info->bytes_used -= num_bytes;
5173 cache->space_info->disk_used -= num_bytes * factor;
5174 spin_unlock(&cache->lock);
5175 spin_unlock(&cache->space_info->lock);
5176
5177 set_extent_dirty(info->pinned_extents,
5178 bytenr, bytenr + num_bytes - 1,
5179 GFP_NOFS | __GFP_NOFAIL);
5180 }
5181 btrfs_put_block_group(cache);
5182 total -= num_bytes;
5183 bytenr += num_bytes;
5184 }
5185 return 0;
5186}
5187
5188static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
5189{
5190 struct btrfs_block_group_cache *cache;
5191 u64 bytenr;
5192
5193 spin_lock(&root->fs_info->block_group_cache_lock);
5194 bytenr = root->fs_info->first_logical_byte;
5195 spin_unlock(&root->fs_info->block_group_cache_lock);
5196
5197 if (bytenr < (u64)-1)
5198 return bytenr;
5199
5200 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
5201 if (!cache)
5202 return 0;
5203
5204 bytenr = cache->key.objectid;
5205 btrfs_put_block_group(cache);
5206
5207 return bytenr;
5208}
5209
5210static int pin_down_extent(struct btrfs_root *root,
5211 struct btrfs_block_group_cache *cache,
5212 u64 bytenr, u64 num_bytes, int reserved)
5213{
5214 spin_lock(&cache->space_info->lock);
5215 spin_lock(&cache->lock);
5216 cache->pinned += num_bytes;
5217 cache->space_info->bytes_pinned += num_bytes;
5218 if (reserved) {
5219 cache->reserved -= num_bytes;
5220 cache->space_info->bytes_reserved -= num_bytes;
5221 }
5222 spin_unlock(&cache->lock);
5223 spin_unlock(&cache->space_info->lock);
5224
5225 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
5226 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
5227 return 0;
5228}
5229
5230
5231
5232
5233int btrfs_pin_extent(struct btrfs_root *root,
5234 u64 bytenr, u64 num_bytes, int reserved)
5235{
5236 struct btrfs_block_group_cache *cache;
5237
5238 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
5239 BUG_ON(!cache);
5240
5241 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
5242
5243 btrfs_put_block_group(cache);
5244 return 0;
5245}
5246
5247
5248
5249
5250int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
5251 u64 bytenr, u64 num_bytes)
5252{
5253 struct btrfs_block_group_cache *cache;
5254 int ret;
5255
5256 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
5257 if (!cache)
5258 return -EINVAL;
5259
5260
5261
5262
5263
5264
5265
5266 cache_block_group(cache, 1);
5267
5268 pin_down_extent(root, cache, bytenr, num_bytes, 0);
5269
5270
5271 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
5272 btrfs_put_block_group(cache);
5273 return ret;
5274}
5275
5276static int __exclude_logged_extent(struct btrfs_root *root, u64 start, u64 num_bytes)
5277{
5278 int ret;
5279 struct btrfs_block_group_cache *block_group;
5280 struct btrfs_caching_control *caching_ctl;
5281
5282 block_group = btrfs_lookup_block_group(root->fs_info, start);
5283 if (!block_group)
5284 return -EINVAL;
5285
5286 cache_block_group(block_group, 0);
5287 caching_ctl = get_caching_control(block_group);
5288
5289 if (!caching_ctl) {
5290
5291 BUG_ON(!block_group_cache_done(block_group));
5292 ret = btrfs_remove_free_space(block_group, start, num_bytes);
5293 } else {
5294 mutex_lock(&caching_ctl->mutex);
5295
5296 if (start >= caching_ctl->progress) {
5297 ret = add_excluded_extent(root, start, num_bytes);
5298 } else if (start + num_bytes <= caching_ctl->progress) {
5299 ret = btrfs_remove_free_space(block_group,
5300 start, num_bytes);
5301 } else {
5302 num_bytes = caching_ctl->progress - start;
5303 ret = btrfs_remove_free_space(block_group,
5304 start, num_bytes);
5305 if (ret)
5306 goto out_lock;
5307
5308 num_bytes = (start + num_bytes) -
5309 caching_ctl->progress;
5310 start = caching_ctl->progress;
5311 ret = add_excluded_extent(root, start, num_bytes);
5312 }
5313out_lock:
5314 mutex_unlock(&caching_ctl->mutex);
5315 put_caching_control(caching_ctl);
5316 }
5317 btrfs_put_block_group(block_group);
5318 return ret;
5319}
5320
5321int btrfs_exclude_logged_extents(struct btrfs_root *log,
5322 struct extent_buffer *eb)
5323{
5324 struct btrfs_file_extent_item *item;
5325 struct btrfs_key key;
5326 int found_type;
5327 int i;
5328
5329 if (!btrfs_fs_incompat(log->fs_info, MIXED_GROUPS))
5330 return 0;
5331
5332 for (i = 0; i < btrfs_header_nritems(eb); i++) {
5333 btrfs_item_key_to_cpu(eb, &key, i);
5334 if (key.type != BTRFS_EXTENT_DATA_KEY)
5335 continue;
5336 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
5337 found_type = btrfs_file_extent_type(eb, item);
5338 if (found_type == BTRFS_FILE_EXTENT_INLINE)
5339 continue;
5340 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
5341 continue;
5342 key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
5343 key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
5344 __exclude_logged_extent(log, key.objectid, key.offset);
5345 }
5346
5347 return 0;
5348}
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5373 u64 num_bytes, int reserve)
5374{
5375 struct btrfs_space_info *space_info = cache->space_info;
5376 int ret = 0;
5377
5378 spin_lock(&space_info->lock);
5379 spin_lock(&cache->lock);
5380 if (reserve != RESERVE_FREE) {
5381 if (cache->ro) {
5382 ret = -EAGAIN;
5383 } else {
5384 cache->reserved += num_bytes;
5385 space_info->bytes_reserved += num_bytes;
5386 if (reserve == RESERVE_ALLOC) {
5387 trace_btrfs_space_reservation(cache->fs_info,
5388 "space_info", space_info->flags,
5389 num_bytes, 0);
5390 space_info->bytes_may_use -= num_bytes;
5391 }
5392 }
5393 } else {
5394 if (cache->ro)
5395 space_info->bytes_readonly += num_bytes;
5396 cache->reserved -= num_bytes;
5397 space_info->bytes_reserved -= num_bytes;
5398 space_info->reservation_progress++;
5399 }
5400 spin_unlock(&cache->lock);
5401 spin_unlock(&space_info->lock);
5402 return ret;
5403}
5404
5405void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5406 struct btrfs_root *root)
5407{
5408 struct btrfs_fs_info *fs_info = root->fs_info;
5409 struct btrfs_caching_control *next;
5410 struct btrfs_caching_control *caching_ctl;
5411 struct btrfs_block_group_cache *cache;
5412 struct btrfs_space_info *space_info;
5413
5414 down_write(&fs_info->extent_commit_sem);
5415
5416 list_for_each_entry_safe(caching_ctl, next,
5417 &fs_info->caching_block_groups, list) {
5418 cache = caching_ctl->block_group;
5419 if (block_group_cache_done(cache)) {
5420 cache->last_byte_to_unpin = (u64)-1;
5421 list_del_init(&caching_ctl->list);
5422 put_caching_control(caching_ctl);
5423 } else {
5424 cache->last_byte_to_unpin = caching_ctl->progress;
5425 }
5426 }
5427
5428 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
5429 fs_info->pinned_extents = &fs_info->freed_extents[1];
5430 else
5431 fs_info->pinned_extents = &fs_info->freed_extents[0];
5432
5433 up_write(&fs_info->extent_commit_sem);
5434
5435 list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
5436 percpu_counter_set(&space_info->total_bytes_pinned, 0);
5437
5438 update_global_block_rsv(fs_info);
5439}
5440
5441static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
5442{
5443 struct btrfs_fs_info *fs_info = root->fs_info;
5444 struct btrfs_block_group_cache *cache = NULL;
5445 struct btrfs_space_info *space_info;
5446 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5447 u64 len;
5448 bool readonly;
5449
5450 while (start <= end) {
5451 readonly = false;
5452 if (!cache ||
5453 start >= cache->key.objectid + cache->key.offset) {
5454 if (cache)
5455 btrfs_put_block_group(cache);
5456 cache = btrfs_lookup_block_group(fs_info, start);
5457 BUG_ON(!cache);
5458 }
5459
5460 len = cache->key.objectid + cache->key.offset - start;
5461 len = min(len, end + 1 - start);
5462
5463 if (start < cache->last_byte_to_unpin) {
5464 len = min(len, cache->last_byte_to_unpin - start);
5465 btrfs_add_free_space(cache, start, len);
5466 }
5467
5468 start += len;
5469 space_info = cache->space_info;
5470
5471 spin_lock(&space_info->lock);
5472 spin_lock(&cache->lock);
5473 cache->pinned -= len;
5474 space_info->bytes_pinned -= len;
5475 if (cache->ro) {
5476 space_info->bytes_readonly += len;
5477 readonly = true;
5478 }
5479 spin_unlock(&cache->lock);
5480 if (!readonly && global_rsv->space_info == space_info) {
5481 spin_lock(&global_rsv->lock);
5482 if (!global_rsv->full) {
5483 len = min(len, global_rsv->size -
5484 global_rsv->reserved);
5485 global_rsv->reserved += len;
5486 space_info->bytes_may_use += len;
5487 if (global_rsv->reserved >= global_rsv->size)
5488 global_rsv->full = 1;
5489 }
5490 spin_unlock(&global_rsv->lock);
5491 }
5492 spin_unlock(&space_info->lock);
5493 }
5494
5495 if (cache)
5496 btrfs_put_block_group(cache);
5497 return 0;
5498}
5499
5500int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
5501 struct btrfs_root *root)
5502{
5503 struct btrfs_fs_info *fs_info = root->fs_info;
5504 struct extent_io_tree *unpin;
5505 u64 start;
5506 u64 end;
5507 int ret;
5508
5509 if (trans->aborted)
5510 return 0;
5511
5512 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
5513 unpin = &fs_info->freed_extents[1];
5514 else
5515 unpin = &fs_info->freed_extents[0];
5516
5517 while (1) {
5518 ret = find_first_extent_bit(unpin, 0, &start, &end,
5519 EXTENT_DIRTY, NULL);
5520 if (ret)
5521 break;
5522
5523 if (btrfs_test_opt(root, DISCARD))
5524 ret = btrfs_discard_extent(root, start,
5525 end + 1 - start, NULL);
5526
5527 clear_extent_dirty(unpin, start, end, GFP_NOFS);
5528 unpin_extent_range(root, start, end);
5529 cond_resched();
5530 }
5531
5532 return 0;
5533}
5534
5535static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
5536 u64 owner, u64 root_objectid)
5537{
5538 struct btrfs_space_info *space_info;
5539 u64 flags;
5540
5541 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
5542 if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
5543 flags = BTRFS_BLOCK_GROUP_SYSTEM;
5544 else
5545 flags = BTRFS_BLOCK_GROUP_METADATA;
5546 } else {
5547 flags = BTRFS_BLOCK_GROUP_DATA;
5548 }
5549
5550 space_info = __find_space_info(fs_info, flags);
5551 BUG_ON(!space_info);
5552 percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
5553}
5554
5555
5556static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5557 struct btrfs_root *root,
5558 u64 bytenr, u64 num_bytes, u64 parent,
5559 u64 root_objectid, u64 owner_objectid,
5560 u64 owner_offset, int refs_to_drop,
5561 struct btrfs_delayed_extent_op *extent_op)
5562{
5563 struct btrfs_key key;
5564 struct btrfs_path *path;
5565 struct btrfs_fs_info *info = root->fs_info;
5566 struct btrfs_root *extent_root = info->extent_root;
5567 struct extent_buffer *leaf;
5568 struct btrfs_extent_item *ei;
5569 struct btrfs_extent_inline_ref *iref;
5570 int ret;
5571 int is_data;
5572 int extent_slot = 0;
5573 int found_extent = 0;
5574 int num_to_del = 1;
5575 u32 item_size;
5576 u64 refs;
5577 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
5578 SKINNY_METADATA);
5579
5580 path = btrfs_alloc_path();
5581 if (!path)
5582 return -ENOMEM;
5583
5584 path->reada = 1;
5585 path->leave_spinning = 1;
5586
5587 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
5588 BUG_ON(!is_data && refs_to_drop != 1);
5589
5590 if (is_data)
5591 skinny_metadata = 0;
5592
5593 ret = lookup_extent_backref(trans, extent_root, path, &iref,
5594 bytenr, num_bytes, parent,
5595 root_objectid, owner_objectid,
5596 owner_offset);
5597 if (ret == 0) {
5598 extent_slot = path->slots[0];
5599 while (extent_slot >= 0) {
5600 btrfs_item_key_to_cpu(path->nodes[0], &key,
5601 extent_slot);
5602 if (key.objectid != bytenr)
5603 break;
5604 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
5605 key.offset == num_bytes) {
5606 found_extent = 1;
5607 break;
5608 }
5609 if (key.type == BTRFS_METADATA_ITEM_KEY &&
5610 key.offset == owner_objectid) {
5611 found_extent = 1;
5612 break;
5613 }
5614 if (path->slots[0] - extent_slot > 5)
5615 break;
5616 extent_slot--;
5617 }
5618#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5619 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
5620 if (found_extent && item_size < sizeof(*ei))
5621 found_extent = 0;
5622#endif
5623 if (!found_extent) {
5624 BUG_ON(iref);
5625 ret = remove_extent_backref(trans, extent_root, path,
5626 NULL, refs_to_drop,
5627 is_data);
5628 if (ret) {
5629 btrfs_abort_transaction(trans, extent_root, ret);
5630 goto out;
5631 }
5632 btrfs_release_path(path);
5633 path->leave_spinning = 1;
5634
5635 key.objectid = bytenr;
5636 key.type = BTRFS_EXTENT_ITEM_KEY;
5637 key.offset = num_bytes;
5638
5639 if (!is_data && skinny_metadata) {
5640 key.type = BTRFS_METADATA_ITEM_KEY;
5641 key.offset = owner_objectid;
5642 }
5643
5644 ret = btrfs_search_slot(trans, extent_root,
5645 &key, path, -1, 1);
5646 if (ret > 0 && skinny_metadata && path->slots[0]) {
5647
5648
5649
5650
5651 path->slots[0]--;
5652 btrfs_item_key_to_cpu(path->nodes[0], &key,
5653 path->slots[0]);
5654 if (key.objectid == bytenr &&
5655 key.type == BTRFS_EXTENT_ITEM_KEY &&
5656 key.offset == num_bytes)
5657 ret = 0;
5658 }
5659
5660 if (ret > 0 && skinny_metadata) {
5661 skinny_metadata = false;
5662 key.type = BTRFS_EXTENT_ITEM_KEY;
5663 key.offset = num_bytes;
5664 btrfs_release_path(path);
5665 ret = btrfs_search_slot(trans, extent_root,
5666 &key, path, -1, 1);
5667 }
5668
5669 if (ret) {
5670 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5671 ret, (unsigned long long)bytenr);
5672 if (ret > 0)
5673 btrfs_print_leaf(extent_root,
5674 path->nodes[0]);
5675 }
5676 if (ret < 0) {
5677 btrfs_abort_transaction(trans, extent_root, ret);
5678 goto out;
5679 }
5680 extent_slot = path->slots[0];
5681 }
5682 } else if (ret == -ENOENT) {
5683 btrfs_print_leaf(extent_root, path->nodes[0]);
5684 WARN_ON(1);
5685 btrfs_err(info,
5686 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
5687 (unsigned long long)bytenr,
5688 (unsigned long long)parent,
5689 (unsigned long long)root_objectid,
5690 (unsigned long long)owner_objectid,
5691 (unsigned long long)owner_offset);
5692 } else {
5693 btrfs_abort_transaction(trans, extent_root, ret);
5694 goto out;
5695 }
5696
5697 leaf = path->nodes[0];
5698 item_size = btrfs_item_size_nr(leaf, extent_slot);
5699#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5700 if (item_size < sizeof(*ei)) {
5701 BUG_ON(found_extent || extent_slot != path->slots[0]);
5702 ret = convert_extent_item_v0(trans, extent_root, path,
5703 owner_objectid, 0);
5704 if (ret < 0) {
5705 btrfs_abort_transaction(trans, extent_root, ret);
5706 goto out;
5707 }
5708
5709 btrfs_release_path(path);
5710 path->leave_spinning = 1;
5711
5712 key.objectid = bytenr;
5713 key.type = BTRFS_EXTENT_ITEM_KEY;
5714 key.offset = num_bytes;
5715
5716 ret = btrfs_search_slot(trans, extent_root, &key, path,
5717 -1, 1);
5718 if (ret) {
5719 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5720 ret, (unsigned long long)bytenr);
5721 btrfs_print_leaf(extent_root, path->nodes[0]);
5722 }
5723 if (ret < 0) {
5724 btrfs_abort_transaction(trans, extent_root, ret);
5725 goto out;
5726 }
5727
5728 extent_slot = path->slots[0];
5729 leaf = path->nodes[0];
5730 item_size = btrfs_item_size_nr(leaf, extent_slot);
5731 }
5732#endif
5733 BUG_ON(item_size < sizeof(*ei));
5734 ei = btrfs_item_ptr(leaf, extent_slot,
5735 struct btrfs_extent_item);
5736 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
5737 key.type == BTRFS_EXTENT_ITEM_KEY) {
5738 struct btrfs_tree_block_info *bi;
5739 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
5740 bi = (struct btrfs_tree_block_info *)(ei + 1);
5741 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
5742 }
5743
5744 refs = btrfs_extent_refs(leaf, ei);
5745 if (refs < refs_to_drop) {
5746 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
5747 "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
5748 ret = -EINVAL;
5749 btrfs_abort_transaction(trans, extent_root, ret);
5750 goto out;
5751 }
5752 refs -= refs_to_drop;
5753
5754 if (refs > 0) {
5755 if (extent_op)
5756 __run_delayed_extent_op(extent_op, leaf, ei);
5757
5758
5759
5760
5761 if (iref) {
5762 BUG_ON(!found_extent);
5763 } else {
5764 btrfs_set_extent_refs(leaf, ei, refs);
5765 btrfs_mark_buffer_dirty(leaf);
5766 }
5767 if (found_extent) {
5768 ret = remove_extent_backref(trans, extent_root, path,
5769 iref, refs_to_drop,
5770 is_data);
5771 if (ret) {
5772 btrfs_abort_transaction(trans, extent_root, ret);
5773 goto out;
5774 }
5775 }
5776 add_pinned_bytes(root->fs_info, -num_bytes, owner_objectid,
5777 root_objectid);
5778 } else {
5779 if (found_extent) {
5780 BUG_ON(is_data && refs_to_drop !=
5781 extent_data_ref_count(root, path, iref));
5782 if (iref) {
5783 BUG_ON(path->slots[0] != extent_slot);
5784 } else {
5785 BUG_ON(path->slots[0] != extent_slot + 1);
5786 path->slots[0] = extent_slot;
5787 num_to_del = 2;
5788 }
5789 }
5790
5791 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
5792 num_to_del);
5793 if (ret) {
5794 btrfs_abort_transaction(trans, extent_root, ret);
5795 goto out;
5796 }
5797 btrfs_release_path(path);
5798
5799 if (is_data) {
5800 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
5801 if (ret) {
5802 btrfs_abort_transaction(trans, extent_root, ret);
5803 goto out;
5804 }
5805 }
5806
5807 ret = update_block_group(root, bytenr, num_bytes, 0);
5808 if (ret) {
5809 btrfs_abort_transaction(trans, extent_root, ret);
5810 goto out;
5811 }
5812 }
5813out:
5814 btrfs_free_path(path);
5815 return ret;
5816}
5817
5818
5819
5820
5821
5822
5823
5824static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5825 struct btrfs_root *root, u64 bytenr)
5826{
5827 struct btrfs_delayed_ref_head *head;
5828 struct btrfs_delayed_ref_root *delayed_refs;
5829 struct btrfs_delayed_ref_node *ref;
5830 struct rb_node *node;
5831 int ret = 0;
5832
5833 delayed_refs = &trans->transaction->delayed_refs;
5834 spin_lock(&delayed_refs->lock);
5835 head = btrfs_find_delayed_ref_head(trans, bytenr);
5836 if (!head)
5837 goto out;
5838
5839 node = rb_prev(&head->node.rb_node);
5840 if (!node)
5841 goto out;
5842
5843 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
5844
5845
5846 if (ref->bytenr == bytenr)
5847 goto out;
5848
5849 if (head->extent_op) {
5850 if (!head->must_insert_reserved)
5851 goto out;
5852 btrfs_free_delayed_extent_op(head->extent_op);
5853 head->extent_op = NULL;
5854 }
5855
5856
5857
5858
5859
5860 if (!mutex_trylock(&head->mutex))
5861 goto out;
5862
5863
5864
5865
5866
5867 head->node.in_tree = 0;
5868 rb_erase(&head->node.rb_node, &delayed_refs->root);
5869
5870 delayed_refs->num_entries--;
5871
5872
5873
5874
5875
5876 delayed_refs->num_heads--;
5877 if (list_empty(&head->cluster))
5878 delayed_refs->num_heads_ready--;
5879
5880 list_del_init(&head->cluster);
5881 spin_unlock(&delayed_refs->lock);
5882
5883 BUG_ON(head->extent_op);
5884 if (head->must_insert_reserved)
5885 ret = 1;
5886
5887 mutex_unlock(&head->mutex);
5888 btrfs_put_delayed_ref(&head->node);
5889 return ret;
5890out:
5891 spin_unlock(&delayed_refs->lock);
5892 return 0;
5893}
5894
5895void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
5896 struct btrfs_root *root,
5897 struct extent_buffer *buf,
5898 u64 parent, int last_ref)
5899{
5900 struct btrfs_block_group_cache *cache = NULL;
5901 int pin = 1;
5902 int ret;
5903
5904 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
5905 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
5906 buf->start, buf->len,
5907 parent, root->root_key.objectid,
5908 btrfs_header_level(buf),
5909 BTRFS_DROP_DELAYED_REF, NULL, 0);
5910 BUG_ON(ret);
5911 }
5912
5913 if (!last_ref)
5914 return;
5915
5916 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
5917
5918 if (btrfs_header_generation(buf) == trans->transid) {
5919 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
5920 ret = check_ref_cleanup(trans, root, buf->start);
5921 if (!ret)
5922 goto out;
5923 }
5924
5925 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
5926 pin_down_extent(root, cache, buf->start, buf->len, 1);
5927 goto out;
5928 }
5929
5930 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
5931
5932 btrfs_add_free_space(cache, buf->start, buf->len);
5933 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
5934 pin = 0;
5935 }
5936out:
5937 if (pin)
5938 add_pinned_bytes(root->fs_info, buf->len,
5939 btrfs_header_level(buf),
5940 root->root_key.objectid);
5941
5942
5943
5944
5945
5946 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
5947 btrfs_put_block_group(cache);
5948}
5949
5950
5951int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5952 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
5953 u64 owner, u64 offset, int for_cow)
5954{
5955 int ret;
5956 struct btrfs_fs_info *fs_info = root->fs_info;
5957
5958 add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
5959
5960
5961
5962
5963
5964 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
5965 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
5966
5967 btrfs_pin_extent(root, bytenr, num_bytes, 1);
5968 ret = 0;
5969 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
5970 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
5971 num_bytes,
5972 parent, root_objectid, (int)owner,
5973 BTRFS_DROP_DELAYED_REF, NULL, for_cow);
5974 } else {
5975 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
5976 num_bytes,
5977 parent, root_objectid, owner,
5978 offset, BTRFS_DROP_DELAYED_REF,
5979 NULL, for_cow);
5980 }
5981 return ret;
5982}
5983
5984static u64 stripe_align(struct btrfs_root *root,
5985 struct btrfs_block_group_cache *cache,
5986 u64 val, u64 num_bytes)
5987{
5988 u64 ret = ALIGN(val, root->stripesize);
5989 return ret;
5990}
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003static noinline int
6004wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6005 u64 num_bytes)
6006{
6007 struct btrfs_caching_control *caching_ctl;
6008
6009 caching_ctl = get_caching_control(cache);
6010 if (!caching_ctl)
6011 return 0;
6012
6013 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
6014 (cache->free_space_ctl->free_space >= num_bytes));
6015
6016 put_caching_control(caching_ctl);
6017 return 0;
6018}
6019
6020static noinline int
6021wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
6022{
6023 struct btrfs_caching_control *caching_ctl;
6024
6025 caching_ctl = get_caching_control(cache);
6026 if (!caching_ctl)
6027 return 0;
6028
6029 wait_event(caching_ctl->wait, block_group_cache_done(cache));
6030
6031 put_caching_control(caching_ctl);
6032 return 0;
6033}
6034
6035int __get_raid_index(u64 flags)
6036{
6037 if (flags & BTRFS_BLOCK_GROUP_RAID10)
6038 return BTRFS_RAID_RAID10;
6039 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
6040 return BTRFS_RAID_RAID1;
6041 else if (flags & BTRFS_BLOCK_GROUP_DUP)
6042 return BTRFS_RAID_DUP;
6043 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
6044 return BTRFS_RAID_RAID0;
6045 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
6046 return BTRFS_RAID_RAID5;
6047 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
6048 return BTRFS_RAID_RAID6;
6049
6050 return BTRFS_RAID_SINGLE;
6051}
6052
6053static int get_block_group_index(struct btrfs_block_group_cache *cache)
6054{
6055 return __get_raid_index(cache->flags);
6056}
6057
6058enum btrfs_loop_type {
6059 LOOP_CACHING_NOWAIT = 0,
6060 LOOP_CACHING_WAIT = 1,
6061 LOOP_ALLOC_CHUNK = 2,
6062 LOOP_NO_EMPTY_SIZE = 3,
6063};
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073static noinline int find_free_extent(struct btrfs_trans_handle *trans,
6074 struct btrfs_root *orig_root,
6075 u64 num_bytes, u64 empty_size,
6076 u64 hint_byte, struct btrfs_key *ins,
6077 u64 flags)
6078{
6079 int ret = 0;
6080 struct btrfs_root *root = orig_root->fs_info->extent_root;
6081 struct btrfs_free_cluster *last_ptr = NULL;
6082 struct btrfs_block_group_cache *block_group = NULL;
6083 struct btrfs_block_group_cache *used_block_group;
6084 u64 search_start = 0;
6085 int empty_cluster = 2 * 1024 * 1024;
6086 struct btrfs_space_info *space_info;
6087 int loop = 0;
6088 int index = __get_raid_index(flags);
6089 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
6090 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
6091 bool found_uncached_bg = false;
6092 bool failed_cluster_refill = false;
6093 bool failed_alloc = false;
6094 bool use_cluster = true;
6095 bool have_caching_bg = false;
6096
6097 WARN_ON(num_bytes < root->sectorsize);
6098 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
6099 ins->objectid = 0;
6100 ins->offset = 0;
6101
6102 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
6103
6104 space_info = __find_space_info(root->fs_info, flags);
6105 if (!space_info) {
6106 btrfs_err(root->fs_info, "No space info for %llu", flags);
6107 return -ENOSPC;
6108 }
6109
6110
6111
6112
6113
6114 if (btrfs_mixed_space_info(space_info))
6115 use_cluster = false;
6116
6117 if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
6118 last_ptr = &root->fs_info->meta_alloc_cluster;
6119 if (!btrfs_test_opt(root, SSD))
6120 empty_cluster = 64 * 1024;
6121 }
6122
6123 if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
6124 btrfs_test_opt(root, SSD)) {
6125 last_ptr = &root->fs_info->data_alloc_cluster;
6126 }
6127
6128 if (last_ptr) {
6129 spin_lock(&last_ptr->lock);
6130 if (last_ptr->block_group)
6131 hint_byte = last_ptr->window_start;
6132 spin_unlock(&last_ptr->lock);
6133 }
6134
6135 search_start = max(search_start, first_logical_byte(root, 0));
6136 search_start = max(search_start, hint_byte);
6137
6138 if (!last_ptr)
6139 empty_cluster = 0;
6140
6141 if (search_start == hint_byte) {
6142 block_group = btrfs_lookup_block_group(root->fs_info,
6143 search_start);
6144 used_block_group = block_group;
6145
6146
6147
6148
6149
6150
6151
6152 if (block_group && block_group_bits(block_group, flags) &&
6153 block_group->cached != BTRFS_CACHE_NO) {
6154 down_read(&space_info->groups_sem);
6155 if (list_empty(&block_group->list) ||
6156 block_group->ro) {
6157
6158
6159
6160
6161
6162
6163 btrfs_put_block_group(block_group);
6164 up_read(&space_info->groups_sem);
6165 } else {
6166 index = get_block_group_index(block_group);
6167 goto have_block_group;
6168 }
6169 } else if (block_group) {
6170 btrfs_put_block_group(block_group);
6171 }
6172 }
6173search:
6174 have_caching_bg = false;
6175 down_read(&space_info->groups_sem);
6176 list_for_each_entry(block_group, &space_info->block_groups[index],
6177 list) {
6178 u64 offset;
6179 int cached;
6180
6181 used_block_group = block_group;
6182 btrfs_get_block_group(block_group);
6183 search_start = block_group->key.objectid;
6184
6185
6186
6187
6188
6189
6190 if (!block_group_bits(block_group, flags)) {
6191 u64 extra = BTRFS_BLOCK_GROUP_DUP |
6192 BTRFS_BLOCK_GROUP_RAID1 |
6193 BTRFS_BLOCK_GROUP_RAID5 |
6194 BTRFS_BLOCK_GROUP_RAID6 |
6195 BTRFS_BLOCK_GROUP_RAID10;
6196
6197
6198
6199
6200
6201
6202 if ((flags & extra) && !(block_group->flags & extra))
6203 goto loop;
6204 }
6205
6206have_block_group:
6207 cached = block_group_cache_done(block_group);
6208 if (unlikely(!cached)) {
6209 found_uncached_bg = true;
6210 ret = cache_block_group(block_group, 0);
6211 BUG_ON(ret < 0);
6212 ret = 0;
6213 }
6214
6215 if (unlikely(block_group->ro))
6216 goto loop;
6217
6218
6219
6220
6221
6222 if (last_ptr) {
6223 unsigned long aligned_cluster;
6224
6225
6226
6227
6228 spin_lock(&last_ptr->refill_lock);
6229 used_block_group = last_ptr->block_group;
6230 if (used_block_group != block_group &&
6231 (!used_block_group ||
6232 used_block_group->ro ||
6233 !block_group_bits(used_block_group, flags))) {
6234 used_block_group = block_group;
6235 goto refill_cluster;
6236 }
6237
6238 if (used_block_group != block_group)
6239 btrfs_get_block_group(used_block_group);
6240
6241 offset = btrfs_alloc_from_cluster(used_block_group,
6242 last_ptr, num_bytes, used_block_group->key.objectid);
6243 if (offset) {
6244
6245 spin_unlock(&last_ptr->refill_lock);
6246 trace_btrfs_reserve_extent_cluster(root,
6247 block_group, search_start, num_bytes);
6248 goto checks;
6249 }
6250
6251 WARN_ON(last_ptr->block_group != used_block_group);
6252 if (used_block_group != block_group) {
6253 btrfs_put_block_group(used_block_group);
6254 used_block_group = block_group;
6255 }
6256refill_cluster:
6257 BUG_ON(used_block_group != block_group);
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273 if (loop >= LOOP_NO_EMPTY_SIZE &&
6274 last_ptr->block_group != block_group) {
6275 spin_unlock(&last_ptr->refill_lock);
6276 goto unclustered_alloc;
6277 }
6278
6279
6280
6281
6282
6283 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6284
6285 if (loop >= LOOP_NO_EMPTY_SIZE) {
6286 spin_unlock(&last_ptr->refill_lock);
6287 goto unclustered_alloc;
6288 }
6289
6290 aligned_cluster = max_t(unsigned long,
6291 empty_cluster + empty_size,
6292 block_group->full_stripe_len);
6293
6294
6295 ret = btrfs_find_space_cluster(trans, root,
6296 block_group, last_ptr,
6297 search_start, num_bytes,
6298 aligned_cluster);
6299 if (ret == 0) {
6300
6301
6302
6303
6304 offset = btrfs_alloc_from_cluster(block_group,
6305 last_ptr, num_bytes,
6306 search_start);
6307 if (offset) {
6308
6309 spin_unlock(&last_ptr->refill_lock);
6310 trace_btrfs_reserve_extent_cluster(root,
6311 block_group, search_start,
6312 num_bytes);
6313 goto checks;
6314 }
6315 } else if (!cached && loop > LOOP_CACHING_NOWAIT
6316 && !failed_cluster_refill) {
6317 spin_unlock(&last_ptr->refill_lock);
6318
6319 failed_cluster_refill = true;
6320 wait_block_group_cache_progress(block_group,
6321 num_bytes + empty_cluster + empty_size);
6322 goto have_block_group;
6323 }
6324
6325
6326
6327
6328
6329
6330
6331 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6332 spin_unlock(&last_ptr->refill_lock);
6333 goto loop;
6334 }
6335
6336unclustered_alloc:
6337 spin_lock(&block_group->free_space_ctl->tree_lock);
6338 if (cached &&
6339 block_group->free_space_ctl->free_space <
6340 num_bytes + empty_cluster + empty_size) {
6341 spin_unlock(&block_group->free_space_ctl->tree_lock);
6342 goto loop;
6343 }
6344 spin_unlock(&block_group->free_space_ctl->tree_lock);
6345
6346 offset = btrfs_find_space_for_alloc(block_group, search_start,
6347 num_bytes, empty_size);
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357 if (!offset && !failed_alloc && !cached &&
6358 loop > LOOP_CACHING_NOWAIT) {
6359 wait_block_group_cache_progress(block_group,
6360 num_bytes + empty_size);
6361 failed_alloc = true;
6362 goto have_block_group;
6363 } else if (!offset) {
6364 if (!cached)
6365 have_caching_bg = true;
6366 goto loop;
6367 }
6368checks:
6369 search_start = stripe_align(root, used_block_group,
6370 offset, num_bytes);
6371
6372
6373 if (search_start + num_bytes >
6374 used_block_group->key.objectid + used_block_group->key.offset) {
6375 btrfs_add_free_space(used_block_group, offset, num_bytes);
6376 goto loop;
6377 }
6378
6379 if (offset < search_start)
6380 btrfs_add_free_space(used_block_group, offset,
6381 search_start - offset);
6382 BUG_ON(offset > search_start);
6383
6384 ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
6385 alloc_type);
6386 if (ret == -EAGAIN) {
6387 btrfs_add_free_space(used_block_group, offset, num_bytes);
6388 goto loop;
6389 }
6390
6391
6392 ins->objectid = search_start;
6393 ins->offset = num_bytes;
6394
6395 trace_btrfs_reserve_extent(orig_root, block_group,
6396 search_start, num_bytes);
6397 if (used_block_group != block_group)
6398 btrfs_put_block_group(used_block_group);
6399 btrfs_put_block_group(block_group);
6400 break;
6401loop:
6402 failed_cluster_refill = false;
6403 failed_alloc = false;
6404 BUG_ON(index != get_block_group_index(block_group));
6405 if (used_block_group != block_group)
6406 btrfs_put_block_group(used_block_group);
6407 btrfs_put_block_group(block_group);
6408 }
6409 up_read(&space_info->groups_sem);
6410
6411 if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
6412 goto search;
6413
6414 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
6415 goto search;
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
6426 index = 0;
6427 loop++;
6428 if (loop == LOOP_ALLOC_CHUNK) {
6429 ret = do_chunk_alloc(trans, root, flags,
6430 CHUNK_ALLOC_FORCE);
6431
6432
6433
6434
6435 if (ret < 0 && ret != -ENOSPC) {
6436 btrfs_abort_transaction(trans,
6437 root, ret);
6438 goto out;
6439 }
6440 }
6441
6442 if (loop == LOOP_NO_EMPTY_SIZE) {
6443 empty_size = 0;
6444 empty_cluster = 0;
6445 }
6446
6447 goto search;
6448 } else if (!ins->objectid) {
6449 ret = -ENOSPC;
6450 } else if (ins->objectid) {
6451 ret = 0;
6452 }
6453out:
6454
6455 return ret;
6456}
6457
6458static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6459 int dump_block_groups)
6460{
6461 struct btrfs_block_group_cache *cache;
6462 int index = 0;
6463
6464 spin_lock(&info->lock);
6465 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
6466 (unsigned long long)info->flags,
6467 (unsigned long long)(info->total_bytes - info->bytes_used -
6468 info->bytes_pinned - info->bytes_reserved -
6469 info->bytes_readonly),
6470 (info->full) ? "" : "not ");
6471 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
6472 "reserved=%llu, may_use=%llu, readonly=%llu\n",
6473 (unsigned long long)info->total_bytes,
6474 (unsigned long long)info->bytes_used,
6475 (unsigned long long)info->bytes_pinned,
6476 (unsigned long long)info->bytes_reserved,
6477 (unsigned long long)info->bytes_may_use,
6478 (unsigned long long)info->bytes_readonly);
6479 spin_unlock(&info->lock);
6480
6481 if (!dump_block_groups)
6482 return;
6483
6484 down_read(&info->groups_sem);
6485again:
6486 list_for_each_entry(cache, &info->block_groups[index], list) {
6487 spin_lock(&cache->lock);
6488 printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
6489 (unsigned long long)cache->key.objectid,
6490 (unsigned long long)cache->key.offset,
6491 (unsigned long long)btrfs_block_group_used(&cache->item),
6492 (unsigned long long)cache->pinned,
6493 (unsigned long long)cache->reserved,
6494 cache->ro ? "[readonly]" : "");
6495 btrfs_dump_free_space(cache, bytes);
6496 spin_unlock(&cache->lock);
6497 }
6498 if (++index < BTRFS_NR_RAID_TYPES)
6499 goto again;
6500 up_read(&info->groups_sem);
6501}
6502
6503int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
6504 struct btrfs_root *root,
6505 u64 num_bytes, u64 min_alloc_size,
6506 u64 empty_size, u64 hint_byte,
6507 struct btrfs_key *ins, int is_data)
6508{
6509 bool final_tried = false;
6510 u64 flags;
6511 int ret;
6512
6513 flags = btrfs_get_alloc_profile(root, is_data);
6514again:
6515 WARN_ON(num_bytes < root->sectorsize);
6516 ret = find_free_extent(trans, root, num_bytes, empty_size,
6517 hint_byte, ins, flags);
6518
6519 if (ret == -ENOSPC) {
6520 if (!final_tried) {
6521 num_bytes = num_bytes >> 1;
6522 num_bytes = round_down(num_bytes, root->sectorsize);
6523 num_bytes = max(num_bytes, min_alloc_size);
6524 if (num_bytes == min_alloc_size)
6525 final_tried = true;
6526 goto again;
6527 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
6528 struct btrfs_space_info *sinfo;
6529
6530 sinfo = __find_space_info(root->fs_info, flags);
6531 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
6532 (unsigned long long)flags,
6533 (unsigned long long)num_bytes);
6534 if (sinfo)
6535 dump_space_info(sinfo, num_bytes, 1);
6536 }
6537 }
6538
6539 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
6540
6541 return ret;
6542}
6543
6544static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6545 u64 start, u64 len, int pin)
6546{
6547 struct btrfs_block_group_cache *cache;
6548 int ret = 0;
6549
6550 cache = btrfs_lookup_block_group(root->fs_info, start);
6551 if (!cache) {
6552 btrfs_err(root->fs_info, "Unable to find block group for %llu",
6553 (unsigned long long)start);
6554 return -ENOSPC;
6555 }
6556
6557 if (btrfs_test_opt(root, DISCARD))
6558 ret = btrfs_discard_extent(root, start, len, NULL);
6559
6560 if (pin)
6561 pin_down_extent(root, cache, start, len, 1);
6562 else {
6563 btrfs_add_free_space(cache, start, len);
6564 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
6565 }
6566 btrfs_put_block_group(cache);
6567
6568 trace_btrfs_reserved_extent_free(root, start, len);
6569
6570 return ret;
6571}
6572
6573int btrfs_free_reserved_extent(struct btrfs_root *root,
6574 u64 start, u64 len)
6575{
6576 return __btrfs_free_reserved_extent(root, start, len, 0);
6577}
6578
6579int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
6580 u64 start, u64 len)
6581{
6582 return __btrfs_free_reserved_extent(root, start, len, 1);
6583}
6584
6585static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6586 struct btrfs_root *root,
6587 u64 parent, u64 root_objectid,
6588 u64 flags, u64 owner, u64 offset,
6589 struct btrfs_key *ins, int ref_mod)
6590{
6591 int ret;
6592 struct btrfs_fs_info *fs_info = root->fs_info;
6593 struct btrfs_extent_item *extent_item;
6594 struct btrfs_extent_inline_ref *iref;
6595 struct btrfs_path *path;
6596 struct extent_buffer *leaf;
6597 int type;
6598 u32 size;
6599
6600 if (parent > 0)
6601 type = BTRFS_SHARED_DATA_REF_KEY;
6602 else
6603 type = BTRFS_EXTENT_DATA_REF_KEY;
6604
6605 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
6606
6607 path = btrfs_alloc_path();
6608 if (!path)
6609 return -ENOMEM;
6610
6611 path->leave_spinning = 1;
6612 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
6613 ins, size);
6614 if (ret) {
6615 btrfs_free_path(path);
6616 return ret;
6617 }
6618
6619 leaf = path->nodes[0];
6620 extent_item = btrfs_item_ptr(leaf, path->slots[0],
6621 struct btrfs_extent_item);
6622 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
6623 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
6624 btrfs_set_extent_flags(leaf, extent_item,
6625 flags | BTRFS_EXTENT_FLAG_DATA);
6626
6627 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
6628 btrfs_set_extent_inline_ref_type(leaf, iref, type);
6629 if (parent > 0) {
6630 struct btrfs_shared_data_ref *ref;
6631 ref = (struct btrfs_shared_data_ref *)(iref + 1);
6632 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
6633 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
6634 } else {
6635 struct btrfs_extent_data_ref *ref;
6636 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
6637 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
6638 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
6639 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
6640 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
6641 }
6642
6643 btrfs_mark_buffer_dirty(path->nodes[0]);
6644 btrfs_free_path(path);
6645
6646 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6647 if (ret) {
6648 btrfs_err(fs_info, "update block group failed for %llu %llu",
6649 (unsigned long long)ins->objectid,
6650 (unsigned long long)ins->offset);
6651 BUG();
6652 }
6653 return ret;
6654}
6655
6656static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6657 struct btrfs_root *root,
6658 u64 parent, u64 root_objectid,
6659 u64 flags, struct btrfs_disk_key *key,
6660 int level, struct btrfs_key *ins)
6661{
6662 int ret;
6663 struct btrfs_fs_info *fs_info = root->fs_info;
6664 struct btrfs_extent_item *extent_item;
6665 struct btrfs_tree_block_info *block_info;
6666 struct btrfs_extent_inline_ref *iref;
6667 struct btrfs_path *path;
6668 struct extent_buffer *leaf;
6669 u32 size = sizeof(*extent_item) + sizeof(*iref);
6670 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6671 SKINNY_METADATA);
6672
6673 if (!skinny_metadata)
6674 size += sizeof(*block_info);
6675
6676 path = btrfs_alloc_path();
6677 if (!path)
6678 return -ENOMEM;
6679
6680 path->leave_spinning = 1;
6681 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
6682 ins, size);
6683 if (ret) {
6684 btrfs_free_path(path);
6685 return ret;
6686 }
6687
6688 leaf = path->nodes[0];
6689 extent_item = btrfs_item_ptr(leaf, path->slots[0],
6690 struct btrfs_extent_item);
6691 btrfs_set_extent_refs(leaf, extent_item, 1);
6692 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
6693 btrfs_set_extent_flags(leaf, extent_item,
6694 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6695
6696 if (skinny_metadata) {
6697 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
6698 } else {
6699 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
6700 btrfs_set_tree_block_key(leaf, block_info, key);
6701 btrfs_set_tree_block_level(leaf, block_info, level);
6702 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
6703 }
6704
6705 if (parent > 0) {
6706 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
6707 btrfs_set_extent_inline_ref_type(leaf, iref,
6708 BTRFS_SHARED_BLOCK_REF_KEY);
6709 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
6710 } else {
6711 btrfs_set_extent_inline_ref_type(leaf, iref,
6712 BTRFS_TREE_BLOCK_REF_KEY);
6713 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
6714 }
6715
6716 btrfs_mark_buffer_dirty(leaf);
6717 btrfs_free_path(path);
6718
6719 ret = update_block_group(root, ins->objectid, root->leafsize, 1);
6720 if (ret) {
6721 btrfs_err(fs_info, "update block group failed for %llu %llu",
6722 (unsigned long long)ins->objectid,
6723 (unsigned long long)ins->offset);
6724 BUG();
6725 }
6726 return ret;
6727}
6728
6729int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6730 struct btrfs_root *root,
6731 u64 root_objectid, u64 owner,
6732 u64 offset, struct btrfs_key *ins)
6733{
6734 int ret;
6735
6736 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
6737
6738 ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
6739 ins->offset, 0,
6740 root_objectid, owner, offset,
6741 BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
6742 return ret;
6743}
6744
6745
6746
6747
6748
6749
6750int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
6751 struct btrfs_root *root,
6752 u64 root_objectid, u64 owner, u64 offset,
6753 struct btrfs_key *ins)
6754{
6755 int ret;
6756 struct btrfs_block_group_cache *block_group;
6757
6758
6759
6760
6761
6762 if (!btrfs_fs_incompat(root->fs_info, MIXED_GROUPS)) {
6763 ret = __exclude_logged_extent(root, ins->objectid, ins->offset);
6764 if (ret)
6765 return ret;
6766 }
6767
6768 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
6769 if (!block_group)
6770 return -EINVAL;
6771
6772 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
6773 RESERVE_ALLOC_NO_ACCOUNT);
6774 BUG_ON(ret);
6775 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
6776 0, owner, offset, ins, 1);
6777 btrfs_put_block_group(block_group);
6778 return ret;
6779}
6780
6781static struct extent_buffer *
6782btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6783 u64 bytenr, u32 blocksize, int level)
6784{
6785 struct extent_buffer *buf;
6786
6787 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
6788 if (!buf)
6789 return ERR_PTR(-ENOMEM);
6790 btrfs_set_header_generation(buf, trans->transid);
6791 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
6792 btrfs_tree_lock(buf);
6793 clean_tree_block(trans, root, buf);
6794 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
6795
6796 btrfs_set_lock_blocking(buf);
6797 btrfs_set_buffer_uptodate(buf);
6798
6799 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
6800
6801
6802
6803
6804 if (root->log_transid % 2 == 0)
6805 set_extent_dirty(&root->dirty_log_pages, buf->start,
6806 buf->start + buf->len - 1, GFP_NOFS);
6807 else
6808 set_extent_new(&root->dirty_log_pages, buf->start,
6809 buf->start + buf->len - 1, GFP_NOFS);
6810 } else {
6811 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
6812 buf->start + buf->len - 1, GFP_NOFS);
6813 }
6814 trans->blocks_used++;
6815
6816 return buf;
6817}
6818
6819static struct btrfs_block_rsv *
6820use_block_rsv(struct btrfs_trans_handle *trans,
6821 struct btrfs_root *root, u32 blocksize)
6822{
6823 struct btrfs_block_rsv *block_rsv;
6824 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
6825 int ret;
6826 bool global_updated = false;
6827
6828 block_rsv = get_block_rsv(trans, root);
6829
6830 if (unlikely(block_rsv->size == 0))
6831 goto try_reserve;
6832again:
6833 ret = block_rsv_use_bytes(block_rsv, blocksize);
6834 if (!ret)
6835 return block_rsv;
6836
6837 if (block_rsv->failfast)
6838 return ERR_PTR(ret);
6839
6840 if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
6841 global_updated = true;
6842 update_global_block_rsv(root->fs_info);
6843 goto again;
6844 }
6845
6846 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
6847 static DEFINE_RATELIMIT_STATE(_rs,
6848 DEFAULT_RATELIMIT_INTERVAL * 10,
6849 1);
6850 if (__ratelimit(&_rs))
6851 WARN(1, KERN_DEBUG
6852 "btrfs: block rsv returned %d\n", ret);
6853 }
6854try_reserve:
6855 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
6856 BTRFS_RESERVE_NO_FLUSH);
6857 if (!ret)
6858 return block_rsv;
6859
6860
6861
6862
6863
6864 if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
6865 block_rsv->space_info == global_rsv->space_info) {
6866 ret = block_rsv_use_bytes(global_rsv, blocksize);
6867 if (!ret)
6868 return global_rsv;
6869 }
6870 return ERR_PTR(ret);
6871}
6872
6873static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
6874 struct btrfs_block_rsv *block_rsv, u32 blocksize)
6875{
6876 block_rsv_add_bytes(block_rsv, blocksize, 0);
6877 block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
6878}
6879
6880
6881
6882
6883
6884
6885
6886
6887struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6888 struct btrfs_root *root, u32 blocksize,
6889 u64 parent, u64 root_objectid,
6890 struct btrfs_disk_key *key, int level,
6891 u64 hint, u64 empty_size)
6892{
6893 struct btrfs_key ins;
6894 struct btrfs_block_rsv *block_rsv;
6895 struct extent_buffer *buf;
6896 u64 flags = 0;
6897 int ret;
6898 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6899 SKINNY_METADATA);
6900
6901 block_rsv = use_block_rsv(trans, root, blocksize);
6902 if (IS_ERR(block_rsv))
6903 return ERR_CAST(block_rsv);
6904
6905 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
6906 empty_size, hint, &ins, 0);
6907 if (ret) {
6908 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
6909 return ERR_PTR(ret);
6910 }
6911
6912 buf = btrfs_init_new_buffer(trans, root, ins.objectid,
6913 blocksize, level);
6914 BUG_ON(IS_ERR(buf));
6915
6916 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
6917 if (parent == 0)
6918 parent = ins.objectid;
6919 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6920 } else
6921 BUG_ON(parent > 0);
6922
6923 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
6924 struct btrfs_delayed_extent_op *extent_op;
6925 extent_op = btrfs_alloc_delayed_extent_op();
6926 BUG_ON(!extent_op);
6927 if (key)
6928 memcpy(&extent_op->key, key, sizeof(extent_op->key));
6929 else
6930 memset(&extent_op->key, 0, sizeof(extent_op->key));
6931 extent_op->flags_to_set = flags;
6932 if (skinny_metadata)
6933 extent_op->update_key = 0;
6934 else
6935 extent_op->update_key = 1;
6936 extent_op->update_flags = 1;
6937 extent_op->is_data = 0;
6938 extent_op->level = level;
6939
6940 ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
6941 ins.objectid,
6942 ins.offset, parent, root_objectid,
6943 level, BTRFS_ADD_DELAYED_EXTENT,
6944 extent_op, 0);
6945 BUG_ON(ret);
6946 }
6947 return buf;
6948}
6949
6950struct walk_control {
6951 u64 refs[BTRFS_MAX_LEVEL];
6952 u64 flags[BTRFS_MAX_LEVEL];
6953 struct btrfs_key update_progress;
6954 int stage;
6955 int level;
6956 int shared_level;
6957 int update_ref;
6958 int keep_locks;
6959 int reada_slot;
6960 int reada_count;
6961 int for_reloc;
6962};
6963
6964#define DROP_REFERENCE 1
6965#define UPDATE_BACKREF 2
6966
6967static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
6968 struct btrfs_root *root,
6969 struct walk_control *wc,
6970 struct btrfs_path *path)
6971{
6972 u64 bytenr;
6973 u64 generation;
6974 u64 refs;
6975 u64 flags;
6976 u32 nritems;
6977 u32 blocksize;
6978 struct btrfs_key key;
6979 struct extent_buffer *eb;
6980 int ret;
6981 int slot;
6982 int nread = 0;
6983
6984 if (path->slots[wc->level] < wc->reada_slot) {
6985 wc->reada_count = wc->reada_count * 2 / 3;
6986 wc->reada_count = max(wc->reada_count, 2);
6987 } else {
6988 wc->reada_count = wc->reada_count * 3 / 2;
6989 wc->reada_count = min_t(int, wc->reada_count,
6990 BTRFS_NODEPTRS_PER_BLOCK(root));
6991 }
6992
6993 eb = path->nodes[wc->level];
6994 nritems = btrfs_header_nritems(eb);
6995 blocksize = btrfs_level_size(root, wc->level - 1);
6996
6997 for (slot = path->slots[wc->level]; slot < nritems; slot++) {
6998 if (nread >= wc->reada_count)
6999 break;
7000
7001 cond_resched();
7002 bytenr = btrfs_node_blockptr(eb, slot);
7003 generation = btrfs_node_ptr_generation(eb, slot);
7004
7005 if (slot == path->slots[wc->level])
7006 goto reada;
7007
7008 if (wc->stage == UPDATE_BACKREF &&
7009 generation <= root->root_key.offset)
7010 continue;
7011
7012
7013 ret = btrfs_lookup_extent_info(trans, root, bytenr,
7014 wc->level - 1, 1, &refs,
7015 &flags);
7016
7017 if (ret < 0)
7018 continue;
7019 BUG_ON(refs == 0);
7020
7021 if (wc->stage == DROP_REFERENCE) {
7022 if (refs == 1)
7023 goto reada;
7024
7025 if (wc->level == 1 &&
7026 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7027 continue;
7028 if (!wc->update_ref ||
7029 generation <= root->root_key.offset)
7030 continue;
7031 btrfs_node_key_to_cpu(eb, &key, slot);
7032 ret = btrfs_comp_cpu_keys(&key,
7033 &wc->update_progress);
7034 if (ret < 0)
7035 continue;
7036 } else {
7037 if (wc->level == 1 &&
7038 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7039 continue;
7040 }
7041reada:
7042 ret = readahead_tree_block(root, bytenr, blocksize,
7043 generation);
7044 if (ret)
7045 break;
7046 nread++;
7047 }
7048 wc->reada_slot = slot;
7049}
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
7060 struct btrfs_root *root,
7061 struct btrfs_path *path,
7062 struct walk_control *wc, int lookup_info)
7063{
7064 int level = wc->level;
7065 struct extent_buffer *eb = path->nodes[level];
7066 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
7067 int ret;
7068
7069 if (wc->stage == UPDATE_BACKREF &&
7070 btrfs_header_owner(eb) != root->root_key.objectid)
7071 return 1;
7072
7073
7074
7075
7076
7077 if (lookup_info &&
7078 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
7079 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
7080 BUG_ON(!path->locks[level]);
7081 ret = btrfs_lookup_extent_info(trans, root,
7082 eb->start, level, 1,
7083 &wc->refs[level],
7084 &wc->flags[level]);
7085 BUG_ON(ret == -ENOMEM);
7086 if (ret)
7087 return ret;
7088 BUG_ON(wc->refs[level] == 0);
7089 }
7090
7091 if (wc->stage == DROP_REFERENCE) {
7092 if (wc->refs[level] > 1)
7093 return 1;
7094
7095 if (path->locks[level] && !wc->keep_locks) {
7096 btrfs_tree_unlock_rw(eb, path->locks[level]);
7097 path->locks[level] = 0;
7098 }
7099 return 0;
7100 }
7101
7102
7103 if (!(wc->flags[level] & flag)) {
7104 BUG_ON(!path->locks[level]);
7105 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
7106 BUG_ON(ret);
7107 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
7108 BUG_ON(ret);
7109 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
7110 eb->len, flag,
7111 btrfs_header_level(eb), 0);
7112 BUG_ON(ret);
7113 wc->flags[level] |= flag;
7114 }
7115
7116
7117
7118
7119
7120 if (path->locks[level] && level > 0) {
7121 btrfs_tree_unlock_rw(eb, path->locks[level]);
7122 path->locks[level] = 0;
7123 }
7124 return 0;
7125}
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7141 struct btrfs_root *root,
7142 struct btrfs_path *path,
7143 struct walk_control *wc, int *lookup_info)
7144{
7145 u64 bytenr;
7146 u64 generation;
7147 u64 parent;
7148 u32 blocksize;
7149 struct btrfs_key key;
7150 struct extent_buffer *next;
7151 int level = wc->level;
7152 int reada = 0;
7153 int ret = 0;
7154
7155 generation = btrfs_node_ptr_generation(path->nodes[level],
7156 path->slots[level]);
7157
7158
7159
7160
7161
7162 if (wc->stage == UPDATE_BACKREF &&
7163 generation <= root->root_key.offset) {
7164 *lookup_info = 1;
7165 return 1;
7166 }
7167
7168 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
7169 blocksize = btrfs_level_size(root, level - 1);
7170
7171 next = btrfs_find_tree_block(root, bytenr, blocksize);
7172 if (!next) {
7173 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
7174 if (!next)
7175 return -ENOMEM;
7176 reada = 1;
7177 }
7178 btrfs_tree_lock(next);
7179 btrfs_set_lock_blocking(next);
7180
7181 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
7182 &wc->refs[level - 1],
7183 &wc->flags[level - 1]);
7184 if (ret < 0) {
7185 btrfs_tree_unlock(next);
7186 return ret;
7187 }
7188
7189 if (unlikely(wc->refs[level - 1] == 0)) {
7190 btrfs_err(root->fs_info, "Missing references.");
7191 BUG();
7192 }
7193 *lookup_info = 0;
7194
7195 if (wc->stage == DROP_REFERENCE) {
7196 if (wc->refs[level - 1] > 1) {
7197 if (level == 1 &&
7198 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7199 goto skip;
7200
7201 if (!wc->update_ref ||
7202 generation <= root->root_key.offset)
7203 goto skip;
7204
7205 btrfs_node_key_to_cpu(path->nodes[level], &key,
7206 path->slots[level]);
7207 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
7208 if (ret < 0)
7209 goto skip;
7210
7211 wc->stage = UPDATE_BACKREF;
7212 wc->shared_level = level - 1;
7213 }
7214 } else {
7215 if (level == 1 &&
7216 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7217 goto skip;
7218 }
7219
7220 if (!btrfs_buffer_uptodate(next, generation, 0)) {
7221 btrfs_tree_unlock(next);
7222 free_extent_buffer(next);
7223 next = NULL;
7224 *lookup_info = 1;
7225 }
7226
7227 if (!next) {
7228 if (reada && level == 1)
7229 reada_walk_down(trans, root, wc, path);
7230 next = read_tree_block(root, bytenr, blocksize, generation);
7231 if (!next || !extent_buffer_uptodate(next)) {
7232 free_extent_buffer(next);
7233 return -EIO;
7234 }
7235 btrfs_tree_lock(next);
7236 btrfs_set_lock_blocking(next);
7237 }
7238
7239 level--;
7240 BUG_ON(level != btrfs_header_level(next));
7241 path->nodes[level] = next;
7242 path->slots[level] = 0;
7243 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7244 wc->level = level;
7245 if (wc->level == 1)
7246 wc->reada_slot = 0;
7247 return 0;
7248skip:
7249 wc->refs[level - 1] = 0;
7250 wc->flags[level - 1] = 0;
7251 if (wc->stage == DROP_REFERENCE) {
7252 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7253 parent = path->nodes[level]->start;
7254 } else {
7255 BUG_ON(root->root_key.objectid !=
7256 btrfs_header_owner(path->nodes[level]));
7257 parent = 0;
7258 }
7259
7260 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
7261 root->root_key.objectid, level - 1, 0, 0);
7262 BUG_ON(ret);
7263 }
7264 btrfs_tree_unlock(next);
7265 free_extent_buffer(next);
7266 *lookup_info = 1;
7267 return 1;
7268}
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
7283 struct btrfs_root *root,
7284 struct btrfs_path *path,
7285 struct walk_control *wc)
7286{
7287 int ret;
7288 int level = wc->level;
7289 struct extent_buffer *eb = path->nodes[level];
7290 u64 parent = 0;
7291
7292 if (wc->stage == UPDATE_BACKREF) {
7293 BUG_ON(wc->shared_level < level);
7294 if (level < wc->shared_level)
7295 goto out;
7296
7297 ret = find_next_key(path, level + 1, &wc->update_progress);
7298 if (ret > 0)
7299 wc->update_ref = 0;
7300
7301 wc->stage = DROP_REFERENCE;
7302 wc->shared_level = -1;
7303 path->slots[level] = 0;
7304
7305
7306
7307
7308
7309
7310 if (!path->locks[level]) {
7311 BUG_ON(level == 0);
7312 btrfs_tree_lock(eb);
7313 btrfs_set_lock_blocking(eb);
7314 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7315
7316 ret = btrfs_lookup_extent_info(trans, root,
7317 eb->start, level, 1,
7318 &wc->refs[level],
7319 &wc->flags[level]);
7320 if (ret < 0) {
7321 btrfs_tree_unlock_rw(eb, path->locks[level]);
7322 path->locks[level] = 0;
7323 return ret;
7324 }
7325 BUG_ON(wc->refs[level] == 0);
7326 if (wc->refs[level] == 1) {
7327 btrfs_tree_unlock_rw(eb, path->locks[level]);
7328 path->locks[level] = 0;
7329 return 1;
7330 }
7331 }
7332 }
7333
7334
7335 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
7336
7337 if (wc->refs[level] == 1) {
7338 if (level == 0) {
7339 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7340 ret = btrfs_dec_ref(trans, root, eb, 1,
7341 wc->for_reloc);
7342 else
7343 ret = btrfs_dec_ref(trans, root, eb, 0,
7344 wc->for_reloc);
7345 BUG_ON(ret);
7346 }
7347
7348 if (!path->locks[level] &&
7349 btrfs_header_generation(eb) == trans->transid) {
7350 btrfs_tree_lock(eb);
7351 btrfs_set_lock_blocking(eb);
7352 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7353 }
7354 clean_tree_block(trans, root, eb);
7355 }
7356
7357 if (eb == root->node) {
7358 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7359 parent = eb->start;
7360 else
7361 BUG_ON(root->root_key.objectid !=
7362 btrfs_header_owner(eb));
7363 } else {
7364 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7365 parent = path->nodes[level + 1]->start;
7366 else
7367 BUG_ON(root->root_key.objectid !=
7368 btrfs_header_owner(path->nodes[level + 1]));
7369 }
7370
7371 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
7372out:
7373 wc->refs[level] = 0;
7374 wc->flags[level] = 0;
7375 return 0;
7376}
7377
7378static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
7379 struct btrfs_root *root,
7380 struct btrfs_path *path,
7381 struct walk_control *wc)
7382{
7383 int level = wc->level;
7384 int lookup_info = 1;
7385 int ret;
7386
7387 while (level >= 0) {
7388 ret = walk_down_proc(trans, root, path, wc, lookup_info);
7389 if (ret > 0)
7390 break;
7391
7392 if (level == 0)
7393 break;
7394
7395 if (path->slots[level] >=
7396 btrfs_header_nritems(path->nodes[level]))
7397 break;
7398
7399 ret = do_walk_down(trans, root, path, wc, &lookup_info);
7400 if (ret > 0) {
7401 path->slots[level]++;
7402 continue;
7403 } else if (ret < 0)
7404 return ret;
7405 level = wc->level;
7406 }
7407 return 0;
7408}
7409
7410static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
7411 struct btrfs_root *root,
7412 struct btrfs_path *path,
7413 struct walk_control *wc, int max_level)
7414{
7415 int level = wc->level;
7416 int ret;
7417
7418 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
7419 while (level < max_level && path->nodes[level]) {
7420 wc->level = level;
7421 if (path->slots[level] + 1 <
7422 btrfs_header_nritems(path->nodes[level])) {
7423 path->slots[level]++;
7424 return 0;
7425 } else {
7426 ret = walk_up_proc(trans, root, path, wc);
7427 if (ret > 0)
7428 return 0;
7429
7430 if (path->locks[level]) {
7431 btrfs_tree_unlock_rw(path->nodes[level],
7432 path->locks[level]);
7433 path->locks[level] = 0;
7434 }
7435 free_extent_buffer(path->nodes[level]);
7436 path->nodes[level] = NULL;
7437 level++;
7438 }
7439 }
7440 return 1;
7441}
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456int btrfs_drop_snapshot(struct btrfs_root *root,
7457 struct btrfs_block_rsv *block_rsv, int update_ref,
7458 int for_reloc)
7459{
7460 struct btrfs_path *path;
7461 struct btrfs_trans_handle *trans;
7462 struct btrfs_root *tree_root = root->fs_info->tree_root;
7463 struct btrfs_root_item *root_item = &root->root_item;
7464 struct walk_control *wc;
7465 struct btrfs_key key;
7466 int err = 0;
7467 int ret;
7468 int level;
7469 bool root_dropped = false;
7470
7471 path = btrfs_alloc_path();
7472 if (!path) {
7473 err = -ENOMEM;
7474 goto out;
7475 }
7476
7477 wc = kzalloc(sizeof(*wc), GFP_NOFS);
7478 if (!wc) {
7479 btrfs_free_path(path);
7480 err = -ENOMEM;
7481 goto out;
7482 }
7483
7484 trans = btrfs_start_transaction(tree_root, 0);
7485 if (IS_ERR(trans)) {
7486 err = PTR_ERR(trans);
7487 goto out_free;
7488 }
7489
7490 if (block_rsv)
7491 trans->block_rsv = block_rsv;
7492
7493 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
7494 level = btrfs_header_level(root->node);
7495 path->nodes[level] = btrfs_lock_root_node(root);
7496 btrfs_set_lock_blocking(path->nodes[level]);
7497 path->slots[level] = 0;
7498 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7499 memset(&wc->update_progress, 0,
7500 sizeof(wc->update_progress));
7501 } else {
7502 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
7503 memcpy(&wc->update_progress, &key,
7504 sizeof(wc->update_progress));
7505
7506 level = root_item->drop_level;
7507 BUG_ON(level == 0);
7508 path->lowest_level = level;
7509 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7510 path->lowest_level = 0;
7511 if (ret < 0) {
7512 err = ret;
7513 goto out_end_trans;
7514 }
7515 WARN_ON(ret > 0);
7516
7517
7518
7519
7520
7521 btrfs_unlock_up_safe(path, 0);
7522
7523 level = btrfs_header_level(root->node);
7524 while (1) {
7525 btrfs_tree_lock(path->nodes[level]);
7526 btrfs_set_lock_blocking(path->nodes[level]);
7527 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7528
7529 ret = btrfs_lookup_extent_info(trans, root,
7530 path->nodes[level]->start,
7531 level, 1, &wc->refs[level],
7532 &wc->flags[level]);
7533 if (ret < 0) {
7534 err = ret;
7535 goto out_end_trans;
7536 }
7537 BUG_ON(wc->refs[level] == 0);
7538
7539 if (level == root_item->drop_level)
7540 break;
7541
7542 btrfs_tree_unlock(path->nodes[level]);
7543 path->locks[level] = 0;
7544 WARN_ON(wc->refs[level] != 1);
7545 level--;
7546 }
7547 }
7548
7549 wc->level = level;
7550 wc->shared_level = -1;
7551 wc->stage = DROP_REFERENCE;
7552 wc->update_ref = update_ref;
7553 wc->keep_locks = 0;
7554 wc->for_reloc = for_reloc;
7555 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
7556
7557 while (1) {
7558
7559 ret = walk_down_tree(trans, root, path, wc);
7560 if (ret < 0) {
7561 err = ret;
7562 break;
7563 }
7564
7565 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
7566 if (ret < 0) {
7567 err = ret;
7568 break;
7569 }
7570
7571 if (ret > 0) {
7572 BUG_ON(wc->stage != DROP_REFERENCE);
7573 break;
7574 }
7575
7576 if (wc->stage == DROP_REFERENCE) {
7577 level = wc->level;
7578 btrfs_node_key(path->nodes[level],
7579 &root_item->drop_progress,
7580 path->slots[level]);
7581 root_item->drop_level = level;
7582 }
7583
7584 BUG_ON(wc->level == 0);
7585 if (btrfs_should_end_transaction(trans, tree_root) ||
7586 (!for_reloc && btrfs_need_cleaner_sleep(root))) {
7587 ret = btrfs_update_root(trans, tree_root,
7588 &root->root_key,
7589 root_item);
7590 if (ret) {
7591 btrfs_abort_transaction(trans, tree_root, ret);
7592 err = ret;
7593 goto out_end_trans;
7594 }
7595
7596 btrfs_end_transaction_throttle(trans, tree_root);
7597 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
7598 pr_debug("btrfs: drop snapshot early exit\n");
7599 err = -EAGAIN;
7600 goto out_free;
7601 }
7602
7603 trans = btrfs_start_transaction(tree_root, 0);
7604 if (IS_ERR(trans)) {
7605 err = PTR_ERR(trans);
7606 goto out_free;
7607 }
7608 if (block_rsv)
7609 trans->block_rsv = block_rsv;
7610 }
7611 }
7612 btrfs_release_path(path);
7613 if (err)
7614 goto out_end_trans;
7615
7616 ret = btrfs_del_root(trans, tree_root, &root->root_key);
7617 if (ret) {
7618 btrfs_abort_transaction(trans, tree_root, ret);
7619 goto out_end_trans;
7620 }
7621
7622 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
7623 ret = btrfs_find_root(tree_root, &root->root_key, path,
7624 NULL, NULL);
7625 if (ret < 0) {
7626 btrfs_abort_transaction(trans, tree_root, ret);
7627 err = ret;
7628 goto out_end_trans;
7629 } else if (ret > 0) {
7630
7631
7632
7633
7634
7635 btrfs_del_orphan_item(trans, tree_root,
7636 root->root_key.objectid);
7637 }
7638 }
7639
7640 if (root->in_radix) {
7641 btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
7642 } else {
7643 free_extent_buffer(root->node);
7644 free_extent_buffer(root->commit_root);
7645 btrfs_put_fs_root(root);
7646 }
7647 root_dropped = true;
7648out_end_trans:
7649 btrfs_end_transaction_throttle(trans, tree_root);
7650out_free:
7651 kfree(wc);
7652 btrfs_free_path(path);
7653out:
7654
7655
7656
7657
7658
7659
7660
7661 if (root_dropped == false)
7662 btrfs_add_dead_root(root);
7663 if (err)
7664 btrfs_std_error(root->fs_info, err);
7665 return err;
7666}
7667
7668
7669
7670
7671
7672
7673
7674int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
7675 struct btrfs_root *root,
7676 struct extent_buffer *node,
7677 struct extent_buffer *parent)
7678{
7679 struct btrfs_path *path;
7680 struct walk_control *wc;
7681 int level;
7682 int parent_level;
7683 int ret = 0;
7684 int wret;
7685
7686 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
7687
7688 path = btrfs_alloc_path();
7689 if (!path)
7690 return -ENOMEM;
7691
7692 wc = kzalloc(sizeof(*wc), GFP_NOFS);
7693 if (!wc) {
7694 btrfs_free_path(path);
7695 return -ENOMEM;
7696 }
7697
7698 btrfs_assert_tree_locked(parent);
7699 parent_level = btrfs_header_level(parent);
7700 extent_buffer_get(parent);
7701 path->nodes[parent_level] = parent;
7702 path->slots[parent_level] = btrfs_header_nritems(parent);
7703
7704 btrfs_assert_tree_locked(node);
7705 level = btrfs_header_level(node);
7706 path->nodes[level] = node;
7707 path->slots[level] = 0;
7708 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7709
7710 wc->refs[parent_level] = 1;
7711 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
7712 wc->level = level;
7713 wc->shared_level = -1;
7714 wc->stage = DROP_REFERENCE;
7715 wc->update_ref = 0;
7716 wc->keep_locks = 1;
7717 wc->for_reloc = 1;
7718 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
7719
7720 while (1) {
7721 wret = walk_down_tree(trans, root, path, wc);
7722 if (wret < 0) {
7723 ret = wret;
7724 break;
7725 }
7726
7727 wret = walk_up_tree(trans, root, path, wc, parent_level);
7728 if (wret < 0)
7729 ret = wret;
7730 if (wret != 0)
7731 break;
7732 }
7733
7734 kfree(wc);
7735 btrfs_free_path(path);
7736 return ret;
7737}
7738
7739static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7740{
7741 u64 num_devices;
7742 u64 stripped;
7743
7744
7745
7746
7747
7748 stripped = get_restripe_target(root->fs_info, flags);
7749 if (stripped)
7750 return extended_to_chunk(stripped);
7751
7752
7753
7754
7755
7756
7757 num_devices = root->fs_info->fs_devices->rw_devices +
7758 root->fs_info->fs_devices->missing_devices;
7759
7760 stripped = BTRFS_BLOCK_GROUP_RAID0 |
7761 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
7762 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
7763
7764 if (num_devices == 1) {
7765 stripped |= BTRFS_BLOCK_GROUP_DUP;
7766 stripped = flags & ~stripped;
7767
7768
7769 if (flags & BTRFS_BLOCK_GROUP_RAID0)
7770 return stripped;
7771
7772
7773 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
7774 BTRFS_BLOCK_GROUP_RAID10))
7775 return stripped | BTRFS_BLOCK_GROUP_DUP;
7776 } else {
7777
7778 if (flags & stripped)
7779 return flags;
7780
7781 stripped |= BTRFS_BLOCK_GROUP_DUP;
7782 stripped = flags & ~stripped;
7783
7784
7785 if (flags & BTRFS_BLOCK_GROUP_DUP)
7786 return stripped | BTRFS_BLOCK_GROUP_RAID1;
7787
7788
7789 }
7790
7791 return flags;
7792}
7793
7794static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
7795{
7796 struct btrfs_space_info *sinfo = cache->space_info;
7797 u64 num_bytes;
7798 u64 min_allocable_bytes;
7799 int ret = -ENOSPC;
7800
7801
7802
7803
7804
7805
7806
7807 if ((sinfo->flags &
7808 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
7809 !force)
7810 min_allocable_bytes = 1 * 1024 * 1024;
7811 else
7812 min_allocable_bytes = 0;
7813
7814 spin_lock(&sinfo->lock);
7815 spin_lock(&cache->lock);
7816
7817 if (cache->ro) {
7818 ret = 0;
7819 goto out;
7820 }
7821
7822 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
7823 cache->bytes_super - btrfs_block_group_used(&cache->item);
7824
7825 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
7826 sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
7827 min_allocable_bytes <= sinfo->total_bytes) {
7828 sinfo->bytes_readonly += num_bytes;
7829 cache->ro = 1;
7830 ret = 0;
7831 }
7832out:
7833 spin_unlock(&cache->lock);
7834 spin_unlock(&sinfo->lock);
7835 return ret;
7836}
7837
7838int btrfs_set_block_group_ro(struct btrfs_root *root,
7839 struct btrfs_block_group_cache *cache)
7840
7841{
7842 struct btrfs_trans_handle *trans;
7843 u64 alloc_flags;
7844 int ret;
7845
7846 BUG_ON(cache->ro);
7847
7848 trans = btrfs_join_transaction(root);
7849 if (IS_ERR(trans))
7850 return PTR_ERR(trans);
7851
7852 alloc_flags = update_block_group_flags(root, cache->flags);
7853 if (alloc_flags != cache->flags) {
7854 ret = do_chunk_alloc(trans, root, alloc_flags,
7855 CHUNK_ALLOC_FORCE);
7856 if (ret < 0)
7857 goto out;
7858 }
7859
7860 ret = set_block_group_ro(cache, 0);
7861 if (!ret)
7862 goto out;
7863 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
7864 ret = do_chunk_alloc(trans, root, alloc_flags,
7865 CHUNK_ALLOC_FORCE);
7866 if (ret < 0)
7867 goto out;
7868 ret = set_block_group_ro(cache, 0);
7869out:
7870 btrfs_end_transaction(trans, root);
7871 return ret;
7872}
7873
7874int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
7875 struct btrfs_root *root, u64 type)
7876{
7877 u64 alloc_flags = get_alloc_profile(root, type);
7878 return do_chunk_alloc(trans, root, alloc_flags,
7879 CHUNK_ALLOC_FORCE);
7880}
7881
7882
7883
7884
7885
7886static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
7887{
7888 struct btrfs_block_group_cache *block_group;
7889 u64 free_bytes = 0;
7890 int factor;
7891
7892 list_for_each_entry(block_group, groups_list, list) {
7893 spin_lock(&block_group->lock);
7894
7895 if (!block_group->ro) {
7896 spin_unlock(&block_group->lock);
7897 continue;
7898 }
7899
7900 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
7901 BTRFS_BLOCK_GROUP_RAID10 |
7902 BTRFS_BLOCK_GROUP_DUP))
7903 factor = 2;
7904 else
7905 factor = 1;
7906
7907 free_bytes += (block_group->key.offset -
7908 btrfs_block_group_used(&block_group->item)) *
7909 factor;
7910
7911 spin_unlock(&block_group->lock);
7912 }
7913
7914 return free_bytes;
7915}
7916
7917
7918
7919
7920
7921u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
7922{
7923 int i;
7924 u64 free_bytes = 0;
7925
7926 spin_lock(&sinfo->lock);
7927
7928 for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
7929 if (!list_empty(&sinfo->block_groups[i]))
7930 free_bytes += __btrfs_get_ro_block_group_free_space(
7931 &sinfo->block_groups[i]);
7932
7933 spin_unlock(&sinfo->lock);
7934
7935 return free_bytes;
7936}
7937
7938void btrfs_set_block_group_rw(struct btrfs_root *root,
7939 struct btrfs_block_group_cache *cache)
7940{
7941 struct btrfs_space_info *sinfo = cache->space_info;
7942 u64 num_bytes;
7943
7944 BUG_ON(!cache->ro);
7945
7946 spin_lock(&sinfo->lock);
7947 spin_lock(&cache->lock);
7948 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
7949 cache->bytes_super - btrfs_block_group_used(&cache->item);
7950 sinfo->bytes_readonly -= num_bytes;
7951 cache->ro = 0;
7952 spin_unlock(&cache->lock);
7953 spin_unlock(&sinfo->lock);
7954}
7955
7956
7957
7958
7959
7960
7961
7962int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7963{
7964 struct btrfs_block_group_cache *block_group;
7965 struct btrfs_space_info *space_info;
7966 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
7967 struct btrfs_device *device;
7968 struct btrfs_trans_handle *trans;
7969 u64 min_free;
7970 u64 dev_min = 1;
7971 u64 dev_nr = 0;
7972 u64 target;
7973 int index;
7974 int full = 0;
7975 int ret = 0;
7976
7977 block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
7978
7979
7980 if (!block_group)
7981 return -1;
7982
7983 min_free = btrfs_block_group_used(&block_group->item);
7984
7985
7986 if (!min_free)
7987 goto out;
7988
7989 space_info = block_group->space_info;
7990 spin_lock(&space_info->lock);
7991
7992 full = space_info->full;
7993
7994
7995
7996
7997
7998
7999
8000
8001 if ((space_info->total_bytes != block_group->key.offset) &&
8002 (space_info->bytes_used + space_info->bytes_reserved +
8003 space_info->bytes_pinned + space_info->bytes_readonly +
8004 min_free < space_info->total_bytes)) {
8005 spin_unlock(&space_info->lock);
8006 goto out;
8007 }
8008 spin_unlock(&space_info->lock);
8009
8010
8011
8012
8013
8014
8015
8016
8017 ret = -1;
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027 target = get_restripe_target(root->fs_info, block_group->flags);
8028 if (target) {
8029 index = __get_raid_index(extended_to_chunk(target));
8030 } else {
8031
8032
8033
8034
8035 if (full)
8036 goto out;
8037
8038 index = get_block_group_index(block_group);
8039 }
8040
8041 if (index == BTRFS_RAID_RAID10) {
8042 dev_min = 4;
8043
8044 min_free >>= 1;
8045 } else if (index == BTRFS_RAID_RAID1) {
8046 dev_min = 2;
8047 } else if (index == BTRFS_RAID_DUP) {
8048
8049 min_free <<= 1;
8050 } else if (index == BTRFS_RAID_RAID0) {
8051 dev_min = fs_devices->rw_devices;
8052 do_div(min_free, dev_min);
8053 }
8054
8055
8056 trans = btrfs_join_transaction(root);
8057 if (IS_ERR(trans)) {
8058 ret = PTR_ERR(trans);
8059 goto out;
8060 }
8061
8062 mutex_lock(&root->fs_info->chunk_mutex);
8063 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
8064 u64 dev_offset;
8065
8066
8067
8068
8069
8070 if (device->total_bytes > device->bytes_used + min_free &&
8071 !device->is_tgtdev_for_dev_replace) {
8072 ret = find_free_dev_extent(trans, device, min_free,
8073 &dev_offset, NULL);
8074 if (!ret)
8075 dev_nr++;
8076
8077 if (dev_nr >= dev_min)
8078 break;
8079
8080 ret = -1;
8081 }
8082 }
8083 mutex_unlock(&root->fs_info->chunk_mutex);
8084 btrfs_end_transaction(trans, root);
8085out:
8086 btrfs_put_block_group(block_group);
8087 return ret;
8088}
8089
8090static int find_first_block_group(struct btrfs_root *root,
8091 struct btrfs_path *path, struct btrfs_key *key)
8092{
8093 int ret = 0;
8094 struct btrfs_key found_key;
8095 struct extent_buffer *leaf;
8096 int slot;
8097
8098 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
8099 if (ret < 0)
8100 goto out;
8101
8102 while (1) {
8103 slot = path->slots[0];
8104 leaf = path->nodes[0];
8105 if (slot >= btrfs_header_nritems(leaf)) {
8106 ret = btrfs_next_leaf(root, path);
8107 if (ret == 0)
8108 continue;
8109 if (ret < 0)
8110 goto out;
8111 break;
8112 }
8113 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8114
8115 if (found_key.objectid >= key->objectid &&
8116 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8117 ret = 0;
8118 goto out;
8119 }
8120 path->slots[0]++;
8121 }
8122out:
8123 return ret;
8124}
8125
8126void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
8127{
8128 struct btrfs_block_group_cache *block_group;
8129 u64 last = 0;
8130
8131 while (1) {
8132 struct inode *inode;
8133
8134 block_group = btrfs_lookup_first_block_group(info, last);
8135 while (block_group) {
8136 spin_lock(&block_group->lock);
8137 if (block_group->iref)
8138 break;
8139 spin_unlock(&block_group->lock);
8140 block_group = next_block_group(info->tree_root,
8141 block_group);
8142 }
8143 if (!block_group) {
8144 if (last == 0)
8145 break;
8146 last = 0;
8147 continue;
8148 }
8149
8150 inode = block_group->inode;
8151 block_group->iref = 0;
8152 block_group->inode = NULL;
8153 spin_unlock(&block_group->lock);
8154 iput(inode);
8155 last = block_group->key.objectid + block_group->key.offset;
8156 btrfs_put_block_group(block_group);
8157 }
8158}
8159
8160int btrfs_free_block_groups(struct btrfs_fs_info *info)
8161{
8162 struct btrfs_block_group_cache *block_group;
8163 struct btrfs_space_info *space_info;
8164 struct btrfs_caching_control *caching_ctl;
8165 struct rb_node *n;
8166
8167 down_write(&info->extent_commit_sem);
8168 while (!list_empty(&info->caching_block_groups)) {
8169 caching_ctl = list_entry(info->caching_block_groups.next,
8170 struct btrfs_caching_control, list);
8171 list_del(&caching_ctl->list);
8172 put_caching_control(caching_ctl);
8173 }
8174 up_write(&info->extent_commit_sem);
8175
8176 spin_lock(&info->block_group_cache_lock);
8177 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
8178 block_group = rb_entry(n, struct btrfs_block_group_cache,
8179 cache_node);
8180 rb_erase(&block_group->cache_node,
8181 &info->block_group_cache_tree);
8182 spin_unlock(&info->block_group_cache_lock);
8183
8184 down_write(&block_group->space_info->groups_sem);
8185 list_del(&block_group->list);
8186 up_write(&block_group->space_info->groups_sem);
8187
8188 if (block_group->cached == BTRFS_CACHE_STARTED)
8189 wait_block_group_cache_done(block_group);
8190
8191
8192
8193
8194
8195 if (block_group->cached == BTRFS_CACHE_NO)
8196 free_excluded_extents(info->extent_root, block_group);
8197
8198 btrfs_remove_free_space_cache(block_group);
8199 btrfs_put_block_group(block_group);
8200
8201 spin_lock(&info->block_group_cache_lock);
8202 }
8203 spin_unlock(&info->block_group_cache_lock);
8204
8205
8206
8207
8208
8209
8210
8211 synchronize_rcu();
8212
8213 release_global_block_rsv(info);
8214
8215 while(!list_empty(&info->space_info)) {
8216 space_info = list_entry(info->space_info.next,
8217 struct btrfs_space_info,
8218 list);
8219 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
8220 if (space_info->bytes_pinned > 0 ||
8221 space_info->bytes_reserved > 0 ||
8222 space_info->bytes_may_use > 0) {
8223 WARN_ON(1);
8224 dump_space_info(space_info, 0, 0);
8225 }
8226 }
8227 percpu_counter_destroy(&space_info->total_bytes_pinned);
8228 list_del(&space_info->list);
8229 kfree(space_info);
8230 }
8231 return 0;
8232}
8233
8234static void __link_block_group(struct btrfs_space_info *space_info,
8235 struct btrfs_block_group_cache *cache)
8236{
8237 int index = get_block_group_index(cache);
8238
8239 down_write(&space_info->groups_sem);
8240 list_add_tail(&cache->list, &space_info->block_groups[index]);
8241 up_write(&space_info->groups_sem);
8242}
8243
8244int btrfs_read_block_groups(struct btrfs_root *root)
8245{
8246 struct btrfs_path *path;
8247 int ret;
8248 struct btrfs_block_group_cache *cache;
8249 struct btrfs_fs_info *info = root->fs_info;
8250 struct btrfs_space_info *space_info;
8251 struct btrfs_key key;
8252 struct btrfs_key found_key;
8253 struct extent_buffer *leaf;
8254 int need_clear = 0;
8255 u64 cache_gen;
8256
8257 root = info->extent_root;
8258 key.objectid = 0;
8259 key.offset = 0;
8260 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
8261 path = btrfs_alloc_path();
8262 if (!path)
8263 return -ENOMEM;
8264 path->reada = 1;
8265
8266 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
8267 if (btrfs_test_opt(root, SPACE_CACHE) &&
8268 btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
8269 need_clear = 1;
8270 if (btrfs_test_opt(root, CLEAR_CACHE))
8271 need_clear = 1;
8272
8273 while (1) {
8274 ret = find_first_block_group(root, path, &key);
8275 if (ret > 0)
8276 break;
8277 if (ret != 0)
8278 goto error;
8279 leaf = path->nodes[0];
8280 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8281 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8282 if (!cache) {
8283 ret = -ENOMEM;
8284 goto error;
8285 }
8286 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8287 GFP_NOFS);
8288 if (!cache->free_space_ctl) {
8289 kfree(cache);
8290 ret = -ENOMEM;
8291 goto error;
8292 }
8293
8294 atomic_set(&cache->count, 1);
8295 spin_lock_init(&cache->lock);
8296 cache->fs_info = info;
8297 INIT_LIST_HEAD(&cache->list);
8298 INIT_LIST_HEAD(&cache->cluster_list);
8299
8300 if (need_clear) {
8301
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311 cache->disk_cache_state = BTRFS_DC_CLEAR;
8312 if (btrfs_test_opt(root, SPACE_CACHE))
8313 cache->dirty = 1;
8314 }
8315
8316 read_extent_buffer(leaf, &cache->item,
8317 btrfs_item_ptr_offset(leaf, path->slots[0]),
8318 sizeof(cache->item));
8319 memcpy(&cache->key, &found_key, sizeof(found_key));
8320
8321 key.objectid = found_key.objectid + found_key.offset;
8322 btrfs_release_path(path);
8323 cache->flags = btrfs_block_group_flags(&cache->item);
8324 cache->sectorsize = root->sectorsize;
8325 cache->full_stripe_len = btrfs_full_stripe_len(root,
8326 &root->fs_info->mapping_tree,
8327 found_key.objectid);
8328 btrfs_init_free_space_ctl(cache);
8329
8330
8331
8332
8333
8334
8335 ret = exclude_super_stripes(root, cache);
8336 if (ret) {
8337
8338
8339
8340
8341 free_excluded_extents(root, cache);
8342 kfree(cache->free_space_ctl);
8343 kfree(cache);
8344 goto error;
8345 }
8346
8347
8348
8349
8350
8351
8352
8353
8354 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
8355 cache->last_byte_to_unpin = (u64)-1;
8356 cache->cached = BTRFS_CACHE_FINISHED;
8357 free_excluded_extents(root, cache);
8358 } else if (btrfs_block_group_used(&cache->item) == 0) {
8359 cache->last_byte_to_unpin = (u64)-1;
8360 cache->cached = BTRFS_CACHE_FINISHED;
8361 add_new_free_space(cache, root->fs_info,
8362 found_key.objectid,
8363 found_key.objectid +
8364 found_key.offset);
8365 free_excluded_extents(root, cache);
8366 }
8367
8368 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8369 if (ret) {
8370 btrfs_remove_free_space_cache(cache);
8371 btrfs_put_block_group(cache);
8372 goto error;
8373 }
8374
8375 ret = update_space_info(info, cache->flags, found_key.offset,
8376 btrfs_block_group_used(&cache->item),
8377 &space_info);
8378 if (ret) {
8379 btrfs_remove_free_space_cache(cache);
8380 spin_lock(&info->block_group_cache_lock);
8381 rb_erase(&cache->cache_node,
8382 &info->block_group_cache_tree);
8383 spin_unlock(&info->block_group_cache_lock);
8384 btrfs_put_block_group(cache);
8385 goto error;
8386 }
8387
8388 cache->space_info = space_info;
8389 spin_lock(&cache->space_info->lock);
8390 cache->space_info->bytes_readonly += cache->bytes_super;
8391 spin_unlock(&cache->space_info->lock);
8392
8393 __link_block_group(space_info, cache);
8394
8395 set_avail_alloc_bits(root->fs_info, cache->flags);
8396 if (btrfs_chunk_readonly(root, cache->key.objectid))
8397 set_block_group_ro(cache, 1);
8398 }
8399
8400 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
8401 if (!(get_alloc_profile(root, space_info->flags) &
8402 (BTRFS_BLOCK_GROUP_RAID10 |
8403 BTRFS_BLOCK_GROUP_RAID1 |
8404 BTRFS_BLOCK_GROUP_RAID5 |
8405 BTRFS_BLOCK_GROUP_RAID6 |
8406 BTRFS_BLOCK_GROUP_DUP)))
8407 continue;
8408
8409
8410
8411
8412 list_for_each_entry(cache, &space_info->block_groups[3], list)
8413 set_block_group_ro(cache, 1);
8414 list_for_each_entry(cache, &space_info->block_groups[4], list)
8415 set_block_group_ro(cache, 1);
8416 }
8417
8418 init_global_block_rsv(info);
8419 ret = 0;
8420error:
8421 btrfs_free_path(path);
8422 return ret;
8423}
8424
8425void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
8426 struct btrfs_root *root)
8427{
8428 struct btrfs_block_group_cache *block_group, *tmp;
8429 struct btrfs_root *extent_root = root->fs_info->extent_root;
8430 struct btrfs_block_group_item item;
8431 struct btrfs_key key;
8432 int ret = 0;
8433
8434 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs,
8435 new_bg_list) {
8436 list_del_init(&block_group->new_bg_list);
8437
8438 if (ret)
8439 continue;
8440
8441 spin_lock(&block_group->lock);
8442 memcpy(&item, &block_group->item, sizeof(item));
8443 memcpy(&key, &block_group->key, sizeof(key));
8444 spin_unlock(&block_group->lock);
8445
8446 ret = btrfs_insert_item(trans, extent_root, &key, &item,
8447 sizeof(item));
8448 if (ret)
8449 btrfs_abort_transaction(trans, extent_root, ret);
8450 ret = btrfs_finish_chunk_alloc(trans, extent_root,
8451 key.objectid, key.offset);
8452 if (ret)
8453 btrfs_abort_transaction(trans, extent_root, ret);
8454 }
8455}
8456
8457int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8458 struct btrfs_root *root, u64 bytes_used,
8459 u64 type, u64 chunk_objectid, u64 chunk_offset,
8460 u64 size)
8461{
8462 int ret;
8463 struct btrfs_root *extent_root;
8464 struct btrfs_block_group_cache *cache;
8465
8466 extent_root = root->fs_info->extent_root;
8467
8468 root->fs_info->last_trans_log_full_commit = trans->transid;
8469
8470 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8471 if (!cache)
8472 return -ENOMEM;
8473 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8474 GFP_NOFS);
8475 if (!cache->free_space_ctl) {
8476 kfree(cache);
8477 return -ENOMEM;
8478 }
8479
8480 cache->key.objectid = chunk_offset;
8481 cache->key.offset = size;
8482 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8483 cache->sectorsize = root->sectorsize;
8484 cache->fs_info = root->fs_info;
8485 cache->full_stripe_len = btrfs_full_stripe_len(root,
8486 &root->fs_info->mapping_tree,
8487 chunk_offset);
8488
8489 atomic_set(&cache->count, 1);
8490 spin_lock_init(&cache->lock);
8491 INIT_LIST_HEAD(&cache->list);
8492 INIT_LIST_HEAD(&cache->cluster_list);
8493 INIT_LIST_HEAD(&cache->new_bg_list);
8494
8495 btrfs_init_free_space_ctl(cache);
8496
8497 btrfs_set_block_group_used(&cache->item, bytes_used);
8498 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8499 cache->flags = type;
8500 btrfs_set_block_group_flags(&cache->item, type);
8501
8502 cache->last_byte_to_unpin = (u64)-1;
8503 cache->cached = BTRFS_CACHE_FINISHED;
8504 ret = exclude_super_stripes(root, cache);
8505 if (ret) {
8506
8507
8508
8509
8510 free_excluded_extents(root, cache);
8511 kfree(cache->free_space_ctl);
8512 kfree(cache);
8513 return ret;
8514 }
8515
8516 add_new_free_space(cache, root->fs_info, chunk_offset,
8517 chunk_offset + size);
8518
8519 free_excluded_extents(root, cache);
8520
8521 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8522 if (ret) {
8523 btrfs_remove_free_space_cache(cache);
8524 btrfs_put_block_group(cache);
8525 return ret;
8526 }
8527
8528 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
8529 &cache->space_info);
8530 if (ret) {
8531 btrfs_remove_free_space_cache(cache);
8532 spin_lock(&root->fs_info->block_group_cache_lock);
8533 rb_erase(&cache->cache_node,
8534 &root->fs_info->block_group_cache_tree);
8535 spin_unlock(&root->fs_info->block_group_cache_lock);
8536 btrfs_put_block_group(cache);
8537 return ret;
8538 }
8539 update_global_block_rsv(root->fs_info);
8540
8541 spin_lock(&cache->space_info->lock);
8542 cache->space_info->bytes_readonly += cache->bytes_super;
8543 spin_unlock(&cache->space_info->lock);
8544
8545 __link_block_group(cache->space_info, cache);
8546
8547 list_add_tail(&cache->new_bg_list, &trans->new_bgs);
8548
8549 set_avail_alloc_bits(extent_root->fs_info, type);
8550
8551 return 0;
8552}
8553
8554static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
8555{
8556 u64 extra_flags = chunk_to_extended(flags) &
8557 BTRFS_EXTENDED_PROFILE_MASK;
8558
8559 write_seqlock(&fs_info->profiles_lock);
8560 if (flags & BTRFS_BLOCK_GROUP_DATA)
8561 fs_info->avail_data_alloc_bits &= ~extra_flags;
8562 if (flags & BTRFS_BLOCK_GROUP_METADATA)
8563 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
8564 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
8565 fs_info->avail_system_alloc_bits &= ~extra_flags;
8566 write_sequnlock(&fs_info->profiles_lock);
8567}
8568
8569int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8570 struct btrfs_root *root, u64 group_start)
8571{
8572 struct btrfs_path *path;
8573 struct btrfs_block_group_cache *block_group;
8574 struct btrfs_free_cluster *cluster;
8575 struct btrfs_root *tree_root = root->fs_info->tree_root;
8576 struct btrfs_key key;
8577 struct inode *inode;
8578 int ret;
8579 int index;
8580 int factor;
8581
8582 root = root->fs_info->extent_root;
8583
8584 block_group = btrfs_lookup_block_group(root->fs_info, group_start);
8585 BUG_ON(!block_group);
8586 BUG_ON(!block_group->ro);
8587
8588
8589
8590
8591
8592 free_excluded_extents(root, block_group);
8593
8594 memcpy(&key, &block_group->key, sizeof(key));
8595 index = get_block_group_index(block_group);
8596 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
8597 BTRFS_BLOCK_GROUP_RAID1 |
8598 BTRFS_BLOCK_GROUP_RAID10))
8599 factor = 2;
8600 else
8601 factor = 1;
8602
8603
8604 cluster = &root->fs_info->data_alloc_cluster;
8605 spin_lock(&cluster->refill_lock);
8606 btrfs_return_cluster_to_free_space(block_group, cluster);
8607 spin_unlock(&cluster->refill_lock);
8608
8609
8610
8611
8612
8613 cluster = &root->fs_info->meta_alloc_cluster;
8614 spin_lock(&cluster->refill_lock);
8615 btrfs_return_cluster_to_free_space(block_group, cluster);
8616 spin_unlock(&cluster->refill_lock);
8617
8618 path = btrfs_alloc_path();
8619 if (!path) {
8620 ret = -ENOMEM;
8621 goto out;
8622 }
8623
8624 inode = lookup_free_space_inode(tree_root, block_group, path);
8625 if (!IS_ERR(inode)) {
8626 ret = btrfs_orphan_add(trans, inode);
8627 if (ret) {
8628 btrfs_add_delayed_iput(inode);
8629 goto out;
8630 }
8631 clear_nlink(inode);
8632
8633 spin_lock(&block_group->lock);
8634 if (block_group->iref) {
8635 block_group->iref = 0;
8636 block_group->inode = NULL;
8637 spin_unlock(&block_group->lock);
8638 iput(inode);
8639 } else {
8640 spin_unlock(&block_group->lock);
8641 }
8642
8643 btrfs_add_delayed_iput(inode);
8644 }
8645
8646 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
8647 key.offset = block_group->key.objectid;
8648 key.type = 0;
8649
8650 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
8651 if (ret < 0)
8652 goto out;
8653 if (ret > 0)
8654 btrfs_release_path(path);
8655 if (ret == 0) {
8656 ret = btrfs_del_item(trans, tree_root, path);
8657 if (ret)
8658 goto out;
8659 btrfs_release_path(path);
8660 }
8661
8662 spin_lock(&root->fs_info->block_group_cache_lock);
8663 rb_erase(&block_group->cache_node,
8664 &root->fs_info->block_group_cache_tree);
8665
8666 if (root->fs_info->first_logical_byte == block_group->key.objectid)
8667 root->fs_info->first_logical_byte = (u64)-1;
8668 spin_unlock(&root->fs_info->block_group_cache_lock);
8669
8670 down_write(&block_group->space_info->groups_sem);
8671
8672
8673
8674
8675 list_del_init(&block_group->list);
8676 if (list_empty(&block_group->space_info->block_groups[index]))
8677 clear_avail_alloc_bits(root->fs_info, block_group->flags);
8678 up_write(&block_group->space_info->groups_sem);
8679
8680 if (block_group->cached == BTRFS_CACHE_STARTED)
8681 wait_block_group_cache_done(block_group);
8682
8683 btrfs_remove_free_space_cache(block_group);
8684
8685 spin_lock(&block_group->space_info->lock);
8686 block_group->space_info->total_bytes -= block_group->key.offset;
8687 block_group->space_info->bytes_readonly -= block_group->key.offset;
8688 block_group->space_info->disk_total -= block_group->key.offset * factor;
8689 spin_unlock(&block_group->space_info->lock);
8690
8691 memcpy(&key, &block_group->key, sizeof(key));
8692
8693 btrfs_clear_space_info_full(root->fs_info);
8694
8695 btrfs_put_block_group(block_group);
8696 btrfs_put_block_group(block_group);
8697
8698 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8699 if (ret > 0)
8700 ret = -EIO;
8701 if (ret < 0)
8702 goto out;
8703
8704 ret = btrfs_del_item(trans, root, path);
8705out:
8706 btrfs_free_path(path);
8707 return ret;
8708}
8709
8710int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
8711{
8712 struct btrfs_space_info *space_info;
8713 struct btrfs_super_block *disk_super;
8714 u64 features;
8715 u64 flags;
8716 int mixed = 0;
8717 int ret;
8718
8719 disk_super = fs_info->super_copy;
8720 if (!btrfs_super_root(disk_super))
8721 return 1;
8722
8723 features = btrfs_super_incompat_flags(disk_super);
8724 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
8725 mixed = 1;
8726
8727 flags = BTRFS_BLOCK_GROUP_SYSTEM;
8728 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8729 if (ret)
8730 goto out;
8731
8732 if (mixed) {
8733 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
8734 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8735 } else {
8736 flags = BTRFS_BLOCK_GROUP_METADATA;
8737 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8738 if (ret)
8739 goto out;
8740
8741 flags = BTRFS_BLOCK_GROUP_DATA;
8742 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8743 }
8744out:
8745 return ret;
8746}
8747
8748int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
8749{
8750 return unpin_extent_range(root, start, end);
8751}
8752
8753int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
8754 u64 num_bytes, u64 *actual_bytes)
8755{
8756 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
8757}
8758
8759int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
8760{
8761 struct btrfs_fs_info *fs_info = root->fs_info;
8762 struct btrfs_block_group_cache *cache = NULL;
8763 u64 group_trimmed;
8764 u64 start;
8765 u64 end;
8766 u64 trimmed = 0;
8767 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
8768 int ret = 0;
8769
8770
8771
8772
8773 if (range->len == total_bytes)
8774 cache = btrfs_lookup_first_block_group(fs_info, range->start);
8775 else
8776 cache = btrfs_lookup_block_group(fs_info, range->start);
8777
8778 while (cache) {
8779 if (cache->key.objectid >= (range->start + range->len)) {
8780 btrfs_put_block_group(cache);
8781 break;
8782 }
8783
8784 start = max(range->start, cache->key.objectid);
8785 end = min(range->start + range->len,
8786 cache->key.objectid + cache->key.offset);
8787
8788 if (end - start >= range->minlen) {
8789 if (!block_group_cache_done(cache)) {
8790 ret = cache_block_group(cache, 0);
8791 if (ret) {
8792 btrfs_put_block_group(cache);
8793 break;
8794 }
8795 ret = wait_block_group_cache_done(cache);
8796 if (ret) {
8797 btrfs_put_block_group(cache);
8798 break;
8799 }
8800 }
8801 ret = btrfs_trim_block_group(cache,
8802 &group_trimmed,
8803 start,
8804 end,
8805 range->minlen);
8806
8807 trimmed += group_trimmed;
8808 if (ret) {
8809 btrfs_put_block_group(cache);
8810 break;
8811 }
8812 }
8813
8814 cache = next_block_group(fs_info->tree_root, cache);
8815 }
8816
8817 range->len = trimmed;
8818 return ret;
8819}
8820