1
2
3#include "misc.h"
4#include "ctree.h"
5#include "space-info.h"
6#include "sysfs.h"
7#include "volumes.h"
8#include "free-space-cache.h"
9#include "ordered-data.h"
10#include "transaction.h"
11#include "block-group.h"
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
161 bool may_use_included)
162{
163 ASSERT(s_info);
164 return s_info->bytes_used + s_info->bytes_reserved +
165 s_info->bytes_pinned + s_info->bytes_readonly +
166 (may_use_included ? s_info->bytes_may_use : 0);
167}
168
169
170
171
172
173void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
174{
175 struct list_head *head = &info->space_info;
176 struct btrfs_space_info *found;
177
178 rcu_read_lock();
179 list_for_each_entry_rcu(found, head, list)
180 found->full = 0;
181 rcu_read_unlock();
182}
183
184static int create_space_info(struct btrfs_fs_info *info, u64 flags)
185{
186
187 struct btrfs_space_info *space_info;
188 int i;
189 int ret;
190
191 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
192 if (!space_info)
193 return -ENOMEM;
194
195 ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
196 GFP_KERNEL);
197 if (ret) {
198 kfree(space_info);
199 return ret;
200 }
201
202 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
203 INIT_LIST_HEAD(&space_info->block_groups[i]);
204 init_rwsem(&space_info->groups_sem);
205 spin_lock_init(&space_info->lock);
206 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
207 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
208 INIT_LIST_HEAD(&space_info->ro_bgs);
209 INIT_LIST_HEAD(&space_info->tickets);
210 INIT_LIST_HEAD(&space_info->priority_tickets);
211
212 ret = btrfs_sysfs_add_space_info_type(info, space_info);
213 if (ret)
214 return ret;
215
216 list_add_rcu(&space_info->list, &info->space_info);
217 if (flags & BTRFS_BLOCK_GROUP_DATA)
218 info->data_sinfo = space_info;
219
220 return ret;
221}
222
223int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
224{
225 struct btrfs_super_block *disk_super;
226 u64 features;
227 u64 flags;
228 int mixed = 0;
229 int ret;
230
231 disk_super = fs_info->super_copy;
232 if (!btrfs_super_root(disk_super))
233 return -EINVAL;
234
235 features = btrfs_super_incompat_flags(disk_super);
236 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
237 mixed = 1;
238
239 flags = BTRFS_BLOCK_GROUP_SYSTEM;
240 ret = create_space_info(fs_info, flags);
241 if (ret)
242 goto out;
243
244 if (mixed) {
245 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
246 ret = create_space_info(fs_info, flags);
247 } else {
248 flags = BTRFS_BLOCK_GROUP_METADATA;
249 ret = create_space_info(fs_info, flags);
250 if (ret)
251 goto out;
252
253 flags = BTRFS_BLOCK_GROUP_DATA;
254 ret = create_space_info(fs_info, flags);
255 }
256out:
257 return ret;
258}
259
260void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
261 u64 total_bytes, u64 bytes_used,
262 u64 bytes_readonly,
263 struct btrfs_space_info **space_info)
264{
265 struct btrfs_space_info *found;
266 int factor;
267
268 factor = btrfs_bg_type_to_factor(flags);
269
270 found = btrfs_find_space_info(info, flags);
271 ASSERT(found);
272 spin_lock(&found->lock);
273 found->total_bytes += total_bytes;
274 found->disk_total += total_bytes * factor;
275 found->bytes_used += bytes_used;
276 found->disk_used += bytes_used * factor;
277 found->bytes_readonly += bytes_readonly;
278 if (total_bytes > 0)
279 found->full = 0;
280 btrfs_try_granting_tickets(info, found);
281 spin_unlock(&found->lock);
282 *space_info = found;
283}
284
285struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
286 u64 flags)
287{
288 struct list_head *head = &info->space_info;
289 struct btrfs_space_info *found;
290
291 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
292
293 rcu_read_lock();
294 list_for_each_entry_rcu(found, head, list) {
295 if (found->flags & flags) {
296 rcu_read_unlock();
297 return found;
298 }
299 }
300 rcu_read_unlock();
301 return NULL;
302}
303
304static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
305{
306 return (global->size << 1);
307}
308
309static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
310 struct btrfs_space_info *space_info,
311 enum btrfs_reserve_flush_enum flush)
312{
313 u64 profile;
314 u64 avail;
315 int factor;
316
317 if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
318 profile = btrfs_system_alloc_profile(fs_info);
319 else
320 profile = btrfs_metadata_alloc_profile(fs_info);
321
322 avail = atomic64_read(&fs_info->free_chunk_space);
323
324
325
326
327
328
329
330 factor = btrfs_bg_type_to_factor(profile);
331 avail = div_u64(avail, factor);
332
333
334
335
336
337
338 if (flush == BTRFS_RESERVE_FLUSH_ALL)
339 avail >>= 3;
340 else
341 avail >>= 1;
342 return avail;
343}
344
345int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
346 struct btrfs_space_info *space_info, u64 bytes,
347 enum btrfs_reserve_flush_enum flush)
348{
349 u64 avail;
350 u64 used;
351
352
353 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
354 return 0;
355
356 used = btrfs_space_info_used(space_info, true);
357 avail = calc_available_free_space(fs_info, space_info, flush);
358
359 if (used + bytes < space_info->total_bytes + avail)
360 return 1;
361 return 0;
362}
363
364static void remove_ticket(struct btrfs_space_info *space_info,
365 struct reserve_ticket *ticket)
366{
367 if (!list_empty(&ticket->list)) {
368 list_del_init(&ticket->list);
369 ASSERT(space_info->reclaim_size >= ticket->bytes);
370 space_info->reclaim_size -= ticket->bytes;
371 }
372}
373
374
375
376
377
378void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
379 struct btrfs_space_info *space_info)
380{
381 struct list_head *head;
382 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
383
384 lockdep_assert_held(&space_info->lock);
385
386 head = &space_info->priority_tickets;
387again:
388 while (!list_empty(head)) {
389 struct reserve_ticket *ticket;
390 u64 used = btrfs_space_info_used(space_info, true);
391
392 ticket = list_first_entry(head, struct reserve_ticket, list);
393
394
395 if ((used + ticket->bytes <= space_info->total_bytes) ||
396 btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
397 flush)) {
398 btrfs_space_info_update_bytes_may_use(fs_info,
399 space_info,
400 ticket->bytes);
401 remove_ticket(space_info, ticket);
402 ticket->bytes = 0;
403 space_info->tickets_id++;
404 wake_up(&ticket->wait);
405 } else {
406 break;
407 }
408 }
409
410 if (head == &space_info->priority_tickets) {
411 head = &space_info->tickets;
412 flush = BTRFS_RESERVE_FLUSH_ALL;
413 goto again;
414 }
415}
416
417#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
418do { \
419 struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
420 spin_lock(&__rsv->lock); \
421 btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
422 __rsv->size, __rsv->reserved); \
423 spin_unlock(&__rsv->lock); \
424} while (0)
425
426static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
427 struct btrfs_space_info *info)
428{
429 lockdep_assert_held(&info->lock);
430
431 btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
432 info->flags,
433 info->total_bytes - btrfs_space_info_used(info, true),
434 info->full ? "" : "not ");
435 btrfs_info(fs_info,
436 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
437 info->total_bytes, info->bytes_used, info->bytes_pinned,
438 info->bytes_reserved, info->bytes_may_use,
439 info->bytes_readonly);
440
441 DUMP_BLOCK_RSV(fs_info, global_block_rsv);
442 DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
443 DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
444 DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
445 DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
446
447}
448
449void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
450 struct btrfs_space_info *info, u64 bytes,
451 int dump_block_groups)
452{
453 struct btrfs_block_group *cache;
454 int index = 0;
455
456 spin_lock(&info->lock);
457 __btrfs_dump_space_info(fs_info, info);
458 spin_unlock(&info->lock);
459
460 if (!dump_block_groups)
461 return;
462
463 down_read(&info->groups_sem);
464again:
465 list_for_each_entry(cache, &info->block_groups[index], list) {
466 spin_lock(&cache->lock);
467 btrfs_info(fs_info,
468 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
469 cache->start, cache->length, cache->used, cache->pinned,
470 cache->reserved, cache->ro ? "[readonly]" : "");
471 btrfs_dump_free_space(cache, bytes);
472 spin_unlock(&cache->lock);
473 }
474 if (++index < BTRFS_NR_RAID_TYPES)
475 goto again;
476 up_read(&info->groups_sem);
477}
478
479static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
480 unsigned long nr_pages, int nr_items)
481{
482 struct super_block *sb = fs_info->sb;
483
484 if (down_read_trylock(&sb->s_umount)) {
485 writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
486 up_read(&sb->s_umount);
487 } else {
488
489
490
491
492
493
494
495 btrfs_start_delalloc_roots(fs_info, nr_items);
496 if (!current->journal_info)
497 btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
498 }
499}
500
501static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
502 u64 to_reclaim)
503{
504 u64 bytes;
505 u64 nr;
506
507 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
508 nr = div64_u64(to_reclaim, bytes);
509 if (!nr)
510 nr = 1;
511 return nr;
512}
513
514#define EXTENT_SIZE_PER_ITEM SZ_256K
515
516
517
518
519static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
520 u64 orig, bool wait_ordered)
521{
522 struct btrfs_space_info *space_info;
523 struct btrfs_trans_handle *trans;
524 u64 delalloc_bytes;
525 u64 dio_bytes;
526 u64 async_pages;
527 u64 items;
528 long time_left;
529 unsigned long nr_pages;
530 int loops;
531
532
533 items = calc_reclaim_items_nr(fs_info, to_reclaim);
534 to_reclaim = items * EXTENT_SIZE_PER_ITEM;
535
536 trans = (struct btrfs_trans_handle *)current->journal_info;
537 space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
538
539 delalloc_bytes = percpu_counter_sum_positive(
540 &fs_info->delalloc_bytes);
541 dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
542 if (delalloc_bytes == 0 && dio_bytes == 0) {
543 if (trans)
544 return;
545 if (wait_ordered)
546 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
547 return;
548 }
549
550
551
552
553
554
555 if (dio_bytes > delalloc_bytes)
556 wait_ordered = true;
557
558 loops = 0;
559 while ((delalloc_bytes || dio_bytes) && loops < 3) {
560 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
561
562
563
564
565
566
567 btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
568
569
570
571
572
573 async_pages = atomic_read(&fs_info->async_delalloc_pages);
574 if (!async_pages)
575 goto skip_async;
576
577
578
579
580
581
582 if (async_pages <= nr_pages)
583 async_pages = 0;
584 else
585 async_pages -= nr_pages;
586
587 wait_event(fs_info->async_submit_wait,
588 atomic_read(&fs_info->async_delalloc_pages) <=
589 (int)async_pages);
590skip_async:
591 spin_lock(&space_info->lock);
592 if (list_empty(&space_info->tickets) &&
593 list_empty(&space_info->priority_tickets)) {
594 spin_unlock(&space_info->lock);
595 break;
596 }
597 spin_unlock(&space_info->lock);
598
599 loops++;
600 if (wait_ordered && !trans) {
601 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
602 } else {
603 time_left = schedule_timeout_killable(1);
604 if (time_left)
605 break;
606 }
607 delalloc_bytes = percpu_counter_sum_positive(
608 &fs_info->delalloc_bytes);
609 dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
610 }
611}
612
613
614
615
616
617
618
619
620
621
622
623static int may_commit_transaction(struct btrfs_fs_info *fs_info,
624 struct btrfs_space_info *space_info)
625{
626 struct reserve_ticket *ticket = NULL;
627 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
628 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
629 struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
630 struct btrfs_trans_handle *trans;
631 u64 bytes_needed;
632 u64 reclaim_bytes = 0;
633 u64 cur_free_bytes = 0;
634
635 trans = (struct btrfs_trans_handle *)current->journal_info;
636 if (trans)
637 return -EAGAIN;
638
639 spin_lock(&space_info->lock);
640 cur_free_bytes = btrfs_space_info_used(space_info, true);
641 if (cur_free_bytes < space_info->total_bytes)
642 cur_free_bytes = space_info->total_bytes - cur_free_bytes;
643 else
644 cur_free_bytes = 0;
645
646 if (!list_empty(&space_info->priority_tickets))
647 ticket = list_first_entry(&space_info->priority_tickets,
648 struct reserve_ticket, list);
649 else if (!list_empty(&space_info->tickets))
650 ticket = list_first_entry(&space_info->tickets,
651 struct reserve_ticket, list);
652 bytes_needed = (ticket) ? ticket->bytes : 0;
653
654 if (bytes_needed > cur_free_bytes)
655 bytes_needed -= cur_free_bytes;
656 else
657 bytes_needed = 0;
658 spin_unlock(&space_info->lock);
659
660 if (!bytes_needed)
661 return 0;
662
663 trans = btrfs_join_transaction(fs_info->extent_root);
664 if (IS_ERR(trans))
665 return PTR_ERR(trans);
666
667
668
669
670
671
672 if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
673 __percpu_counter_compare(&space_info->total_bytes_pinned,
674 bytes_needed,
675 BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
676 goto commit;
677
678
679
680
681
682 if (space_info != delayed_rsv->space_info)
683 goto enospc;
684
685 spin_lock(&delayed_rsv->lock);
686 reclaim_bytes += delayed_rsv->reserved;
687 spin_unlock(&delayed_rsv->lock);
688
689 spin_lock(&delayed_refs_rsv->lock);
690 reclaim_bytes += delayed_refs_rsv->reserved;
691 spin_unlock(&delayed_refs_rsv->lock);
692
693 spin_lock(&trans_rsv->lock);
694 reclaim_bytes += trans_rsv->reserved;
695 spin_unlock(&trans_rsv->lock);
696
697 if (reclaim_bytes >= bytes_needed)
698 goto commit;
699 bytes_needed -= reclaim_bytes;
700
701 if (__percpu_counter_compare(&space_info->total_bytes_pinned,
702 bytes_needed,
703 BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
704 goto enospc;
705
706commit:
707 return btrfs_commit_transaction(trans);
708enospc:
709 btrfs_end_transaction(trans);
710 return -ENOSPC;
711}
712
713
714
715
716
717
718static void flush_space(struct btrfs_fs_info *fs_info,
719 struct btrfs_space_info *space_info, u64 num_bytes,
720 int state)
721{
722 struct btrfs_root *root = fs_info->extent_root;
723 struct btrfs_trans_handle *trans;
724 int nr;
725 int ret = 0;
726
727 switch (state) {
728 case FLUSH_DELAYED_ITEMS_NR:
729 case FLUSH_DELAYED_ITEMS:
730 if (state == FLUSH_DELAYED_ITEMS_NR)
731 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
732 else
733 nr = -1;
734
735 trans = btrfs_join_transaction(root);
736 if (IS_ERR(trans)) {
737 ret = PTR_ERR(trans);
738 break;
739 }
740 ret = btrfs_run_delayed_items_nr(trans, nr);
741 btrfs_end_transaction(trans);
742 break;
743 case FLUSH_DELALLOC:
744 case FLUSH_DELALLOC_WAIT:
745 shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
746 state == FLUSH_DELALLOC_WAIT);
747 break;
748 case FLUSH_DELAYED_REFS_NR:
749 case FLUSH_DELAYED_REFS:
750 trans = btrfs_join_transaction(root);
751 if (IS_ERR(trans)) {
752 ret = PTR_ERR(trans);
753 break;
754 }
755 if (state == FLUSH_DELAYED_REFS_NR)
756 nr = calc_reclaim_items_nr(fs_info, num_bytes);
757 else
758 nr = 0;
759 btrfs_run_delayed_refs(trans, nr);
760 btrfs_end_transaction(trans);
761 break;
762 case ALLOC_CHUNK:
763 case ALLOC_CHUNK_FORCE:
764 trans = btrfs_join_transaction(root);
765 if (IS_ERR(trans)) {
766 ret = PTR_ERR(trans);
767 break;
768 }
769 ret = btrfs_chunk_alloc(trans,
770 btrfs_metadata_alloc_profile(fs_info),
771 (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
772 CHUNK_ALLOC_FORCE);
773 btrfs_end_transaction(trans);
774 if (ret > 0 || ret == -ENOSPC)
775 ret = 0;
776 break;
777 case RUN_DELAYED_IPUTS:
778
779
780
781
782
783 btrfs_run_delayed_iputs(fs_info);
784 btrfs_wait_on_delayed_iputs(fs_info);
785 break;
786 case COMMIT_TRANS:
787 ret = may_commit_transaction(fs_info, space_info);
788 break;
789 default:
790 ret = -ENOSPC;
791 break;
792 }
793
794 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
795 ret);
796 return;
797}
798
799static inline u64
800btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
801 struct btrfs_space_info *space_info)
802{
803 u64 used;
804 u64 avail;
805 u64 expected;
806 u64 to_reclaim = space_info->reclaim_size;
807
808 lockdep_assert_held(&space_info->lock);
809
810 avail = calc_available_free_space(fs_info, space_info,
811 BTRFS_RESERVE_FLUSH_ALL);
812 used = btrfs_space_info_used(space_info, true);
813
814
815
816
817
818
819
820 if (space_info->total_bytes + avail < used)
821 to_reclaim += used - (space_info->total_bytes + avail);
822
823 if (to_reclaim)
824 return to_reclaim;
825
826 to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
827 if (btrfs_can_overcommit(fs_info, space_info, to_reclaim,
828 BTRFS_RESERVE_FLUSH_ALL))
829 return 0;
830
831 used = btrfs_space_info_used(space_info, true);
832
833 if (btrfs_can_overcommit(fs_info, space_info, SZ_1M,
834 BTRFS_RESERVE_FLUSH_ALL))
835 expected = div_factor_fine(space_info->total_bytes, 95);
836 else
837 expected = div_factor_fine(space_info->total_bytes, 90);
838
839 if (used > expected)
840 to_reclaim = used - expected;
841 else
842 to_reclaim = 0;
843 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
844 space_info->bytes_reserved);
845 return to_reclaim;
846}
847
848static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
849 struct btrfs_space_info *space_info,
850 u64 used)
851{
852 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
853
854
855 if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
856 return 0;
857
858 if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info))
859 return 0;
860
861 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
862 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
863}
864
865static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
866 struct btrfs_space_info *space_info,
867 struct reserve_ticket *ticket)
868{
869 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
870 u64 min_bytes;
871
872 if (global_rsv->space_info != space_info)
873 return false;
874
875 spin_lock(&global_rsv->lock);
876 min_bytes = div_factor(global_rsv->size, 1);
877 if (global_rsv->reserved < min_bytes + ticket->bytes) {
878 spin_unlock(&global_rsv->lock);
879 return false;
880 }
881 global_rsv->reserved -= ticket->bytes;
882 remove_ticket(space_info, ticket);
883 ticket->bytes = 0;
884 wake_up(&ticket->wait);
885 space_info->tickets_id++;
886 if (global_rsv->reserved < global_rsv->size)
887 global_rsv->full = 0;
888 spin_unlock(&global_rsv->lock);
889
890 return true;
891}
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
909 struct btrfs_space_info *space_info)
910{
911 struct reserve_ticket *ticket;
912 u64 tickets_id = space_info->tickets_id;
913 u64 first_ticket_bytes = 0;
914
915 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
916 btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
917 __btrfs_dump_space_info(fs_info, space_info);
918 }
919
920 while (!list_empty(&space_info->tickets) &&
921 tickets_id == space_info->tickets_id) {
922 ticket = list_first_entry(&space_info->tickets,
923 struct reserve_ticket, list);
924
925 if (ticket->steal &&
926 steal_from_global_rsv(fs_info, space_info, ticket))
927 return true;
928
929
930
931
932
933
934
935
936
937
938
939 if (first_ticket_bytes == 0)
940 first_ticket_bytes = ticket->bytes;
941 else if (first_ticket_bytes > ticket->bytes)
942 return true;
943
944 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
945 btrfs_info(fs_info, "failing ticket with %llu bytes",
946 ticket->bytes);
947
948 remove_ticket(space_info, ticket);
949 ticket->error = -ENOSPC;
950 wake_up(&ticket->wait);
951
952
953
954
955
956
957
958 btrfs_try_granting_tickets(fs_info, space_info);
959 }
960 return (tickets_id != space_info->tickets_id);
961}
962
963
964
965
966
967
968static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
969{
970 struct btrfs_fs_info *fs_info;
971 struct btrfs_space_info *space_info;
972 u64 to_reclaim;
973 int flush_state;
974 int commit_cycles = 0;
975 u64 last_tickets_id;
976
977 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
978 space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
979
980 spin_lock(&space_info->lock);
981 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
982 if (!to_reclaim) {
983 space_info->flush = 0;
984 spin_unlock(&space_info->lock);
985 return;
986 }
987 last_tickets_id = space_info->tickets_id;
988 spin_unlock(&space_info->lock);
989
990 flush_state = FLUSH_DELAYED_ITEMS_NR;
991 do {
992 flush_space(fs_info, space_info, to_reclaim, flush_state);
993 spin_lock(&space_info->lock);
994 if (list_empty(&space_info->tickets)) {
995 space_info->flush = 0;
996 spin_unlock(&space_info->lock);
997 return;
998 }
999 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
1000 space_info);
1001 if (last_tickets_id == space_info->tickets_id) {
1002 flush_state++;
1003 } else {
1004 last_tickets_id = space_info->tickets_id;
1005 flush_state = FLUSH_DELAYED_ITEMS_NR;
1006 if (commit_cycles)
1007 commit_cycles--;
1008 }
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020 if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
1021 flush_state++;
1022
1023 if (flush_state > COMMIT_TRANS) {
1024 commit_cycles++;
1025 if (commit_cycles > 2) {
1026 if (maybe_fail_all_tickets(fs_info, space_info)) {
1027 flush_state = FLUSH_DELAYED_ITEMS_NR;
1028 commit_cycles--;
1029 } else {
1030 space_info->flush = 0;
1031 }
1032 } else {
1033 flush_state = FLUSH_DELAYED_ITEMS_NR;
1034 }
1035 }
1036 spin_unlock(&space_info->lock);
1037 } while (flush_state <= COMMIT_TRANS);
1038}
1039
1040void btrfs_init_async_reclaim_work(struct work_struct *work)
1041{
1042 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
1043}
1044
1045static const enum btrfs_flush_state priority_flush_states[] = {
1046 FLUSH_DELAYED_ITEMS_NR,
1047 FLUSH_DELAYED_ITEMS,
1048 ALLOC_CHUNK,
1049};
1050
1051static const enum btrfs_flush_state evict_flush_states[] = {
1052 FLUSH_DELAYED_ITEMS_NR,
1053 FLUSH_DELAYED_ITEMS,
1054 FLUSH_DELAYED_REFS_NR,
1055 FLUSH_DELAYED_REFS,
1056 FLUSH_DELALLOC,
1057 FLUSH_DELALLOC_WAIT,
1058 ALLOC_CHUNK,
1059 COMMIT_TRANS,
1060};
1061
1062static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
1063 struct btrfs_space_info *space_info,
1064 struct reserve_ticket *ticket,
1065 const enum btrfs_flush_state *states,
1066 int states_nr)
1067{
1068 u64 to_reclaim;
1069 int flush_state;
1070
1071 spin_lock(&space_info->lock);
1072 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
1073 if (!to_reclaim) {
1074 spin_unlock(&space_info->lock);
1075 return;
1076 }
1077 spin_unlock(&space_info->lock);
1078
1079 flush_state = 0;
1080 do {
1081 flush_space(fs_info, space_info, to_reclaim, states[flush_state]);
1082 flush_state++;
1083 spin_lock(&space_info->lock);
1084 if (ticket->bytes == 0) {
1085 spin_unlock(&space_info->lock);
1086 return;
1087 }
1088 spin_unlock(&space_info->lock);
1089 } while (flush_state < states_nr);
1090}
1091
1092static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
1093 struct btrfs_space_info *space_info,
1094 struct reserve_ticket *ticket)
1095
1096{
1097 DEFINE_WAIT(wait);
1098 int ret = 0;
1099
1100 spin_lock(&space_info->lock);
1101 while (ticket->bytes > 0 && ticket->error == 0) {
1102 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
1103 if (ret) {
1104
1105
1106
1107
1108
1109
1110
1111
1112 remove_ticket(space_info, ticket);
1113 ticket->error = -EINTR;
1114 break;
1115 }
1116 spin_unlock(&space_info->lock);
1117
1118 schedule();
1119
1120 finish_wait(&ticket->wait, &wait);
1121 spin_lock(&space_info->lock);
1122 }
1123 spin_unlock(&space_info->lock);
1124}
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
1137 struct btrfs_space_info *space_info,
1138 struct reserve_ticket *ticket,
1139 enum btrfs_reserve_flush_enum flush)
1140{
1141 int ret;
1142
1143 switch (flush) {
1144 case BTRFS_RESERVE_FLUSH_ALL:
1145 case BTRFS_RESERVE_FLUSH_ALL_STEAL:
1146 wait_reserve_ticket(fs_info, space_info, ticket);
1147 break;
1148 case BTRFS_RESERVE_FLUSH_LIMIT:
1149 priority_reclaim_metadata_space(fs_info, space_info, ticket,
1150 priority_flush_states,
1151 ARRAY_SIZE(priority_flush_states));
1152 break;
1153 case BTRFS_RESERVE_FLUSH_EVICT:
1154 priority_reclaim_metadata_space(fs_info, space_info, ticket,
1155 evict_flush_states,
1156 ARRAY_SIZE(evict_flush_states));
1157 break;
1158 default:
1159 ASSERT(0);
1160 break;
1161 }
1162
1163 spin_lock(&space_info->lock);
1164 ret = ticket->error;
1165 if (ticket->bytes || ticket->error) {
1166
1167
1168
1169
1170
1171
1172
1173 if (!list_empty(&ticket->list)) {
1174 remove_ticket(space_info, ticket);
1175 btrfs_try_granting_tickets(fs_info, space_info);
1176 }
1177
1178 if (!ret)
1179 ret = -ENOSPC;
1180 }
1181 spin_unlock(&space_info->lock);
1182 ASSERT(list_empty(&ticket->list));
1183
1184
1185
1186
1187
1188
1189 ASSERT(!(ticket->bytes == 0 && ticket->error));
1190 return ret;
1191}
1192
1193
1194
1195
1196
1197static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
1198{
1199 return (flush == BTRFS_RESERVE_FLUSH_ALL) ||
1200 (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
1201}
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
1218 struct btrfs_space_info *space_info,
1219 u64 orig_bytes,
1220 enum btrfs_reserve_flush_enum flush)
1221{
1222 struct reserve_ticket ticket;
1223 u64 used;
1224 int ret = 0;
1225 bool pending_tickets;
1226
1227 ASSERT(orig_bytes);
1228 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
1229
1230 spin_lock(&space_info->lock);
1231 ret = -ENOSPC;
1232 used = btrfs_space_info_used(space_info, true);
1233
1234
1235
1236
1237
1238
1239 if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH))
1240 pending_tickets = !list_empty(&space_info->tickets) ||
1241 !list_empty(&space_info->priority_tickets);
1242 else
1243 pending_tickets = !list_empty(&space_info->priority_tickets);
1244
1245
1246
1247
1248
1249 if (!pending_tickets &&
1250 ((used + orig_bytes <= space_info->total_bytes) ||
1251 btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
1252 btrfs_space_info_update_bytes_may_use(fs_info, space_info,
1253 orig_bytes);
1254 ret = 0;
1255 }
1256
1257
1258
1259
1260
1261
1262
1263
1264 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
1265 ticket.bytes = orig_bytes;
1266 ticket.error = 0;
1267 space_info->reclaim_size += ticket.bytes;
1268 init_waitqueue_head(&ticket.wait);
1269 ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
1270 if (flush == BTRFS_RESERVE_FLUSH_ALL ||
1271 flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
1272 list_add_tail(&ticket.list, &space_info->tickets);
1273 if (!space_info->flush) {
1274 space_info->flush = 1;
1275 trace_btrfs_trigger_flush(fs_info,
1276 space_info->flags,
1277 orig_bytes, flush,
1278 "enospc");
1279 queue_work(system_unbound_wq,
1280 &fs_info->async_reclaim_work);
1281 }
1282 } else {
1283 list_add_tail(&ticket.list,
1284 &space_info->priority_tickets);
1285 }
1286 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
1287 used += orig_bytes;
1288
1289
1290
1291
1292
1293 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
1294 need_do_async_reclaim(fs_info, space_info, used) &&
1295 !work_busy(&fs_info->async_reclaim_work)) {
1296 trace_btrfs_trigger_flush(fs_info, space_info->flags,
1297 orig_bytes, flush, "preempt");
1298 queue_work(system_unbound_wq,
1299 &fs_info->async_reclaim_work);
1300 }
1301 }
1302 spin_unlock(&space_info->lock);
1303 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
1304 return ret;
1305
1306 return handle_reserve_ticket(fs_info, space_info, &ticket, flush);
1307}
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
1324 struct btrfs_block_rsv *block_rsv,
1325 u64 orig_bytes,
1326 enum btrfs_reserve_flush_enum flush)
1327{
1328 struct btrfs_fs_info *fs_info = root->fs_info;
1329 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
1330 int ret;
1331
1332 ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
1333 orig_bytes, flush);
1334 if (ret == -ENOSPC &&
1335 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
1336 if (block_rsv != global_rsv &&
1337 !btrfs_block_rsv_use_bytes(global_rsv, orig_bytes))
1338 ret = 0;
1339 }
1340 if (ret == -ENOSPC) {
1341 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
1342 block_rsv->space_info->flags,
1343 orig_bytes, 1);
1344
1345 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
1346 btrfs_dump_space_info(fs_info, block_rsv->space_info,
1347 orig_bytes, 0);
1348 }
1349 return ret;
1350}
1351