1
2
3#include "misc.h"
4#include "ctree.h"
5#include "space-info.h"
6#include "sysfs.h"
7#include "volumes.h"
8#include "free-space-cache.h"
9#include "ordered-data.h"
10#include "transaction.h"
11#include "block-group.h"
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
162 bool may_use_included)
163{
164 ASSERT(s_info);
165 return s_info->bytes_used + s_info->bytes_reserved +
166 s_info->bytes_pinned + s_info->bytes_readonly +
167 s_info->bytes_zone_unusable +
168 (may_use_included ? s_info->bytes_may_use : 0);
169}
170
171
172
173
174
175void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
176{
177 struct list_head *head = &info->space_info;
178 struct btrfs_space_info *found;
179
180 list_for_each_entry(found, head, list)
181 found->full = 0;
182}
183
184
185
186
187
188#define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH (75)
189
190static int create_space_info(struct btrfs_fs_info *info, u64 flags)
191{
192
193 struct btrfs_space_info *space_info;
194 int i;
195 int ret;
196
197 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
198 if (!space_info)
199 return -ENOMEM;
200
201 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
202 INIT_LIST_HEAD(&space_info->block_groups[i]);
203 init_rwsem(&space_info->groups_sem);
204 spin_lock_init(&space_info->lock);
205 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
206 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
207 INIT_LIST_HEAD(&space_info->ro_bgs);
208 INIT_LIST_HEAD(&space_info->tickets);
209 INIT_LIST_HEAD(&space_info->priority_tickets);
210 space_info->clamp = 1;
211
212 if (btrfs_is_zoned(info))
213 space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
214
215 ret = btrfs_sysfs_add_space_info_type(info, space_info);
216 if (ret)
217 return ret;
218
219 list_add(&space_info->list, &info->space_info);
220 if (flags & BTRFS_BLOCK_GROUP_DATA)
221 info->data_sinfo = space_info;
222
223 return ret;
224}
225
226int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
227{
228 struct btrfs_super_block *disk_super;
229 u64 features;
230 u64 flags;
231 int mixed = 0;
232 int ret;
233
234 disk_super = fs_info->super_copy;
235 if (!btrfs_super_root(disk_super))
236 return -EINVAL;
237
238 features = btrfs_super_incompat_flags(disk_super);
239 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
240 mixed = 1;
241
242 flags = BTRFS_BLOCK_GROUP_SYSTEM;
243 ret = create_space_info(fs_info, flags);
244 if (ret)
245 goto out;
246
247 if (mixed) {
248 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
249 ret = create_space_info(fs_info, flags);
250 } else {
251 flags = BTRFS_BLOCK_GROUP_METADATA;
252 ret = create_space_info(fs_info, flags);
253 if (ret)
254 goto out;
255
256 flags = BTRFS_BLOCK_GROUP_DATA;
257 ret = create_space_info(fs_info, flags);
258 }
259out:
260 return ret;
261}
262
263void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
264 u64 total_bytes, u64 bytes_used,
265 u64 bytes_readonly, u64 bytes_zone_unusable,
266 struct btrfs_space_info **space_info)
267{
268 struct btrfs_space_info *found;
269 int factor;
270
271 factor = btrfs_bg_type_to_factor(flags);
272
273 found = btrfs_find_space_info(info, flags);
274 ASSERT(found);
275 spin_lock(&found->lock);
276 found->total_bytes += total_bytes;
277 found->disk_total += total_bytes * factor;
278 found->bytes_used += bytes_used;
279 found->disk_used += bytes_used * factor;
280 found->bytes_readonly += bytes_readonly;
281 found->bytes_zone_unusable += bytes_zone_unusable;
282 if (total_bytes > 0)
283 found->full = 0;
284 btrfs_try_granting_tickets(info, found);
285 spin_unlock(&found->lock);
286 *space_info = found;
287}
288
289struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
290 u64 flags)
291{
292 struct list_head *head = &info->space_info;
293 struct btrfs_space_info *found;
294
295 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
296
297 list_for_each_entry(found, head, list) {
298 if (found->flags & flags)
299 return found;
300 }
301 return NULL;
302}
303
304static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
305 struct btrfs_space_info *space_info,
306 enum btrfs_reserve_flush_enum flush)
307{
308 u64 profile;
309 u64 avail;
310 int factor;
311
312 if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
313 profile = btrfs_system_alloc_profile(fs_info);
314 else
315 profile = btrfs_metadata_alloc_profile(fs_info);
316
317 avail = atomic64_read(&fs_info->free_chunk_space);
318
319
320
321
322
323
324
325 factor = btrfs_bg_type_to_factor(profile);
326 avail = div_u64(avail, factor);
327
328
329
330
331
332
333 if (flush == BTRFS_RESERVE_FLUSH_ALL)
334 avail >>= 3;
335 else
336 avail >>= 1;
337 return avail;
338}
339
340int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
341 struct btrfs_space_info *space_info, u64 bytes,
342 enum btrfs_reserve_flush_enum flush)
343{
344 u64 avail;
345 u64 used;
346
347
348 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
349 return 0;
350
351 used = btrfs_space_info_used(space_info, true);
352 avail = calc_available_free_space(fs_info, space_info, flush);
353
354 if (used + bytes < space_info->total_bytes + avail)
355 return 1;
356 return 0;
357}
358
359static void remove_ticket(struct btrfs_space_info *space_info,
360 struct reserve_ticket *ticket)
361{
362 if (!list_empty(&ticket->list)) {
363 list_del_init(&ticket->list);
364 ASSERT(space_info->reclaim_size >= ticket->bytes);
365 space_info->reclaim_size -= ticket->bytes;
366 }
367}
368
369
370
371
372
373void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
374 struct btrfs_space_info *space_info)
375{
376 struct list_head *head;
377 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
378
379 lockdep_assert_held(&space_info->lock);
380
381 head = &space_info->priority_tickets;
382again:
383 while (!list_empty(head)) {
384 struct reserve_ticket *ticket;
385 u64 used = btrfs_space_info_used(space_info, true);
386
387 ticket = list_first_entry(head, struct reserve_ticket, list);
388
389
390 if ((used + ticket->bytes <= space_info->total_bytes) ||
391 btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
392 flush)) {
393 btrfs_space_info_update_bytes_may_use(fs_info,
394 space_info,
395 ticket->bytes);
396 remove_ticket(space_info, ticket);
397 ticket->bytes = 0;
398 space_info->tickets_id++;
399 wake_up(&ticket->wait);
400 } else {
401 break;
402 }
403 }
404
405 if (head == &space_info->priority_tickets) {
406 head = &space_info->tickets;
407 flush = BTRFS_RESERVE_FLUSH_ALL;
408 goto again;
409 }
410}
411
412#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
413do { \
414 struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
415 spin_lock(&__rsv->lock); \
416 btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
417 __rsv->size, __rsv->reserved); \
418 spin_unlock(&__rsv->lock); \
419} while (0)
420
421static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
422 struct btrfs_space_info *info)
423{
424 lockdep_assert_held(&info->lock);
425
426
427 btrfs_info(fs_info, "space_info %llu has %lld free, is %sfull",
428 info->flags,
429 (s64)(info->total_bytes - btrfs_space_info_used(info, true)),
430 info->full ? "" : "not ");
431 btrfs_info(fs_info,
432 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu",
433 info->total_bytes, info->bytes_used, info->bytes_pinned,
434 info->bytes_reserved, info->bytes_may_use,
435 info->bytes_readonly, info->bytes_zone_unusable);
436
437 DUMP_BLOCK_RSV(fs_info, global_block_rsv);
438 DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
439 DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
440 DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
441 DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
442
443}
444
445void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
446 struct btrfs_space_info *info, u64 bytes,
447 int dump_block_groups)
448{
449 struct btrfs_block_group *cache;
450 int index = 0;
451
452 spin_lock(&info->lock);
453 __btrfs_dump_space_info(fs_info, info);
454 spin_unlock(&info->lock);
455
456 if (!dump_block_groups)
457 return;
458
459 down_read(&info->groups_sem);
460again:
461 list_for_each_entry(cache, &info->block_groups[index], list) {
462 spin_lock(&cache->lock);
463 btrfs_info(fs_info,
464 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
465 cache->start, cache->length, cache->used, cache->pinned,
466 cache->reserved, cache->zone_unusable,
467 cache->ro ? "[readonly]" : "");
468 spin_unlock(&cache->lock);
469 btrfs_dump_free_space(cache, bytes);
470 }
471 if (++index < BTRFS_NR_RAID_TYPES)
472 goto again;
473 up_read(&info->groups_sem);
474}
475
476static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
477 u64 to_reclaim)
478{
479 u64 bytes;
480 u64 nr;
481
482 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
483 nr = div64_u64(to_reclaim, bytes);
484 if (!nr)
485 nr = 1;
486 return nr;
487}
488
489#define EXTENT_SIZE_PER_ITEM SZ_256K
490
491
492
493
494static void shrink_delalloc(struct btrfs_fs_info *fs_info,
495 struct btrfs_space_info *space_info,
496 u64 to_reclaim, bool wait_ordered,
497 bool for_preempt)
498{
499 struct btrfs_trans_handle *trans;
500 u64 delalloc_bytes;
501 u64 ordered_bytes;
502 u64 items;
503 long time_left;
504 int loops;
505
506 delalloc_bytes = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
507 ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes);
508 if (delalloc_bytes == 0 && ordered_bytes == 0)
509 return;
510
511
512 if (to_reclaim == U64_MAX) {
513 items = U64_MAX;
514 } else {
515
516
517
518
519
520
521
522
523
524
525
526
527 to_reclaim = max(to_reclaim, delalloc_bytes >> 3);
528 items = calc_reclaim_items_nr(fs_info, to_reclaim) * 2;
529 }
530
531 trans = current->journal_info;
532
533
534
535
536
537
538 if (ordered_bytes > delalloc_bytes && !for_preempt)
539 wait_ordered = true;
540
541 loops = 0;
542 while ((delalloc_bytes || ordered_bytes) && loops < 3) {
543 u64 temp = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
544 long nr_pages = min_t(u64, temp, LONG_MAX);
545 int async_pages;
546
547 btrfs_start_delalloc_roots(fs_info, nr_pages, true);
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570 async_pages = atomic_read(&fs_info->async_delalloc_pages);
571 if (!async_pages)
572 goto skip_async;
573
574
575
576
577
578
579
580 if (async_pages > nr_pages)
581 async_pages -= nr_pages;
582 else
583 async_pages = 0;
584 wait_event(fs_info->async_submit_wait,
585 atomic_read(&fs_info->async_delalloc_pages) <=
586 async_pages);
587skip_async:
588 loops++;
589 if (wait_ordered && !trans) {
590 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
591 } else {
592 time_left = schedule_timeout_killable(1);
593 if (time_left)
594 break;
595 }
596
597
598
599
600
601
602 if (for_preempt)
603 break;
604
605 spin_lock(&space_info->lock);
606 if (list_empty(&space_info->tickets) &&
607 list_empty(&space_info->priority_tickets)) {
608 spin_unlock(&space_info->lock);
609 break;
610 }
611 spin_unlock(&space_info->lock);
612
613 delalloc_bytes = percpu_counter_sum_positive(
614 &fs_info->delalloc_bytes);
615 ordered_bytes = percpu_counter_sum_positive(
616 &fs_info->ordered_bytes);
617 }
618}
619
620
621
622
623
624
625static void flush_space(struct btrfs_fs_info *fs_info,
626 struct btrfs_space_info *space_info, u64 num_bytes,
627 enum btrfs_flush_state state, bool for_preempt)
628{
629 struct btrfs_root *root = fs_info->tree_root;
630 struct btrfs_trans_handle *trans;
631 int nr;
632 int ret = 0;
633
634 switch (state) {
635 case FLUSH_DELAYED_ITEMS_NR:
636 case FLUSH_DELAYED_ITEMS:
637 if (state == FLUSH_DELAYED_ITEMS_NR)
638 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
639 else
640 nr = -1;
641
642 trans = btrfs_join_transaction(root);
643 if (IS_ERR(trans)) {
644 ret = PTR_ERR(trans);
645 break;
646 }
647 ret = btrfs_run_delayed_items_nr(trans, nr);
648 btrfs_end_transaction(trans);
649 break;
650 case FLUSH_DELALLOC:
651 case FLUSH_DELALLOC_WAIT:
652 case FLUSH_DELALLOC_FULL:
653 if (state == FLUSH_DELALLOC_FULL)
654 num_bytes = U64_MAX;
655 shrink_delalloc(fs_info, space_info, num_bytes,
656 state != FLUSH_DELALLOC, for_preempt);
657 break;
658 case FLUSH_DELAYED_REFS_NR:
659 case FLUSH_DELAYED_REFS:
660 trans = btrfs_join_transaction(root);
661 if (IS_ERR(trans)) {
662 ret = PTR_ERR(trans);
663 break;
664 }
665 if (state == FLUSH_DELAYED_REFS_NR)
666 nr = calc_reclaim_items_nr(fs_info, num_bytes);
667 else
668 nr = 0;
669 btrfs_run_delayed_refs(trans, nr);
670 btrfs_end_transaction(trans);
671 break;
672 case ALLOC_CHUNK:
673 case ALLOC_CHUNK_FORCE:
674 trans = btrfs_join_transaction(root);
675 if (IS_ERR(trans)) {
676 ret = PTR_ERR(trans);
677 break;
678 }
679 ret = btrfs_chunk_alloc(trans,
680 btrfs_get_alloc_profile(fs_info, space_info->flags),
681 (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
682 CHUNK_ALLOC_FORCE);
683 btrfs_end_transaction(trans);
684 if (ret > 0 || ret == -ENOSPC)
685 ret = 0;
686 break;
687 case RUN_DELAYED_IPUTS:
688
689
690
691
692
693 btrfs_run_delayed_iputs(fs_info);
694 btrfs_wait_on_delayed_iputs(fs_info);
695 break;
696 case COMMIT_TRANS:
697 ASSERT(current->journal_info == NULL);
698 trans = btrfs_join_transaction(root);
699 if (IS_ERR(trans)) {
700 ret = PTR_ERR(trans);
701 break;
702 }
703 ret = btrfs_commit_transaction(trans);
704 break;
705 default:
706 ret = -ENOSPC;
707 break;
708 }
709
710 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
711 ret, for_preempt);
712 return;
713}
714
715static inline u64
716btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
717 struct btrfs_space_info *space_info)
718{
719 u64 used;
720 u64 avail;
721 u64 to_reclaim = space_info->reclaim_size;
722
723 lockdep_assert_held(&space_info->lock);
724
725 avail = calc_available_free_space(fs_info, space_info,
726 BTRFS_RESERVE_FLUSH_ALL);
727 used = btrfs_space_info_used(space_info, true);
728
729
730
731
732
733
734
735 if (space_info->total_bytes + avail < used)
736 to_reclaim += used - (space_info->total_bytes + avail);
737
738 return to_reclaim;
739}
740
741static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
742 struct btrfs_space_info *space_info)
743{
744 u64 global_rsv_size = fs_info->global_block_rsv.reserved;
745 u64 ordered, delalloc;
746 u64 thresh = div_factor_fine(space_info->total_bytes, 90);
747 u64 used;
748
749 lockdep_assert_held(&space_info->lock);
750
751
752 if ((space_info->bytes_used + space_info->bytes_reserved +
753 global_rsv_size) >= thresh)
754 return false;
755
756 used = space_info->bytes_may_use + space_info->bytes_pinned;
757
758
759 if (global_rsv_size >= used)
760 return false;
761
762
763
764
765
766
767 if (used - global_rsv_size <= SZ_128M)
768 return false;
769
770
771
772
773
774 if (space_info->reclaim_size)
775 return false;
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806 thresh = calc_available_free_space(fs_info, space_info,
807 BTRFS_RESERVE_FLUSH_ALL);
808 used = space_info->bytes_used + space_info->bytes_reserved +
809 space_info->bytes_readonly + global_rsv_size;
810 if (used < space_info->total_bytes)
811 thresh += space_info->total_bytes - used;
812 thresh >>= space_info->clamp;
813
814 used = space_info->bytes_pinned;
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839 ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1;
840 delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes);
841 if (ordered >= delalloc)
842 used += fs_info->delayed_refs_rsv.reserved +
843 fs_info->delayed_block_rsv.reserved;
844 else
845 used += space_info->bytes_may_use - global_rsv_size;
846
847 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
848 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
849}
850
851static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
852 struct btrfs_space_info *space_info,
853 struct reserve_ticket *ticket)
854{
855 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
856 u64 min_bytes;
857
858 if (!ticket->steal)
859 return false;
860
861 if (global_rsv->space_info != space_info)
862 return false;
863
864 spin_lock(&global_rsv->lock);
865 min_bytes = div_factor(global_rsv->size, 1);
866 if (global_rsv->reserved < min_bytes + ticket->bytes) {
867 spin_unlock(&global_rsv->lock);
868 return false;
869 }
870 global_rsv->reserved -= ticket->bytes;
871 remove_ticket(space_info, ticket);
872 ticket->bytes = 0;
873 wake_up(&ticket->wait);
874 space_info->tickets_id++;
875 if (global_rsv->reserved < global_rsv->size)
876 global_rsv->full = 0;
877 spin_unlock(&global_rsv->lock);
878
879 return true;
880}
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
898 struct btrfs_space_info *space_info)
899{
900 struct reserve_ticket *ticket;
901 u64 tickets_id = space_info->tickets_id;
902 const bool aborted = BTRFS_FS_ERROR(fs_info);
903
904 trace_btrfs_fail_all_tickets(fs_info, space_info);
905
906 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
907 btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
908 __btrfs_dump_space_info(fs_info, space_info);
909 }
910
911 while (!list_empty(&space_info->tickets) &&
912 tickets_id == space_info->tickets_id) {
913 ticket = list_first_entry(&space_info->tickets,
914 struct reserve_ticket, list);
915
916 if (!aborted && steal_from_global_rsv(fs_info, space_info, ticket))
917 return true;
918
919 if (!aborted && btrfs_test_opt(fs_info, ENOSPC_DEBUG))
920 btrfs_info(fs_info, "failing ticket with %llu bytes",
921 ticket->bytes);
922
923 remove_ticket(space_info, ticket);
924 if (aborted)
925 ticket->error = -EIO;
926 else
927 ticket->error = -ENOSPC;
928 wake_up(&ticket->wait);
929
930
931
932
933
934
935
936 if (!aborted)
937 btrfs_try_granting_tickets(fs_info, space_info);
938 }
939 return (tickets_id != space_info->tickets_id);
940}
941
942
943
944
945
946
947static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
948{
949 struct btrfs_fs_info *fs_info;
950 struct btrfs_space_info *space_info;
951 u64 to_reclaim;
952 enum btrfs_flush_state flush_state;
953 int commit_cycles = 0;
954 u64 last_tickets_id;
955
956 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
957 space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
958
959 spin_lock(&space_info->lock);
960 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
961 if (!to_reclaim) {
962 space_info->flush = 0;
963 spin_unlock(&space_info->lock);
964 return;
965 }
966 last_tickets_id = space_info->tickets_id;
967 spin_unlock(&space_info->lock);
968
969 flush_state = FLUSH_DELAYED_ITEMS_NR;
970 do {
971 flush_space(fs_info, space_info, to_reclaim, flush_state, false);
972 spin_lock(&space_info->lock);
973 if (list_empty(&space_info->tickets)) {
974 space_info->flush = 0;
975 spin_unlock(&space_info->lock);
976 return;
977 }
978 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
979 space_info);
980 if (last_tickets_id == space_info->tickets_id) {
981 flush_state++;
982 } else {
983 last_tickets_id = space_info->tickets_id;
984 flush_state = FLUSH_DELAYED_ITEMS_NR;
985 if (commit_cycles)
986 commit_cycles--;
987 }
988
989
990
991
992
993
994 if (flush_state == FLUSH_DELALLOC_FULL && !commit_cycles)
995 flush_state++;
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007 if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
1008 flush_state++;
1009
1010 if (flush_state > COMMIT_TRANS) {
1011 commit_cycles++;
1012 if (commit_cycles > 2) {
1013 if (maybe_fail_all_tickets(fs_info, space_info)) {
1014 flush_state = FLUSH_DELAYED_ITEMS_NR;
1015 commit_cycles--;
1016 } else {
1017 space_info->flush = 0;
1018 }
1019 } else {
1020 flush_state = FLUSH_DELAYED_ITEMS_NR;
1021 }
1022 }
1023 spin_unlock(&space_info->lock);
1024 } while (flush_state <= COMMIT_TRANS);
1025}
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
1036{
1037 struct btrfs_fs_info *fs_info;
1038 struct btrfs_space_info *space_info;
1039 struct btrfs_block_rsv *delayed_block_rsv;
1040 struct btrfs_block_rsv *delayed_refs_rsv;
1041 struct btrfs_block_rsv *global_rsv;
1042 struct btrfs_block_rsv *trans_rsv;
1043 int loops = 0;
1044
1045 fs_info = container_of(work, struct btrfs_fs_info,
1046 preempt_reclaim_work);
1047 space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
1048 delayed_block_rsv = &fs_info->delayed_block_rsv;
1049 delayed_refs_rsv = &fs_info->delayed_refs_rsv;
1050 global_rsv = &fs_info->global_block_rsv;
1051 trans_rsv = &fs_info->trans_block_rsv;
1052
1053 spin_lock(&space_info->lock);
1054 while (need_preemptive_reclaim(fs_info, space_info)) {
1055 enum btrfs_flush_state flush;
1056 u64 delalloc_size = 0;
1057 u64 to_reclaim, block_rsv_size;
1058 u64 global_rsv_size = global_rsv->reserved;
1059
1060 loops++;
1061
1062
1063
1064
1065
1066
1067
1068
1069 block_rsv_size = global_rsv_size +
1070 delayed_block_rsv->reserved +
1071 delayed_refs_rsv->reserved +
1072 trans_rsv->reserved;
1073 if (block_rsv_size < space_info->bytes_may_use)
1074 delalloc_size = space_info->bytes_may_use - block_rsv_size;
1075
1076
1077
1078
1079
1080
1081 block_rsv_size -= global_rsv_size;
1082
1083
1084
1085
1086
1087
1088 if (delalloc_size > block_rsv_size) {
1089 to_reclaim = delalloc_size;
1090 flush = FLUSH_DELALLOC;
1091 } else if (space_info->bytes_pinned >
1092 (delayed_block_rsv->reserved +
1093 delayed_refs_rsv->reserved)) {
1094 to_reclaim = space_info->bytes_pinned;
1095 flush = COMMIT_TRANS;
1096 } else if (delayed_block_rsv->reserved >
1097 delayed_refs_rsv->reserved) {
1098 to_reclaim = delayed_block_rsv->reserved;
1099 flush = FLUSH_DELAYED_ITEMS_NR;
1100 } else {
1101 to_reclaim = delayed_refs_rsv->reserved;
1102 flush = FLUSH_DELAYED_REFS_NR;
1103 }
1104
1105 spin_unlock(&space_info->lock);
1106
1107
1108
1109
1110
1111
1112 to_reclaim >>= 2;
1113 if (!to_reclaim)
1114 to_reclaim = btrfs_calc_insert_metadata_size(fs_info, 1);
1115 flush_space(fs_info, space_info, to_reclaim, flush, true);
1116 cond_resched();
1117 spin_lock(&space_info->lock);
1118 }
1119
1120
1121 if (loops == 1 && !space_info->reclaim_size)
1122 space_info->clamp = max(1, space_info->clamp - 1);
1123 trace_btrfs_done_preemptive_reclaim(fs_info, space_info);
1124 spin_unlock(&space_info->lock);
1125}
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160static const enum btrfs_flush_state data_flush_states[] = {
1161 FLUSH_DELALLOC_FULL,
1162 RUN_DELAYED_IPUTS,
1163 COMMIT_TRANS,
1164 ALLOC_CHUNK_FORCE,
1165};
1166
1167static void btrfs_async_reclaim_data_space(struct work_struct *work)
1168{
1169 struct btrfs_fs_info *fs_info;
1170 struct btrfs_space_info *space_info;
1171 u64 last_tickets_id;
1172 enum btrfs_flush_state flush_state = 0;
1173
1174 fs_info = container_of(work, struct btrfs_fs_info, async_data_reclaim_work);
1175 space_info = fs_info->data_sinfo;
1176
1177 spin_lock(&space_info->lock);
1178 if (list_empty(&space_info->tickets)) {
1179 space_info->flush = 0;
1180 spin_unlock(&space_info->lock);
1181 return;
1182 }
1183 last_tickets_id = space_info->tickets_id;
1184 spin_unlock(&space_info->lock);
1185
1186 while (!space_info->full) {
1187 flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
1188 spin_lock(&space_info->lock);
1189 if (list_empty(&space_info->tickets)) {
1190 space_info->flush = 0;
1191 spin_unlock(&space_info->lock);
1192 return;
1193 }
1194
1195
1196 if (BTRFS_FS_ERROR(fs_info))
1197 goto aborted_fs;
1198 last_tickets_id = space_info->tickets_id;
1199 spin_unlock(&space_info->lock);
1200 }
1201
1202 while (flush_state < ARRAY_SIZE(data_flush_states)) {
1203 flush_space(fs_info, space_info, U64_MAX,
1204 data_flush_states[flush_state], false);
1205 spin_lock(&space_info->lock);
1206 if (list_empty(&space_info->tickets)) {
1207 space_info->flush = 0;
1208 spin_unlock(&space_info->lock);
1209 return;
1210 }
1211
1212 if (last_tickets_id == space_info->tickets_id) {
1213 flush_state++;
1214 } else {
1215 last_tickets_id = space_info->tickets_id;
1216 flush_state = 0;
1217 }
1218
1219 if (flush_state >= ARRAY_SIZE(data_flush_states)) {
1220 if (space_info->full) {
1221 if (maybe_fail_all_tickets(fs_info, space_info))
1222 flush_state = 0;
1223 else
1224 space_info->flush = 0;
1225 } else {
1226 flush_state = 0;
1227 }
1228
1229
1230 if (BTRFS_FS_ERROR(fs_info))
1231 goto aborted_fs;
1232
1233 }
1234 spin_unlock(&space_info->lock);
1235 }
1236 return;
1237
1238aborted_fs:
1239 maybe_fail_all_tickets(fs_info, space_info);
1240 space_info->flush = 0;
1241 spin_unlock(&space_info->lock);
1242}
1243
1244void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
1245{
1246 INIT_WORK(&fs_info->async_reclaim_work, btrfs_async_reclaim_metadata_space);
1247 INIT_WORK(&fs_info->async_data_reclaim_work, btrfs_async_reclaim_data_space);
1248 INIT_WORK(&fs_info->preempt_reclaim_work,
1249 btrfs_preempt_reclaim_metadata_space);
1250}
1251
1252static const enum btrfs_flush_state priority_flush_states[] = {
1253 FLUSH_DELAYED_ITEMS_NR,
1254 FLUSH_DELAYED_ITEMS,
1255 ALLOC_CHUNK,
1256};
1257
1258static const enum btrfs_flush_state evict_flush_states[] = {
1259 FLUSH_DELAYED_ITEMS_NR,
1260 FLUSH_DELAYED_ITEMS,
1261 FLUSH_DELAYED_REFS_NR,
1262 FLUSH_DELAYED_REFS,
1263 FLUSH_DELALLOC,
1264 FLUSH_DELALLOC_WAIT,
1265 FLUSH_DELALLOC_FULL,
1266 ALLOC_CHUNK,
1267 COMMIT_TRANS,
1268};
1269
1270static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
1271 struct btrfs_space_info *space_info,
1272 struct reserve_ticket *ticket,
1273 const enum btrfs_flush_state *states,
1274 int states_nr)
1275{
1276 u64 to_reclaim;
1277 int flush_state = 0;
1278
1279 spin_lock(&space_info->lock);
1280 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
1281
1282
1283
1284
1285
1286
1287 if (ticket->bytes == 0) {
1288 spin_unlock(&space_info->lock);
1289 return;
1290 }
1291
1292 while (flush_state < states_nr) {
1293 spin_unlock(&space_info->lock);
1294 flush_space(fs_info, space_info, to_reclaim, states[flush_state],
1295 false);
1296 flush_state++;
1297 spin_lock(&space_info->lock);
1298 if (ticket->bytes == 0) {
1299 spin_unlock(&space_info->lock);
1300 return;
1301 }
1302 }
1303
1304
1305 if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
1306 ticket->error = -ENOSPC;
1307 remove_ticket(space_info, ticket);
1308 }
1309
1310
1311
1312
1313
1314
1315 btrfs_try_granting_tickets(fs_info, space_info);
1316 spin_unlock(&space_info->lock);
1317}
1318
1319static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,
1320 struct btrfs_space_info *space_info,
1321 struct reserve_ticket *ticket)
1322{
1323 spin_lock(&space_info->lock);
1324
1325
1326 if (ticket->bytes == 0) {
1327 spin_unlock(&space_info->lock);
1328 return;
1329 }
1330
1331 while (!space_info->full) {
1332 spin_unlock(&space_info->lock);
1333 flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
1334 spin_lock(&space_info->lock);
1335 if (ticket->bytes == 0) {
1336 spin_unlock(&space_info->lock);
1337 return;
1338 }
1339 }
1340
1341 ticket->error = -ENOSPC;
1342 remove_ticket(space_info, ticket);
1343 btrfs_try_granting_tickets(fs_info, space_info);
1344 spin_unlock(&space_info->lock);
1345}
1346
1347static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
1348 struct btrfs_space_info *space_info,
1349 struct reserve_ticket *ticket)
1350
1351{
1352 DEFINE_WAIT(wait);
1353 int ret = 0;
1354
1355 spin_lock(&space_info->lock);
1356 while (ticket->bytes > 0 && ticket->error == 0) {
1357 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
1358 if (ret) {
1359
1360
1361
1362
1363
1364
1365
1366
1367 remove_ticket(space_info, ticket);
1368 ticket->error = -EINTR;
1369 break;
1370 }
1371 spin_unlock(&space_info->lock);
1372
1373 schedule();
1374
1375 finish_wait(&ticket->wait, &wait);
1376 spin_lock(&space_info->lock);
1377 }
1378 spin_unlock(&space_info->lock);
1379}
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
1395 struct btrfs_space_info *space_info,
1396 struct reserve_ticket *ticket,
1397 u64 start_ns, u64 orig_bytes,
1398 enum btrfs_reserve_flush_enum flush)
1399{
1400 int ret;
1401
1402 switch (flush) {
1403 case BTRFS_RESERVE_FLUSH_DATA:
1404 case BTRFS_RESERVE_FLUSH_ALL:
1405 case BTRFS_RESERVE_FLUSH_ALL_STEAL:
1406 wait_reserve_ticket(fs_info, space_info, ticket);
1407 break;
1408 case BTRFS_RESERVE_FLUSH_LIMIT:
1409 priority_reclaim_metadata_space(fs_info, space_info, ticket,
1410 priority_flush_states,
1411 ARRAY_SIZE(priority_flush_states));
1412 break;
1413 case BTRFS_RESERVE_FLUSH_EVICT:
1414 priority_reclaim_metadata_space(fs_info, space_info, ticket,
1415 evict_flush_states,
1416 ARRAY_SIZE(evict_flush_states));
1417 break;
1418 case BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE:
1419 priority_reclaim_data_space(fs_info, space_info, ticket);
1420 break;
1421 default:
1422 ASSERT(0);
1423 break;
1424 }
1425
1426 ret = ticket->error;
1427 ASSERT(list_empty(&ticket->list));
1428
1429
1430
1431
1432
1433
1434 ASSERT(!(ticket->bytes == 0 && ticket->error));
1435 trace_btrfs_reserve_ticket(fs_info, space_info->flags, orig_bytes,
1436 start_ns, flush, ticket->error);
1437 return ret;
1438}
1439
1440
1441
1442
1443
1444static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
1445{
1446 return (flush == BTRFS_RESERVE_FLUSH_ALL) ||
1447 (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
1448}
1449
1450static inline void maybe_clamp_preempt(struct btrfs_fs_info *fs_info,
1451 struct btrfs_space_info *space_info)
1452{
1453 u64 ordered = percpu_counter_sum_positive(&fs_info->ordered_bytes);
1454 u64 delalloc = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464 if (ordered < delalloc)
1465 space_info->clamp = min(space_info->clamp + 1, 8);
1466}
1467
1468static inline bool can_steal(enum btrfs_reserve_flush_enum flush)
1469{
1470 return (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL ||
1471 flush == BTRFS_RESERVE_FLUSH_EVICT);
1472}
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489static int __reserve_bytes(struct btrfs_fs_info *fs_info,
1490 struct btrfs_space_info *space_info, u64 orig_bytes,
1491 enum btrfs_reserve_flush_enum flush)
1492{
1493 struct work_struct *async_work;
1494 struct reserve_ticket ticket;
1495 u64 start_ns = 0;
1496 u64 used;
1497 int ret = 0;
1498 bool pending_tickets;
1499
1500 ASSERT(orig_bytes);
1501 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
1502
1503 if (flush == BTRFS_RESERVE_FLUSH_DATA)
1504 async_work = &fs_info->async_data_reclaim_work;
1505 else
1506 async_work = &fs_info->async_reclaim_work;
1507
1508 spin_lock(&space_info->lock);
1509 ret = -ENOSPC;
1510 used = btrfs_space_info_used(space_info, true);
1511
1512
1513
1514
1515
1516
1517 if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH))
1518 pending_tickets = !list_empty(&space_info->tickets) ||
1519 !list_empty(&space_info->priority_tickets);
1520 else
1521 pending_tickets = !list_empty(&space_info->priority_tickets);
1522
1523
1524
1525
1526
1527 if (!pending_tickets &&
1528 ((used + orig_bytes <= space_info->total_bytes) ||
1529 btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
1530 btrfs_space_info_update_bytes_may_use(fs_info, space_info,
1531 orig_bytes);
1532 ret = 0;
1533 }
1534
1535
1536
1537
1538
1539
1540
1541
1542 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
1543 ticket.bytes = orig_bytes;
1544 ticket.error = 0;
1545 space_info->reclaim_size += ticket.bytes;
1546 init_waitqueue_head(&ticket.wait);
1547 ticket.steal = can_steal(flush);
1548 if (trace_btrfs_reserve_ticket_enabled())
1549 start_ns = ktime_get_ns();
1550
1551 if (flush == BTRFS_RESERVE_FLUSH_ALL ||
1552 flush == BTRFS_RESERVE_FLUSH_ALL_STEAL ||
1553 flush == BTRFS_RESERVE_FLUSH_DATA) {
1554 list_add_tail(&ticket.list, &space_info->tickets);
1555 if (!space_info->flush) {
1556
1557
1558
1559
1560
1561
1562
1563 maybe_clamp_preempt(fs_info, space_info);
1564
1565 space_info->flush = 1;
1566 trace_btrfs_trigger_flush(fs_info,
1567 space_info->flags,
1568 orig_bytes, flush,
1569 "enospc");
1570 queue_work(system_unbound_wq, async_work);
1571 }
1572 } else {
1573 list_add_tail(&ticket.list,
1574 &space_info->priority_tickets);
1575 }
1576 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
1577 used += orig_bytes;
1578
1579
1580
1581
1582
1583 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
1584 !work_busy(&fs_info->preempt_reclaim_work) &&
1585 need_preemptive_reclaim(fs_info, space_info)) {
1586 trace_btrfs_trigger_flush(fs_info, space_info->flags,
1587 orig_bytes, flush, "preempt");
1588 queue_work(system_unbound_wq,
1589 &fs_info->preempt_reclaim_work);
1590 }
1591 }
1592 spin_unlock(&space_info->lock);
1593 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
1594 return ret;
1595
1596 return handle_reserve_ticket(fs_info, space_info, &ticket, start_ns,
1597 orig_bytes, flush);
1598}
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
1616 struct btrfs_block_rsv *block_rsv,
1617 u64 orig_bytes,
1618 enum btrfs_reserve_flush_enum flush)
1619{
1620 int ret;
1621
1622 ret = __reserve_bytes(fs_info, block_rsv->space_info, orig_bytes, flush);
1623 if (ret == -ENOSPC) {
1624 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
1625 block_rsv->space_info->flags,
1626 orig_bytes, 1);
1627
1628 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
1629 btrfs_dump_space_info(fs_info, block_rsv->space_info,
1630 orig_bytes, 0);
1631 }
1632 return ret;
1633}
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
1646 enum btrfs_reserve_flush_enum flush)
1647{
1648 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
1649 int ret;
1650
1651 ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
1652 flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE);
1653 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
1654
1655 ret = __reserve_bytes(fs_info, data_sinfo, bytes, flush);
1656 if (ret == -ENOSPC) {
1657 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
1658 data_sinfo->flags, bytes, 1);
1659 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
1660 btrfs_dump_space_info(fs_info, data_sinfo, bytes, 0);
1661 }
1662 return ret;
1663}
1664