1
2
3#include "misc.h"
4#include "ctree.h"
5#include "space-info.h"
6#include "sysfs.h"
7#include "volumes.h"
8#include "free-space-cache.h"
9#include "ordered-data.h"
10#include "transaction.h"
11#include "block-group.h"
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
162 bool may_use_included)
163{
164 ASSERT(s_info);
165 return s_info->bytes_used + s_info->bytes_reserved +
166 s_info->bytes_pinned + s_info->bytes_readonly +
167 s_info->bytes_zone_unusable +
168 (may_use_included ? s_info->bytes_may_use : 0);
169}
170
171
172
173
174
175void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
176{
177 struct list_head *head = &info->space_info;
178 struct btrfs_space_info *found;
179
180 list_for_each_entry(found, head, list)
181 found->full = 0;
182}
183
184static int create_space_info(struct btrfs_fs_info *info, u64 flags)
185{
186
187 struct btrfs_space_info *space_info;
188 int i;
189 int ret;
190
191 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
192 if (!space_info)
193 return -ENOMEM;
194
195 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
196 INIT_LIST_HEAD(&space_info->block_groups[i]);
197 init_rwsem(&space_info->groups_sem);
198 spin_lock_init(&space_info->lock);
199 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
200 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
201 INIT_LIST_HEAD(&space_info->ro_bgs);
202 INIT_LIST_HEAD(&space_info->tickets);
203 INIT_LIST_HEAD(&space_info->priority_tickets);
204 space_info->clamp = 1;
205
206 ret = btrfs_sysfs_add_space_info_type(info, space_info);
207 if (ret)
208 return ret;
209
210 list_add(&space_info->list, &info->space_info);
211 if (flags & BTRFS_BLOCK_GROUP_DATA)
212 info->data_sinfo = space_info;
213
214 return ret;
215}
216
217int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
218{
219 struct btrfs_super_block *disk_super;
220 u64 features;
221 u64 flags;
222 int mixed = 0;
223 int ret;
224
225 disk_super = fs_info->super_copy;
226 if (!btrfs_super_root(disk_super))
227 return -EINVAL;
228
229 features = btrfs_super_incompat_flags(disk_super);
230 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
231 mixed = 1;
232
233 flags = BTRFS_BLOCK_GROUP_SYSTEM;
234 ret = create_space_info(fs_info, flags);
235 if (ret)
236 goto out;
237
238 if (mixed) {
239 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
240 ret = create_space_info(fs_info, flags);
241 } else {
242 flags = BTRFS_BLOCK_GROUP_METADATA;
243 ret = create_space_info(fs_info, flags);
244 if (ret)
245 goto out;
246
247 flags = BTRFS_BLOCK_GROUP_DATA;
248 ret = create_space_info(fs_info, flags);
249 }
250out:
251 return ret;
252}
253
254void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
255 u64 total_bytes, u64 bytes_used,
256 u64 bytes_readonly, u64 bytes_zone_unusable,
257 struct btrfs_space_info **space_info)
258{
259 struct btrfs_space_info *found;
260 int factor;
261
262 factor = btrfs_bg_type_to_factor(flags);
263
264 found = btrfs_find_space_info(info, flags);
265 ASSERT(found);
266 spin_lock(&found->lock);
267 found->total_bytes += total_bytes;
268 found->disk_total += total_bytes * factor;
269 found->bytes_used += bytes_used;
270 found->disk_used += bytes_used * factor;
271 found->bytes_readonly += bytes_readonly;
272 found->bytes_zone_unusable += bytes_zone_unusable;
273 if (total_bytes > 0)
274 found->full = 0;
275 btrfs_try_granting_tickets(info, found);
276 spin_unlock(&found->lock);
277 *space_info = found;
278}
279
280struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
281 u64 flags)
282{
283 struct list_head *head = &info->space_info;
284 struct btrfs_space_info *found;
285
286 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
287
288 list_for_each_entry(found, head, list) {
289 if (found->flags & flags)
290 return found;
291 }
292 return NULL;
293}
294
295static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
296 struct btrfs_space_info *space_info,
297 enum btrfs_reserve_flush_enum flush)
298{
299 u64 profile;
300 u64 avail;
301 int factor;
302
303 if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
304 profile = btrfs_system_alloc_profile(fs_info);
305 else
306 profile = btrfs_metadata_alloc_profile(fs_info);
307
308 avail = atomic64_read(&fs_info->free_chunk_space);
309
310
311
312
313
314
315
316 factor = btrfs_bg_type_to_factor(profile);
317 avail = div_u64(avail, factor);
318
319
320
321
322
323
324 if (flush == BTRFS_RESERVE_FLUSH_ALL)
325 avail >>= 3;
326 else
327 avail >>= 1;
328 return avail;
329}
330
331int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
332 struct btrfs_space_info *space_info, u64 bytes,
333 enum btrfs_reserve_flush_enum flush)
334{
335 u64 avail;
336 u64 used;
337
338
339 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
340 return 0;
341
342 used = btrfs_space_info_used(space_info, true);
343 avail = calc_available_free_space(fs_info, space_info, flush);
344
345 if (used + bytes < space_info->total_bytes + avail)
346 return 1;
347 return 0;
348}
349
350static void remove_ticket(struct btrfs_space_info *space_info,
351 struct reserve_ticket *ticket)
352{
353 if (!list_empty(&ticket->list)) {
354 list_del_init(&ticket->list);
355 ASSERT(space_info->reclaim_size >= ticket->bytes);
356 space_info->reclaim_size -= ticket->bytes;
357 }
358}
359
360
361
362
363
364void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
365 struct btrfs_space_info *space_info)
366{
367 struct list_head *head;
368 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
369
370 lockdep_assert_held(&space_info->lock);
371
372 head = &space_info->priority_tickets;
373again:
374 while (!list_empty(head)) {
375 struct reserve_ticket *ticket;
376 u64 used = btrfs_space_info_used(space_info, true);
377
378 ticket = list_first_entry(head, struct reserve_ticket, list);
379
380
381 if ((used + ticket->bytes <= space_info->total_bytes) ||
382 btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
383 flush)) {
384 btrfs_space_info_update_bytes_may_use(fs_info,
385 space_info,
386 ticket->bytes);
387 remove_ticket(space_info, ticket);
388 ticket->bytes = 0;
389 space_info->tickets_id++;
390 wake_up(&ticket->wait);
391 } else {
392 break;
393 }
394 }
395
396 if (head == &space_info->priority_tickets) {
397 head = &space_info->tickets;
398 flush = BTRFS_RESERVE_FLUSH_ALL;
399 goto again;
400 }
401}
402
403#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
404do { \
405 struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
406 spin_lock(&__rsv->lock); \
407 btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
408 __rsv->size, __rsv->reserved); \
409 spin_unlock(&__rsv->lock); \
410} while (0)
411
412static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
413 struct btrfs_space_info *info)
414{
415 lockdep_assert_held(&info->lock);
416
417
418 btrfs_info(fs_info, "space_info %llu has %lld free, is %sfull",
419 info->flags,
420 (s64)(info->total_bytes - btrfs_space_info_used(info, true)),
421 info->full ? "" : "not ");
422 btrfs_info(fs_info,
423 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu",
424 info->total_bytes, info->bytes_used, info->bytes_pinned,
425 info->bytes_reserved, info->bytes_may_use,
426 info->bytes_readonly, info->bytes_zone_unusable);
427
428 DUMP_BLOCK_RSV(fs_info, global_block_rsv);
429 DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
430 DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
431 DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
432 DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
433
434}
435
436void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
437 struct btrfs_space_info *info, u64 bytes,
438 int dump_block_groups)
439{
440 struct btrfs_block_group *cache;
441 int index = 0;
442
443 spin_lock(&info->lock);
444 __btrfs_dump_space_info(fs_info, info);
445 spin_unlock(&info->lock);
446
447 if (!dump_block_groups)
448 return;
449
450 down_read(&info->groups_sem);
451again:
452 list_for_each_entry(cache, &info->block_groups[index], list) {
453 spin_lock(&cache->lock);
454 btrfs_info(fs_info,
455 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
456 cache->start, cache->length, cache->used, cache->pinned,
457 cache->reserved, cache->zone_unusable,
458 cache->ro ? "[readonly]" : "");
459 spin_unlock(&cache->lock);
460 btrfs_dump_free_space(cache, bytes);
461 }
462 if (++index < BTRFS_NR_RAID_TYPES)
463 goto again;
464 up_read(&info->groups_sem);
465}
466
467static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
468 u64 to_reclaim)
469{
470 u64 bytes;
471 u64 nr;
472
473 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
474 nr = div64_u64(to_reclaim, bytes);
475 if (!nr)
476 nr = 1;
477 return nr;
478}
479
480#define EXTENT_SIZE_PER_ITEM SZ_256K
481
482
483
484
485static void shrink_delalloc(struct btrfs_fs_info *fs_info,
486 struct btrfs_space_info *space_info,
487 u64 to_reclaim, bool wait_ordered,
488 bool for_preempt)
489{
490 struct btrfs_trans_handle *trans;
491 u64 delalloc_bytes;
492 u64 ordered_bytes;
493 u64 items;
494 long time_left;
495 int loops;
496
497 delalloc_bytes = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
498 ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes);
499 if (delalloc_bytes == 0 && ordered_bytes == 0)
500 return;
501
502
503 if (to_reclaim == U64_MAX) {
504 items = U64_MAX;
505 } else {
506
507
508
509
510
511
512
513
514
515
516
517
518 to_reclaim = max(to_reclaim, delalloc_bytes >> 3);
519 items = calc_reclaim_items_nr(fs_info, to_reclaim) * 2;
520 }
521
522 trans = (struct btrfs_trans_handle *)current->journal_info;
523
524
525
526
527
528
529 if (ordered_bytes > delalloc_bytes && !for_preempt)
530 wait_ordered = true;
531
532 loops = 0;
533 while ((delalloc_bytes || ordered_bytes) && loops < 3) {
534 u64 temp = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
535 long nr_pages = min_t(u64, temp, LONG_MAX);
536 int async_pages;
537
538 btrfs_start_delalloc_roots(fs_info, nr_pages, true);
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561 async_pages = atomic_read(&fs_info->async_delalloc_pages);
562 if (!async_pages)
563 goto skip_async;
564
565
566
567
568
569
570
571 if (async_pages > nr_pages)
572 async_pages -= nr_pages;
573 else
574 async_pages = 0;
575 wait_event(fs_info->async_submit_wait,
576 atomic_read(&fs_info->async_delalloc_pages) <=
577 async_pages);
578skip_async:
579 loops++;
580 if (wait_ordered && !trans) {
581 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
582 } else {
583 time_left = schedule_timeout_killable(1);
584 if (time_left)
585 break;
586 }
587
588
589
590
591
592
593 if (for_preempt)
594 break;
595
596 spin_lock(&space_info->lock);
597 if (list_empty(&space_info->tickets) &&
598 list_empty(&space_info->priority_tickets)) {
599 spin_unlock(&space_info->lock);
600 break;
601 }
602 spin_unlock(&space_info->lock);
603
604 delalloc_bytes = percpu_counter_sum_positive(
605 &fs_info->delalloc_bytes);
606 ordered_bytes = percpu_counter_sum_positive(
607 &fs_info->ordered_bytes);
608 }
609}
610
611
612
613
614
615
616static void flush_space(struct btrfs_fs_info *fs_info,
617 struct btrfs_space_info *space_info, u64 num_bytes,
618 enum btrfs_flush_state state, bool for_preempt)
619{
620 struct btrfs_root *root = fs_info->tree_root;
621 struct btrfs_trans_handle *trans;
622 int nr;
623 int ret = 0;
624
625 switch (state) {
626 case FLUSH_DELAYED_ITEMS_NR:
627 case FLUSH_DELAYED_ITEMS:
628 if (state == FLUSH_DELAYED_ITEMS_NR)
629 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
630 else
631 nr = -1;
632
633 trans = btrfs_join_transaction(root);
634 if (IS_ERR(trans)) {
635 ret = PTR_ERR(trans);
636 break;
637 }
638 ret = btrfs_run_delayed_items_nr(trans, nr);
639 btrfs_end_transaction(trans);
640 break;
641 case FLUSH_DELALLOC:
642 case FLUSH_DELALLOC_WAIT:
643 case FLUSH_DELALLOC_FULL:
644 if (state == FLUSH_DELALLOC_FULL)
645 num_bytes = U64_MAX;
646 shrink_delalloc(fs_info, space_info, num_bytes,
647 state != FLUSH_DELALLOC, for_preempt);
648 break;
649 case FLUSH_DELAYED_REFS_NR:
650 case FLUSH_DELAYED_REFS:
651 trans = btrfs_join_transaction(root);
652 if (IS_ERR(trans)) {
653 ret = PTR_ERR(trans);
654 break;
655 }
656 if (state == FLUSH_DELAYED_REFS_NR)
657 nr = calc_reclaim_items_nr(fs_info, num_bytes);
658 else
659 nr = 0;
660 btrfs_run_delayed_refs(trans, nr);
661 btrfs_end_transaction(trans);
662 break;
663 case ALLOC_CHUNK:
664 case ALLOC_CHUNK_FORCE:
665 trans = btrfs_join_transaction(root);
666 if (IS_ERR(trans)) {
667 ret = PTR_ERR(trans);
668 break;
669 }
670 ret = btrfs_chunk_alloc(trans,
671 btrfs_get_alloc_profile(fs_info, space_info->flags),
672 (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
673 CHUNK_ALLOC_FORCE);
674 btrfs_end_transaction(trans);
675 if (ret > 0 || ret == -ENOSPC)
676 ret = 0;
677 break;
678 case RUN_DELAYED_IPUTS:
679
680
681
682
683
684 btrfs_run_delayed_iputs(fs_info);
685 btrfs_wait_on_delayed_iputs(fs_info);
686 break;
687 case COMMIT_TRANS:
688 ASSERT(current->journal_info == NULL);
689 trans = btrfs_join_transaction(root);
690 if (IS_ERR(trans)) {
691 ret = PTR_ERR(trans);
692 break;
693 }
694 ret = btrfs_commit_transaction(trans);
695 break;
696 default:
697 ret = -ENOSPC;
698 break;
699 }
700
701 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
702 ret, for_preempt);
703 return;
704}
705
706static inline u64
707btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
708 struct btrfs_space_info *space_info)
709{
710 u64 used;
711 u64 avail;
712 u64 to_reclaim = space_info->reclaim_size;
713
714 lockdep_assert_held(&space_info->lock);
715
716 avail = calc_available_free_space(fs_info, space_info,
717 BTRFS_RESERVE_FLUSH_ALL);
718 used = btrfs_space_info_used(space_info, true);
719
720
721
722
723
724
725
726 if (space_info->total_bytes + avail < used)
727 to_reclaim += used - (space_info->total_bytes + avail);
728
729 return to_reclaim;
730}
731
732static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
733 struct btrfs_space_info *space_info)
734{
735 u64 global_rsv_size = fs_info->global_block_rsv.reserved;
736 u64 ordered, delalloc;
737 u64 thresh = div_factor_fine(space_info->total_bytes, 90);
738 u64 used;
739
740
741 if ((space_info->bytes_used + space_info->bytes_reserved +
742 global_rsv_size) >= thresh)
743 return false;
744
745 used = space_info->bytes_may_use + space_info->bytes_pinned;
746
747
748 if (global_rsv_size >= used)
749 return false;
750
751
752
753
754
755
756 if (used - global_rsv_size <= SZ_128M)
757 return false;
758
759
760
761
762
763 if (space_info->reclaim_size)
764 return false;
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795 thresh = calc_available_free_space(fs_info, space_info,
796 BTRFS_RESERVE_FLUSH_ALL);
797 used = space_info->bytes_used + space_info->bytes_reserved +
798 space_info->bytes_readonly + global_rsv_size;
799 if (used < space_info->total_bytes)
800 thresh += space_info->total_bytes - used;
801 thresh >>= space_info->clamp;
802
803 used = space_info->bytes_pinned;
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828 ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1;
829 delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes);
830 if (ordered >= delalloc)
831 used += fs_info->delayed_refs_rsv.reserved +
832 fs_info->delayed_block_rsv.reserved;
833 else
834 used += space_info->bytes_may_use - global_rsv_size;
835
836 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
837 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
838}
839
840static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
841 struct btrfs_space_info *space_info,
842 struct reserve_ticket *ticket)
843{
844 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
845 u64 min_bytes;
846
847 if (!ticket->steal)
848 return false;
849
850 if (global_rsv->space_info != space_info)
851 return false;
852
853 spin_lock(&global_rsv->lock);
854 min_bytes = div_factor(global_rsv->size, 1);
855 if (global_rsv->reserved < min_bytes + ticket->bytes) {
856 spin_unlock(&global_rsv->lock);
857 return false;
858 }
859 global_rsv->reserved -= ticket->bytes;
860 remove_ticket(space_info, ticket);
861 ticket->bytes = 0;
862 wake_up(&ticket->wait);
863 space_info->tickets_id++;
864 if (global_rsv->reserved < global_rsv->size)
865 global_rsv->full = 0;
866 spin_unlock(&global_rsv->lock);
867
868 return true;
869}
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
887 struct btrfs_space_info *space_info)
888{
889 struct reserve_ticket *ticket;
890 u64 tickets_id = space_info->tickets_id;
891 const bool aborted = BTRFS_FS_ERROR(fs_info);
892
893 trace_btrfs_fail_all_tickets(fs_info, space_info);
894
895 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
896 btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
897 __btrfs_dump_space_info(fs_info, space_info);
898 }
899
900 while (!list_empty(&space_info->tickets) &&
901 tickets_id == space_info->tickets_id) {
902 ticket = list_first_entry(&space_info->tickets,
903 struct reserve_ticket, list);
904
905 if (!aborted && steal_from_global_rsv(fs_info, space_info, ticket))
906 return true;
907
908 if (!aborted && btrfs_test_opt(fs_info, ENOSPC_DEBUG))
909 btrfs_info(fs_info, "failing ticket with %llu bytes",
910 ticket->bytes);
911
912 remove_ticket(space_info, ticket);
913 if (aborted)
914 ticket->error = -EIO;
915 else
916 ticket->error = -ENOSPC;
917 wake_up(&ticket->wait);
918
919
920
921
922
923
924
925 if (!aborted)
926 btrfs_try_granting_tickets(fs_info, space_info);
927 }
928 return (tickets_id != space_info->tickets_id);
929}
930
931
932
933
934
935
936static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
937{
938 struct btrfs_fs_info *fs_info;
939 struct btrfs_space_info *space_info;
940 u64 to_reclaim;
941 enum btrfs_flush_state flush_state;
942 int commit_cycles = 0;
943 u64 last_tickets_id;
944
945 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
946 space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
947
948 spin_lock(&space_info->lock);
949 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
950 if (!to_reclaim) {
951 space_info->flush = 0;
952 spin_unlock(&space_info->lock);
953 return;
954 }
955 last_tickets_id = space_info->tickets_id;
956 spin_unlock(&space_info->lock);
957
958 flush_state = FLUSH_DELAYED_ITEMS_NR;
959 do {
960 flush_space(fs_info, space_info, to_reclaim, flush_state, false);
961 spin_lock(&space_info->lock);
962 if (list_empty(&space_info->tickets)) {
963 space_info->flush = 0;
964 spin_unlock(&space_info->lock);
965 return;
966 }
967 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
968 space_info);
969 if (last_tickets_id == space_info->tickets_id) {
970 flush_state++;
971 } else {
972 last_tickets_id = space_info->tickets_id;
973 flush_state = FLUSH_DELAYED_ITEMS_NR;
974 if (commit_cycles)
975 commit_cycles--;
976 }
977
978
979
980
981
982
983 if (flush_state == FLUSH_DELALLOC_FULL && !commit_cycles)
984 flush_state++;
985
986
987
988
989
990
991
992
993
994
995
996 if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
997 flush_state++;
998
999 if (flush_state > COMMIT_TRANS) {
1000 commit_cycles++;
1001 if (commit_cycles > 2) {
1002 if (maybe_fail_all_tickets(fs_info, space_info)) {
1003 flush_state = FLUSH_DELAYED_ITEMS_NR;
1004 commit_cycles--;
1005 } else {
1006 space_info->flush = 0;
1007 }
1008 } else {
1009 flush_state = FLUSH_DELAYED_ITEMS_NR;
1010 }
1011 }
1012 spin_unlock(&space_info->lock);
1013 } while (flush_state <= COMMIT_TRANS);
1014}
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
1025{
1026 struct btrfs_fs_info *fs_info;
1027 struct btrfs_space_info *space_info;
1028 struct btrfs_block_rsv *delayed_block_rsv;
1029 struct btrfs_block_rsv *delayed_refs_rsv;
1030 struct btrfs_block_rsv *global_rsv;
1031 struct btrfs_block_rsv *trans_rsv;
1032 int loops = 0;
1033
1034 fs_info = container_of(work, struct btrfs_fs_info,
1035 preempt_reclaim_work);
1036 space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
1037 delayed_block_rsv = &fs_info->delayed_block_rsv;
1038 delayed_refs_rsv = &fs_info->delayed_refs_rsv;
1039 global_rsv = &fs_info->global_block_rsv;
1040 trans_rsv = &fs_info->trans_block_rsv;
1041
1042 spin_lock(&space_info->lock);
1043 while (need_preemptive_reclaim(fs_info, space_info)) {
1044 enum btrfs_flush_state flush;
1045 u64 delalloc_size = 0;
1046 u64 to_reclaim, block_rsv_size;
1047 u64 global_rsv_size = global_rsv->reserved;
1048
1049 loops++;
1050
1051
1052
1053
1054
1055
1056
1057
1058 block_rsv_size = global_rsv_size +
1059 delayed_block_rsv->reserved +
1060 delayed_refs_rsv->reserved +
1061 trans_rsv->reserved;
1062 if (block_rsv_size < space_info->bytes_may_use)
1063 delalloc_size = space_info->bytes_may_use - block_rsv_size;
1064 spin_unlock(&space_info->lock);
1065
1066
1067
1068
1069
1070
1071 block_rsv_size -= global_rsv_size;
1072
1073
1074
1075
1076
1077
1078 if (delalloc_size > block_rsv_size) {
1079 to_reclaim = delalloc_size;
1080 flush = FLUSH_DELALLOC;
1081 } else if (space_info->bytes_pinned >
1082 (delayed_block_rsv->reserved +
1083 delayed_refs_rsv->reserved)) {
1084 to_reclaim = space_info->bytes_pinned;
1085 flush = COMMIT_TRANS;
1086 } else if (delayed_block_rsv->reserved >
1087 delayed_refs_rsv->reserved) {
1088 to_reclaim = delayed_block_rsv->reserved;
1089 flush = FLUSH_DELAYED_ITEMS_NR;
1090 } else {
1091 to_reclaim = delayed_refs_rsv->reserved;
1092 flush = FLUSH_DELAYED_REFS_NR;
1093 }
1094
1095
1096
1097
1098
1099
1100 to_reclaim >>= 2;
1101 if (!to_reclaim)
1102 to_reclaim = btrfs_calc_insert_metadata_size(fs_info, 1);
1103 flush_space(fs_info, space_info, to_reclaim, flush, true);
1104 cond_resched();
1105 spin_lock(&space_info->lock);
1106 }
1107
1108
1109 if (loops == 1 && !space_info->reclaim_size)
1110 space_info->clamp = max(1, space_info->clamp - 1);
1111 trace_btrfs_done_preemptive_reclaim(fs_info, space_info);
1112 spin_unlock(&space_info->lock);
1113}
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148static const enum btrfs_flush_state data_flush_states[] = {
1149 FLUSH_DELALLOC_FULL,
1150 RUN_DELAYED_IPUTS,
1151 COMMIT_TRANS,
1152 ALLOC_CHUNK_FORCE,
1153};
1154
1155static void btrfs_async_reclaim_data_space(struct work_struct *work)
1156{
1157 struct btrfs_fs_info *fs_info;
1158 struct btrfs_space_info *space_info;
1159 u64 last_tickets_id;
1160 enum btrfs_flush_state flush_state = 0;
1161
1162 fs_info = container_of(work, struct btrfs_fs_info, async_data_reclaim_work);
1163 space_info = fs_info->data_sinfo;
1164
1165 spin_lock(&space_info->lock);
1166 if (list_empty(&space_info->tickets)) {
1167 space_info->flush = 0;
1168 spin_unlock(&space_info->lock);
1169 return;
1170 }
1171 last_tickets_id = space_info->tickets_id;
1172 spin_unlock(&space_info->lock);
1173
1174 while (!space_info->full) {
1175 flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
1176 spin_lock(&space_info->lock);
1177 if (list_empty(&space_info->tickets)) {
1178 space_info->flush = 0;
1179 spin_unlock(&space_info->lock);
1180 return;
1181 }
1182
1183
1184 if (BTRFS_FS_ERROR(fs_info))
1185 goto aborted_fs;
1186 last_tickets_id = space_info->tickets_id;
1187 spin_unlock(&space_info->lock);
1188 }
1189
1190 while (flush_state < ARRAY_SIZE(data_flush_states)) {
1191 flush_space(fs_info, space_info, U64_MAX,
1192 data_flush_states[flush_state], false);
1193 spin_lock(&space_info->lock);
1194 if (list_empty(&space_info->tickets)) {
1195 space_info->flush = 0;
1196 spin_unlock(&space_info->lock);
1197 return;
1198 }
1199
1200 if (last_tickets_id == space_info->tickets_id) {
1201 flush_state++;
1202 } else {
1203 last_tickets_id = space_info->tickets_id;
1204 flush_state = 0;
1205 }
1206
1207 if (flush_state >= ARRAY_SIZE(data_flush_states)) {
1208 if (space_info->full) {
1209 if (maybe_fail_all_tickets(fs_info, space_info))
1210 flush_state = 0;
1211 else
1212 space_info->flush = 0;
1213 } else {
1214 flush_state = 0;
1215 }
1216
1217
1218 if (BTRFS_FS_ERROR(fs_info))
1219 goto aborted_fs;
1220
1221 }
1222 spin_unlock(&space_info->lock);
1223 }
1224 return;
1225
1226aborted_fs:
1227 maybe_fail_all_tickets(fs_info, space_info);
1228 space_info->flush = 0;
1229 spin_unlock(&space_info->lock);
1230}
1231
1232void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
1233{
1234 INIT_WORK(&fs_info->async_reclaim_work, btrfs_async_reclaim_metadata_space);
1235 INIT_WORK(&fs_info->async_data_reclaim_work, btrfs_async_reclaim_data_space);
1236 INIT_WORK(&fs_info->preempt_reclaim_work,
1237 btrfs_preempt_reclaim_metadata_space);
1238}
1239
1240static const enum btrfs_flush_state priority_flush_states[] = {
1241 FLUSH_DELAYED_ITEMS_NR,
1242 FLUSH_DELAYED_ITEMS,
1243 ALLOC_CHUNK,
1244};
1245
1246static const enum btrfs_flush_state evict_flush_states[] = {
1247 FLUSH_DELAYED_ITEMS_NR,
1248 FLUSH_DELAYED_ITEMS,
1249 FLUSH_DELAYED_REFS_NR,
1250 FLUSH_DELAYED_REFS,
1251 FLUSH_DELALLOC,
1252 FLUSH_DELALLOC_WAIT,
1253 FLUSH_DELALLOC_FULL,
1254 ALLOC_CHUNK,
1255 COMMIT_TRANS,
1256};
1257
1258static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
1259 struct btrfs_space_info *space_info,
1260 struct reserve_ticket *ticket,
1261 const enum btrfs_flush_state *states,
1262 int states_nr)
1263{
1264 u64 to_reclaim;
1265 int flush_state = 0;
1266
1267 spin_lock(&space_info->lock);
1268 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
1269
1270
1271
1272
1273
1274
1275 if (ticket->bytes == 0) {
1276 spin_unlock(&space_info->lock);
1277 return;
1278 }
1279
1280 while (flush_state < states_nr) {
1281 spin_unlock(&space_info->lock);
1282 flush_space(fs_info, space_info, to_reclaim, states[flush_state],
1283 false);
1284 flush_state++;
1285 spin_lock(&space_info->lock);
1286 if (ticket->bytes == 0) {
1287 spin_unlock(&space_info->lock);
1288 return;
1289 }
1290 }
1291
1292
1293 if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
1294 ticket->error = -ENOSPC;
1295 remove_ticket(space_info, ticket);
1296 }
1297
1298
1299
1300
1301
1302
1303 btrfs_try_granting_tickets(fs_info, space_info);
1304 spin_unlock(&space_info->lock);
1305}
1306
1307static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,
1308 struct btrfs_space_info *space_info,
1309 struct reserve_ticket *ticket)
1310{
1311 spin_lock(&space_info->lock);
1312
1313
1314 if (ticket->bytes == 0) {
1315 spin_unlock(&space_info->lock);
1316 return;
1317 }
1318
1319 while (!space_info->full) {
1320 spin_unlock(&space_info->lock);
1321 flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
1322 spin_lock(&space_info->lock);
1323 if (ticket->bytes == 0) {
1324 spin_unlock(&space_info->lock);
1325 return;
1326 }
1327 }
1328
1329 ticket->error = -ENOSPC;
1330 remove_ticket(space_info, ticket);
1331 btrfs_try_granting_tickets(fs_info, space_info);
1332 spin_unlock(&space_info->lock);
1333}
1334
1335static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
1336 struct btrfs_space_info *space_info,
1337 struct reserve_ticket *ticket)
1338
1339{
1340 DEFINE_WAIT(wait);
1341 int ret = 0;
1342
1343 spin_lock(&space_info->lock);
1344 while (ticket->bytes > 0 && ticket->error == 0) {
1345 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
1346 if (ret) {
1347
1348
1349
1350
1351
1352
1353
1354
1355 remove_ticket(space_info, ticket);
1356 ticket->error = -EINTR;
1357 break;
1358 }
1359 spin_unlock(&space_info->lock);
1360
1361 schedule();
1362
1363 finish_wait(&ticket->wait, &wait);
1364 spin_lock(&space_info->lock);
1365 }
1366 spin_unlock(&space_info->lock);
1367}
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
1383 struct btrfs_space_info *space_info,
1384 struct reserve_ticket *ticket,
1385 u64 start_ns, u64 orig_bytes,
1386 enum btrfs_reserve_flush_enum flush)
1387{
1388 int ret;
1389
1390 switch (flush) {
1391 case BTRFS_RESERVE_FLUSH_DATA:
1392 case BTRFS_RESERVE_FLUSH_ALL:
1393 case BTRFS_RESERVE_FLUSH_ALL_STEAL:
1394 wait_reserve_ticket(fs_info, space_info, ticket);
1395 break;
1396 case BTRFS_RESERVE_FLUSH_LIMIT:
1397 priority_reclaim_metadata_space(fs_info, space_info, ticket,
1398 priority_flush_states,
1399 ARRAY_SIZE(priority_flush_states));
1400 break;
1401 case BTRFS_RESERVE_FLUSH_EVICT:
1402 priority_reclaim_metadata_space(fs_info, space_info, ticket,
1403 evict_flush_states,
1404 ARRAY_SIZE(evict_flush_states));
1405 break;
1406 case BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE:
1407 priority_reclaim_data_space(fs_info, space_info, ticket);
1408 break;
1409 default:
1410 ASSERT(0);
1411 break;
1412 }
1413
1414 ret = ticket->error;
1415 ASSERT(list_empty(&ticket->list));
1416
1417
1418
1419
1420
1421
1422 ASSERT(!(ticket->bytes == 0 && ticket->error));
1423 trace_btrfs_reserve_ticket(fs_info, space_info->flags, orig_bytes,
1424 start_ns, flush, ticket->error);
1425 return ret;
1426}
1427
1428
1429
1430
1431
1432static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
1433{
1434 return (flush == BTRFS_RESERVE_FLUSH_ALL) ||
1435 (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
1436}
1437
1438static inline void maybe_clamp_preempt(struct btrfs_fs_info *fs_info,
1439 struct btrfs_space_info *space_info)
1440{
1441 u64 ordered = percpu_counter_sum_positive(&fs_info->ordered_bytes);
1442 u64 delalloc = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452 if (ordered < delalloc)
1453 space_info->clamp = min(space_info->clamp + 1, 8);
1454}
1455
1456static inline bool can_steal(enum btrfs_reserve_flush_enum flush)
1457{
1458 return (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL ||
1459 flush == BTRFS_RESERVE_FLUSH_EVICT);
1460}
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477static int __reserve_bytes(struct btrfs_fs_info *fs_info,
1478 struct btrfs_space_info *space_info, u64 orig_bytes,
1479 enum btrfs_reserve_flush_enum flush)
1480{
1481 struct work_struct *async_work;
1482 struct reserve_ticket ticket;
1483 u64 start_ns = 0;
1484 u64 used;
1485 int ret = 0;
1486 bool pending_tickets;
1487
1488 ASSERT(orig_bytes);
1489 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
1490
1491 if (flush == BTRFS_RESERVE_FLUSH_DATA)
1492 async_work = &fs_info->async_data_reclaim_work;
1493 else
1494 async_work = &fs_info->async_reclaim_work;
1495
1496 spin_lock(&space_info->lock);
1497 ret = -ENOSPC;
1498 used = btrfs_space_info_used(space_info, true);
1499
1500
1501
1502
1503
1504
1505 if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH))
1506 pending_tickets = !list_empty(&space_info->tickets) ||
1507 !list_empty(&space_info->priority_tickets);
1508 else
1509 pending_tickets = !list_empty(&space_info->priority_tickets);
1510
1511
1512
1513
1514
1515 if (!pending_tickets &&
1516 ((used + orig_bytes <= space_info->total_bytes) ||
1517 btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
1518 btrfs_space_info_update_bytes_may_use(fs_info, space_info,
1519 orig_bytes);
1520 ret = 0;
1521 }
1522
1523
1524
1525
1526
1527
1528
1529
1530 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
1531 ticket.bytes = orig_bytes;
1532 ticket.error = 0;
1533 space_info->reclaim_size += ticket.bytes;
1534 init_waitqueue_head(&ticket.wait);
1535 ticket.steal = can_steal(flush);
1536 if (trace_btrfs_reserve_ticket_enabled())
1537 start_ns = ktime_get_ns();
1538
1539 if (flush == BTRFS_RESERVE_FLUSH_ALL ||
1540 flush == BTRFS_RESERVE_FLUSH_ALL_STEAL ||
1541 flush == BTRFS_RESERVE_FLUSH_DATA) {
1542 list_add_tail(&ticket.list, &space_info->tickets);
1543 if (!space_info->flush) {
1544
1545
1546
1547
1548
1549
1550
1551 maybe_clamp_preempt(fs_info, space_info);
1552
1553 space_info->flush = 1;
1554 trace_btrfs_trigger_flush(fs_info,
1555 space_info->flags,
1556 orig_bytes, flush,
1557 "enospc");
1558 queue_work(system_unbound_wq, async_work);
1559 }
1560 } else {
1561 list_add_tail(&ticket.list,
1562 &space_info->priority_tickets);
1563 }
1564 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
1565 used += orig_bytes;
1566
1567
1568
1569
1570
1571 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
1572 !work_busy(&fs_info->preempt_reclaim_work) &&
1573 need_preemptive_reclaim(fs_info, space_info)) {
1574 trace_btrfs_trigger_flush(fs_info, space_info->flags,
1575 orig_bytes, flush, "preempt");
1576 queue_work(system_unbound_wq,
1577 &fs_info->preempt_reclaim_work);
1578 }
1579 }
1580 spin_unlock(&space_info->lock);
1581 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
1582 return ret;
1583
1584 return handle_reserve_ticket(fs_info, space_info, &ticket, start_ns,
1585 orig_bytes, flush);
1586}
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603int btrfs_reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
1604 struct btrfs_block_rsv *block_rsv,
1605 u64 orig_bytes,
1606 enum btrfs_reserve_flush_enum flush)
1607{
1608 int ret;
1609
1610 ret = __reserve_bytes(fs_info, block_rsv->space_info, orig_bytes, flush);
1611 if (ret == -ENOSPC) {
1612 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
1613 block_rsv->space_info->flags,
1614 orig_bytes, 1);
1615
1616 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
1617 btrfs_dump_space_info(fs_info, block_rsv->space_info,
1618 orig_bytes, 0);
1619 }
1620 return ret;
1621}
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
1634 enum btrfs_reserve_flush_enum flush)
1635{
1636 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
1637 int ret;
1638
1639 ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
1640 flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE);
1641 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
1642
1643 ret = __reserve_bytes(fs_info, data_sinfo, bytes, flush);
1644 if (ret == -ENOSPC) {
1645 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
1646 data_sinfo->flags, bytes, 1);
1647 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
1648 btrfs_dump_space_info(fs_info, data_sinfo, bytes, 0);
1649 }
1650 return ret;
1651}
1652