1
2
3#include "misc.h"
4#include "ctree.h"
5#include "space-info.h"
6#include "sysfs.h"
7#include "volumes.h"
8#include "free-space-cache.h"
9#include "ordered-data.h"
10#include "transaction.h"
11#include "block-group.h"
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
162 bool may_use_included)
163{
164 ASSERT(s_info);
165 return s_info->bytes_used + s_info->bytes_reserved +
166 s_info->bytes_pinned + s_info->bytes_readonly +
167 s_info->bytes_zone_unusable +
168 (may_use_included ? s_info->bytes_may_use : 0);
169}
170
171
172
173
174
175void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
176{
177 struct list_head *head = &info->space_info;
178 struct btrfs_space_info *found;
179
180 list_for_each_entry(found, head, list)
181 found->full = 0;
182}
183
184static int create_space_info(struct btrfs_fs_info *info, u64 flags)
185{
186
187 struct btrfs_space_info *space_info;
188 int i;
189 int ret;
190
191 space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
192 if (!space_info)
193 return -ENOMEM;
194
195 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
196 INIT_LIST_HEAD(&space_info->block_groups[i]);
197 init_rwsem(&space_info->groups_sem);
198 spin_lock_init(&space_info->lock);
199 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
200 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
201 INIT_LIST_HEAD(&space_info->ro_bgs);
202 INIT_LIST_HEAD(&space_info->tickets);
203 INIT_LIST_HEAD(&space_info->priority_tickets);
204 space_info->clamp = 1;
205
206 ret = btrfs_sysfs_add_space_info_type(info, space_info);
207 if (ret)
208 return ret;
209
210 list_add(&space_info->list, &info->space_info);
211 if (flags & BTRFS_BLOCK_GROUP_DATA)
212 info->data_sinfo = space_info;
213
214 return ret;
215}
216
217int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
218{
219 struct btrfs_super_block *disk_super;
220 u64 features;
221 u64 flags;
222 int mixed = 0;
223 int ret;
224
225 disk_super = fs_info->super_copy;
226 if (!btrfs_super_root(disk_super))
227 return -EINVAL;
228
229 features = btrfs_super_incompat_flags(disk_super);
230 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
231 mixed = 1;
232
233 flags = BTRFS_BLOCK_GROUP_SYSTEM;
234 ret = create_space_info(fs_info, flags);
235 if (ret)
236 goto out;
237
238 if (mixed) {
239 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
240 ret = create_space_info(fs_info, flags);
241 } else {
242 flags = BTRFS_BLOCK_GROUP_METADATA;
243 ret = create_space_info(fs_info, flags);
244 if (ret)
245 goto out;
246
247 flags = BTRFS_BLOCK_GROUP_DATA;
248 ret = create_space_info(fs_info, flags);
249 }
250out:
251 return ret;
252}
253
254void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
255 u64 total_bytes, u64 bytes_used,
256 u64 bytes_readonly, u64 bytes_zone_unusable,
257 struct btrfs_space_info **space_info)
258{
259 struct btrfs_space_info *found;
260 int factor;
261
262 factor = btrfs_bg_type_to_factor(flags);
263
264 found = btrfs_find_space_info(info, flags);
265 ASSERT(found);
266 spin_lock(&found->lock);
267 found->total_bytes += total_bytes;
268 found->disk_total += total_bytes * factor;
269 found->bytes_used += bytes_used;
270 found->disk_used += bytes_used * factor;
271 found->bytes_readonly += bytes_readonly;
272 found->bytes_zone_unusable += bytes_zone_unusable;
273 if (total_bytes > 0)
274 found->full = 0;
275 btrfs_try_granting_tickets(info, found);
276 spin_unlock(&found->lock);
277 *space_info = found;
278}
279
280struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
281 u64 flags)
282{
283 struct list_head *head = &info->space_info;
284 struct btrfs_space_info *found;
285
286 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
287
288 list_for_each_entry(found, head, list) {
289 if (found->flags & flags)
290 return found;
291 }
292 return NULL;
293}
294
295static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
296 struct btrfs_space_info *space_info,
297 enum btrfs_reserve_flush_enum flush)
298{
299 u64 profile;
300 u64 avail;
301 int factor;
302
303 if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
304 profile = btrfs_system_alloc_profile(fs_info);
305 else
306 profile = btrfs_metadata_alloc_profile(fs_info);
307
308 avail = atomic64_read(&fs_info->free_chunk_space);
309
310
311
312
313
314
315
316 factor = btrfs_bg_type_to_factor(profile);
317 avail = div_u64(avail, factor);
318
319
320
321
322
323
324 if (flush == BTRFS_RESERVE_FLUSH_ALL)
325 avail >>= 3;
326 else
327 avail >>= 1;
328 return avail;
329}
330
331int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
332 struct btrfs_space_info *space_info, u64 bytes,
333 enum btrfs_reserve_flush_enum flush)
334{
335 u64 avail;
336 u64 used;
337
338
339 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
340 return 0;
341
342 used = btrfs_space_info_used(space_info, true);
343 avail = calc_available_free_space(fs_info, space_info, flush);
344
345 if (used + bytes < space_info->total_bytes + avail)
346 return 1;
347 return 0;
348}
349
350static void remove_ticket(struct btrfs_space_info *space_info,
351 struct reserve_ticket *ticket)
352{
353 if (!list_empty(&ticket->list)) {
354 list_del_init(&ticket->list);
355 ASSERT(space_info->reclaim_size >= ticket->bytes);
356 space_info->reclaim_size -= ticket->bytes;
357 }
358}
359
360
361
362
363
364void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
365 struct btrfs_space_info *space_info)
366{
367 struct list_head *head;
368 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
369
370 lockdep_assert_held(&space_info->lock);
371
372 head = &space_info->priority_tickets;
373again:
374 while (!list_empty(head)) {
375 struct reserve_ticket *ticket;
376 u64 used = btrfs_space_info_used(space_info, true);
377
378 ticket = list_first_entry(head, struct reserve_ticket, list);
379
380
381 if ((used + ticket->bytes <= space_info->total_bytes) ||
382 btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
383 flush)) {
384 btrfs_space_info_update_bytes_may_use(fs_info,
385 space_info,
386 ticket->bytes);
387 remove_ticket(space_info, ticket);
388 ticket->bytes = 0;
389 space_info->tickets_id++;
390 wake_up(&ticket->wait);
391 } else {
392 break;
393 }
394 }
395
396 if (head == &space_info->priority_tickets) {
397 head = &space_info->tickets;
398 flush = BTRFS_RESERVE_FLUSH_ALL;
399 goto again;
400 }
401}
402
403#define DUMP_BLOCK_RSV(fs_info, rsv_name) \
404do { \
405 struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \
406 spin_lock(&__rsv->lock); \
407 btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \
408 __rsv->size, __rsv->reserved); \
409 spin_unlock(&__rsv->lock); \
410} while (0)
411
412static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
413 struct btrfs_space_info *info)
414{
415 lockdep_assert_held(&info->lock);
416
417
418 btrfs_info(fs_info, "space_info %llu has %lld free, is %sfull",
419 info->flags,
420 (s64)(info->total_bytes - btrfs_space_info_used(info, true)),
421 info->full ? "" : "not ");
422 btrfs_info(fs_info,
423 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu",
424 info->total_bytes, info->bytes_used, info->bytes_pinned,
425 info->bytes_reserved, info->bytes_may_use,
426 info->bytes_readonly, info->bytes_zone_unusable);
427
428 DUMP_BLOCK_RSV(fs_info, global_block_rsv);
429 DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
430 DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
431 DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
432 DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
433
434}
435
436void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
437 struct btrfs_space_info *info, u64 bytes,
438 int dump_block_groups)
439{
440 struct btrfs_block_group *cache;
441 int index = 0;
442
443 spin_lock(&info->lock);
444 __btrfs_dump_space_info(fs_info, info);
445 spin_unlock(&info->lock);
446
447 if (!dump_block_groups)
448 return;
449
450 down_read(&info->groups_sem);
451again:
452 list_for_each_entry(cache, &info->block_groups[index], list) {
453 spin_lock(&cache->lock);
454 btrfs_info(fs_info,
455 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
456 cache->start, cache->length, cache->used, cache->pinned,
457 cache->reserved, cache->zone_unusable,
458 cache->ro ? "[readonly]" : "");
459 spin_unlock(&cache->lock);
460 btrfs_dump_free_space(cache, bytes);
461 }
462 if (++index < BTRFS_NR_RAID_TYPES)
463 goto again;
464 up_read(&info->groups_sem);
465}
466
467static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
468 u64 to_reclaim)
469{
470 u64 bytes;
471 u64 nr;
472
473 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
474 nr = div64_u64(to_reclaim, bytes);
475 if (!nr)
476 nr = 1;
477 return nr;
478}
479
480#define EXTENT_SIZE_PER_ITEM SZ_256K
481
482
483
484
485static void shrink_delalloc(struct btrfs_fs_info *fs_info,
486 struct btrfs_space_info *space_info,
487 u64 to_reclaim, bool wait_ordered,
488 bool for_preempt)
489{
490 struct btrfs_trans_handle *trans;
491 u64 delalloc_bytes;
492 u64 ordered_bytes;
493 u64 items;
494 long time_left;
495 int loops;
496
497 delalloc_bytes = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
498 ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes);
499 if (delalloc_bytes == 0 && ordered_bytes == 0)
500 return;
501
502
503 if (to_reclaim == U64_MAX) {
504 items = U64_MAX;
505 } else {
506
507
508
509
510
511
512
513
514
515
516
517
518 to_reclaim = max(to_reclaim, delalloc_bytes >> 3);
519 items = calc_reclaim_items_nr(fs_info, to_reclaim) * 2;
520 }
521
522 trans = (struct btrfs_trans_handle *)current->journal_info;
523
524
525
526
527
528
529 if (ordered_bytes > delalloc_bytes && !for_preempt)
530 wait_ordered = true;
531
532 loops = 0;
533 while ((delalloc_bytes || ordered_bytes) && loops < 3) {
534 u64 temp = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
535 long nr_pages = min_t(u64, temp, LONG_MAX);
536 int async_pages;
537
538 btrfs_start_delalloc_roots(fs_info, nr_pages, true);
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561 async_pages = atomic_read(&fs_info->async_delalloc_pages);
562 if (!async_pages)
563 goto skip_async;
564
565
566
567
568
569
570
571 if (async_pages > nr_pages)
572 async_pages -= nr_pages;
573 else
574 async_pages = 0;
575 wait_event(fs_info->async_submit_wait,
576 atomic_read(&fs_info->async_delalloc_pages) <=
577 async_pages);
578skip_async:
579 loops++;
580 if (wait_ordered && !trans) {
581 btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
582 } else {
583 time_left = schedule_timeout_killable(1);
584 if (time_left)
585 break;
586 }
587
588
589
590
591
592
593 if (for_preempt)
594 break;
595
596 spin_lock(&space_info->lock);
597 if (list_empty(&space_info->tickets) &&
598 list_empty(&space_info->priority_tickets)) {
599 spin_unlock(&space_info->lock);
600 break;
601 }
602 spin_unlock(&space_info->lock);
603
604 delalloc_bytes = percpu_counter_sum_positive(
605 &fs_info->delalloc_bytes);
606 ordered_bytes = percpu_counter_sum_positive(
607 &fs_info->ordered_bytes);
608 }
609}
610
611
612
613
614
615
616static void flush_space(struct btrfs_fs_info *fs_info,
617 struct btrfs_space_info *space_info, u64 num_bytes,
618 enum btrfs_flush_state state, bool for_preempt)
619{
620 struct btrfs_root *root = fs_info->extent_root;
621 struct btrfs_trans_handle *trans;
622 int nr;
623 int ret = 0;
624
625 switch (state) {
626 case FLUSH_DELAYED_ITEMS_NR:
627 case FLUSH_DELAYED_ITEMS:
628 if (state == FLUSH_DELAYED_ITEMS_NR)
629 nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
630 else
631 nr = -1;
632
633 trans = btrfs_join_transaction(root);
634 if (IS_ERR(trans)) {
635 ret = PTR_ERR(trans);
636 break;
637 }
638 ret = btrfs_run_delayed_items_nr(trans, nr);
639 btrfs_end_transaction(trans);
640 break;
641 case FLUSH_DELALLOC:
642 case FLUSH_DELALLOC_WAIT:
643 case FLUSH_DELALLOC_FULL:
644 if (state == FLUSH_DELALLOC_FULL)
645 num_bytes = U64_MAX;
646 shrink_delalloc(fs_info, space_info, num_bytes,
647 state != FLUSH_DELALLOC, for_preempt);
648 break;
649 case FLUSH_DELAYED_REFS_NR:
650 case FLUSH_DELAYED_REFS:
651 trans = btrfs_join_transaction(root);
652 if (IS_ERR(trans)) {
653 ret = PTR_ERR(trans);
654 break;
655 }
656 if (state == FLUSH_DELAYED_REFS_NR)
657 nr = calc_reclaim_items_nr(fs_info, num_bytes);
658 else
659 nr = 0;
660 btrfs_run_delayed_refs(trans, nr);
661 btrfs_end_transaction(trans);
662 break;
663 case ALLOC_CHUNK:
664 case ALLOC_CHUNK_FORCE:
665 trans = btrfs_join_transaction(root);
666 if (IS_ERR(trans)) {
667 ret = PTR_ERR(trans);
668 break;
669 }
670 ret = btrfs_chunk_alloc(trans,
671 btrfs_get_alloc_profile(fs_info, space_info->flags),
672 (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
673 CHUNK_ALLOC_FORCE);
674 btrfs_end_transaction(trans);
675 if (ret > 0 || ret == -ENOSPC)
676 ret = 0;
677 break;
678 case RUN_DELAYED_IPUTS:
679
680
681
682
683
684 btrfs_run_delayed_iputs(fs_info);
685 btrfs_wait_on_delayed_iputs(fs_info);
686 break;
687 case COMMIT_TRANS:
688 ASSERT(current->journal_info == NULL);
689 trans = btrfs_join_transaction(root);
690 if (IS_ERR(trans)) {
691 ret = PTR_ERR(trans);
692 break;
693 }
694 ret = btrfs_commit_transaction(trans);
695 break;
696 default:
697 ret = -ENOSPC;
698 break;
699 }
700
701 trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
702 ret, for_preempt);
703 return;
704}
705
706static inline u64
707btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
708 struct btrfs_space_info *space_info)
709{
710 u64 used;
711 u64 avail;
712 u64 to_reclaim = space_info->reclaim_size;
713
714 lockdep_assert_held(&space_info->lock);
715
716 avail = calc_available_free_space(fs_info, space_info,
717 BTRFS_RESERVE_FLUSH_ALL);
718 used = btrfs_space_info_used(space_info, true);
719
720
721
722
723
724
725
726 if (space_info->total_bytes + avail < used)
727 to_reclaim += used - (space_info->total_bytes + avail);
728
729 return to_reclaim;
730}
731
732static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
733 struct btrfs_space_info *space_info)
734{
735 u64 global_rsv_size = fs_info->global_block_rsv.reserved;
736 u64 ordered, delalloc;
737 u64 thresh = div_factor_fine(space_info->total_bytes, 90);
738 u64 used;
739
740
741 if ((space_info->bytes_used + space_info->bytes_reserved +
742 global_rsv_size) >= thresh)
743 return false;
744
745 used = space_info->bytes_may_use + space_info->bytes_pinned;
746
747
748 if (global_rsv_size >= used)
749 return false;
750
751
752
753
754
755
756 if (used - global_rsv_size <= SZ_128M)
757 return false;
758
759
760
761
762
763 if (space_info->reclaim_size)
764 return false;
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795 thresh = calc_available_free_space(fs_info, space_info,
796 BTRFS_RESERVE_FLUSH_ALL);
797 used = space_info->bytes_used + space_info->bytes_reserved +
798 space_info->bytes_readonly + global_rsv_size;
799 if (used < space_info->total_bytes)
800 thresh += space_info->total_bytes - used;
801 thresh >>= space_info->clamp;
802
803 used = space_info->bytes_pinned;
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828 ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1;
829 delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes);
830 if (ordered >= delalloc)
831 used += fs_info->delayed_refs_rsv.reserved +
832 fs_info->delayed_block_rsv.reserved;
833 else
834 used += space_info->bytes_may_use - global_rsv_size;
835
836 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
837 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
838}
839
840static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
841 struct btrfs_space_info *space_info,
842 struct reserve_ticket *ticket)
843{
844 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
845 u64 min_bytes;
846
847 if (global_rsv->space_info != space_info)
848 return false;
849
850 spin_lock(&global_rsv->lock);
851 min_bytes = div_factor(global_rsv->size, 1);
852 if (global_rsv->reserved < min_bytes + ticket->bytes) {
853 spin_unlock(&global_rsv->lock);
854 return false;
855 }
856 global_rsv->reserved -= ticket->bytes;
857 remove_ticket(space_info, ticket);
858 ticket->bytes = 0;
859 wake_up(&ticket->wait);
860 space_info->tickets_id++;
861 if (global_rsv->reserved < global_rsv->size)
862 global_rsv->full = 0;
863 spin_unlock(&global_rsv->lock);
864
865 return true;
866}
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
884 struct btrfs_space_info *space_info)
885{
886 struct reserve_ticket *ticket;
887 u64 tickets_id = space_info->tickets_id;
888
889 trace_btrfs_fail_all_tickets(fs_info, space_info);
890
891 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
892 btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
893 __btrfs_dump_space_info(fs_info, space_info);
894 }
895
896 while (!list_empty(&space_info->tickets) &&
897 tickets_id == space_info->tickets_id) {
898 ticket = list_first_entry(&space_info->tickets,
899 struct reserve_ticket, list);
900
901 if (ticket->steal &&
902 steal_from_global_rsv(fs_info, space_info, ticket))
903 return true;
904
905 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
906 btrfs_info(fs_info, "failing ticket with %llu bytes",
907 ticket->bytes);
908
909 remove_ticket(space_info, ticket);
910 ticket->error = -ENOSPC;
911 wake_up(&ticket->wait);
912
913
914
915
916
917
918
919 btrfs_try_granting_tickets(fs_info, space_info);
920 }
921 return (tickets_id != space_info->tickets_id);
922}
923
924
925
926
927
928
929static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
930{
931 struct btrfs_fs_info *fs_info;
932 struct btrfs_space_info *space_info;
933 u64 to_reclaim;
934 enum btrfs_flush_state flush_state;
935 int commit_cycles = 0;
936 u64 last_tickets_id;
937
938 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
939 space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
940
941 spin_lock(&space_info->lock);
942 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
943 if (!to_reclaim) {
944 space_info->flush = 0;
945 spin_unlock(&space_info->lock);
946 return;
947 }
948 last_tickets_id = space_info->tickets_id;
949 spin_unlock(&space_info->lock);
950
951 flush_state = FLUSH_DELAYED_ITEMS_NR;
952 do {
953 flush_space(fs_info, space_info, to_reclaim, flush_state, false);
954 spin_lock(&space_info->lock);
955 if (list_empty(&space_info->tickets)) {
956 space_info->flush = 0;
957 spin_unlock(&space_info->lock);
958 return;
959 }
960 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
961 space_info);
962 if (last_tickets_id == space_info->tickets_id) {
963 flush_state++;
964 } else {
965 last_tickets_id = space_info->tickets_id;
966 flush_state = FLUSH_DELAYED_ITEMS_NR;
967 if (commit_cycles)
968 commit_cycles--;
969 }
970
971
972
973
974
975
976 if (flush_state == FLUSH_DELALLOC_FULL && !commit_cycles)
977 flush_state++;
978
979
980
981
982
983
984
985
986
987
988
989 if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
990 flush_state++;
991
992 if (flush_state > COMMIT_TRANS) {
993 commit_cycles++;
994 if (commit_cycles > 2) {
995 if (maybe_fail_all_tickets(fs_info, space_info)) {
996 flush_state = FLUSH_DELAYED_ITEMS_NR;
997 commit_cycles--;
998 } else {
999 space_info->flush = 0;
1000 }
1001 } else {
1002 flush_state = FLUSH_DELAYED_ITEMS_NR;
1003 }
1004 }
1005 spin_unlock(&space_info->lock);
1006 } while (flush_state <= COMMIT_TRANS);
1007}
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
1018{
1019 struct btrfs_fs_info *fs_info;
1020 struct btrfs_space_info *space_info;
1021 struct btrfs_block_rsv *delayed_block_rsv;
1022 struct btrfs_block_rsv *delayed_refs_rsv;
1023 struct btrfs_block_rsv *global_rsv;
1024 struct btrfs_block_rsv *trans_rsv;
1025 int loops = 0;
1026
1027 fs_info = container_of(work, struct btrfs_fs_info,
1028 preempt_reclaim_work);
1029 space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
1030 delayed_block_rsv = &fs_info->delayed_block_rsv;
1031 delayed_refs_rsv = &fs_info->delayed_refs_rsv;
1032 global_rsv = &fs_info->global_block_rsv;
1033 trans_rsv = &fs_info->trans_block_rsv;
1034
1035 spin_lock(&space_info->lock);
1036 while (need_preemptive_reclaim(fs_info, space_info)) {
1037 enum btrfs_flush_state flush;
1038 u64 delalloc_size = 0;
1039 u64 to_reclaim, block_rsv_size;
1040 u64 global_rsv_size = global_rsv->reserved;
1041
1042 loops++;
1043
1044
1045
1046
1047
1048
1049
1050
1051 block_rsv_size = global_rsv_size +
1052 delayed_block_rsv->reserved +
1053 delayed_refs_rsv->reserved +
1054 trans_rsv->reserved;
1055 if (block_rsv_size < space_info->bytes_may_use)
1056 delalloc_size = space_info->bytes_may_use - block_rsv_size;
1057 spin_unlock(&space_info->lock);
1058
1059
1060
1061
1062
1063
1064 block_rsv_size -= global_rsv_size;
1065
1066
1067
1068
1069
1070
1071 if (delalloc_size > block_rsv_size) {
1072 to_reclaim = delalloc_size;
1073 flush = FLUSH_DELALLOC;
1074 } else if (space_info->bytes_pinned >
1075 (delayed_block_rsv->reserved +
1076 delayed_refs_rsv->reserved)) {
1077 to_reclaim = space_info->bytes_pinned;
1078 flush = COMMIT_TRANS;
1079 } else if (delayed_block_rsv->reserved >
1080 delayed_refs_rsv->reserved) {
1081 to_reclaim = delayed_block_rsv->reserved;
1082 flush = FLUSH_DELAYED_ITEMS_NR;
1083 } else {
1084 to_reclaim = delayed_refs_rsv->reserved;
1085 flush = FLUSH_DELAYED_REFS_NR;
1086 }
1087
1088
1089
1090
1091
1092
1093 to_reclaim >>= 2;
1094 if (!to_reclaim)
1095 to_reclaim = btrfs_calc_insert_metadata_size(fs_info, 1);
1096 flush_space(fs_info, space_info, to_reclaim, flush, true);
1097 cond_resched();
1098 spin_lock(&space_info->lock);
1099 }
1100
1101
1102 if (loops == 1 && !space_info->reclaim_size)
1103 space_info->clamp = max(1, space_info->clamp - 1);
1104 trace_btrfs_done_preemptive_reclaim(fs_info, space_info);
1105 spin_unlock(&space_info->lock);
1106}
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141static const enum btrfs_flush_state data_flush_states[] = {
1142 FLUSH_DELALLOC_FULL,
1143 RUN_DELAYED_IPUTS,
1144 COMMIT_TRANS,
1145 ALLOC_CHUNK_FORCE,
1146};
1147
1148static void btrfs_async_reclaim_data_space(struct work_struct *work)
1149{
1150 struct btrfs_fs_info *fs_info;
1151 struct btrfs_space_info *space_info;
1152 u64 last_tickets_id;
1153 enum btrfs_flush_state flush_state = 0;
1154
1155 fs_info = container_of(work, struct btrfs_fs_info, async_data_reclaim_work);
1156 space_info = fs_info->data_sinfo;
1157
1158 spin_lock(&space_info->lock);
1159 if (list_empty(&space_info->tickets)) {
1160 space_info->flush = 0;
1161 spin_unlock(&space_info->lock);
1162 return;
1163 }
1164 last_tickets_id = space_info->tickets_id;
1165 spin_unlock(&space_info->lock);
1166
1167 while (!space_info->full) {
1168 flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
1169 spin_lock(&space_info->lock);
1170 if (list_empty(&space_info->tickets)) {
1171 space_info->flush = 0;
1172 spin_unlock(&space_info->lock);
1173 return;
1174 }
1175 last_tickets_id = space_info->tickets_id;
1176 spin_unlock(&space_info->lock);
1177 }
1178
1179 while (flush_state < ARRAY_SIZE(data_flush_states)) {
1180 flush_space(fs_info, space_info, U64_MAX,
1181 data_flush_states[flush_state], false);
1182 spin_lock(&space_info->lock);
1183 if (list_empty(&space_info->tickets)) {
1184 space_info->flush = 0;
1185 spin_unlock(&space_info->lock);
1186 return;
1187 }
1188
1189 if (last_tickets_id == space_info->tickets_id) {
1190 flush_state++;
1191 } else {
1192 last_tickets_id = space_info->tickets_id;
1193 flush_state = 0;
1194 }
1195
1196 if (flush_state >= ARRAY_SIZE(data_flush_states)) {
1197 if (space_info->full) {
1198 if (maybe_fail_all_tickets(fs_info, space_info))
1199 flush_state = 0;
1200 else
1201 space_info->flush = 0;
1202 } else {
1203 flush_state = 0;
1204 }
1205 }
1206 spin_unlock(&space_info->lock);
1207 }
1208}
1209
1210void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
1211{
1212 INIT_WORK(&fs_info->async_reclaim_work, btrfs_async_reclaim_metadata_space);
1213 INIT_WORK(&fs_info->async_data_reclaim_work, btrfs_async_reclaim_data_space);
1214 INIT_WORK(&fs_info->preempt_reclaim_work,
1215 btrfs_preempt_reclaim_metadata_space);
1216}
1217
1218static const enum btrfs_flush_state priority_flush_states[] = {
1219 FLUSH_DELAYED_ITEMS_NR,
1220 FLUSH_DELAYED_ITEMS,
1221 ALLOC_CHUNK,
1222};
1223
1224static const enum btrfs_flush_state evict_flush_states[] = {
1225 FLUSH_DELAYED_ITEMS_NR,
1226 FLUSH_DELAYED_ITEMS,
1227 FLUSH_DELAYED_REFS_NR,
1228 FLUSH_DELAYED_REFS,
1229 FLUSH_DELALLOC,
1230 FLUSH_DELALLOC_WAIT,
1231 FLUSH_DELALLOC_FULL,
1232 ALLOC_CHUNK,
1233 COMMIT_TRANS,
1234};
1235
1236static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
1237 struct btrfs_space_info *space_info,
1238 struct reserve_ticket *ticket,
1239 const enum btrfs_flush_state *states,
1240 int states_nr)
1241{
1242 u64 to_reclaim;
1243 int flush_state;
1244
1245 spin_lock(&space_info->lock);
1246 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
1247 if (!to_reclaim) {
1248 spin_unlock(&space_info->lock);
1249 return;
1250 }
1251 spin_unlock(&space_info->lock);
1252
1253 flush_state = 0;
1254 do {
1255 flush_space(fs_info, space_info, to_reclaim, states[flush_state],
1256 false);
1257 flush_state++;
1258 spin_lock(&space_info->lock);
1259 if (ticket->bytes == 0) {
1260 spin_unlock(&space_info->lock);
1261 return;
1262 }
1263 spin_unlock(&space_info->lock);
1264 } while (flush_state < states_nr);
1265}
1266
1267static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,
1268 struct btrfs_space_info *space_info,
1269 struct reserve_ticket *ticket)
1270{
1271 while (!space_info->full) {
1272 flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false);
1273 spin_lock(&space_info->lock);
1274 if (ticket->bytes == 0) {
1275 spin_unlock(&space_info->lock);
1276 return;
1277 }
1278 spin_unlock(&space_info->lock);
1279 }
1280}
1281
1282static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
1283 struct btrfs_space_info *space_info,
1284 struct reserve_ticket *ticket)
1285
1286{
1287 DEFINE_WAIT(wait);
1288 int ret = 0;
1289
1290 spin_lock(&space_info->lock);
1291 while (ticket->bytes > 0 && ticket->error == 0) {
1292 ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
1293 if (ret) {
1294
1295
1296
1297
1298
1299
1300
1301
1302 remove_ticket(space_info, ticket);
1303 ticket->error = -EINTR;
1304 break;
1305 }
1306 spin_unlock(&space_info->lock);
1307
1308 schedule();
1309
1310 finish_wait(&ticket->wait, &wait);
1311 spin_lock(&space_info->lock);
1312 }
1313 spin_unlock(&space_info->lock);
1314}
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
1330 struct btrfs_space_info *space_info,
1331 struct reserve_ticket *ticket,
1332 u64 start_ns, u64 orig_bytes,
1333 enum btrfs_reserve_flush_enum flush)
1334{
1335 int ret;
1336
1337 switch (flush) {
1338 case BTRFS_RESERVE_FLUSH_DATA:
1339 case BTRFS_RESERVE_FLUSH_ALL:
1340 case BTRFS_RESERVE_FLUSH_ALL_STEAL:
1341 wait_reserve_ticket(fs_info, space_info, ticket);
1342 break;
1343 case BTRFS_RESERVE_FLUSH_LIMIT:
1344 priority_reclaim_metadata_space(fs_info, space_info, ticket,
1345 priority_flush_states,
1346 ARRAY_SIZE(priority_flush_states));
1347 break;
1348 case BTRFS_RESERVE_FLUSH_EVICT:
1349 priority_reclaim_metadata_space(fs_info, space_info, ticket,
1350 evict_flush_states,
1351 ARRAY_SIZE(evict_flush_states));
1352 break;
1353 case BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE:
1354 priority_reclaim_data_space(fs_info, space_info, ticket);
1355 break;
1356 default:
1357 ASSERT(0);
1358 break;
1359 }
1360
1361 spin_lock(&space_info->lock);
1362 ret = ticket->error;
1363 if (ticket->bytes || ticket->error) {
1364
1365
1366
1367
1368
1369
1370
1371 if (!list_empty(&ticket->list)) {
1372 remove_ticket(space_info, ticket);
1373 btrfs_try_granting_tickets(fs_info, space_info);
1374 }
1375
1376 if (!ret)
1377 ret = -ENOSPC;
1378 }
1379 spin_unlock(&space_info->lock);
1380 ASSERT(list_empty(&ticket->list));
1381
1382
1383
1384
1385
1386
1387 ASSERT(!(ticket->bytes == 0 && ticket->error));
1388 trace_btrfs_reserve_ticket(fs_info, space_info->flags, orig_bytes,
1389 start_ns, flush, ticket->error);
1390 return ret;
1391}
1392
1393
1394
1395
1396
1397static inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush)
1398{
1399 return (flush == BTRFS_RESERVE_FLUSH_ALL) ||
1400 (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
1401}
1402
1403static inline void maybe_clamp_preempt(struct btrfs_fs_info *fs_info,
1404 struct btrfs_space_info *space_info)
1405{
1406 u64 ordered = percpu_counter_sum_positive(&fs_info->ordered_bytes);
1407 u64 delalloc = percpu_counter_sum_positive(&fs_info->delalloc_bytes);
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417 if (ordered < delalloc)
1418 space_info->clamp = min(space_info->clamp + 1, 8);
1419}
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436static int __reserve_bytes(struct btrfs_fs_info *fs_info,
1437 struct btrfs_space_info *space_info, u64 orig_bytes,
1438 enum btrfs_reserve_flush_enum flush)
1439{
1440 struct work_struct *async_work;
1441 struct reserve_ticket ticket;
1442 u64 start_ns = 0;
1443 u64 used;
1444 int ret = 0;
1445 bool pending_tickets;
1446
1447 ASSERT(orig_bytes);
1448 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
1449
1450 if (flush == BTRFS_RESERVE_FLUSH_DATA)
1451 async_work = &fs_info->async_data_reclaim_work;
1452 else
1453 async_work = &fs_info->async_reclaim_work;
1454
1455 spin_lock(&space_info->lock);
1456 ret = -ENOSPC;
1457 used = btrfs_space_info_used(space_info, true);
1458
1459
1460
1461
1462
1463
1464 if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH))
1465 pending_tickets = !list_empty(&space_info->tickets) ||
1466 !list_empty(&space_info->priority_tickets);
1467 else
1468 pending_tickets = !list_empty(&space_info->priority_tickets);
1469
1470
1471
1472
1473
1474 if (!pending_tickets &&
1475 ((used + orig_bytes <= space_info->total_bytes) ||
1476 btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
1477 btrfs_space_info_update_bytes_may_use(fs_info, space_info,
1478 orig_bytes);
1479 ret = 0;
1480 }
1481
1482
1483
1484
1485
1486
1487
1488
1489 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
1490 ticket.bytes = orig_bytes;
1491 ticket.error = 0;
1492 space_info->reclaim_size += ticket.bytes;
1493 init_waitqueue_head(&ticket.wait);
1494 ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
1495 if (trace_btrfs_reserve_ticket_enabled())
1496 start_ns = ktime_get_ns();
1497
1498 if (flush == BTRFS_RESERVE_FLUSH_ALL ||
1499 flush == BTRFS_RESERVE_FLUSH_ALL_STEAL ||
1500 flush == BTRFS_RESERVE_FLUSH_DATA) {
1501 list_add_tail(&ticket.list, &space_info->tickets);
1502 if (!space_info->flush) {
1503
1504
1505
1506
1507
1508
1509
1510 maybe_clamp_preempt(fs_info, space_info);
1511
1512 space_info->flush = 1;
1513 trace_btrfs_trigger_flush(fs_info,
1514 space_info->flags,
1515 orig_bytes, flush,
1516 "enospc");
1517 queue_work(system_unbound_wq, async_work);
1518 }
1519 } else {
1520 list_add_tail(&ticket.list,
1521 &space_info->priority_tickets);
1522 }
1523 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
1524 used += orig_bytes;
1525
1526
1527
1528
1529
1530 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
1531 !work_busy(&fs_info->preempt_reclaim_work) &&
1532 need_preemptive_reclaim(fs_info, space_info)) {
1533 trace_btrfs_trigger_flush(fs_info, space_info->flags,
1534 orig_bytes, flush, "preempt");
1535 queue_work(system_unbound_wq,
1536 &fs_info->preempt_reclaim_work);
1537 }
1538 }
1539 spin_unlock(&space_info->lock);
1540 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
1541 return ret;
1542
1543 return handle_reserve_ticket(fs_info, space_info, &ticket, start_ns,
1544 orig_bytes, flush);
1545}
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
1563 struct btrfs_block_rsv *block_rsv,
1564 u64 orig_bytes,
1565 enum btrfs_reserve_flush_enum flush)
1566{
1567 struct btrfs_fs_info *fs_info = root->fs_info;
1568 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
1569 int ret;
1570
1571 ret = __reserve_bytes(fs_info, block_rsv->space_info, orig_bytes, flush);
1572 if (ret == -ENOSPC &&
1573 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
1574 if (block_rsv != global_rsv &&
1575 !btrfs_block_rsv_use_bytes(global_rsv, orig_bytes))
1576 ret = 0;
1577 }
1578 if (ret == -ENOSPC) {
1579 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
1580 block_rsv->space_info->flags,
1581 orig_bytes, 1);
1582
1583 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
1584 btrfs_dump_space_info(fs_info, block_rsv->space_info,
1585 orig_bytes, 0);
1586 }
1587 return ret;
1588}
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
1601 enum btrfs_reserve_flush_enum flush)
1602{
1603 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
1604 int ret;
1605
1606 ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
1607 flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE);
1608 ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
1609
1610 ret = __reserve_bytes(fs_info, data_sinfo, bytes, flush);
1611 if (ret == -ENOSPC) {
1612 trace_btrfs_space_reservation(fs_info, "space_info:enospc",
1613 data_sinfo->flags, bytes, 1);
1614 if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
1615 btrfs_dump_space_info(fs_info, data_sinfo, bytes, 0);
1616 }
1617 return ret;
1618}
1619