1
2
3
4
5
6
7
8
9
10
11
12#include "ext4_jbd2.h"
13#include "mballoc.h"
14#include <linux/log2.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/nospec.h>
18#include <linux/backing-dev.h>
19#include <trace/events/ext4.h>
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386static struct kmem_cache *ext4_pspace_cachep;
387static struct kmem_cache *ext4_ac_cachep;
388static struct kmem_cache *ext4_free_data_cachep;
389
390
391
392
393#define NR_GRPINFO_CACHES 8
394static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
395
396static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
397 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
398 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
399 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
400};
401
402static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
403 ext4_group_t group);
404static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
405 ext4_group_t group);
406static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
407
408static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
409 ext4_group_t group, int cr);
410
411static int ext4_try_to_trim_range(struct super_block *sb,
412 struct ext4_buddy *e4b, ext4_grpblk_t start,
413 ext4_grpblk_t max, ext4_grpblk_t minblocks);
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433static DEFINE_PER_CPU(u64, discard_pa_seq);
434static inline u64 ext4_get_discard_pa_seq_sum(void)
435{
436 int __cpu;
437 u64 __seq = 0;
438
439 for_each_possible_cpu(__cpu)
440 __seq += per_cpu(discard_pa_seq, __cpu);
441 return __seq;
442}
443
444static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
445{
446#if BITS_PER_LONG == 64
447 *bit += ((unsigned long) addr & 7UL) << 3;
448 addr = (void *) ((unsigned long) addr & ~7UL);
449#elif BITS_PER_LONG == 32
450 *bit += ((unsigned long) addr & 3UL) << 3;
451 addr = (void *) ((unsigned long) addr & ~3UL);
452#else
453#error "how many bits you are?!"
454#endif
455 return addr;
456}
457
458static inline int mb_test_bit(int bit, void *addr)
459{
460
461
462
463
464 addr = mb_correct_addr_and_bit(&bit, addr);
465 return ext4_test_bit(bit, addr);
466}
467
468static inline void mb_set_bit(int bit, void *addr)
469{
470 addr = mb_correct_addr_and_bit(&bit, addr);
471 ext4_set_bit(bit, addr);
472}
473
474static inline void mb_clear_bit(int bit, void *addr)
475{
476 addr = mb_correct_addr_and_bit(&bit, addr);
477 ext4_clear_bit(bit, addr);
478}
479
480static inline int mb_test_and_clear_bit(int bit, void *addr)
481{
482 addr = mb_correct_addr_and_bit(&bit, addr);
483 return ext4_test_and_clear_bit(bit, addr);
484}
485
486static inline int mb_find_next_zero_bit(void *addr, int max, int start)
487{
488 int fix = 0, ret, tmpmax;
489 addr = mb_correct_addr_and_bit(&fix, addr);
490 tmpmax = max + fix;
491 start += fix;
492
493 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
494 if (ret > max)
495 return max;
496 return ret;
497}
498
499static inline int mb_find_next_bit(void *addr, int max, int start)
500{
501 int fix = 0, ret, tmpmax;
502 addr = mb_correct_addr_and_bit(&fix, addr);
503 tmpmax = max + fix;
504 start += fix;
505
506 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
507 if (ret > max)
508 return max;
509 return ret;
510}
511
512static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
513{
514 char *bb;
515
516 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
517 BUG_ON(max == NULL);
518
519 if (order > e4b->bd_blkbits + 1) {
520 *max = 0;
521 return NULL;
522 }
523
524
525 if (order == 0) {
526 *max = 1 << (e4b->bd_blkbits + 3);
527 return e4b->bd_bitmap;
528 }
529
530 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
531 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
532
533 return bb;
534}
535
536#ifdef DOUBLE_CHECK
537static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
538 int first, int count)
539{
540 int i;
541 struct super_block *sb = e4b->bd_sb;
542
543 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
544 return;
545 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
546 for (i = 0; i < count; i++) {
547 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
548 ext4_fsblk_t blocknr;
549
550 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
551 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
552 ext4_grp_locked_error(sb, e4b->bd_group,
553 inode ? inode->i_ino : 0,
554 blocknr,
555 "freeing block already freed "
556 "(bit %u)",
557 first + i);
558 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
559 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
560 }
561 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
562 }
563}
564
565static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
566{
567 int i;
568
569 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
570 return;
571 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
572 for (i = 0; i < count; i++) {
573 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
574 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
575 }
576}
577
578static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
579{
580 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
581 return;
582 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
583 unsigned char *b1, *b2;
584 int i;
585 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
586 b2 = (unsigned char *) bitmap;
587 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
588 if (b1[i] != b2[i]) {
589 ext4_msg(e4b->bd_sb, KERN_ERR,
590 "corruption in group %u "
591 "at byte %u(%u): %x in copy != %x "
592 "on disk/prealloc",
593 e4b->bd_group, i, i * 8, b1[i], b2[i]);
594 BUG();
595 }
596 }
597 }
598}
599
600static void mb_group_bb_bitmap_alloc(struct super_block *sb,
601 struct ext4_group_info *grp, ext4_group_t group)
602{
603 struct buffer_head *bh;
604
605 grp->bb_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
606 if (!grp->bb_bitmap)
607 return;
608
609 bh = ext4_read_block_bitmap(sb, group);
610 if (IS_ERR_OR_NULL(bh)) {
611 kfree(grp->bb_bitmap);
612 grp->bb_bitmap = NULL;
613 return;
614 }
615
616 memcpy(grp->bb_bitmap, bh->b_data, sb->s_blocksize);
617 put_bh(bh);
618}
619
620static void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
621{
622 kfree(grp->bb_bitmap);
623}
624
625#else
626static inline void mb_free_blocks_double(struct inode *inode,
627 struct ext4_buddy *e4b, int first, int count)
628{
629 return;
630}
631static inline void mb_mark_used_double(struct ext4_buddy *e4b,
632 int first, int count)
633{
634 return;
635}
636static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
637{
638 return;
639}
640
641static inline void mb_group_bb_bitmap_alloc(struct super_block *sb,
642 struct ext4_group_info *grp, ext4_group_t group)
643{
644 return;
645}
646
647static inline void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
648{
649 return;
650}
651#endif
652
653#ifdef AGGRESSIVE_CHECK
654
655#define MB_CHECK_ASSERT(assert) \
656do { \
657 if (!(assert)) { \
658 printk(KERN_EMERG \
659 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
660 function, file, line, # assert); \
661 BUG(); \
662 } \
663} while (0)
664
665static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
666 const char *function, int line)
667{
668 struct super_block *sb = e4b->bd_sb;
669 int order = e4b->bd_blkbits + 1;
670 int max;
671 int max2;
672 int i;
673 int j;
674 int k;
675 int count;
676 struct ext4_group_info *grp;
677 int fragments = 0;
678 int fstart;
679 struct list_head *cur;
680 void *buddy;
681 void *buddy2;
682
683 if (e4b->bd_info->bb_check_counter++ % 10)
684 return 0;
685
686 while (order > 1) {
687 buddy = mb_find_buddy(e4b, order, &max);
688 MB_CHECK_ASSERT(buddy);
689 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
690 MB_CHECK_ASSERT(buddy2);
691 MB_CHECK_ASSERT(buddy != buddy2);
692 MB_CHECK_ASSERT(max * 2 == max2);
693
694 count = 0;
695 for (i = 0; i < max; i++) {
696
697 if (mb_test_bit(i, buddy)) {
698
699 if (!mb_test_bit(i << 1, buddy2)) {
700 MB_CHECK_ASSERT(
701 mb_test_bit((i<<1)+1, buddy2));
702 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
703 MB_CHECK_ASSERT(
704 mb_test_bit(i << 1, buddy2));
705 }
706 continue;
707 }
708
709
710 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
711 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
712
713 for (j = 0; j < (1 << order); j++) {
714 k = (i * (1 << order)) + j;
715 MB_CHECK_ASSERT(
716 !mb_test_bit(k, e4b->bd_bitmap));
717 }
718 count++;
719 }
720 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
721 order--;
722 }
723
724 fstart = -1;
725 buddy = mb_find_buddy(e4b, 0, &max);
726 for (i = 0; i < max; i++) {
727 if (!mb_test_bit(i, buddy)) {
728 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
729 if (fstart == -1) {
730 fragments++;
731 fstart = i;
732 }
733 continue;
734 }
735 fstart = -1;
736
737 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
738 buddy2 = mb_find_buddy(e4b, j, &max2);
739 k = i >> j;
740 MB_CHECK_ASSERT(k < max2);
741 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
742 }
743 }
744 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
745 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
746
747 grp = ext4_get_group_info(sb, e4b->bd_group);
748 list_for_each(cur, &grp->bb_prealloc_list) {
749 ext4_group_t groupnr;
750 struct ext4_prealloc_space *pa;
751 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
752 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
753 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
754 for (i = 0; i < pa->pa_len; i++)
755 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
756 }
757 return 0;
758}
759#undef MB_CHECK_ASSERT
760#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
761 __FILE__, __func__, __LINE__)
762#else
763#define mb_check_buddy(e4b)
764#endif
765
766
767
768
769
770
771
772static void ext4_mb_mark_free_simple(struct super_block *sb,
773 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
774 struct ext4_group_info *grp)
775{
776 struct ext4_sb_info *sbi = EXT4_SB(sb);
777 ext4_grpblk_t min;
778 ext4_grpblk_t max;
779 ext4_grpblk_t chunk;
780 unsigned int border;
781
782 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
783
784 border = 2 << sb->s_blocksize_bits;
785
786 while (len > 0) {
787
788 max = ffs(first | border) - 1;
789
790
791 min = fls(len) - 1;
792
793 if (max < min)
794 min = max;
795 chunk = 1 << min;
796
797
798 grp->bb_counters[min]++;
799 if (min > 0)
800 mb_clear_bit(first >> min,
801 buddy + sbi->s_mb_offsets[min]);
802
803 len -= chunk;
804 first += chunk;
805 }
806}
807
808static void ext4_mb_rb_insert(struct rb_root *root, struct rb_node *new,
809 int (*cmp)(struct rb_node *, struct rb_node *))
810{
811 struct rb_node **iter = &root->rb_node, *parent = NULL;
812
813 while (*iter) {
814 parent = *iter;
815 if (cmp(new, *iter) > 0)
816 iter = &((*iter)->rb_left);
817 else
818 iter = &((*iter)->rb_right);
819 }
820
821 rb_link_node(new, parent, iter);
822 rb_insert_color(new, root);
823}
824
825static int
826ext4_mb_avg_fragment_size_cmp(struct rb_node *rb1, struct rb_node *rb2)
827{
828 struct ext4_group_info *grp1 = rb_entry(rb1,
829 struct ext4_group_info,
830 bb_avg_fragment_size_rb);
831 struct ext4_group_info *grp2 = rb_entry(rb2,
832 struct ext4_group_info,
833 bb_avg_fragment_size_rb);
834 int num_frags_1, num_frags_2;
835
836 num_frags_1 = grp1->bb_fragments ?
837 grp1->bb_free / grp1->bb_fragments : 0;
838 num_frags_2 = grp2->bb_fragments ?
839 grp2->bb_free / grp2->bb_fragments : 0;
840
841 return (num_frags_2 - num_frags_1);
842}
843
844
845
846
847
848static void
849mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
850{
851 struct ext4_sb_info *sbi = EXT4_SB(sb);
852
853 if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
854 return;
855
856 write_lock(&sbi->s_mb_rb_lock);
857 if (!RB_EMPTY_NODE(&grp->bb_avg_fragment_size_rb)) {
858 rb_erase(&grp->bb_avg_fragment_size_rb,
859 &sbi->s_mb_avg_fragment_size_root);
860 RB_CLEAR_NODE(&grp->bb_avg_fragment_size_rb);
861 }
862
863 ext4_mb_rb_insert(&sbi->s_mb_avg_fragment_size_root,
864 &grp->bb_avg_fragment_size_rb,
865 ext4_mb_avg_fragment_size_cmp);
866 write_unlock(&sbi->s_mb_rb_lock);
867}
868
869
870
871
872
873static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
874 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
875{
876 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
877 struct ext4_group_info *iter, *grp;
878 int i;
879
880 if (ac->ac_status == AC_STATUS_FOUND)
881 return;
882
883 if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR0_OPTIMIZED))
884 atomic_inc(&sbi->s_bal_cr0_bad_suggestions);
885
886 grp = NULL;
887 for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
888 if (list_empty(&sbi->s_mb_largest_free_orders[i]))
889 continue;
890 read_lock(&sbi->s_mb_largest_free_orders_locks[i]);
891 if (list_empty(&sbi->s_mb_largest_free_orders[i])) {
892 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
893 continue;
894 }
895 grp = NULL;
896 list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
897 bb_largest_free_order_node) {
898 if (sbi->s_mb_stats)
899 atomic64_inc(&sbi->s_bal_cX_groups_considered[0]);
900 if (likely(ext4_mb_good_group(ac, iter->bb_group, 0))) {
901 grp = iter;
902 break;
903 }
904 }
905 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
906 if (grp)
907 break;
908 }
909
910 if (!grp) {
911
912 *new_cr = 1;
913 } else {
914 *group = grp->bb_group;
915 ac->ac_last_optimal_group = *group;
916 ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
917 }
918}
919
920
921
922
923
924
925
926static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
927 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
928{
929 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
930 int avg_fragment_size, best_so_far;
931 struct rb_node *node, *found;
932 struct ext4_group_info *grp;
933
934
935
936
937
938
939 if (!read_trylock(&sbi->s_mb_rb_lock)) {
940 ac->ac_flags |= EXT4_MB_SEARCH_NEXT_LINEAR;
941 return;
942 }
943
944 if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
945 if (sbi->s_mb_stats)
946 atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
947
948 grp = ext4_get_group_info(ac->ac_sb, ac->ac_last_optimal_group);
949 for (found = rb_next(&grp->bb_avg_fragment_size_rb); found != NULL;
950 found = rb_next(found)) {
951 grp = rb_entry(found, struct ext4_group_info,
952 bb_avg_fragment_size_rb);
953 if (sbi->s_mb_stats)
954 atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
955 if (likely(ext4_mb_good_group(ac, grp->bb_group, 1)))
956 break;
957 }
958 goto done;
959 }
960
961 node = sbi->s_mb_avg_fragment_size_root.rb_node;
962 best_so_far = 0;
963 found = NULL;
964
965 while (node) {
966 grp = rb_entry(node, struct ext4_group_info,
967 bb_avg_fragment_size_rb);
968 avg_fragment_size = 0;
969 if (ext4_mb_good_group(ac, grp->bb_group, 1)) {
970 avg_fragment_size = grp->bb_fragments ?
971 grp->bb_free / grp->bb_fragments : 0;
972 if (!best_so_far || avg_fragment_size < best_so_far) {
973 best_so_far = avg_fragment_size;
974 found = node;
975 }
976 }
977 if (avg_fragment_size > ac->ac_g_ex.fe_len)
978 node = node->rb_right;
979 else
980 node = node->rb_left;
981 }
982
983done:
984 if (found) {
985 grp = rb_entry(found, struct ext4_group_info,
986 bb_avg_fragment_size_rb);
987 *group = grp->bb_group;
988 ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
989 } else {
990 *new_cr = 2;
991 }
992
993 read_unlock(&sbi->s_mb_rb_lock);
994 ac->ac_last_optimal_group = *group;
995}
996
997static inline int should_optimize_scan(struct ext4_allocation_context *ac)
998{
999 if (unlikely(!test_opt2(ac->ac_sb, MB_OPTIMIZE_SCAN)))
1000 return 0;
1001 if (ac->ac_criteria >= 2)
1002 return 0;
1003 if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
1004 return 0;
1005 return 1;
1006}
1007
1008
1009
1010
1011
1012static int
1013next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
1014{
1015 if (!should_optimize_scan(ac))
1016 goto inc_and_return;
1017
1018 if (ac->ac_groups_linear_remaining) {
1019 ac->ac_groups_linear_remaining--;
1020 goto inc_and_return;
1021 }
1022
1023 if (ac->ac_flags & EXT4_MB_SEARCH_NEXT_LINEAR) {
1024 ac->ac_flags &= ~EXT4_MB_SEARCH_NEXT_LINEAR;
1025 goto inc_and_return;
1026 }
1027
1028 return group;
1029inc_and_return:
1030
1031
1032
1033
1034 return group + 1 >= ngroups ? 0 : group + 1;
1035}
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
1051 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
1052{
1053 *new_cr = ac->ac_criteria;
1054
1055 if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
1056 return;
1057
1058 if (*new_cr == 0) {
1059 ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
1060 } else if (*new_cr == 1) {
1061 ext4_mb_choose_next_group_cr1(ac, new_cr, group, ngroups);
1062 } else {
1063
1064
1065
1066
1067 WARN_ON(1);
1068 }
1069}
1070
1071
1072
1073
1074
1075static void
1076mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
1077{
1078 struct ext4_sb_info *sbi = EXT4_SB(sb);
1079 int i;
1080
1081 if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
1082 write_lock(&sbi->s_mb_largest_free_orders_locks[
1083 grp->bb_largest_free_order]);
1084 list_del_init(&grp->bb_largest_free_order_node);
1085 write_unlock(&sbi->s_mb_largest_free_orders_locks[
1086 grp->bb_largest_free_order]);
1087 }
1088 grp->bb_largest_free_order = -1;
1089
1090 for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
1091 if (grp->bb_counters[i] > 0) {
1092 grp->bb_largest_free_order = i;
1093 break;
1094 }
1095 }
1096 if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
1097 grp->bb_largest_free_order >= 0 && grp->bb_free) {
1098 write_lock(&sbi->s_mb_largest_free_orders_locks[
1099 grp->bb_largest_free_order]);
1100 list_add_tail(&grp->bb_largest_free_order_node,
1101 &sbi->s_mb_largest_free_orders[grp->bb_largest_free_order]);
1102 write_unlock(&sbi->s_mb_largest_free_orders_locks[
1103 grp->bb_largest_free_order]);
1104 }
1105}
1106
1107static noinline_for_stack
1108void ext4_mb_generate_buddy(struct super_block *sb,
1109 void *buddy, void *bitmap, ext4_group_t group)
1110{
1111 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1112 struct ext4_sb_info *sbi = EXT4_SB(sb);
1113 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
1114 ext4_grpblk_t i = 0;
1115 ext4_grpblk_t first;
1116 ext4_grpblk_t len;
1117 unsigned free = 0;
1118 unsigned fragments = 0;
1119 unsigned long long period = get_cycles();
1120
1121
1122
1123 i = mb_find_next_zero_bit(bitmap, max, 0);
1124 grp->bb_first_free = i;
1125 while (i < max) {
1126 fragments++;
1127 first = i;
1128 i = mb_find_next_bit(bitmap, max, i);
1129 len = i - first;
1130 free += len;
1131 if (len > 1)
1132 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
1133 else
1134 grp->bb_counters[0]++;
1135 if (i < max)
1136 i = mb_find_next_zero_bit(bitmap, max, i);
1137 }
1138 grp->bb_fragments = fragments;
1139
1140 if (free != grp->bb_free) {
1141 ext4_grp_locked_error(sb, group, 0, 0,
1142 "block bitmap and bg descriptor "
1143 "inconsistent: %u vs %u free clusters",
1144 free, grp->bb_free);
1145
1146
1147
1148
1149 grp->bb_free = free;
1150 ext4_mark_group_bitmap_corrupted(sb, group,
1151 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1152 }
1153 mb_set_largest_free_order(sb, grp);
1154
1155 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
1156
1157 period = get_cycles() - period;
1158 atomic_inc(&sbi->s_mb_buddies_generated);
1159 atomic64_add(period, &sbi->s_mb_generation_time);
1160 mb_update_avg_fragment_size(sb, grp);
1161}
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
1184{
1185 ext4_group_t ngroups;
1186 int blocksize;
1187 int blocks_per_page;
1188 int groups_per_page;
1189 int err = 0;
1190 int i;
1191 ext4_group_t first_group, group;
1192 int first_block;
1193 struct super_block *sb;
1194 struct buffer_head *bhs;
1195 struct buffer_head **bh = NULL;
1196 struct inode *inode;
1197 char *data;
1198 char *bitmap;
1199 struct ext4_group_info *grinfo;
1200
1201 inode = page->mapping->host;
1202 sb = inode->i_sb;
1203 ngroups = ext4_get_groups_count(sb);
1204 blocksize = i_blocksize(inode);
1205 blocks_per_page = PAGE_SIZE / blocksize;
1206
1207 mb_debug(sb, "init page %lu\n", page->index);
1208
1209 groups_per_page = blocks_per_page >> 1;
1210 if (groups_per_page == 0)
1211 groups_per_page = 1;
1212
1213
1214 if (groups_per_page > 1) {
1215 i = sizeof(struct buffer_head *) * groups_per_page;
1216 bh = kzalloc(i, gfp);
1217 if (bh == NULL) {
1218 err = -ENOMEM;
1219 goto out;
1220 }
1221 } else
1222 bh = &bhs;
1223
1224 first_group = page->index * blocks_per_page / 2;
1225
1226
1227 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
1228 if (group >= ngroups)
1229 break;
1230
1231 grinfo = ext4_get_group_info(sb, group);
1232
1233
1234
1235
1236
1237
1238 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
1239 bh[i] = NULL;
1240 continue;
1241 }
1242 bh[i] = ext4_read_block_bitmap_nowait(sb, group, false);
1243 if (IS_ERR(bh[i])) {
1244 err = PTR_ERR(bh[i]);
1245 bh[i] = NULL;
1246 goto out;
1247 }
1248 mb_debug(sb, "read bitmap for group %u\n", group);
1249 }
1250
1251
1252 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
1253 int err2;
1254
1255 if (!bh[i])
1256 continue;
1257 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
1258 if (!err)
1259 err = err2;
1260 }
1261
1262 first_block = page->index * blocks_per_page;
1263 for (i = 0; i < blocks_per_page; i++) {
1264 group = (first_block + i) >> 1;
1265 if (group >= ngroups)
1266 break;
1267
1268 if (!bh[group - first_group])
1269
1270 continue;
1271
1272 if (!buffer_verified(bh[group - first_group]))
1273
1274 continue;
1275 err = 0;
1276
1277
1278
1279
1280
1281
1282
1283 data = page_address(page) + (i * blocksize);
1284 bitmap = bh[group - first_group]->b_data;
1285
1286
1287
1288
1289
1290 if ((first_block + i) & 1) {
1291
1292 BUG_ON(incore == NULL);
1293 mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
1294 group, page->index, i * blocksize);
1295 trace_ext4_mb_buddy_bitmap_load(sb, group);
1296 grinfo = ext4_get_group_info(sb, group);
1297 grinfo->bb_fragments = 0;
1298 memset(grinfo->bb_counters, 0,
1299 sizeof(*grinfo->bb_counters) *
1300 (MB_NUM_ORDERS(sb)));
1301
1302
1303
1304 ext4_lock_group(sb, group);
1305
1306 memset(data, 0xff, blocksize);
1307 ext4_mb_generate_buddy(sb, data, incore, group);
1308 ext4_unlock_group(sb, group);
1309 incore = NULL;
1310 } else {
1311
1312 BUG_ON(incore != NULL);
1313 mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
1314 group, page->index, i * blocksize);
1315 trace_ext4_mb_bitmap_load(sb, group);
1316
1317
1318 ext4_lock_group(sb, group);
1319 memcpy(data, bitmap, blocksize);
1320
1321
1322 ext4_mb_generate_from_pa(sb, data, group);
1323 ext4_mb_generate_from_freelist(sb, data, group);
1324 ext4_unlock_group(sb, group);
1325
1326
1327
1328
1329 incore = data;
1330 }
1331 }
1332 SetPageUptodate(page);
1333
1334out:
1335 if (bh) {
1336 for (i = 0; i < groups_per_page; i++)
1337 brelse(bh[i]);
1338 if (bh != &bhs)
1339 kfree(bh);
1340 }
1341 return err;
1342}
1343
1344
1345
1346
1347
1348
1349
1350static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
1351 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
1352{
1353 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
1354 int block, pnum, poff;
1355 int blocks_per_page;
1356 struct page *page;
1357
1358 e4b->bd_buddy_page = NULL;
1359 e4b->bd_bitmap_page = NULL;
1360
1361 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1362
1363
1364
1365
1366
1367 block = group * 2;
1368 pnum = block / blocks_per_page;
1369 poff = block % blocks_per_page;
1370 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1371 if (!page)
1372 return -ENOMEM;
1373 BUG_ON(page->mapping != inode->i_mapping);
1374 e4b->bd_bitmap_page = page;
1375 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1376
1377 if (blocks_per_page >= 2) {
1378
1379 return 0;
1380 }
1381
1382 block++;
1383 pnum = block / blocks_per_page;
1384 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1385 if (!page)
1386 return -ENOMEM;
1387 BUG_ON(page->mapping != inode->i_mapping);
1388 e4b->bd_buddy_page = page;
1389 return 0;
1390}
1391
1392static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1393{
1394 if (e4b->bd_bitmap_page) {
1395 unlock_page(e4b->bd_bitmap_page);
1396 put_page(e4b->bd_bitmap_page);
1397 }
1398 if (e4b->bd_buddy_page) {
1399 unlock_page(e4b->bd_buddy_page);
1400 put_page(e4b->bd_buddy_page);
1401 }
1402}
1403
1404
1405
1406
1407
1408
1409static noinline_for_stack
1410int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1411{
1412
1413 struct ext4_group_info *this_grp;
1414 struct ext4_buddy e4b;
1415 struct page *page;
1416 int ret = 0;
1417
1418 might_sleep();
1419 mb_debug(sb, "init group %u\n", group);
1420 this_grp = ext4_get_group_info(sb, group);
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1431 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1432
1433
1434
1435
1436 goto err;
1437 }
1438
1439 page = e4b.bd_bitmap_page;
1440 ret = ext4_mb_init_cache(page, NULL, gfp);
1441 if (ret)
1442 goto err;
1443 if (!PageUptodate(page)) {
1444 ret = -EIO;
1445 goto err;
1446 }
1447
1448 if (e4b.bd_buddy_page == NULL) {
1449
1450
1451
1452
1453
1454 ret = 0;
1455 goto err;
1456 }
1457
1458 page = e4b.bd_buddy_page;
1459 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1460 if (ret)
1461 goto err;
1462 if (!PageUptodate(page)) {
1463 ret = -EIO;
1464 goto err;
1465 }
1466err:
1467 ext4_mb_put_buddy_page_lock(&e4b);
1468 return ret;
1469}
1470
1471
1472
1473
1474
1475
1476static noinline_for_stack int
1477ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1478 struct ext4_buddy *e4b, gfp_t gfp)
1479{
1480 int blocks_per_page;
1481 int block;
1482 int pnum;
1483 int poff;
1484 struct page *page;
1485 int ret;
1486 struct ext4_group_info *grp;
1487 struct ext4_sb_info *sbi = EXT4_SB(sb);
1488 struct inode *inode = sbi->s_buddy_cache;
1489
1490 might_sleep();
1491 mb_debug(sb, "load group %u\n", group);
1492
1493 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1494 grp = ext4_get_group_info(sb, group);
1495
1496 e4b->bd_blkbits = sb->s_blocksize_bits;
1497 e4b->bd_info = grp;
1498 e4b->bd_sb = sb;
1499 e4b->bd_group = group;
1500 e4b->bd_buddy_page = NULL;
1501 e4b->bd_bitmap_page = NULL;
1502
1503 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1504
1505
1506
1507
1508 ret = ext4_mb_init_group(sb, group, gfp);
1509 if (ret)
1510 return ret;
1511 }
1512
1513
1514
1515
1516
1517
1518 block = group * 2;
1519 pnum = block / blocks_per_page;
1520 poff = block % blocks_per_page;
1521
1522
1523
1524 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1525 if (page == NULL || !PageUptodate(page)) {
1526 if (page)
1527
1528
1529
1530
1531
1532
1533
1534
1535 put_page(page);
1536 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1537 if (page) {
1538 BUG_ON(page->mapping != inode->i_mapping);
1539 if (!PageUptodate(page)) {
1540 ret = ext4_mb_init_cache(page, NULL, gfp);
1541 if (ret) {
1542 unlock_page(page);
1543 goto err;
1544 }
1545 mb_cmp_bitmaps(e4b, page_address(page) +
1546 (poff * sb->s_blocksize));
1547 }
1548 unlock_page(page);
1549 }
1550 }
1551 if (page == NULL) {
1552 ret = -ENOMEM;
1553 goto err;
1554 }
1555 if (!PageUptodate(page)) {
1556 ret = -EIO;
1557 goto err;
1558 }
1559
1560
1561 e4b->bd_bitmap_page = page;
1562 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1563
1564 block++;
1565 pnum = block / blocks_per_page;
1566 poff = block % blocks_per_page;
1567
1568 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1569 if (page == NULL || !PageUptodate(page)) {
1570 if (page)
1571 put_page(page);
1572 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1573 if (page) {
1574 BUG_ON(page->mapping != inode->i_mapping);
1575 if (!PageUptodate(page)) {
1576 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1577 gfp);
1578 if (ret) {
1579 unlock_page(page);
1580 goto err;
1581 }
1582 }
1583 unlock_page(page);
1584 }
1585 }
1586 if (page == NULL) {
1587 ret = -ENOMEM;
1588 goto err;
1589 }
1590 if (!PageUptodate(page)) {
1591 ret = -EIO;
1592 goto err;
1593 }
1594
1595
1596 e4b->bd_buddy_page = page;
1597 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1598
1599 return 0;
1600
1601err:
1602 if (page)
1603 put_page(page);
1604 if (e4b->bd_bitmap_page)
1605 put_page(e4b->bd_bitmap_page);
1606 if (e4b->bd_buddy_page)
1607 put_page(e4b->bd_buddy_page);
1608 e4b->bd_buddy = NULL;
1609 e4b->bd_bitmap = NULL;
1610 return ret;
1611}
1612
1613static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1614 struct ext4_buddy *e4b)
1615{
1616 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1617}
1618
1619static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1620{
1621 if (e4b->bd_bitmap_page)
1622 put_page(e4b->bd_bitmap_page);
1623 if (e4b->bd_buddy_page)
1624 put_page(e4b->bd_buddy_page);
1625}
1626
1627
1628static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1629{
1630 int order = 1, max;
1631 void *bb;
1632
1633 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1634 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1635
1636 while (order <= e4b->bd_blkbits + 1) {
1637 bb = mb_find_buddy(e4b, order, &max);
1638 if (!mb_test_bit(block >> order, bb)) {
1639
1640 return order;
1641 }
1642 order++;
1643 }
1644 return 0;
1645}
1646
1647static void mb_clear_bits(void *bm, int cur, int len)
1648{
1649 __u32 *addr;
1650
1651 len = cur + len;
1652 while (cur < len) {
1653 if ((cur & 31) == 0 && (len - cur) >= 32) {
1654
1655 addr = bm + (cur >> 3);
1656 *addr = 0;
1657 cur += 32;
1658 continue;
1659 }
1660 mb_clear_bit(cur, bm);
1661 cur++;
1662 }
1663}
1664
1665
1666
1667
1668static int mb_test_and_clear_bits(void *bm, int cur, int len)
1669{
1670 __u32 *addr;
1671 int zero_bit = -1;
1672
1673 len = cur + len;
1674 while (cur < len) {
1675 if ((cur & 31) == 0 && (len - cur) >= 32) {
1676
1677 addr = bm + (cur >> 3);
1678 if (*addr != (__u32)(-1) && zero_bit == -1)
1679 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1680 *addr = 0;
1681 cur += 32;
1682 continue;
1683 }
1684 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1685 zero_bit = cur;
1686 cur++;
1687 }
1688
1689 return zero_bit;
1690}
1691
1692void ext4_set_bits(void *bm, int cur, int len)
1693{
1694 __u32 *addr;
1695
1696 len = cur + len;
1697 while (cur < len) {
1698 if ((cur & 31) == 0 && (len - cur) >= 32) {
1699
1700 addr = bm + (cur >> 3);
1701 *addr = 0xffffffff;
1702 cur += 32;
1703 continue;
1704 }
1705 mb_set_bit(cur, bm);
1706 cur++;
1707 }
1708}
1709
1710static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1711{
1712 if (mb_test_bit(*bit + side, bitmap)) {
1713 mb_clear_bit(*bit, bitmap);
1714 (*bit) -= side;
1715 return 1;
1716 }
1717 else {
1718 (*bit) += side;
1719 mb_set_bit(*bit, bitmap);
1720 return -1;
1721 }
1722}
1723
1724static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1725{
1726 int max;
1727 int order = 1;
1728 void *buddy = mb_find_buddy(e4b, order, &max);
1729
1730 while (buddy) {
1731 void *buddy2;
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762 if (first & 1)
1763 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1764 if (!(last & 1))
1765 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1766 if (first > last)
1767 break;
1768 order++;
1769
1770 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1771 mb_clear_bits(buddy, first, last - first + 1);
1772 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1773 break;
1774 }
1775 first >>= 1;
1776 last >>= 1;
1777 buddy = buddy2;
1778 }
1779}
1780
1781static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1782 int first, int count)
1783{
1784 int left_is_free = 0;
1785 int right_is_free = 0;
1786 int block;
1787 int last = first + count - 1;
1788 struct super_block *sb = e4b->bd_sb;
1789
1790 if (WARN_ON(count == 0))
1791 return;
1792 BUG_ON(last >= (sb->s_blocksize << 3));
1793 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1794
1795 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1796 return;
1797
1798 mb_check_buddy(e4b);
1799 mb_free_blocks_double(inode, e4b, first, count);
1800
1801 this_cpu_inc(discard_pa_seq);
1802 e4b->bd_info->bb_free += count;
1803 if (first < e4b->bd_info->bb_first_free)
1804 e4b->bd_info->bb_first_free = first;
1805
1806
1807
1808
1809 if (first != 0)
1810 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1811 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1812 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1813 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1814
1815 if (unlikely(block != -1)) {
1816 struct ext4_sb_info *sbi = EXT4_SB(sb);
1817 ext4_fsblk_t blocknr;
1818
1819 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1820 blocknr += EXT4_C2B(sbi, block);
1821 if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
1822 ext4_grp_locked_error(sb, e4b->bd_group,
1823 inode ? inode->i_ino : 0,
1824 blocknr,
1825 "freeing already freed block (bit %u); block bitmap corrupt.",
1826 block);
1827 ext4_mark_group_bitmap_corrupted(
1828 sb, e4b->bd_group,
1829 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1830 }
1831 goto done;
1832 }
1833
1834
1835 if (left_is_free && right_is_free)
1836 e4b->bd_info->bb_fragments--;
1837 else if (!left_is_free && !right_is_free)
1838 e4b->bd_info->bb_fragments++;
1839
1840
1841
1842
1843
1844
1845
1846 if (first & 1) {
1847 first += !left_is_free;
1848 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1849 }
1850 if (!(last & 1)) {
1851 last -= !right_is_free;
1852 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1853 }
1854
1855 if (first <= last)
1856 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1857
1858done:
1859 mb_set_largest_free_order(sb, e4b->bd_info);
1860 mb_update_avg_fragment_size(sb, e4b->bd_info);
1861 mb_check_buddy(e4b);
1862}
1863
1864static int mb_find_extent(struct ext4_buddy *e4b, int block,
1865 int needed, struct ext4_free_extent *ex)
1866{
1867 int next = block;
1868 int max, order;
1869 void *buddy;
1870
1871 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1872 BUG_ON(ex == NULL);
1873
1874 buddy = mb_find_buddy(e4b, 0, &max);
1875 BUG_ON(buddy == NULL);
1876 BUG_ON(block >= max);
1877 if (mb_test_bit(block, buddy)) {
1878 ex->fe_len = 0;
1879 ex->fe_start = 0;
1880 ex->fe_group = 0;
1881 return 0;
1882 }
1883
1884
1885 order = mb_find_order_for_block(e4b, block);
1886 block = block >> order;
1887
1888 ex->fe_len = 1 << order;
1889 ex->fe_start = block << order;
1890 ex->fe_group = e4b->bd_group;
1891
1892
1893 next = next - ex->fe_start;
1894 ex->fe_len -= next;
1895 ex->fe_start += next;
1896
1897 while (needed > ex->fe_len &&
1898 mb_find_buddy(e4b, order, &max)) {
1899
1900 if (block + 1 >= max)
1901 break;
1902
1903 next = (block + 1) * (1 << order);
1904 if (mb_test_bit(next, e4b->bd_bitmap))
1905 break;
1906
1907 order = mb_find_order_for_block(e4b, next);
1908
1909 block = next >> order;
1910 ex->fe_len += 1 << order;
1911 }
1912
1913 if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
1914
1915 WARN_ON(1);
1916 ext4_grp_locked_error(e4b->bd_sb, e4b->bd_group, 0, 0,
1917 "corruption or bug in mb_find_extent "
1918 "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
1919 block, order, needed, ex->fe_group, ex->fe_start,
1920 ex->fe_len, ex->fe_logical);
1921 ex->fe_len = 0;
1922 ex->fe_start = 0;
1923 ex->fe_group = 0;
1924 }
1925 return ex->fe_len;
1926}
1927
1928static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1929{
1930 int ord;
1931 int mlen = 0;
1932 int max = 0;
1933 int cur;
1934 int start = ex->fe_start;
1935 int len = ex->fe_len;
1936 unsigned ret = 0;
1937 int len0 = len;
1938 void *buddy;
1939
1940 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1941 BUG_ON(e4b->bd_group != ex->fe_group);
1942 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1943 mb_check_buddy(e4b);
1944 mb_mark_used_double(e4b, start, len);
1945
1946 this_cpu_inc(discard_pa_seq);
1947 e4b->bd_info->bb_free -= len;
1948 if (e4b->bd_info->bb_first_free == start)
1949 e4b->bd_info->bb_first_free += len;
1950
1951
1952 if (start != 0)
1953 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1954 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1955 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1956 if (mlen && max)
1957 e4b->bd_info->bb_fragments++;
1958 else if (!mlen && !max)
1959 e4b->bd_info->bb_fragments--;
1960
1961
1962 while (len) {
1963 ord = mb_find_order_for_block(e4b, start);
1964
1965 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1966
1967 mlen = 1 << ord;
1968 buddy = mb_find_buddy(e4b, ord, &max);
1969 BUG_ON((start >> ord) >= max);
1970 mb_set_bit(start >> ord, buddy);
1971 e4b->bd_info->bb_counters[ord]--;
1972 start += mlen;
1973 len -= mlen;
1974 BUG_ON(len < 0);
1975 continue;
1976 }
1977
1978
1979 if (ret == 0)
1980 ret = len | (ord << 16);
1981
1982
1983 BUG_ON(ord <= 0);
1984 buddy = mb_find_buddy(e4b, ord, &max);
1985 mb_set_bit(start >> ord, buddy);
1986 e4b->bd_info->bb_counters[ord]--;
1987
1988 ord--;
1989 cur = (start >> ord) & ~1U;
1990 buddy = mb_find_buddy(e4b, ord, &max);
1991 mb_clear_bit(cur, buddy);
1992 mb_clear_bit(cur + 1, buddy);
1993 e4b->bd_info->bb_counters[ord]++;
1994 e4b->bd_info->bb_counters[ord]++;
1995 }
1996 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1997
1998 mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info);
1999 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
2000 mb_check_buddy(e4b);
2001
2002 return ret;
2003}
2004
2005
2006
2007
2008static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
2009 struct ext4_buddy *e4b)
2010{
2011 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2012 int ret;
2013
2014 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
2015 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2016
2017 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
2018 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
2019 ret = mb_mark_used(e4b, &ac->ac_b_ex);
2020
2021
2022
2023 ac->ac_f_ex = ac->ac_b_ex;
2024
2025 ac->ac_status = AC_STATUS_FOUND;
2026 ac->ac_tail = ret & 0xffff;
2027 ac->ac_buddy = ret >> 16;
2028
2029
2030
2031
2032
2033
2034
2035
2036 ac->ac_bitmap_page = e4b->bd_bitmap_page;
2037 get_page(ac->ac_bitmap_page);
2038 ac->ac_buddy_page = e4b->bd_buddy_page;
2039 get_page(ac->ac_buddy_page);
2040
2041 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2042 spin_lock(&sbi->s_md_lock);
2043 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
2044 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
2045 spin_unlock(&sbi->s_md_lock);
2046 }
2047
2048
2049
2050
2051
2052 if (ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
2053 ext4_mb_new_preallocation(ac);
2054
2055}
2056
2057static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
2058 struct ext4_buddy *e4b,
2059 int finish_group)
2060{
2061 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2062 struct ext4_free_extent *bex = &ac->ac_b_ex;
2063 struct ext4_free_extent *gex = &ac->ac_g_ex;
2064 struct ext4_free_extent ex;
2065 int max;
2066
2067 if (ac->ac_status == AC_STATUS_FOUND)
2068 return;
2069
2070
2071
2072 if (ac->ac_found > sbi->s_mb_max_to_scan &&
2073 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2074 ac->ac_status = AC_STATUS_BREAK;
2075 return;
2076 }
2077
2078
2079
2080
2081 if (bex->fe_len < gex->fe_len)
2082 return;
2083
2084 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
2085 && bex->fe_group == e4b->bd_group) {
2086
2087
2088
2089 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
2090 if (max >= gex->fe_len) {
2091 ext4_mb_use_best_found(ac, e4b);
2092 return;
2093 }
2094 }
2095}
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
2108 struct ext4_free_extent *ex,
2109 struct ext4_buddy *e4b)
2110{
2111 struct ext4_free_extent *bex = &ac->ac_b_ex;
2112 struct ext4_free_extent *gex = &ac->ac_g_ex;
2113
2114 BUG_ON(ex->fe_len <= 0);
2115 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
2116 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
2117 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
2118
2119 ac->ac_found++;
2120
2121
2122
2123
2124 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2125 *bex = *ex;
2126 ext4_mb_use_best_found(ac, e4b);
2127 return;
2128 }
2129
2130
2131
2132
2133 if (ex->fe_len == gex->fe_len) {
2134 *bex = *ex;
2135 ext4_mb_use_best_found(ac, e4b);
2136 return;
2137 }
2138
2139
2140
2141
2142 if (bex->fe_len == 0) {
2143 *bex = *ex;
2144 return;
2145 }
2146
2147
2148
2149
2150 if (bex->fe_len < gex->fe_len) {
2151
2152
2153 if (ex->fe_len > bex->fe_len)
2154 *bex = *ex;
2155 } else if (ex->fe_len > gex->fe_len) {
2156
2157
2158
2159 if (ex->fe_len < bex->fe_len)
2160 *bex = *ex;
2161 }
2162
2163 ext4_mb_check_limits(ac, e4b, 0);
2164}
2165
2166static noinline_for_stack
2167int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
2168 struct ext4_buddy *e4b)
2169{
2170 struct ext4_free_extent ex = ac->ac_b_ex;
2171 ext4_group_t group = ex.fe_group;
2172 int max;
2173 int err;
2174
2175 BUG_ON(ex.fe_len <= 0);
2176 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
2177 if (err)
2178 return err;
2179
2180 ext4_lock_group(ac->ac_sb, group);
2181 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
2182
2183 if (max > 0) {
2184 ac->ac_b_ex = ex;
2185 ext4_mb_use_best_found(ac, e4b);
2186 }
2187
2188 ext4_unlock_group(ac->ac_sb, group);
2189 ext4_mb_unload_buddy(e4b);
2190
2191 return 0;
2192}
2193
2194static noinline_for_stack
2195int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
2196 struct ext4_buddy *e4b)
2197{
2198 ext4_group_t group = ac->ac_g_ex.fe_group;
2199 int max;
2200 int err;
2201 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2202 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2203 struct ext4_free_extent ex;
2204
2205 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
2206 return 0;
2207 if (grp->bb_free == 0)
2208 return 0;
2209
2210 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
2211 if (err)
2212 return err;
2213
2214 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
2215 ext4_mb_unload_buddy(e4b);
2216 return 0;
2217 }
2218
2219 ext4_lock_group(ac->ac_sb, group);
2220 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
2221 ac->ac_g_ex.fe_len, &ex);
2222 ex.fe_logical = 0xDEADFA11;
2223
2224 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
2225 ext4_fsblk_t start;
2226
2227 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
2228 ex.fe_start;
2229
2230 if (do_div(start, sbi->s_stripe) == 0) {
2231 ac->ac_found++;
2232 ac->ac_b_ex = ex;
2233 ext4_mb_use_best_found(ac, e4b);
2234 }
2235 } else if (max >= ac->ac_g_ex.fe_len) {
2236 BUG_ON(ex.fe_len <= 0);
2237 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
2238 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
2239 ac->ac_found++;
2240 ac->ac_b_ex = ex;
2241 ext4_mb_use_best_found(ac, e4b);
2242 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
2243
2244
2245 BUG_ON(ex.fe_len <= 0);
2246 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
2247 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
2248 ac->ac_found++;
2249 ac->ac_b_ex = ex;
2250 ext4_mb_use_best_found(ac, e4b);
2251 }
2252 ext4_unlock_group(ac->ac_sb, group);
2253 ext4_mb_unload_buddy(e4b);
2254
2255 return 0;
2256}
2257
2258
2259
2260
2261
2262static noinline_for_stack
2263void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
2264 struct ext4_buddy *e4b)
2265{
2266 struct super_block *sb = ac->ac_sb;
2267 struct ext4_group_info *grp = e4b->bd_info;
2268 void *buddy;
2269 int i;
2270 int k;
2271 int max;
2272
2273 BUG_ON(ac->ac_2order <= 0);
2274 for (i = ac->ac_2order; i < MB_NUM_ORDERS(sb); i++) {
2275 if (grp->bb_counters[i] == 0)
2276 continue;
2277
2278 buddy = mb_find_buddy(e4b, i, &max);
2279 BUG_ON(buddy == NULL);
2280
2281 k = mb_find_next_zero_bit(buddy, max, 0);
2282 if (k >= max) {
2283 ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
2284 "%d free clusters of order %d. But found 0",
2285 grp->bb_counters[i], i);
2286 ext4_mark_group_bitmap_corrupted(ac->ac_sb,
2287 e4b->bd_group,
2288 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2289 break;
2290 }
2291 ac->ac_found++;
2292
2293 ac->ac_b_ex.fe_len = 1 << i;
2294 ac->ac_b_ex.fe_start = k << i;
2295 ac->ac_b_ex.fe_group = e4b->bd_group;
2296
2297 ext4_mb_use_best_found(ac, e4b);
2298
2299 BUG_ON(ac->ac_f_ex.fe_len != ac->ac_g_ex.fe_len);
2300
2301 if (EXT4_SB(sb)->s_mb_stats)
2302 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
2303
2304 break;
2305 }
2306}
2307
2308
2309
2310
2311
2312
2313static noinline_for_stack
2314void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
2315 struct ext4_buddy *e4b)
2316{
2317 struct super_block *sb = ac->ac_sb;
2318 void *bitmap = e4b->bd_bitmap;
2319 struct ext4_free_extent ex;
2320 int i;
2321 int free;
2322
2323 free = e4b->bd_info->bb_free;
2324 if (WARN_ON(free <= 0))
2325 return;
2326
2327 i = e4b->bd_info->bb_first_free;
2328
2329 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
2330 i = mb_find_next_zero_bit(bitmap,
2331 EXT4_CLUSTERS_PER_GROUP(sb), i);
2332 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
2333
2334
2335
2336
2337
2338 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
2339 "%d free clusters as per "
2340 "group info. But bitmap says 0",
2341 free);
2342 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
2343 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2344 break;
2345 }
2346
2347 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
2348 if (WARN_ON(ex.fe_len <= 0))
2349 break;
2350 if (free < ex.fe_len) {
2351 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
2352 "%d free clusters as per "
2353 "group info. But got %d blocks",
2354 free, ex.fe_len);
2355 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
2356 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2357
2358
2359
2360
2361
2362 break;
2363 }
2364 ex.fe_logical = 0xDEADC0DE;
2365 ext4_mb_measure_extent(ac, &ex, e4b);
2366
2367 i += ex.fe_len;
2368 free -= ex.fe_len;
2369 }
2370
2371 ext4_mb_check_limits(ac, e4b, 1);
2372}
2373
2374
2375
2376
2377
2378static noinline_for_stack
2379void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
2380 struct ext4_buddy *e4b)
2381{
2382 struct super_block *sb = ac->ac_sb;
2383 struct ext4_sb_info *sbi = EXT4_SB(sb);
2384 void *bitmap = e4b->bd_bitmap;
2385 struct ext4_free_extent ex;
2386 ext4_fsblk_t first_group_block;
2387 ext4_fsblk_t a;
2388 ext4_grpblk_t i;
2389 int max;
2390
2391 BUG_ON(sbi->s_stripe == 0);
2392
2393
2394 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2395
2396 a = first_group_block + sbi->s_stripe - 1;
2397 do_div(a, sbi->s_stripe);
2398 i = (a * sbi->s_stripe) - first_group_block;
2399
2400 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2401 if (!mb_test_bit(i, bitmap)) {
2402 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2403 if (max >= sbi->s_stripe) {
2404 ac->ac_found++;
2405 ex.fe_logical = 0xDEADF00D;
2406 ac->ac_b_ex = ex;
2407 ext4_mb_use_best_found(ac, e4b);
2408 break;
2409 }
2410 }
2411 i += sbi->s_stripe;
2412 }
2413}
2414
2415
2416
2417
2418
2419
2420static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
2421 ext4_group_t group, int cr)
2422{
2423 ext4_grpblk_t free, fragments;
2424 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2425 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2426
2427 BUG_ON(cr < 0 || cr >= 4);
2428
2429 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2430 return false;
2431
2432 free = grp->bb_free;
2433 if (free == 0)
2434 return false;
2435
2436 fragments = grp->bb_fragments;
2437 if (fragments == 0)
2438 return false;
2439
2440 switch (cr) {
2441 case 0:
2442 BUG_ON(ac->ac_2order == 0);
2443
2444
2445 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2446 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2447 ((group % flex_size) == 0))
2448 return false;
2449
2450 if (free < ac->ac_g_ex.fe_len)
2451 return false;
2452
2453 if (ac->ac_2order >= MB_NUM_ORDERS(ac->ac_sb))
2454 return true;
2455
2456 if (grp->bb_largest_free_order < ac->ac_2order)
2457 return false;
2458
2459 return true;
2460 case 1:
2461 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2462 return true;
2463 break;
2464 case 2:
2465 if (free >= ac->ac_g_ex.fe_len)
2466 return true;
2467 break;
2468 case 3:
2469 return true;
2470 default:
2471 BUG();
2472 }
2473
2474 return false;
2475}
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
2489 ext4_group_t group, int cr)
2490{
2491 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2492 struct super_block *sb = ac->ac_sb;
2493 struct ext4_sb_info *sbi = EXT4_SB(sb);
2494 bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK;
2495 ext4_grpblk_t free;
2496 int ret = 0;
2497
2498 if (sbi->s_mb_stats)
2499 atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
2500 if (should_lock) {
2501 ext4_lock_group(sb, group);
2502 __release(ext4_group_lock_ptr(sb, group));
2503 }
2504 free = grp->bb_free;
2505 if (free == 0)
2506 goto out;
2507 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2508 goto out;
2509 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2510 goto out;
2511 if (should_lock) {
2512 __acquire(ext4_group_lock_ptr(sb, group));
2513 ext4_unlock_group(sb, group);
2514 }
2515
2516
2517 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2518 struct ext4_group_desc *gdp =
2519 ext4_get_group_desc(sb, group, NULL);
2520 int ret;
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530 if (cr < 2 &&
2531 (!sbi->s_log_groups_per_flex ||
2532 ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
2533 !(ext4_has_group_desc_csum(sb) &&
2534 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))))
2535 return 0;
2536 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
2537 if (ret)
2538 return ret;
2539 }
2540
2541 if (should_lock) {
2542 ext4_lock_group(sb, group);
2543 __release(ext4_group_lock_ptr(sb, group));
2544 }
2545 ret = ext4_mb_good_group(ac, group, cr);
2546out:
2547 if (should_lock) {
2548 __acquire(ext4_group_lock_ptr(sb, group));
2549 ext4_unlock_group(sb, group);
2550 }
2551 return ret;
2552}
2553
2554
2555
2556
2557
2558ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
2559 unsigned int nr, int *cnt)
2560{
2561 ext4_group_t ngroups = ext4_get_groups_count(sb);
2562 struct buffer_head *bh;
2563 struct blk_plug plug;
2564
2565 blk_start_plug(&plug);
2566 while (nr-- > 0) {
2567 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
2568 NULL);
2569 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
2570
2571
2572
2573
2574
2575
2576
2577
2578 if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
2579 EXT4_MB_GRP_NEED_INIT(grp) &&
2580 ext4_free_group_clusters(sb, gdp) > 0 &&
2581 !(ext4_has_group_desc_csum(sb) &&
2582 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
2583 bh = ext4_read_block_bitmap_nowait(sb, group, true);
2584 if (bh && !IS_ERR(bh)) {
2585 if (!buffer_uptodate(bh) && cnt)
2586 (*cnt)++;
2587 brelse(bh);
2588 }
2589 }
2590 if (++group >= ngroups)
2591 group = 0;
2592 }
2593 blk_finish_plug(&plug);
2594 return group;
2595}
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
2610 unsigned int nr)
2611{
2612 while (nr-- > 0) {
2613 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
2614 NULL);
2615 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
2616
2617 if (!group)
2618 group = ext4_get_groups_count(sb);
2619 group--;
2620 grp = ext4_get_group_info(sb, group);
2621
2622 if (EXT4_MB_GRP_NEED_INIT(grp) &&
2623 ext4_free_group_clusters(sb, gdp) > 0 &&
2624 !(ext4_has_group_desc_csum(sb) &&
2625 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
2626 if (ext4_mb_init_group(sb, group, GFP_NOFS))
2627 break;
2628 }
2629 }
2630}
2631
2632static noinline_for_stack int
2633ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2634{
2635 ext4_group_t prefetch_grp = 0, ngroups, group, i;
2636 int cr = -1;
2637 int err = 0, first_err = 0;
2638 unsigned int nr = 0, prefetch_ios = 0;
2639 struct ext4_sb_info *sbi;
2640 struct super_block *sb;
2641 struct ext4_buddy e4b;
2642 int lost;
2643
2644 sb = ac->ac_sb;
2645 sbi = EXT4_SB(sb);
2646 ngroups = ext4_get_groups_count(sb);
2647
2648 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2649 ngroups = sbi->s_blockfile_groups;
2650
2651 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2652
2653
2654 err = ext4_mb_find_by_goal(ac, &e4b);
2655 if (err || ac->ac_status == AC_STATUS_FOUND)
2656 goto out;
2657
2658 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2659 goto out;
2660
2661
2662
2663
2664
2665
2666 i = fls(ac->ac_g_ex.fe_len);
2667 ac->ac_2order = 0;
2668
2669
2670
2671
2672
2673
2674
2675 if (i >= sbi->s_mb_order2_reqs && i <= MB_NUM_ORDERS(sb)) {
2676
2677
2678
2679 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2680 ac->ac_2order = array_index_nospec(i - 1,
2681 MB_NUM_ORDERS(sb));
2682 }
2683
2684
2685 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2686
2687 spin_lock(&sbi->s_md_lock);
2688 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2689 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2690 spin_unlock(&sbi->s_md_lock);
2691 }
2692
2693
2694 cr = ac->ac_2order ? 0 : 1;
2695
2696
2697
2698
2699repeat:
2700 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2701 ac->ac_criteria = cr;
2702
2703
2704
2705
2706 group = ac->ac_g_ex.fe_group;
2707 ac->ac_last_optimal_group = group;
2708 ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
2709 prefetch_grp = group;
2710
2711 for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
2712 i++) {
2713 int ret = 0, new_cr;
2714
2715 cond_resched();
2716
2717 ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
2718 if (new_cr != cr) {
2719 cr = new_cr;
2720 goto repeat;
2721 }
2722
2723
2724
2725
2726
2727
2728
2729 if ((prefetch_grp == group) &&
2730 (cr > 1 ||
2731 prefetch_ios < sbi->s_mb_prefetch_limit)) {
2732 unsigned int curr_ios = prefetch_ios;
2733
2734 nr = sbi->s_mb_prefetch;
2735 if (ext4_has_feature_flex_bg(sb)) {
2736 nr = 1 << sbi->s_log_groups_per_flex;
2737 nr -= group & (nr - 1);
2738 nr = min(nr, sbi->s_mb_prefetch);
2739 }
2740 prefetch_grp = ext4_mb_prefetch(sb, group,
2741 nr, &prefetch_ios);
2742 if (prefetch_ios == curr_ios)
2743 nr = 0;
2744 }
2745
2746
2747 ret = ext4_mb_good_group_nolock(ac, group, cr);
2748 if (ret <= 0) {
2749 if (!first_err)
2750 first_err = ret;
2751 continue;
2752 }
2753
2754 err = ext4_mb_load_buddy(sb, group, &e4b);
2755 if (err)
2756 goto out;
2757
2758 ext4_lock_group(sb, group);
2759
2760
2761
2762
2763
2764 ret = ext4_mb_good_group(ac, group, cr);
2765 if (ret == 0) {
2766 ext4_unlock_group(sb, group);
2767 ext4_mb_unload_buddy(&e4b);
2768 continue;
2769 }
2770
2771 ac->ac_groups_scanned++;
2772 if (cr == 0)
2773 ext4_mb_simple_scan_group(ac, &e4b);
2774 else if (cr == 1 && sbi->s_stripe &&
2775 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2776 ext4_mb_scan_aligned(ac, &e4b);
2777 else
2778 ext4_mb_complex_scan_group(ac, &e4b);
2779
2780 ext4_unlock_group(sb, group);
2781 ext4_mb_unload_buddy(&e4b);
2782
2783 if (ac->ac_status != AC_STATUS_CONTINUE)
2784 break;
2785 }
2786
2787 if (sbi->s_mb_stats && i == ngroups)
2788 atomic64_inc(&sbi->s_bal_cX_failed[cr]);
2789 }
2790
2791 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2792 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2793
2794
2795
2796
2797 ext4_mb_try_best_found(ac, &e4b);
2798 if (ac->ac_status != AC_STATUS_FOUND) {
2799
2800
2801
2802
2803
2804 lost = atomic_inc_return(&sbi->s_mb_lost_chunks);
2805 mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n",
2806 ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start,
2807 ac->ac_b_ex.fe_len, lost);
2808
2809 ac->ac_b_ex.fe_group = 0;
2810 ac->ac_b_ex.fe_start = 0;
2811 ac->ac_b_ex.fe_len = 0;
2812 ac->ac_status = AC_STATUS_CONTINUE;
2813 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2814 cr = 3;
2815 goto repeat;
2816 }
2817 }
2818
2819 if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND)
2820 atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]);
2821out:
2822 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2823 err = first_err;
2824
2825 mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n",
2826 ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
2827 ac->ac_flags, cr, err);
2828
2829 if (nr)
2830 ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
2831
2832 return err;
2833}
2834
2835static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2836{
2837 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2838 ext4_group_t group;
2839
2840 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2841 return NULL;
2842 group = *pos + 1;
2843 return (void *) ((unsigned long) group);
2844}
2845
2846static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2847{
2848 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2849 ext4_group_t group;
2850
2851 ++*pos;
2852 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2853 return NULL;
2854 group = *pos + 1;
2855 return (void *) ((unsigned long) group);
2856}
2857
2858static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2859{
2860 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2861 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2862 int i;
2863 int err, buddy_loaded = 0;
2864 struct ext4_buddy e4b;
2865 struct ext4_group_info *grinfo;
2866 unsigned char blocksize_bits = min_t(unsigned char,
2867 sb->s_blocksize_bits,
2868 EXT4_MAX_BLOCK_LOG_SIZE);
2869 struct sg {
2870 struct ext4_group_info info;
2871 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2872 } sg;
2873
2874 group--;
2875 if (group == 0)
2876 seq_puts(seq, "#group: free frags first ["
2877 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2878 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2879
2880 i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2881 sizeof(struct ext4_group_info);
2882
2883 grinfo = ext4_get_group_info(sb, group);
2884
2885 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2886 err = ext4_mb_load_buddy(sb, group, &e4b);
2887 if (err) {
2888 seq_printf(seq, "#%-5u: I/O error\n", group);
2889 return 0;
2890 }
2891 buddy_loaded = 1;
2892 }
2893
2894 memcpy(&sg, ext4_get_group_info(sb, group), i);
2895
2896 if (buddy_loaded)
2897 ext4_mb_unload_buddy(&e4b);
2898
2899 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2900 sg.info.bb_fragments, sg.info.bb_first_free);
2901 for (i = 0; i <= 13; i++)
2902 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
2903 sg.info.bb_counters[i] : 0);
2904 seq_puts(seq, " ]\n");
2905
2906 return 0;
2907}
2908
2909static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2910{
2911}
2912
2913const struct seq_operations ext4_mb_seq_groups_ops = {
2914 .start = ext4_mb_seq_groups_start,
2915 .next = ext4_mb_seq_groups_next,
2916 .stop = ext4_mb_seq_groups_stop,
2917 .show = ext4_mb_seq_groups_show,
2918};
2919
2920int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
2921{
2922 struct super_block *sb = (struct super_block *)seq->private;
2923 struct ext4_sb_info *sbi = EXT4_SB(sb);
2924
2925 seq_puts(seq, "mballoc:\n");
2926 if (!sbi->s_mb_stats) {
2927 seq_puts(seq, "\tmb stats collection turned off.\n");
2928 seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n");
2929 return 0;
2930 }
2931 seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
2932 seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
2933
2934 seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned));
2935
2936 seq_puts(seq, "\tcr0_stats:\n");
2937 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0]));
2938 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2939 atomic64_read(&sbi->s_bal_cX_groups_considered[0]));
2940 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2941 atomic64_read(&sbi->s_bal_cX_failed[0]));
2942 seq_printf(seq, "\t\tbad_suggestions: %u\n",
2943 atomic_read(&sbi->s_bal_cr0_bad_suggestions));
2944
2945 seq_puts(seq, "\tcr1_stats:\n");
2946 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1]));
2947 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2948 atomic64_read(&sbi->s_bal_cX_groups_considered[1]));
2949 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2950 atomic64_read(&sbi->s_bal_cX_failed[1]));
2951 seq_printf(seq, "\t\tbad_suggestions: %u\n",
2952 atomic_read(&sbi->s_bal_cr1_bad_suggestions));
2953
2954 seq_puts(seq, "\tcr2_stats:\n");
2955 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2]));
2956 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2957 atomic64_read(&sbi->s_bal_cX_groups_considered[2]));
2958 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2959 atomic64_read(&sbi->s_bal_cX_failed[2]));
2960
2961 seq_puts(seq, "\tcr3_stats:\n");
2962 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3]));
2963 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2964 atomic64_read(&sbi->s_bal_cX_groups_considered[3]));
2965 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2966 atomic64_read(&sbi->s_bal_cX_failed[3]));
2967 seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned));
2968 seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
2969 seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
2970 seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
2971 seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
2972
2973 seq_printf(seq, "\tbuddies_generated: %u/%u\n",
2974 atomic_read(&sbi->s_mb_buddies_generated),
2975 ext4_get_groups_count(sb));
2976 seq_printf(seq, "\tbuddies_time_used: %llu\n",
2977 atomic64_read(&sbi->s_mb_generation_time));
2978 seq_printf(seq, "\tpreallocated: %u\n",
2979 atomic_read(&sbi->s_mb_preallocated));
2980 seq_printf(seq, "\tdiscarded: %u\n",
2981 atomic_read(&sbi->s_mb_discarded));
2982 return 0;
2983}
2984
2985static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
2986__acquires(&EXT4_SB(sb)->s_mb_rb_lock)
2987{
2988 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2989 unsigned long position;
2990
2991 read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
2992
2993 if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
2994 return NULL;
2995 position = *pos + 1;
2996 return (void *) ((unsigned long) position);
2997}
2998
2999static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos)
3000{
3001 struct super_block *sb = PDE_DATA(file_inode(seq->file));
3002 unsigned long position;
3003
3004 ++*pos;
3005 if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
3006 return NULL;
3007 position = *pos + 1;
3008 return (void *) ((unsigned long) position);
3009}
3010
3011static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
3012{
3013 struct super_block *sb = PDE_DATA(file_inode(seq->file));
3014 struct ext4_sb_info *sbi = EXT4_SB(sb);
3015 unsigned long position = ((unsigned long) v);
3016 struct ext4_group_info *grp;
3017 struct rb_node *n;
3018 unsigned int count, min, max;
3019
3020 position--;
3021 if (position >= MB_NUM_ORDERS(sb)) {
3022 seq_puts(seq, "fragment_size_tree:\n");
3023 n = rb_first(&sbi->s_mb_avg_fragment_size_root);
3024 if (!n) {
3025 seq_puts(seq, "\ttree_min: 0\n\ttree_max: 0\n\ttree_nodes: 0\n");
3026 return 0;
3027 }
3028 grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
3029 min = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
3030 count = 1;
3031 while (rb_next(n)) {
3032 count++;
3033 n = rb_next(n);
3034 }
3035 grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
3036 max = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
3037
3038 seq_printf(seq, "\ttree_min: %u\n\ttree_max: %u\n\ttree_nodes: %u\n",
3039 min, max, count);
3040 return 0;
3041 }
3042
3043 if (position == 0) {
3044 seq_printf(seq, "optimize_scan: %d\n",
3045 test_opt2(sb, MB_OPTIMIZE_SCAN) ? 1 : 0);
3046 seq_puts(seq, "max_free_order_lists:\n");
3047 }
3048 count = 0;
3049 list_for_each_entry(grp, &sbi->s_mb_largest_free_orders[position],
3050 bb_largest_free_order_node)
3051 count++;
3052 seq_printf(seq, "\tlist_order_%u_groups: %u\n",
3053 (unsigned int)position, count);
3054
3055 return 0;
3056}
3057
3058static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
3059__releases(&EXT4_SB(sb)->s_mb_rb_lock)
3060{
3061 struct super_block *sb = PDE_DATA(file_inode(seq->file));
3062
3063 read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
3064}
3065
3066const struct seq_operations ext4_mb_seq_structs_summary_ops = {
3067 .start = ext4_mb_seq_structs_summary_start,
3068 .next = ext4_mb_seq_structs_summary_next,
3069 .stop = ext4_mb_seq_structs_summary_stop,
3070 .show = ext4_mb_seq_structs_summary_show,
3071};
3072
3073static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
3074{
3075 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
3076 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
3077
3078 BUG_ON(!cachep);
3079 return cachep;
3080}
3081
3082
3083
3084
3085
3086int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
3087{
3088 struct ext4_sb_info *sbi = EXT4_SB(sb);
3089 unsigned size;
3090 struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
3091
3092 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
3093 EXT4_DESC_PER_BLOCK_BITS(sb);
3094 if (size <= sbi->s_group_info_size)
3095 return 0;
3096
3097 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
3098 new_groupinfo = kvzalloc(size, GFP_KERNEL);
3099 if (!new_groupinfo) {
3100 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
3101 return -ENOMEM;
3102 }
3103 rcu_read_lock();
3104 old_groupinfo = rcu_dereference(sbi->s_group_info);
3105 if (old_groupinfo)
3106 memcpy(new_groupinfo, old_groupinfo,
3107 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
3108 rcu_read_unlock();
3109 rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
3110 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
3111 if (old_groupinfo)
3112 ext4_kvfree_array_rcu(old_groupinfo);
3113 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
3114 sbi->s_group_info_size);
3115 return 0;
3116}
3117
3118
3119int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
3120 struct ext4_group_desc *desc)
3121{
3122 int i;
3123 int metalen = 0;
3124 int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
3125 struct ext4_sb_info *sbi = EXT4_SB(sb);
3126 struct ext4_group_info **meta_group_info;
3127 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3128
3129
3130
3131
3132
3133
3134 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
3135 metalen = sizeof(*meta_group_info) <<
3136 EXT4_DESC_PER_BLOCK_BITS(sb);
3137 meta_group_info = kmalloc(metalen, GFP_NOFS);
3138 if (meta_group_info == NULL) {
3139 ext4_msg(sb, KERN_ERR, "can't allocate mem "
3140 "for a buddy group");
3141 goto exit_meta_group_info;
3142 }
3143 rcu_read_lock();
3144 rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
3145 rcu_read_unlock();
3146 }
3147
3148 meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
3149 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
3150
3151 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
3152 if (meta_group_info[i] == NULL) {
3153 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
3154 goto exit_group_info;
3155 }
3156 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
3157 &(meta_group_info[i]->bb_state));
3158
3159
3160
3161
3162
3163 if (ext4_has_group_desc_csum(sb) &&
3164 (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3165 meta_group_info[i]->bb_free =
3166 ext4_free_clusters_after_init(sb, group, desc);
3167 } else {
3168 meta_group_info[i]->bb_free =
3169 ext4_free_group_clusters(sb, desc);
3170 }
3171
3172 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
3173 init_rwsem(&meta_group_info[i]->alloc_sem);
3174 meta_group_info[i]->bb_free_root = RB_ROOT;
3175 INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
3176 RB_CLEAR_NODE(&meta_group_info[i]->bb_avg_fragment_size_rb);
3177 meta_group_info[i]->bb_largest_free_order = -1;
3178 meta_group_info[i]->bb_group = group;
3179
3180 mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
3181 return 0;
3182
3183exit_group_info:
3184
3185 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
3186 struct ext4_group_info ***group_info;
3187
3188 rcu_read_lock();
3189 group_info = rcu_dereference(sbi->s_group_info);
3190 kfree(group_info[idx]);
3191 group_info[idx] = NULL;
3192 rcu_read_unlock();
3193 }
3194exit_meta_group_info:
3195 return -ENOMEM;
3196}
3197
3198static int ext4_mb_init_backend(struct super_block *sb)
3199{
3200 ext4_group_t ngroups = ext4_get_groups_count(sb);
3201 ext4_group_t i;
3202 struct ext4_sb_info *sbi = EXT4_SB(sb);
3203 int err;
3204 struct ext4_group_desc *desc;
3205 struct ext4_group_info ***group_info;
3206 struct kmem_cache *cachep;
3207
3208 err = ext4_mb_alloc_groupinfo(sb, ngroups);
3209 if (err)
3210 return err;
3211
3212 sbi->s_buddy_cache = new_inode(sb);
3213 if (sbi->s_buddy_cache == NULL) {
3214 ext4_msg(sb, KERN_ERR, "can't get new inode");
3215 goto err_freesgi;
3216 }
3217
3218
3219
3220
3221 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
3222 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
3223 for (i = 0; i < ngroups; i++) {
3224 cond_resched();
3225 desc = ext4_get_group_desc(sb, i, NULL);
3226 if (desc == NULL) {
3227 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
3228 goto err_freebuddy;
3229 }
3230 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
3231 goto err_freebuddy;
3232 }
3233
3234 if (ext4_has_feature_flex_bg(sb)) {
3235
3236
3237
3238
3239 if (sbi->s_es->s_log_groups_per_flex >= 32) {
3240 ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group");
3241 goto err_freebuddy;
3242 }
3243 sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex,
3244 BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
3245 sbi->s_mb_prefetch *= 8;
3246 } else {
3247 sbi->s_mb_prefetch = 32;
3248 }
3249 if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
3250 sbi->s_mb_prefetch = ext4_get_groups_count(sb);
3251
3252
3253
3254
3255
3256
3257
3258 sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4;
3259 if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb))
3260 sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb);
3261
3262 return 0;
3263
3264err_freebuddy:
3265 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3266 while (i-- > 0)
3267 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
3268 i = sbi->s_group_info_size;
3269 rcu_read_lock();
3270 group_info = rcu_dereference(sbi->s_group_info);
3271 while (i-- > 0)
3272 kfree(group_info[i]);
3273 rcu_read_unlock();
3274 iput(sbi->s_buddy_cache);
3275err_freesgi:
3276 rcu_read_lock();
3277 kvfree(rcu_dereference(sbi->s_group_info));
3278 rcu_read_unlock();
3279 return -ENOMEM;
3280}
3281
3282static void ext4_groupinfo_destroy_slabs(void)
3283{
3284 int i;
3285
3286 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
3287 kmem_cache_destroy(ext4_groupinfo_caches[i]);
3288 ext4_groupinfo_caches[i] = NULL;
3289 }
3290}
3291
3292static int ext4_groupinfo_create_slab(size_t size)
3293{
3294 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
3295 int slab_size;
3296 int blocksize_bits = order_base_2(size);
3297 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
3298 struct kmem_cache *cachep;
3299
3300 if (cache_index >= NR_GRPINFO_CACHES)
3301 return -EINVAL;
3302
3303 if (unlikely(cache_index < 0))
3304 cache_index = 0;
3305
3306 mutex_lock(&ext4_grpinfo_slab_create_mutex);
3307 if (ext4_groupinfo_caches[cache_index]) {
3308 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
3309 return 0;
3310 }
3311
3312 slab_size = offsetof(struct ext4_group_info,
3313 bb_counters[blocksize_bits + 2]);
3314
3315 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
3316 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
3317 NULL);
3318
3319 ext4_groupinfo_caches[cache_index] = cachep;
3320
3321 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
3322 if (!cachep) {
3323 printk(KERN_EMERG
3324 "EXT4-fs: no memory for groupinfo slab cache\n");
3325 return -ENOMEM;
3326 }
3327
3328 return 0;
3329}
3330
3331static void ext4_discard_work(struct work_struct *work)
3332{
3333 struct ext4_sb_info *sbi = container_of(work,
3334 struct ext4_sb_info, s_discard_work);
3335 struct super_block *sb = sbi->s_sb;
3336 struct ext4_free_data *fd, *nfd;
3337 struct ext4_buddy e4b;
3338 struct list_head discard_list;
3339 ext4_group_t grp, load_grp;
3340 int err = 0;
3341
3342 INIT_LIST_HEAD(&discard_list);
3343 spin_lock(&sbi->s_md_lock);
3344 list_splice_init(&sbi->s_discard_list, &discard_list);
3345 spin_unlock(&sbi->s_md_lock);
3346
3347 load_grp = UINT_MAX;
3348 list_for_each_entry_safe(fd, nfd, &discard_list, efd_list) {
3349
3350
3351
3352
3353 if ((sb->s_flags & SB_ACTIVE) && !err &&
3354 !atomic_read(&sbi->s_retry_alloc_pending)) {
3355 grp = fd->efd_group;
3356 if (grp != load_grp) {
3357 if (load_grp != UINT_MAX)
3358 ext4_mb_unload_buddy(&e4b);
3359
3360 err = ext4_mb_load_buddy(sb, grp, &e4b);
3361 if (err) {
3362 kmem_cache_free(ext4_free_data_cachep, fd);
3363 load_grp = UINT_MAX;
3364 continue;
3365 } else {
3366 load_grp = grp;
3367 }
3368 }
3369
3370 ext4_lock_group(sb, grp);
3371 ext4_try_to_trim_range(sb, &e4b, fd->efd_start_cluster,
3372 fd->efd_start_cluster + fd->efd_count - 1, 1);
3373 ext4_unlock_group(sb, grp);
3374 }
3375 kmem_cache_free(ext4_free_data_cachep, fd);
3376 }
3377
3378 if (load_grp != UINT_MAX)
3379 ext4_mb_unload_buddy(&e4b);
3380}
3381
3382int ext4_mb_init(struct super_block *sb)
3383{
3384 struct ext4_sb_info *sbi = EXT4_SB(sb);
3385 unsigned i, j;
3386 unsigned offset, offset_incr;
3387 unsigned max;
3388 int ret;
3389
3390 i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_offsets);
3391
3392 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
3393 if (sbi->s_mb_offsets == NULL) {
3394 ret = -ENOMEM;
3395 goto out;
3396 }
3397
3398 i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_maxs);
3399 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
3400 if (sbi->s_mb_maxs == NULL) {
3401 ret = -ENOMEM;
3402 goto out;
3403 }
3404
3405 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
3406 if (ret < 0)
3407 goto out;
3408
3409
3410 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
3411 sbi->s_mb_offsets[0] = 0;
3412
3413 i = 1;
3414 offset = 0;
3415 offset_incr = 1 << (sb->s_blocksize_bits - 1);
3416 max = sb->s_blocksize << 2;
3417 do {
3418 sbi->s_mb_offsets[i] = offset;
3419 sbi->s_mb_maxs[i] = max;
3420 offset += offset_incr;
3421 offset_incr = offset_incr >> 1;
3422 max = max >> 1;
3423 i++;
3424 } while (i < MB_NUM_ORDERS(sb));
3425
3426 sbi->s_mb_avg_fragment_size_root = RB_ROOT;
3427 sbi->s_mb_largest_free_orders =
3428 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
3429 GFP_KERNEL);
3430 if (!sbi->s_mb_largest_free_orders) {
3431 ret = -ENOMEM;
3432 goto out;
3433 }
3434 sbi->s_mb_largest_free_orders_locks =
3435 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
3436 GFP_KERNEL);
3437 if (!sbi->s_mb_largest_free_orders_locks) {
3438 ret = -ENOMEM;
3439 goto out;
3440 }
3441 for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
3442 INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
3443 rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
3444 }
3445 rwlock_init(&sbi->s_mb_rb_lock);
3446
3447 spin_lock_init(&sbi->s_md_lock);
3448 sbi->s_mb_free_pending = 0;
3449 INIT_LIST_HEAD(&sbi->s_freed_data_list);
3450 INIT_LIST_HEAD(&sbi->s_discard_list);
3451 INIT_WORK(&sbi->s_discard_work, ext4_discard_work);
3452 atomic_set(&sbi->s_retry_alloc_pending, 0);
3453
3454 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
3455 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
3456 sbi->s_mb_stats = MB_DEFAULT_STATS;
3457 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
3458 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
3459 sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
3473 sbi->s_cluster_bits, 32);
3474
3475
3476
3477
3478
3479
3480
3481
3482 if (sbi->s_stripe > 1) {
3483 sbi->s_mb_group_prealloc = roundup(
3484 sbi->s_mb_group_prealloc, sbi->s_stripe);
3485 }
3486
3487 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
3488 if (sbi->s_locality_groups == NULL) {
3489 ret = -ENOMEM;
3490 goto out;
3491 }
3492 for_each_possible_cpu(i) {
3493 struct ext4_locality_group *lg;
3494 lg = per_cpu_ptr(sbi->s_locality_groups, i);
3495 mutex_init(&lg->lg_mutex);
3496 for (j = 0; j < PREALLOC_TB_SIZE; j++)
3497 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
3498 spin_lock_init(&lg->lg_prealloc_lock);
3499 }
3500
3501 if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
3502 sbi->s_mb_max_linear_groups = 0;
3503 else
3504 sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
3505
3506 ret = ext4_mb_init_backend(sb);
3507 if (ret != 0)
3508 goto out_free_locality_groups;
3509
3510 return 0;
3511
3512out_free_locality_groups:
3513 free_percpu(sbi->s_locality_groups);
3514 sbi->s_locality_groups = NULL;
3515out:
3516 kfree(sbi->s_mb_largest_free_orders);
3517 kfree(sbi->s_mb_largest_free_orders_locks);
3518 kfree(sbi->s_mb_offsets);
3519 sbi->s_mb_offsets = NULL;
3520 kfree(sbi->s_mb_maxs);
3521 sbi->s_mb_maxs = NULL;
3522 return ret;
3523}
3524
3525
3526static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
3527{
3528 struct ext4_prealloc_space *pa;
3529 struct list_head *cur, *tmp;
3530 int count = 0;
3531
3532 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
3533 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3534 list_del(&pa->pa_group_list);
3535 count++;
3536 kmem_cache_free(ext4_pspace_cachep, pa);
3537 }
3538 return count;
3539}
3540
3541int ext4_mb_release(struct super_block *sb)
3542{
3543 ext4_group_t ngroups = ext4_get_groups_count(sb);
3544 ext4_group_t i;
3545 int num_meta_group_infos;
3546 struct ext4_group_info *grinfo, ***group_info;
3547 struct ext4_sb_info *sbi = EXT4_SB(sb);
3548 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3549 int count;
3550
3551 if (test_opt(sb, DISCARD)) {
3552
3553
3554
3555 flush_work(&sbi->s_discard_work);
3556 WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
3557 }
3558
3559 if (sbi->s_group_info) {
3560 for (i = 0; i < ngroups; i++) {
3561 cond_resched();
3562 grinfo = ext4_get_group_info(sb, i);
3563 mb_group_bb_bitmap_free(grinfo);
3564 ext4_lock_group(sb, i);
3565 count = ext4_mb_cleanup_pa(grinfo);
3566 if (count)
3567 mb_debug(sb, "mballoc: %d PAs left\n",
3568 count);
3569 ext4_unlock_group(sb, i);
3570 kmem_cache_free(cachep, grinfo);
3571 }
3572 num_meta_group_infos = (ngroups +
3573 EXT4_DESC_PER_BLOCK(sb) - 1) >>
3574 EXT4_DESC_PER_BLOCK_BITS(sb);
3575 rcu_read_lock();
3576 group_info = rcu_dereference(sbi->s_group_info);
3577 for (i = 0; i < num_meta_group_infos; i++)
3578 kfree(group_info[i]);
3579 kvfree(group_info);
3580 rcu_read_unlock();
3581 }
3582 kfree(sbi->s_mb_largest_free_orders);
3583 kfree(sbi->s_mb_largest_free_orders_locks);
3584 kfree(sbi->s_mb_offsets);
3585 kfree(sbi->s_mb_maxs);
3586 iput(sbi->s_buddy_cache);
3587 if (sbi->s_mb_stats) {
3588 ext4_msg(sb, KERN_INFO,
3589 "mballoc: %u blocks %u reqs (%u success)",
3590 atomic_read(&sbi->s_bal_allocated),
3591 atomic_read(&sbi->s_bal_reqs),
3592 atomic_read(&sbi->s_bal_success));
3593 ext4_msg(sb, KERN_INFO,
3594 "mballoc: %u extents scanned, %u groups scanned, %u goal hits, "
3595 "%u 2^N hits, %u breaks, %u lost",
3596 atomic_read(&sbi->s_bal_ex_scanned),
3597 atomic_read(&sbi->s_bal_groups_scanned),
3598 atomic_read(&sbi->s_bal_goals),
3599 atomic_read(&sbi->s_bal_2orders),
3600 atomic_read(&sbi->s_bal_breaks),
3601 atomic_read(&sbi->s_mb_lost_chunks));
3602 ext4_msg(sb, KERN_INFO,
3603 "mballoc: %u generated and it took %llu",
3604 atomic_read(&sbi->s_mb_buddies_generated),
3605 atomic64_read(&sbi->s_mb_generation_time));
3606 ext4_msg(sb, KERN_INFO,
3607 "mballoc: %u preallocated, %u discarded",
3608 atomic_read(&sbi->s_mb_preallocated),
3609 atomic_read(&sbi->s_mb_discarded));
3610 }
3611
3612 free_percpu(sbi->s_locality_groups);
3613
3614 return 0;
3615}
3616
3617static inline int ext4_issue_discard(struct super_block *sb,
3618 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
3619 struct bio **biop)
3620{
3621 ext4_fsblk_t discard_block;
3622
3623 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
3624 ext4_group_first_block_no(sb, block_group));
3625 count = EXT4_C2B(EXT4_SB(sb), count);
3626 trace_ext4_discard_blocks(sb,
3627 (unsigned long long) discard_block, count);
3628 if (biop) {
3629 return __blkdev_issue_discard(sb->s_bdev,
3630 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
3631 (sector_t)count << (sb->s_blocksize_bits - 9),
3632 GFP_NOFS, 0, biop);
3633 } else
3634 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
3635}
3636
3637static void ext4_free_data_in_buddy(struct super_block *sb,
3638 struct ext4_free_data *entry)
3639{
3640 struct ext4_buddy e4b;
3641 struct ext4_group_info *db;
3642 int err, count = 0, count2 = 0;
3643
3644 mb_debug(sb, "gonna free %u blocks in group %u (0x%p):",
3645 entry->efd_count, entry->efd_group, entry);
3646
3647 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
3648
3649 BUG_ON(err != 0);
3650
3651 spin_lock(&EXT4_SB(sb)->s_md_lock);
3652 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
3653 spin_unlock(&EXT4_SB(sb)->s_md_lock);
3654
3655 db = e4b.bd_info;
3656
3657 count += entry->efd_count;
3658 count2++;
3659 ext4_lock_group(sb, entry->efd_group);
3660
3661 rb_erase(&entry->efd_node, &(db->bb_free_root));
3662 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
3663
3664
3665
3666
3667
3668
3669
3670 if (!test_opt(sb, DISCARD))
3671 EXT4_MB_GRP_CLEAR_TRIMMED(db);
3672
3673 if (!db->bb_free_root.rb_node) {
3674
3675
3676
3677 put_page(e4b.bd_buddy_page);
3678 put_page(e4b.bd_bitmap_page);
3679 }
3680 ext4_unlock_group(sb, entry->efd_group);
3681 ext4_mb_unload_buddy(&e4b);
3682
3683 mb_debug(sb, "freed %d blocks in %d structures\n", count,
3684 count2);
3685}
3686
3687
3688
3689
3690
3691void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
3692{
3693 struct ext4_sb_info *sbi = EXT4_SB(sb);
3694 struct ext4_free_data *entry, *tmp;
3695 struct list_head freed_data_list;
3696 struct list_head *cut_pos = NULL;
3697 bool wake;
3698
3699 INIT_LIST_HEAD(&freed_data_list);
3700
3701 spin_lock(&sbi->s_md_lock);
3702 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
3703 if (entry->efd_tid != commit_tid)
3704 break;
3705 cut_pos = &entry->efd_list;
3706 }
3707 if (cut_pos)
3708 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
3709 cut_pos);
3710 spin_unlock(&sbi->s_md_lock);
3711
3712 list_for_each_entry(entry, &freed_data_list, efd_list)
3713 ext4_free_data_in_buddy(sb, entry);
3714
3715 if (test_opt(sb, DISCARD)) {
3716 spin_lock(&sbi->s_md_lock);
3717 wake = list_empty(&sbi->s_discard_list);
3718 list_splice_tail(&freed_data_list, &sbi->s_discard_list);
3719 spin_unlock(&sbi->s_md_lock);
3720 if (wake)
3721 queue_work(system_unbound_wq, &sbi->s_discard_work);
3722 } else {
3723 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
3724 kmem_cache_free(ext4_free_data_cachep, entry);
3725 }
3726}
3727
3728int __init ext4_init_mballoc(void)
3729{
3730 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
3731 SLAB_RECLAIM_ACCOUNT);
3732 if (ext4_pspace_cachep == NULL)
3733 goto out;
3734
3735 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
3736 SLAB_RECLAIM_ACCOUNT);
3737 if (ext4_ac_cachep == NULL)
3738 goto out_pa_free;
3739
3740 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
3741 SLAB_RECLAIM_ACCOUNT);
3742 if (ext4_free_data_cachep == NULL)
3743 goto out_ac_free;
3744
3745 return 0;
3746
3747out_ac_free:
3748 kmem_cache_destroy(ext4_ac_cachep);
3749out_pa_free:
3750 kmem_cache_destroy(ext4_pspace_cachep);
3751out:
3752 return -ENOMEM;
3753}
3754
3755void ext4_exit_mballoc(void)
3756{
3757
3758
3759
3760
3761 rcu_barrier();
3762 kmem_cache_destroy(ext4_pspace_cachep);
3763 kmem_cache_destroy(ext4_ac_cachep);
3764 kmem_cache_destroy(ext4_free_data_cachep);
3765 ext4_groupinfo_destroy_slabs();
3766}
3767
3768
3769
3770
3771
3772
3773static noinline_for_stack int
3774ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3775 handle_t *handle, unsigned int reserv_clstrs)
3776{
3777 struct buffer_head *bitmap_bh = NULL;
3778 struct ext4_group_desc *gdp;
3779 struct buffer_head *gdp_bh;
3780 struct ext4_sb_info *sbi;
3781 struct super_block *sb;
3782 ext4_fsblk_t block;
3783 int err, len;
3784
3785 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3786 BUG_ON(ac->ac_b_ex.fe_len <= 0);
3787
3788 sb = ac->ac_sb;
3789 sbi = EXT4_SB(sb);
3790
3791 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
3792 if (IS_ERR(bitmap_bh)) {
3793 err = PTR_ERR(bitmap_bh);
3794 bitmap_bh = NULL;
3795 goto out_err;
3796 }
3797
3798 BUFFER_TRACE(bitmap_bh, "getting write access");
3799 err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
3800 EXT4_JTR_NONE);
3801 if (err)
3802 goto out_err;
3803
3804 err = -EIO;
3805 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
3806 if (!gdp)
3807 goto out_err;
3808
3809 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
3810 ext4_free_group_clusters(sb, gdp));
3811
3812 BUFFER_TRACE(gdp_bh, "get_write_access");
3813 err = ext4_journal_get_write_access(handle, sb, gdp_bh, EXT4_JTR_NONE);
3814 if (err)
3815 goto out_err;
3816
3817 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3818
3819 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3820 if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
3821 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
3822 "fs metadata", block, block+len);
3823
3824
3825
3826
3827 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3828 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3829 ac->ac_b_ex.fe_len);
3830 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3831 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3832 if (!err)
3833 err = -EFSCORRUPTED;
3834 goto out_err;
3835 }
3836
3837 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3838#ifdef AGGRESSIVE_CHECK
3839 {
3840 int i;
3841 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
3842 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
3843 bitmap_bh->b_data));
3844 }
3845 }
3846#endif
3847 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3848 ac->ac_b_ex.fe_len);
3849 if (ext4_has_group_desc_csum(sb) &&
3850 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3851 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3852 ext4_free_group_clusters_set(sb, gdp,
3853 ext4_free_clusters_after_init(sb,
3854 ac->ac_b_ex.fe_group, gdp));
3855 }
3856 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
3857 ext4_free_group_clusters_set(sb, gdp, len);
3858 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
3859 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
3860
3861 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3862 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
3863
3864
3865
3866 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3867
3868 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
3869 reserv_clstrs);
3870
3871 if (sbi->s_log_groups_per_flex) {
3872 ext4_group_t flex_group = ext4_flex_group(sbi,
3873 ac->ac_b_ex.fe_group);
3874 atomic64_sub(ac->ac_b_ex.fe_len,
3875 &sbi_array_rcu_deref(sbi, s_flex_groups,
3876 flex_group)->free_clusters);
3877 }
3878
3879 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3880 if (err)
3881 goto out_err;
3882 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3883
3884out_err:
3885 brelse(bitmap_bh);
3886 return err;
3887}
3888
3889
3890
3891
3892
3893void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
3894 int len, int state)
3895{
3896 struct buffer_head *bitmap_bh = NULL;
3897 struct ext4_group_desc *gdp;
3898 struct buffer_head *gdp_bh;
3899 struct ext4_sb_info *sbi = EXT4_SB(sb);
3900 ext4_group_t group;
3901 ext4_grpblk_t blkoff;
3902 int i, clen, err;
3903 int already;
3904
3905 clen = EXT4_B2C(sbi, len);
3906
3907 ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
3908 bitmap_bh = ext4_read_block_bitmap(sb, group);
3909 if (IS_ERR(bitmap_bh)) {
3910 err = PTR_ERR(bitmap_bh);
3911 bitmap_bh = NULL;
3912 goto out_err;
3913 }
3914
3915 err = -EIO;
3916 gdp = ext4_get_group_desc(sb, group, &gdp_bh);
3917 if (!gdp)
3918 goto out_err;
3919
3920 ext4_lock_group(sb, group);
3921 already = 0;
3922 for (i = 0; i < clen; i++)
3923 if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state)
3924 already++;
3925
3926 if (state)
3927 ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
3928 else
3929 mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
3930 if (ext4_has_group_desc_csum(sb) &&
3931 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3932 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3933 ext4_free_group_clusters_set(sb, gdp,
3934 ext4_free_clusters_after_init(sb,
3935 group, gdp));
3936 }
3937 if (state)
3938 clen = ext4_free_group_clusters(sb, gdp) - clen + already;
3939 else
3940 clen = ext4_free_group_clusters(sb, gdp) + clen - already;
3941
3942 ext4_free_group_clusters_set(sb, gdp, clen);
3943 ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
3944 ext4_group_desc_csum_set(sb, group, gdp);
3945
3946 ext4_unlock_group(sb, group);
3947
3948 if (sbi->s_log_groups_per_flex) {
3949 ext4_group_t flex_group = ext4_flex_group(sbi, group);
3950
3951 atomic64_sub(len,
3952 &sbi_array_rcu_deref(sbi, s_flex_groups,
3953 flex_group)->free_clusters);
3954 }
3955
3956 err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
3957 if (err)
3958 goto out_err;
3959 sync_dirty_buffer(bitmap_bh);
3960 err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
3961 sync_dirty_buffer(gdp_bh);
3962
3963out_err:
3964 brelse(bitmap_bh);
3965}
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3977{
3978 struct super_block *sb = ac->ac_sb;
3979 struct ext4_locality_group *lg = ac->ac_lg;
3980
3981 BUG_ON(lg == NULL);
3982 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3983 mb_debug(sb, "goal %u blocks for locality group\n", ac->ac_g_ex.fe_len);
3984}
3985
3986
3987
3988
3989
3990static noinline_for_stack void
3991ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3992 struct ext4_allocation_request *ar)
3993{
3994 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3995 int bsbits, max;
3996 ext4_lblk_t end;
3997 loff_t size, start_off;
3998 loff_t orig_size __maybe_unused;
3999 ext4_lblk_t start;
4000 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
4001 struct ext4_prealloc_space *pa;
4002
4003
4004
4005 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4006 return;
4007
4008
4009 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4010 return;
4011
4012
4013
4014 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
4015 return;
4016
4017 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
4018 ext4_mb_normalize_group_request(ac);
4019 return ;
4020 }
4021
4022 bsbits = ac->ac_sb->s_blocksize_bits;
4023
4024
4025
4026 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4027 size = size << bsbits;
4028 if (size < i_size_read(ac->ac_inode))
4029 size = i_size_read(ac->ac_inode);
4030 orig_size = size;
4031
4032
4033 max = 2 << bsbits;
4034
4035#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
4036 (req <= (size) || max <= (chunk_size))
4037
4038
4039
4040 start_off = 0;
4041 if (size <= 16 * 1024) {
4042 size = 16 * 1024;
4043 } else if (size <= 32 * 1024) {
4044 size = 32 * 1024;
4045 } else if (size <= 64 * 1024) {
4046 size = 64 * 1024;
4047 } else if (size <= 128 * 1024) {
4048 size = 128 * 1024;
4049 } else if (size <= 256 * 1024) {
4050 size = 256 * 1024;
4051 } else if (size <= 512 * 1024) {
4052 size = 512 * 1024;
4053 } else if (size <= 1024 * 1024) {
4054 size = 1024 * 1024;
4055 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
4056 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
4057 (21 - bsbits)) << 21;
4058 size = 2 * 1024 * 1024;
4059 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
4060 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
4061 (22 - bsbits)) << 22;
4062 size = 4 * 1024 * 1024;
4063 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
4064 (8<<20)>>bsbits, max, 8 * 1024)) {
4065 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
4066 (23 - bsbits)) << 23;
4067 size = 8 * 1024 * 1024;
4068 } else {
4069 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
4070 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
4071 ac->ac_o_ex.fe_len) << bsbits;
4072 }
4073 size = size >> bsbits;
4074 start = start_off >> bsbits;
4075
4076
4077 if (ar->pleft && start <= ar->lleft) {
4078 size -= ar->lleft + 1 - start;
4079 start = ar->lleft + 1;
4080 }
4081 if (ar->pright && start + size - 1 >= ar->lright)
4082 size -= start + size - ar->lright;
4083
4084
4085
4086
4087
4088 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
4089 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
4090
4091 end = start + size;
4092
4093
4094 rcu_read_lock();
4095 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4096 ext4_lblk_t pa_end;
4097
4098 if (pa->pa_deleted)
4099 continue;
4100 spin_lock(&pa->pa_lock);
4101 if (pa->pa_deleted) {
4102 spin_unlock(&pa->pa_lock);
4103 continue;
4104 }
4105
4106 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
4107 pa->pa_len);
4108
4109
4110 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
4111 ac->ac_o_ex.fe_logical < pa->pa_lstart));
4112
4113
4114 if (pa->pa_lstart >= end || pa_end <= start) {
4115 spin_unlock(&pa->pa_lock);
4116 continue;
4117 }
4118 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
4119
4120
4121 if (pa_end <= ac->ac_o_ex.fe_logical) {
4122 BUG_ON(pa_end < start);
4123 start = pa_end;
4124 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
4125 BUG_ON(pa->pa_lstart > end);
4126 end = pa->pa_lstart;
4127 }
4128 spin_unlock(&pa->pa_lock);
4129 }
4130 rcu_read_unlock();
4131 size = end - start;
4132
4133
4134 rcu_read_lock();
4135 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4136 ext4_lblk_t pa_end;
4137
4138 spin_lock(&pa->pa_lock);
4139 if (pa->pa_deleted == 0) {
4140 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
4141 pa->pa_len);
4142 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
4143 }
4144 spin_unlock(&pa->pa_lock);
4145 }
4146 rcu_read_unlock();
4147
4148 if (start + size <= ac->ac_o_ex.fe_logical &&
4149 start > ac->ac_o_ex.fe_logical) {
4150 ext4_msg(ac->ac_sb, KERN_ERR,
4151 "start %lu, size %lu, fe_logical %lu",
4152 (unsigned long) start, (unsigned long) size,
4153 (unsigned long) ac->ac_o_ex.fe_logical);
4154 BUG();
4155 }
4156 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
4157
4158
4159
4160
4161
4162 ac->ac_g_ex.fe_logical = start;
4163 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
4164
4165
4166 if (ar->pright && (ar->lright == (start + size))) {
4167
4168 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
4169 &ac->ac_f_ex.fe_group,
4170 &ac->ac_f_ex.fe_start);
4171 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
4172 }
4173 if (ar->pleft && (ar->lleft + 1 == start)) {
4174
4175 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
4176 &ac->ac_f_ex.fe_group,
4177 &ac->ac_f_ex.fe_start);
4178 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
4179 }
4180
4181 mb_debug(ac->ac_sb, "goal: %lld(was %lld) blocks at %u\n", size,
4182 orig_size, start);
4183}
4184
4185static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
4186{
4187 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4188
4189 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len >= 1) {
4190 atomic_inc(&sbi->s_bal_reqs);
4191 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
4192 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
4193 atomic_inc(&sbi->s_bal_success);
4194 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
4195 atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned);
4196 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
4197 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
4198 atomic_inc(&sbi->s_bal_goals);
4199 if (ac->ac_found > sbi->s_mb_max_to_scan)
4200 atomic_inc(&sbi->s_bal_breaks);
4201 }
4202
4203 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
4204 trace_ext4_mballoc_alloc(ac);
4205 else
4206 trace_ext4_mballoc_prealloc(ac);
4207}
4208
4209
4210
4211
4212
4213
4214
4215static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
4216{
4217 struct ext4_prealloc_space *pa = ac->ac_pa;
4218 struct ext4_buddy e4b;
4219 int err;
4220
4221 if (pa == NULL) {
4222 if (ac->ac_f_ex.fe_len == 0)
4223 return;
4224 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
4225 if (err) {
4226
4227
4228
4229
4230
4231 WARN(1, "mb_load_buddy failed (%d)", err);
4232 return;
4233 }
4234 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
4235 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
4236 ac->ac_f_ex.fe_len);
4237 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
4238 ext4_mb_unload_buddy(&e4b);
4239 return;
4240 }
4241 if (pa->pa_type == MB_INODE_PA)
4242 pa->pa_free += ac->ac_b_ex.fe_len;
4243}
4244
4245
4246
4247
4248static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
4249 struct ext4_prealloc_space *pa)
4250{
4251 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4252 ext4_fsblk_t start;
4253 ext4_fsblk_t end;
4254 int len;
4255
4256
4257 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
4258 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
4259 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
4260 len = EXT4_NUM_B2C(sbi, end - start);
4261 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
4262 &ac->ac_b_ex.fe_start);
4263 ac->ac_b_ex.fe_len = len;
4264 ac->ac_status = AC_STATUS_FOUND;
4265 ac->ac_pa = pa;
4266
4267 BUG_ON(start < pa->pa_pstart);
4268 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
4269 BUG_ON(pa->pa_free < len);
4270 pa->pa_free -= len;
4271
4272 mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa);
4273}
4274
4275
4276
4277
4278static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
4279 struct ext4_prealloc_space *pa)
4280{
4281 unsigned int len = ac->ac_o_ex.fe_len;
4282
4283 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
4284 &ac->ac_b_ex.fe_group,
4285 &ac->ac_b_ex.fe_start);
4286 ac->ac_b_ex.fe_len = len;
4287 ac->ac_status = AC_STATUS_FOUND;
4288 ac->ac_pa = pa;
4289
4290
4291
4292
4293
4294
4295
4296 mb_debug(ac->ac_sb, "use %u/%u from group pa %p\n",
4297 pa->pa_lstart-len, len, pa);
4298}
4299
4300
4301
4302
4303
4304
4305
4306static struct ext4_prealloc_space *
4307ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
4308 struct ext4_prealloc_space *pa,
4309 struct ext4_prealloc_space *cpa)
4310{
4311 ext4_fsblk_t cur_distance, new_distance;
4312
4313 if (cpa == NULL) {
4314 atomic_inc(&pa->pa_count);
4315 return pa;
4316 }
4317 cur_distance = abs(goal_block - cpa->pa_pstart);
4318 new_distance = abs(goal_block - pa->pa_pstart);
4319
4320 if (cur_distance <= new_distance)
4321 return cpa;
4322
4323
4324 atomic_dec(&cpa->pa_count);
4325 atomic_inc(&pa->pa_count);
4326 return pa;
4327}
4328
4329
4330
4331
4332static noinline_for_stack bool
4333ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
4334{
4335 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4336 int order, i;
4337 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
4338 struct ext4_locality_group *lg;
4339 struct ext4_prealloc_space *pa, *cpa = NULL;
4340 ext4_fsblk_t goal_block;
4341
4342
4343 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4344 return false;
4345
4346
4347 rcu_read_lock();
4348 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4349
4350
4351
4352 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
4353 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
4354 EXT4_C2B(sbi, pa->pa_len)))
4355 continue;
4356
4357
4358 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
4359 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
4360 EXT4_MAX_BLOCK_FILE_PHYS))
4361 continue;
4362
4363
4364 spin_lock(&pa->pa_lock);
4365 if (pa->pa_deleted == 0 && pa->pa_free) {
4366 atomic_inc(&pa->pa_count);
4367 ext4_mb_use_inode_pa(ac, pa);
4368 spin_unlock(&pa->pa_lock);
4369 ac->ac_criteria = 10;
4370 rcu_read_unlock();
4371 return true;
4372 }
4373 spin_unlock(&pa->pa_lock);
4374 }
4375 rcu_read_unlock();
4376
4377
4378 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
4379 return false;
4380
4381
4382 lg = ac->ac_lg;
4383 if (lg == NULL)
4384 return false;
4385 order = fls(ac->ac_o_ex.fe_len) - 1;
4386 if (order > PREALLOC_TB_SIZE - 1)
4387
4388 order = PREALLOC_TB_SIZE - 1;
4389
4390 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
4391
4392
4393
4394
4395 for (i = order; i < PREALLOC_TB_SIZE; i++) {
4396 rcu_read_lock();
4397 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
4398 pa_inode_list) {
4399 spin_lock(&pa->pa_lock);
4400 if (pa->pa_deleted == 0 &&
4401 pa->pa_free >= ac->ac_o_ex.fe_len) {
4402
4403 cpa = ext4_mb_check_group_pa(goal_block,
4404 pa, cpa);
4405 }
4406 spin_unlock(&pa->pa_lock);
4407 }
4408 rcu_read_unlock();
4409 }
4410 if (cpa) {
4411 ext4_mb_use_group_pa(ac, cpa);
4412 ac->ac_criteria = 20;
4413 return true;
4414 }
4415 return false;
4416}
4417
4418
4419
4420
4421
4422
4423
4424static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
4425 ext4_group_t group)
4426{
4427 struct rb_node *n;
4428 struct ext4_group_info *grp;
4429 struct ext4_free_data *entry;
4430
4431 grp = ext4_get_group_info(sb, group);
4432 n = rb_first(&(grp->bb_free_root));
4433
4434 while (n) {
4435 entry = rb_entry(n, struct ext4_free_data, efd_node);
4436 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
4437 n = rb_next(n);
4438 }
4439 return;
4440}
4441
4442
4443
4444
4445
4446
4447static noinline_for_stack
4448void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
4449 ext4_group_t group)
4450{
4451 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
4452 struct ext4_prealloc_space *pa;
4453 struct list_head *cur;
4454 ext4_group_t groupnr;
4455 ext4_grpblk_t start;
4456 int preallocated = 0;
4457 int len;
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467 list_for_each(cur, &grp->bb_prealloc_list) {
4468 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
4469 spin_lock(&pa->pa_lock);
4470 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4471 &groupnr, &start);
4472 len = pa->pa_len;
4473 spin_unlock(&pa->pa_lock);
4474 if (unlikely(len == 0))
4475 continue;
4476 BUG_ON(groupnr != group);
4477 ext4_set_bits(bitmap, start, len);
4478 preallocated += len;
4479 }
4480 mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
4481}
4482
4483static void ext4_mb_mark_pa_deleted(struct super_block *sb,
4484 struct ext4_prealloc_space *pa)
4485{
4486 struct ext4_inode_info *ei;
4487
4488 if (pa->pa_deleted) {
4489 ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n",
4490 pa->pa_type, pa->pa_pstart, pa->pa_lstart,
4491 pa->pa_len);
4492 return;
4493 }
4494
4495 pa->pa_deleted = 1;
4496
4497 if (pa->pa_type == MB_INODE_PA) {
4498 ei = EXT4_I(pa->pa_inode);
4499 atomic_dec(&ei->i_prealloc_active);
4500 }
4501}
4502
4503static void ext4_mb_pa_callback(struct rcu_head *head)
4504{
4505 struct ext4_prealloc_space *pa;
4506 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
4507
4508 BUG_ON(atomic_read(&pa->pa_count));
4509 BUG_ON(pa->pa_deleted == 0);
4510 kmem_cache_free(ext4_pspace_cachep, pa);
4511}
4512
4513
4514
4515
4516
4517static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
4518 struct super_block *sb, struct ext4_prealloc_space *pa)
4519{
4520 ext4_group_t grp;
4521 ext4_fsblk_t grp_blk;
4522
4523
4524 spin_lock(&pa->pa_lock);
4525 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
4526 spin_unlock(&pa->pa_lock);
4527 return;
4528 }
4529
4530 if (pa->pa_deleted == 1) {
4531 spin_unlock(&pa->pa_lock);
4532 return;
4533 }
4534
4535 ext4_mb_mark_pa_deleted(sb, pa);
4536 spin_unlock(&pa->pa_lock);
4537
4538 grp_blk = pa->pa_pstart;
4539
4540
4541
4542
4543 if (pa->pa_type == MB_GROUP_PA)
4544 grp_blk--;
4545
4546 grp = ext4_get_group_number(sb, grp_blk);
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562 ext4_lock_group(sb, grp);
4563 list_del(&pa->pa_group_list);
4564 ext4_unlock_group(sb, grp);
4565
4566 spin_lock(pa->pa_obj_lock);
4567 list_del_rcu(&pa->pa_inode_list);
4568 spin_unlock(pa->pa_obj_lock);
4569
4570 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4571}
4572
4573
4574
4575
4576static noinline_for_stack void
4577ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
4578{
4579 struct super_block *sb = ac->ac_sb;
4580 struct ext4_sb_info *sbi = EXT4_SB(sb);
4581 struct ext4_prealloc_space *pa;
4582 struct ext4_group_info *grp;
4583 struct ext4_inode_info *ei;
4584
4585
4586 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
4587 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
4588 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
4589 BUG_ON(ac->ac_pa == NULL);
4590
4591 pa = ac->ac_pa;
4592
4593 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
4594 int winl;
4595 int wins;
4596 int win;
4597 int offs;
4598
4599
4600
4601
4602 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
4603 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
4604
4605
4606
4607
4608 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
4609
4610
4611 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
4612
4613
4614 win = min(winl, wins);
4615
4616 offs = ac->ac_o_ex.fe_logical %
4617 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4618 if (offs && offs < win)
4619 win = offs;
4620
4621 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
4622 EXT4_NUM_B2C(sbi, win);
4623 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
4624 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
4625 }
4626
4627
4628
4629 ac->ac_f_ex = ac->ac_b_ex;
4630
4631 pa->pa_lstart = ac->ac_b_ex.fe_logical;
4632 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4633 pa->pa_len = ac->ac_b_ex.fe_len;
4634 pa->pa_free = pa->pa_len;
4635 spin_lock_init(&pa->pa_lock);
4636 INIT_LIST_HEAD(&pa->pa_inode_list);
4637 INIT_LIST_HEAD(&pa->pa_group_list);
4638 pa->pa_deleted = 0;
4639 pa->pa_type = MB_INODE_PA;
4640
4641 mb_debug(sb, "new inode pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
4642 pa->pa_len, pa->pa_lstart);
4643 trace_ext4_mb_new_inode_pa(ac, pa);
4644
4645 ext4_mb_use_inode_pa(ac, pa);
4646 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
4647
4648 ei = EXT4_I(ac->ac_inode);
4649 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
4650
4651 pa->pa_obj_lock = &ei->i_prealloc_lock;
4652 pa->pa_inode = ac->ac_inode;
4653
4654 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
4655
4656 spin_lock(pa->pa_obj_lock);
4657 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
4658 spin_unlock(pa->pa_obj_lock);
4659 atomic_inc(&ei->i_prealloc_active);
4660}
4661
4662
4663
4664
4665static noinline_for_stack void
4666ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
4667{
4668 struct super_block *sb = ac->ac_sb;
4669 struct ext4_locality_group *lg;
4670 struct ext4_prealloc_space *pa;
4671 struct ext4_group_info *grp;
4672
4673
4674 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
4675 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
4676 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
4677 BUG_ON(ac->ac_pa == NULL);
4678
4679 pa = ac->ac_pa;
4680
4681
4682
4683 ac->ac_f_ex = ac->ac_b_ex;
4684
4685 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4686 pa->pa_lstart = pa->pa_pstart;
4687 pa->pa_len = ac->ac_b_ex.fe_len;
4688 pa->pa_free = pa->pa_len;
4689 spin_lock_init(&pa->pa_lock);
4690 INIT_LIST_HEAD(&pa->pa_inode_list);
4691 INIT_LIST_HEAD(&pa->pa_group_list);
4692 pa->pa_deleted = 0;
4693 pa->pa_type = MB_GROUP_PA;
4694
4695 mb_debug(sb, "new group pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
4696 pa->pa_len, pa->pa_lstart);
4697 trace_ext4_mb_new_group_pa(ac, pa);
4698
4699 ext4_mb_use_group_pa(ac, pa);
4700 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
4701
4702 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
4703 lg = ac->ac_lg;
4704 BUG_ON(lg == NULL);
4705
4706 pa->pa_obj_lock = &lg->lg_prealloc_lock;
4707 pa->pa_inode = NULL;
4708
4709 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
4710
4711
4712
4713
4714
4715}
4716
4717static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
4718{
4719 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4720 ext4_mb_new_group_pa(ac);
4721 else
4722 ext4_mb_new_inode_pa(ac);
4723}
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733static noinline_for_stack int
4734ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
4735 struct ext4_prealloc_space *pa)
4736{
4737 struct super_block *sb = e4b->bd_sb;
4738 struct ext4_sb_info *sbi = EXT4_SB(sb);
4739 unsigned int end;
4740 unsigned int next;
4741 ext4_group_t group;
4742 ext4_grpblk_t bit;
4743 unsigned long long grp_blk_start;
4744 int free = 0;
4745
4746 BUG_ON(pa->pa_deleted == 0);
4747 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
4748 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
4749 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
4750 end = bit + pa->pa_len;
4751
4752 while (bit < end) {
4753 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
4754 if (bit >= end)
4755 break;
4756 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
4757 mb_debug(sb, "free preallocated %u/%u in group %u\n",
4758 (unsigned) ext4_group_first_block_no(sb, group) + bit,
4759 (unsigned) next - bit, (unsigned) group);
4760 free += next - bit;
4761
4762 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
4763 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
4764 EXT4_C2B(sbi, bit)),
4765 next - bit);
4766 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
4767 bit = next + 1;
4768 }
4769 if (free != pa->pa_free) {
4770 ext4_msg(e4b->bd_sb, KERN_CRIT,
4771 "pa %p: logic %lu, phys. %lu, len %d",
4772 pa, (unsigned long) pa->pa_lstart,
4773 (unsigned long) pa->pa_pstart,
4774 pa->pa_len);
4775 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
4776 free, pa->pa_free);
4777
4778
4779
4780
4781 }
4782 atomic_add(free, &sbi->s_mb_discarded);
4783
4784 return 0;
4785}
4786
4787static noinline_for_stack int
4788ext4_mb_release_group_pa(struct ext4_buddy *e4b,
4789 struct ext4_prealloc_space *pa)
4790{
4791 struct super_block *sb = e4b->bd_sb;
4792 ext4_group_t group;
4793 ext4_grpblk_t bit;
4794
4795 trace_ext4_mb_release_group_pa(sb, pa);
4796 BUG_ON(pa->pa_deleted == 0);
4797 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
4798 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
4799 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
4800 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
4801 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
4802
4803 return 0;
4804}
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815static noinline_for_stack int
4816ext4_mb_discard_group_preallocations(struct super_block *sb,
4817 ext4_group_t group, int needed)
4818{
4819 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
4820 struct buffer_head *bitmap_bh = NULL;
4821 struct ext4_prealloc_space *pa, *tmp;
4822 struct list_head list;
4823 struct ext4_buddy e4b;
4824 int err;
4825 int busy = 0;
4826 int free, free_total = 0;
4827
4828 mb_debug(sb, "discard preallocation for group %u\n", group);
4829 if (list_empty(&grp->bb_prealloc_list))
4830 goto out_dbg;
4831
4832 bitmap_bh = ext4_read_block_bitmap(sb, group);
4833 if (IS_ERR(bitmap_bh)) {
4834 err = PTR_ERR(bitmap_bh);
4835 ext4_error_err(sb, -err,
4836 "Error %d reading block bitmap for %u",
4837 err, group);
4838 goto out_dbg;
4839 }
4840
4841 err = ext4_mb_load_buddy(sb, group, &e4b);
4842 if (err) {
4843 ext4_warning(sb, "Error %d loading buddy information for %u",
4844 err, group);
4845 put_bh(bitmap_bh);
4846 goto out_dbg;
4847 }
4848
4849 if (needed == 0)
4850 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
4851
4852 INIT_LIST_HEAD(&list);
4853repeat:
4854 free = 0;
4855 ext4_lock_group(sb, group);
4856 list_for_each_entry_safe(pa, tmp,
4857 &grp->bb_prealloc_list, pa_group_list) {
4858 spin_lock(&pa->pa_lock);
4859 if (atomic_read(&pa->pa_count)) {
4860 spin_unlock(&pa->pa_lock);
4861 busy = 1;
4862 continue;
4863 }
4864 if (pa->pa_deleted) {
4865 spin_unlock(&pa->pa_lock);
4866 continue;
4867 }
4868
4869
4870 ext4_mb_mark_pa_deleted(sb, pa);
4871
4872 if (!free)
4873 this_cpu_inc(discard_pa_seq);
4874
4875
4876 free += pa->pa_free;
4877
4878 spin_unlock(&pa->pa_lock);
4879
4880 list_del(&pa->pa_group_list);
4881 list_add(&pa->u.pa_tmp_list, &list);
4882 }
4883
4884
4885 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4886
4887
4888 spin_lock(pa->pa_obj_lock);
4889 list_del_rcu(&pa->pa_inode_list);
4890 spin_unlock(pa->pa_obj_lock);
4891
4892 if (pa->pa_type == MB_GROUP_PA)
4893 ext4_mb_release_group_pa(&e4b, pa);
4894 else
4895 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4896
4897 list_del(&pa->u.pa_tmp_list);
4898 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4899 }
4900
4901 free_total += free;
4902
4903
4904 if (free_total < needed && busy) {
4905 ext4_unlock_group(sb, group);
4906 cond_resched();
4907 busy = 0;
4908 goto repeat;
4909 }
4910 ext4_unlock_group(sb, group);
4911 ext4_mb_unload_buddy(&e4b);
4912 put_bh(bitmap_bh);
4913out_dbg:
4914 mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
4915 free_total, group, grp->bb_free);
4916 return free_total;
4917}
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
4929{
4930 struct ext4_inode_info *ei = EXT4_I(inode);
4931 struct super_block *sb = inode->i_sb;
4932 struct buffer_head *bitmap_bh = NULL;
4933 struct ext4_prealloc_space *pa, *tmp;
4934 ext4_group_t group = 0;
4935 struct list_head list;
4936 struct ext4_buddy e4b;
4937 int err;
4938
4939 if (!S_ISREG(inode->i_mode)) {
4940
4941 return;
4942 }
4943
4944 if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
4945 return;
4946
4947 mb_debug(sb, "discard preallocation for inode %lu\n",
4948 inode->i_ino);
4949 trace_ext4_discard_preallocations(inode,
4950 atomic_read(&ei->i_prealloc_active), needed);
4951
4952 INIT_LIST_HEAD(&list);
4953
4954 if (needed == 0)
4955 needed = UINT_MAX;
4956
4957repeat:
4958
4959 spin_lock(&ei->i_prealloc_lock);
4960 while (!list_empty(&ei->i_prealloc_list) && needed) {
4961 pa = list_entry(ei->i_prealloc_list.prev,
4962 struct ext4_prealloc_space, pa_inode_list);
4963 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
4964 spin_lock(&pa->pa_lock);
4965 if (atomic_read(&pa->pa_count)) {
4966
4967
4968 spin_unlock(&pa->pa_lock);
4969 spin_unlock(&ei->i_prealloc_lock);
4970 ext4_msg(sb, KERN_ERR,
4971 "uh-oh! used pa while discarding");
4972 WARN_ON(1);
4973 schedule_timeout_uninterruptible(HZ);
4974 goto repeat;
4975
4976 }
4977 if (pa->pa_deleted == 0) {
4978 ext4_mb_mark_pa_deleted(sb, pa);
4979 spin_unlock(&pa->pa_lock);
4980 list_del_rcu(&pa->pa_inode_list);
4981 list_add(&pa->u.pa_tmp_list, &list);
4982 needed--;
4983 continue;
4984 }
4985
4986
4987 spin_unlock(&pa->pa_lock);
4988 spin_unlock(&ei->i_prealloc_lock);
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002 schedule_timeout_uninterruptible(HZ);
5003 goto repeat;
5004 }
5005 spin_unlock(&ei->i_prealloc_lock);
5006
5007 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
5008 BUG_ON(pa->pa_type != MB_INODE_PA);
5009 group = ext4_get_group_number(sb, pa->pa_pstart);
5010
5011 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
5012 GFP_NOFS|__GFP_NOFAIL);
5013 if (err) {
5014 ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
5015 err, group);
5016 continue;
5017 }
5018
5019 bitmap_bh = ext4_read_block_bitmap(sb, group);
5020 if (IS_ERR(bitmap_bh)) {
5021 err = PTR_ERR(bitmap_bh);
5022 ext4_error_err(sb, -err, "Error %d reading block bitmap for %u",
5023 err, group);
5024 ext4_mb_unload_buddy(&e4b);
5025 continue;
5026 }
5027
5028 ext4_lock_group(sb, group);
5029 list_del(&pa->pa_group_list);
5030 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
5031 ext4_unlock_group(sb, group);
5032
5033 ext4_mb_unload_buddy(&e4b);
5034 put_bh(bitmap_bh);
5035
5036 list_del(&pa->u.pa_tmp_list);
5037 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
5038 }
5039}
5040
5041static int ext4_mb_pa_alloc(struct ext4_allocation_context *ac)
5042{
5043 struct ext4_prealloc_space *pa;
5044
5045 BUG_ON(ext4_pspace_cachep == NULL);
5046 pa = kmem_cache_zalloc(ext4_pspace_cachep, GFP_NOFS);
5047 if (!pa)
5048 return -ENOMEM;
5049 atomic_set(&pa->pa_count, 1);
5050 ac->ac_pa = pa;
5051 return 0;
5052}
5053
5054static void ext4_mb_pa_free(struct ext4_allocation_context *ac)
5055{
5056 struct ext4_prealloc_space *pa = ac->ac_pa;
5057
5058 BUG_ON(!pa);
5059 ac->ac_pa = NULL;
5060 WARN_ON(!atomic_dec_and_test(&pa->pa_count));
5061 kmem_cache_free(ext4_pspace_cachep, pa);
5062}
5063
5064#ifdef CONFIG_EXT4_DEBUG
5065static inline void ext4_mb_show_pa(struct super_block *sb)
5066{
5067 ext4_group_t i, ngroups;
5068
5069 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
5070 return;
5071
5072 ngroups = ext4_get_groups_count(sb);
5073 mb_debug(sb, "groups: ");
5074 for (i = 0; i < ngroups; i++) {
5075 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
5076 struct ext4_prealloc_space *pa;
5077 ext4_grpblk_t start;
5078 struct list_head *cur;
5079 ext4_lock_group(sb, i);
5080 list_for_each(cur, &grp->bb_prealloc_list) {
5081 pa = list_entry(cur, struct ext4_prealloc_space,
5082 pa_group_list);
5083 spin_lock(&pa->pa_lock);
5084 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
5085 NULL, &start);
5086 spin_unlock(&pa->pa_lock);
5087 mb_debug(sb, "PA:%u:%d:%d\n", i, start,
5088 pa->pa_len);
5089 }
5090 ext4_unlock_group(sb, i);
5091 mb_debug(sb, "%u: %d/%d\n", i, grp->bb_free,
5092 grp->bb_fragments);
5093 }
5094}
5095
5096static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
5097{
5098 struct super_block *sb = ac->ac_sb;
5099
5100 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
5101 return;
5102
5103 mb_debug(sb, "Can't allocate:"
5104 " Allocation context details:");
5105 mb_debug(sb, "status %u flags 0x%x",
5106 ac->ac_status, ac->ac_flags);
5107 mb_debug(sb, "orig %lu/%lu/%lu@%lu, "
5108 "goal %lu/%lu/%lu@%lu, "
5109 "best %lu/%lu/%lu@%lu cr %d",
5110 (unsigned long)ac->ac_o_ex.fe_group,
5111 (unsigned long)ac->ac_o_ex.fe_start,
5112 (unsigned long)ac->ac_o_ex.fe_len,
5113 (unsigned long)ac->ac_o_ex.fe_logical,
5114 (unsigned long)ac->ac_g_ex.fe_group,
5115 (unsigned long)ac->ac_g_ex.fe_start,
5116 (unsigned long)ac->ac_g_ex.fe_len,
5117 (unsigned long)ac->ac_g_ex.fe_logical,
5118 (unsigned long)ac->ac_b_ex.fe_group,
5119 (unsigned long)ac->ac_b_ex.fe_start,
5120 (unsigned long)ac->ac_b_ex.fe_len,
5121 (unsigned long)ac->ac_b_ex.fe_logical,
5122 (int)ac->ac_criteria);
5123 mb_debug(sb, "%u found", ac->ac_found);
5124 ext4_mb_show_pa(sb);
5125}
5126#else
5127static inline void ext4_mb_show_pa(struct super_block *sb)
5128{
5129 return;
5130}
5131static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
5132{
5133 ext4_mb_show_pa(ac->ac_sb);
5134 return;
5135}
5136#endif
5137
5138
5139
5140
5141
5142
5143
5144
5145static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
5146{
5147 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
5148 int bsbits = ac->ac_sb->s_blocksize_bits;
5149 loff_t size, isize;
5150
5151 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
5152 return;
5153
5154 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
5155 return;
5156
5157 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
5158 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
5159 >> bsbits;
5160
5161 if ((size == isize) && !ext4_fs_is_busy(sbi) &&
5162 !inode_is_open_for_write(ac->ac_inode)) {
5163 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
5164 return;
5165 }
5166
5167 if (sbi->s_mb_group_prealloc <= 0) {
5168 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
5169 return;
5170 }
5171
5172
5173 size = max(size, isize);
5174 if (size > sbi->s_mb_stream_request) {
5175 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
5176 return;
5177 }
5178
5179 BUG_ON(ac->ac_lg != NULL);
5180
5181
5182
5183
5184
5185 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
5186
5187
5188 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
5189
5190
5191 mutex_lock(&ac->ac_lg->lg_mutex);
5192}
5193
5194static noinline_for_stack int
5195ext4_mb_initialize_context(struct ext4_allocation_context *ac,
5196 struct ext4_allocation_request *ar)
5197{
5198 struct super_block *sb = ar->inode->i_sb;
5199 struct ext4_sb_info *sbi = EXT4_SB(sb);
5200 struct ext4_super_block *es = sbi->s_es;
5201 ext4_group_t group;
5202 unsigned int len;
5203 ext4_fsblk_t goal;
5204 ext4_grpblk_t block;
5205
5206
5207 len = ar->len;
5208
5209
5210 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
5211 len = EXT4_CLUSTERS_PER_GROUP(sb);
5212
5213
5214 goal = ar->goal;
5215 if (goal < le32_to_cpu(es->s_first_data_block) ||
5216 goal >= ext4_blocks_count(es))
5217 goal = le32_to_cpu(es->s_first_data_block);
5218 ext4_get_group_no_and_offset(sb, goal, &group, &block);
5219
5220
5221 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
5222 ac->ac_status = AC_STATUS_CONTINUE;
5223 ac->ac_sb = sb;
5224 ac->ac_inode = ar->inode;
5225 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
5226 ac->ac_o_ex.fe_group = group;
5227 ac->ac_o_ex.fe_start = block;
5228 ac->ac_o_ex.fe_len = len;
5229 ac->ac_g_ex = ac->ac_o_ex;
5230 ac->ac_flags = ar->flags;
5231
5232
5233
5234 ext4_mb_group_or_file(ac);
5235
5236 mb_debug(sb, "init ac: %u blocks @ %u, goal %u, flags 0x%x, 2^%d, "
5237 "left: %u/%u, right %u/%u to %swritable\n",
5238 (unsigned) ar->len, (unsigned) ar->logical,
5239 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
5240 (unsigned) ar->lleft, (unsigned) ar->pleft,
5241 (unsigned) ar->lright, (unsigned) ar->pright,
5242 inode_is_open_for_write(ar->inode) ? "" : "non-");
5243 return 0;
5244
5245}
5246
5247static noinline_for_stack void
5248ext4_mb_discard_lg_preallocations(struct super_block *sb,
5249 struct ext4_locality_group *lg,
5250 int order, int total_entries)
5251{
5252 ext4_group_t group = 0;
5253 struct ext4_buddy e4b;
5254 struct list_head discard_list;
5255 struct ext4_prealloc_space *pa, *tmp;
5256
5257 mb_debug(sb, "discard locality group preallocation\n");
5258
5259 INIT_LIST_HEAD(&discard_list);
5260
5261 spin_lock(&lg->lg_prealloc_lock);
5262 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
5263 pa_inode_list,
5264 lockdep_is_held(&lg->lg_prealloc_lock)) {
5265 spin_lock(&pa->pa_lock);
5266 if (atomic_read(&pa->pa_count)) {
5267
5268
5269
5270
5271
5272 spin_unlock(&pa->pa_lock);
5273 continue;
5274 }
5275 if (pa->pa_deleted) {
5276 spin_unlock(&pa->pa_lock);
5277 continue;
5278 }
5279
5280 BUG_ON(pa->pa_type != MB_GROUP_PA);
5281
5282
5283 ext4_mb_mark_pa_deleted(sb, pa);
5284 spin_unlock(&pa->pa_lock);
5285
5286 list_del_rcu(&pa->pa_inode_list);
5287 list_add(&pa->u.pa_tmp_list, &discard_list);
5288
5289 total_entries--;
5290 if (total_entries <= 5) {
5291
5292
5293
5294
5295
5296
5297 break;
5298 }
5299 }
5300 spin_unlock(&lg->lg_prealloc_lock);
5301
5302 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
5303 int err;
5304
5305 group = ext4_get_group_number(sb, pa->pa_pstart);
5306 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
5307 GFP_NOFS|__GFP_NOFAIL);
5308 if (err) {
5309 ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
5310 err, group);
5311 continue;
5312 }
5313 ext4_lock_group(sb, group);
5314 list_del(&pa->pa_group_list);
5315 ext4_mb_release_group_pa(&e4b, pa);
5316 ext4_unlock_group(sb, group);
5317
5318 ext4_mb_unload_buddy(&e4b);
5319 list_del(&pa->u.pa_tmp_list);
5320 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
5321 }
5322}
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
5334{
5335 int order, added = 0, lg_prealloc_count = 1;
5336 struct super_block *sb = ac->ac_sb;
5337 struct ext4_locality_group *lg = ac->ac_lg;
5338 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
5339
5340 order = fls(pa->pa_free) - 1;
5341 if (order > PREALLOC_TB_SIZE - 1)
5342
5343 order = PREALLOC_TB_SIZE - 1;
5344
5345 spin_lock(&lg->lg_prealloc_lock);
5346 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
5347 pa_inode_list,
5348 lockdep_is_held(&lg->lg_prealloc_lock)) {
5349 spin_lock(&tmp_pa->pa_lock);
5350 if (tmp_pa->pa_deleted) {
5351 spin_unlock(&tmp_pa->pa_lock);
5352 continue;
5353 }
5354 if (!added && pa->pa_free < tmp_pa->pa_free) {
5355
5356 list_add_tail_rcu(&pa->pa_inode_list,
5357 &tmp_pa->pa_inode_list);
5358 added = 1;
5359
5360
5361
5362
5363 }
5364 spin_unlock(&tmp_pa->pa_lock);
5365 lg_prealloc_count++;
5366 }
5367 if (!added)
5368 list_add_tail_rcu(&pa->pa_inode_list,
5369 &lg->lg_prealloc_list[order]);
5370 spin_unlock(&lg->lg_prealloc_lock);
5371
5372
5373 if (lg_prealloc_count > 8) {
5374 ext4_mb_discard_lg_preallocations(sb, lg,
5375 order, lg_prealloc_count);
5376 return;
5377 }
5378 return ;
5379}
5380
5381
5382
5383
5384static void ext4_mb_trim_inode_pa(struct inode *inode)
5385{
5386 struct ext4_inode_info *ei = EXT4_I(inode);
5387 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5388 int count, delta;
5389
5390 count = atomic_read(&ei->i_prealloc_active);
5391 delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1;
5392 if (count > sbi->s_mb_max_inode_prealloc + delta) {
5393 count -= sbi->s_mb_max_inode_prealloc;
5394 ext4_discard_preallocations(inode, count);
5395 }
5396}
5397
5398
5399
5400
5401static int ext4_mb_release_context(struct ext4_allocation_context *ac)
5402{
5403 struct inode *inode = ac->ac_inode;
5404 struct ext4_inode_info *ei = EXT4_I(inode);
5405 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
5406 struct ext4_prealloc_space *pa = ac->ac_pa;
5407 if (pa) {
5408 if (pa->pa_type == MB_GROUP_PA) {
5409
5410 spin_lock(&pa->pa_lock);
5411 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
5412 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
5413 pa->pa_free -= ac->ac_b_ex.fe_len;
5414 pa->pa_len -= ac->ac_b_ex.fe_len;
5415 spin_unlock(&pa->pa_lock);
5416
5417
5418
5419
5420
5421
5422
5423 if (likely(pa->pa_free)) {
5424 spin_lock(pa->pa_obj_lock);
5425 list_del_rcu(&pa->pa_inode_list);
5426 spin_unlock(pa->pa_obj_lock);
5427 ext4_mb_add_n_trim(ac);
5428 }
5429 }
5430
5431 if (pa->pa_type == MB_INODE_PA) {
5432
5433
5434
5435
5436 spin_lock(pa->pa_obj_lock);
5437 list_move(&pa->pa_inode_list, &ei->i_prealloc_list);
5438 spin_unlock(pa->pa_obj_lock);
5439 }
5440
5441 ext4_mb_put_pa(ac, ac->ac_sb, pa);
5442 }
5443 if (ac->ac_bitmap_page)
5444 put_page(ac->ac_bitmap_page);
5445 if (ac->ac_buddy_page)
5446 put_page(ac->ac_buddy_page);
5447 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
5448 mutex_unlock(&ac->ac_lg->lg_mutex);
5449 ext4_mb_collect_stats(ac);
5450 ext4_mb_trim_inode_pa(inode);
5451 return 0;
5452}
5453
5454static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
5455{
5456 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
5457 int ret;
5458 int freed = 0;
5459
5460 trace_ext4_mb_discard_preallocations(sb, needed);
5461 for (i = 0; i < ngroups && needed > 0; i++) {
5462 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
5463 freed += ret;
5464 needed -= ret;
5465 }
5466
5467 return freed;
5468}
5469
5470static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb,
5471 struct ext4_allocation_context *ac, u64 *seq)
5472{
5473 int freed;
5474 u64 seq_retry = 0;
5475 bool ret = false;
5476
5477 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
5478 if (freed) {
5479 ret = true;
5480 goto out_dbg;
5481 }
5482 seq_retry = ext4_get_discard_pa_seq_sum();
5483 if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) {
5484 ac->ac_flags |= EXT4_MB_STRICT_CHECK;
5485 *seq = seq_retry;
5486 ret = true;
5487 }
5488
5489out_dbg:
5490 mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no");
5491 return ret;
5492}
5493
5494static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
5495 struct ext4_allocation_request *ar, int *errp);
5496
5497
5498
5499
5500
5501
5502ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
5503 struct ext4_allocation_request *ar, int *errp)
5504{
5505 struct ext4_allocation_context *ac = NULL;
5506 struct ext4_sb_info *sbi;
5507 struct super_block *sb;
5508 ext4_fsblk_t block = 0;
5509 unsigned int inquota = 0;
5510 unsigned int reserv_clstrs = 0;
5511 u64 seq;
5512
5513 might_sleep();
5514 sb = ar->inode->i_sb;
5515 sbi = EXT4_SB(sb);
5516
5517 trace_ext4_request_blocks(ar);
5518 if (sbi->s_mount_state & EXT4_FC_REPLAY)
5519 return ext4_mb_new_blocks_simple(handle, ar, errp);
5520
5521
5522 if (ext4_is_quota_file(ar->inode))
5523 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
5524
5525 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
5526
5527
5528
5529
5530 while (ar->len &&
5531 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
5532
5533
5534 cond_resched();
5535 ar->len = ar->len >> 1;
5536 }
5537 if (!ar->len) {
5538 ext4_mb_show_pa(sb);
5539 *errp = -ENOSPC;
5540 return 0;
5541 }
5542 reserv_clstrs = ar->len;
5543 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
5544 dquot_alloc_block_nofail(ar->inode,
5545 EXT4_C2B(sbi, ar->len));
5546 } else {
5547 while (ar->len &&
5548 dquot_alloc_block(ar->inode,
5549 EXT4_C2B(sbi, ar->len))) {
5550
5551 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
5552 ar->len--;
5553 }
5554 }
5555 inquota = ar->len;
5556 if (ar->len == 0) {
5557 *errp = -EDQUOT;
5558 goto out;
5559 }
5560 }
5561
5562 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
5563 if (!ac) {
5564 ar->len = 0;
5565 *errp = -ENOMEM;
5566 goto out;
5567 }
5568
5569 *errp = ext4_mb_initialize_context(ac, ar);
5570 if (*errp) {
5571 ar->len = 0;
5572 goto out;
5573 }
5574
5575 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
5576 seq = this_cpu_read(discard_pa_seq);
5577 if (!ext4_mb_use_preallocated(ac)) {
5578 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
5579 ext4_mb_normalize_request(ac, ar);
5580
5581 *errp = ext4_mb_pa_alloc(ac);
5582 if (*errp)
5583 goto errout;
5584repeat:
5585
5586 *errp = ext4_mb_regular_allocator(ac);
5587
5588
5589
5590
5591
5592
5593
5594 if (*errp) {
5595 ext4_mb_pa_free(ac);
5596 ext4_discard_allocated_blocks(ac);
5597 goto errout;
5598 }
5599 if (ac->ac_status == AC_STATUS_FOUND &&
5600 ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len)
5601 ext4_mb_pa_free(ac);
5602 }
5603 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
5604 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
5605 if (*errp) {
5606 ext4_discard_allocated_blocks(ac);
5607 goto errout;
5608 } else {
5609 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
5610 ar->len = ac->ac_b_ex.fe_len;
5611 }
5612 } else {
5613 if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
5614 goto repeat;
5615
5616
5617
5618
5619 ext4_mb_pa_free(ac);
5620 *errp = -ENOSPC;
5621 }
5622
5623errout:
5624 if (*errp) {
5625 ac->ac_b_ex.fe_len = 0;
5626 ar->len = 0;
5627 ext4_mb_show_ac(ac);
5628 }
5629 ext4_mb_release_context(ac);
5630out:
5631 if (ac)
5632 kmem_cache_free(ext4_ac_cachep, ac);
5633 if (inquota && ar->len < inquota)
5634 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
5635 if (!ar->len) {
5636 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
5637
5638 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
5639 reserv_clstrs);
5640 }
5641
5642 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
5643
5644 return block;
5645}
5646
5647
5648
5649
5650
5651
5652static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
5653 struct ext4_free_data *entry,
5654 struct ext4_free_data *new_entry,
5655 struct rb_root *entry_rb_root)
5656{
5657 if ((entry->efd_tid != new_entry->efd_tid) ||
5658 (entry->efd_group != new_entry->efd_group))
5659 return;
5660 if (entry->efd_start_cluster + entry->efd_count ==
5661 new_entry->efd_start_cluster) {
5662 new_entry->efd_start_cluster = entry->efd_start_cluster;
5663 new_entry->efd_count += entry->efd_count;
5664 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
5665 entry->efd_start_cluster) {
5666 new_entry->efd_count += entry->efd_count;
5667 } else
5668 return;
5669 spin_lock(&sbi->s_md_lock);
5670 list_del(&entry->efd_list);
5671 spin_unlock(&sbi->s_md_lock);
5672 rb_erase(&entry->efd_node, entry_rb_root);
5673 kmem_cache_free(ext4_free_data_cachep, entry);
5674}
5675
5676static noinline_for_stack int
5677ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
5678 struct ext4_free_data *new_entry)
5679{
5680 ext4_group_t group = e4b->bd_group;
5681 ext4_grpblk_t cluster;
5682 ext4_grpblk_t clusters = new_entry->efd_count;
5683 struct ext4_free_data *entry;
5684 struct ext4_group_info *db = e4b->bd_info;
5685 struct super_block *sb = e4b->bd_sb;
5686 struct ext4_sb_info *sbi = EXT4_SB(sb);
5687 struct rb_node **n = &db->bb_free_root.rb_node, *node;
5688 struct rb_node *parent = NULL, *new_node;
5689
5690 BUG_ON(!ext4_handle_valid(handle));
5691 BUG_ON(e4b->bd_bitmap_page == NULL);
5692 BUG_ON(e4b->bd_buddy_page == NULL);
5693
5694 new_node = &new_entry->efd_node;
5695 cluster = new_entry->efd_start_cluster;
5696
5697 if (!*n) {
5698
5699
5700
5701
5702
5703 get_page(e4b->bd_buddy_page);
5704 get_page(e4b->bd_bitmap_page);
5705 }
5706 while (*n) {
5707 parent = *n;
5708 entry = rb_entry(parent, struct ext4_free_data, efd_node);
5709 if (cluster < entry->efd_start_cluster)
5710 n = &(*n)->rb_left;
5711 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
5712 n = &(*n)->rb_right;
5713 else {
5714 ext4_grp_locked_error(sb, group, 0,
5715 ext4_group_first_block_no(sb, group) +
5716 EXT4_C2B(sbi, cluster),
5717 "Block already on to-be-freed list");
5718 kmem_cache_free(ext4_free_data_cachep, new_entry);
5719 return 0;
5720 }
5721 }
5722
5723 rb_link_node(new_node, parent, n);
5724 rb_insert_color(new_node, &db->bb_free_root);
5725
5726
5727 node = rb_prev(new_node);
5728 if (node) {
5729 entry = rb_entry(node, struct ext4_free_data, efd_node);
5730 ext4_try_merge_freed_extent(sbi, entry, new_entry,
5731 &(db->bb_free_root));
5732 }
5733
5734 node = rb_next(new_node);
5735 if (node) {
5736 entry = rb_entry(node, struct ext4_free_data, efd_node);
5737 ext4_try_merge_freed_extent(sbi, entry, new_entry,
5738 &(db->bb_free_root));
5739 }
5740
5741 spin_lock(&sbi->s_md_lock);
5742 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
5743 sbi->s_mb_free_pending += clusters;
5744 spin_unlock(&sbi->s_md_lock);
5745 return 0;
5746}
5747
5748
5749
5750
5751
5752
5753static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
5754 struct ext4_allocation_request *ar, int *errp)
5755{
5756 struct buffer_head *bitmap_bh;
5757 struct super_block *sb = ar->inode->i_sb;
5758 ext4_group_t group;
5759 ext4_grpblk_t blkoff;
5760 int i = sb->s_blocksize;
5761 ext4_fsblk_t goal, block;
5762 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
5763
5764 goal = ar->goal;
5765 if (goal < le32_to_cpu(es->s_first_data_block) ||
5766 goal >= ext4_blocks_count(es))
5767 goal = le32_to_cpu(es->s_first_data_block);
5768
5769 ar->len = 0;
5770 ext4_get_group_no_and_offset(sb, goal, &group, &blkoff);
5771 for (; group < ext4_get_groups_count(sb); group++) {
5772 bitmap_bh = ext4_read_block_bitmap(sb, group);
5773 if (IS_ERR(bitmap_bh)) {
5774 *errp = PTR_ERR(bitmap_bh);
5775 pr_warn("Failed to read block bitmap\n");
5776 return 0;
5777 }
5778
5779 ext4_get_group_no_and_offset(sb,
5780 max(ext4_group_first_block_no(sb, group), goal),
5781 NULL, &blkoff);
5782 i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize,
5783 blkoff);
5784 brelse(bitmap_bh);
5785 if (i >= sb->s_blocksize)
5786 continue;
5787 if (ext4_fc_replay_check_excluded(sb,
5788 ext4_group_first_block_no(sb, group) + i))
5789 continue;
5790 break;
5791 }
5792
5793 if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize)
5794 return 0;
5795
5796 block = ext4_group_first_block_no(sb, group) + i;
5797 ext4_mb_mark_bb(sb, block, 1, 1);
5798 ar->len = 1;
5799
5800 return block;
5801}
5802
5803static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
5804 unsigned long count)
5805{
5806 struct buffer_head *bitmap_bh;
5807 struct super_block *sb = inode->i_sb;
5808 struct ext4_group_desc *gdp;
5809 struct buffer_head *gdp_bh;
5810 ext4_group_t group;
5811 ext4_grpblk_t blkoff;
5812 int already_freed = 0, err, i;
5813
5814 ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
5815 bitmap_bh = ext4_read_block_bitmap(sb, group);
5816 if (IS_ERR(bitmap_bh)) {
5817 err = PTR_ERR(bitmap_bh);
5818 pr_warn("Failed to read block bitmap\n");
5819 return;
5820 }
5821 gdp = ext4_get_group_desc(sb, group, &gdp_bh);
5822 if (!gdp)
5823 return;
5824
5825 for (i = 0; i < count; i++) {
5826 if (!mb_test_bit(blkoff + i, bitmap_bh->b_data))
5827 already_freed++;
5828 }
5829 mb_clear_bits(bitmap_bh->b_data, blkoff, count);
5830 err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
5831 if (err)
5832 return;
5833 ext4_free_group_clusters_set(
5834 sb, gdp, ext4_free_group_clusters(sb, gdp) +
5835 count - already_freed);
5836 ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
5837 ext4_group_desc_csum_set(sb, group, gdp);
5838 ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
5839 sync_dirty_buffer(bitmap_bh);
5840 sync_dirty_buffer(gdp_bh);
5841 brelse(bitmap_bh);
5842}
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853void ext4_free_blocks(handle_t *handle, struct inode *inode,
5854 struct buffer_head *bh, ext4_fsblk_t block,
5855 unsigned long count, int flags)
5856{
5857 struct buffer_head *bitmap_bh = NULL;
5858 struct super_block *sb = inode->i_sb;
5859 struct ext4_group_desc *gdp;
5860 unsigned int overflow;
5861 ext4_grpblk_t bit;
5862 struct buffer_head *gd_bh;
5863 ext4_group_t block_group;
5864 struct ext4_sb_info *sbi;
5865 struct ext4_buddy e4b;
5866 unsigned int count_clusters;
5867 int err = 0;
5868 int ret;
5869
5870 sbi = EXT4_SB(sb);
5871
5872 if (sbi->s_mount_state & EXT4_FC_REPLAY) {
5873 ext4_free_blocks_simple(inode, block, count);
5874 return;
5875 }
5876
5877 might_sleep();
5878 if (bh) {
5879 if (block)
5880 BUG_ON(block != bh->b_blocknr);
5881 else
5882 block = bh->b_blocknr;
5883 }
5884
5885 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
5886 !ext4_inode_block_valid(inode, block, count)) {
5887 ext4_error(sb, "Freeing blocks not in datazone - "
5888 "block = %llu, count = %lu", block, count);
5889 goto error_return;
5890 }
5891
5892 ext4_debug("freeing block %llu\n", block);
5893 trace_ext4_free_blocks(inode, block, count, flags);
5894
5895 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
5896 BUG_ON(count > 1);
5897
5898 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
5899 inode, bh, block);
5900 }
5901
5902
5903
5904
5905
5906
5907
5908
5909 overflow = EXT4_PBLK_COFF(sbi, block);
5910 if (overflow) {
5911 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
5912 overflow = sbi->s_cluster_ratio - overflow;
5913 block += overflow;
5914 if (count > overflow)
5915 count -= overflow;
5916 else
5917 return;
5918 } else {
5919 block -= overflow;
5920 count += overflow;
5921 }
5922 }
5923 overflow = EXT4_LBLK_COFF(sbi, count);
5924 if (overflow) {
5925 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
5926 if (count > overflow)
5927 count -= overflow;
5928 else
5929 return;
5930 } else
5931 count += sbi->s_cluster_ratio - overflow;
5932 }
5933
5934 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
5935 int i;
5936 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
5937
5938 for (i = 0; i < count; i++) {
5939 cond_resched();
5940 if (is_metadata)
5941 bh = sb_find_get_block(inode->i_sb, block + i);
5942 ext4_forget(handle, is_metadata, inode, bh, block + i);
5943 }
5944 }
5945
5946do_more:
5947 overflow = 0;
5948 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
5949
5950 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
5951 ext4_get_group_info(sb, block_group))))
5952 return;
5953
5954
5955
5956
5957
5958 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
5959 overflow = EXT4_C2B(sbi, bit) + count -
5960 EXT4_BLOCKS_PER_GROUP(sb);
5961 count -= overflow;
5962 }
5963 count_clusters = EXT4_NUM_B2C(sbi, count);
5964 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
5965 if (IS_ERR(bitmap_bh)) {
5966 err = PTR_ERR(bitmap_bh);
5967 bitmap_bh = NULL;
5968 goto error_return;
5969 }
5970 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
5971 if (!gdp) {
5972 err = -EIO;
5973 goto error_return;
5974 }
5975
5976 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
5977 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
5978 in_range(block, ext4_inode_table(sb, gdp),
5979 sbi->s_itb_per_group) ||
5980 in_range(block + count - 1, ext4_inode_table(sb, gdp),
5981 sbi->s_itb_per_group)) {
5982
5983 ext4_error(sb, "Freeing blocks in system zone - "
5984 "Block = %llu, count = %lu", block, count);
5985
5986 goto error_return;
5987 }
5988
5989 BUFFER_TRACE(bitmap_bh, "getting write access");
5990 err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
5991 EXT4_JTR_NONE);
5992 if (err)
5993 goto error_return;
5994
5995
5996
5997
5998
5999
6000 BUFFER_TRACE(gd_bh, "get_write_access");
6001 err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
6002 if (err)
6003 goto error_return;
6004#ifdef AGGRESSIVE_CHECK
6005 {
6006 int i;
6007 for (i = 0; i < count_clusters; i++)
6008 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
6009 }
6010#endif
6011 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
6012
6013
6014 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
6015 GFP_NOFS|__GFP_NOFAIL);
6016 if (err)
6017 goto error_return;
6018
6019
6020
6021
6022
6023
6024
6025 if (ext4_handle_valid(handle) &&
6026 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
6027 !ext4_should_writeback_data(inode))) {
6028 struct ext4_free_data *new_entry;
6029
6030
6031
6032
6033 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
6034 GFP_NOFS|__GFP_NOFAIL);
6035 new_entry->efd_start_cluster = bit;
6036 new_entry->efd_group = block_group;
6037 new_entry->efd_count = count_clusters;
6038 new_entry->efd_tid = handle->h_transaction->t_tid;
6039
6040 ext4_lock_group(sb, block_group);
6041 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
6042 ext4_mb_free_metadata(handle, &e4b, new_entry);
6043 } else {
6044
6045
6046
6047
6048 if (test_opt(sb, DISCARD)) {
6049 err = ext4_issue_discard(sb, block_group, bit, count,
6050 NULL);
6051 if (err && err != -EOPNOTSUPP)
6052 ext4_msg(sb, KERN_WARNING, "discard request in"
6053 " group:%d block:%d count:%lu failed"
6054 " with %d", block_group, bit, count,
6055 err);
6056 } else
6057 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
6058
6059 ext4_lock_group(sb, block_group);
6060 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
6061 mb_free_blocks(inode, &e4b, bit, count_clusters);
6062 }
6063
6064 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
6065 ext4_free_group_clusters_set(sb, gdp, ret);
6066 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
6067 ext4_group_desc_csum_set(sb, block_group, gdp);
6068 ext4_unlock_group(sb, block_group);
6069
6070 if (sbi->s_log_groups_per_flex) {
6071 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
6072 atomic64_add(count_clusters,
6073 &sbi_array_rcu_deref(sbi, s_flex_groups,
6074 flex_group)->free_clusters);
6075 }
6076
6077
6078
6079
6080
6081
6082 if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
6083 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
6084 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
6085 percpu_counter_add(&sbi->s_freeclusters_counter,
6086 count_clusters);
6087 }
6088
6089 ext4_mb_unload_buddy(&e4b);
6090
6091
6092 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6093 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6094
6095
6096 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6097 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6098 if (!err)
6099 err = ret;
6100
6101 if (overflow && !err) {
6102 block += count;
6103 count = overflow;
6104 put_bh(bitmap_bh);
6105 goto do_more;
6106 }
6107error_return:
6108 brelse(bitmap_bh);
6109 ext4_std_error(sb, err);
6110 return;
6111}
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
6123 ext4_fsblk_t block, unsigned long count)
6124{
6125 struct buffer_head *bitmap_bh = NULL;
6126 struct buffer_head *gd_bh;
6127 ext4_group_t block_group;
6128 ext4_grpblk_t bit;
6129 unsigned int i;
6130 struct ext4_group_desc *desc;
6131 struct ext4_sb_info *sbi = EXT4_SB(sb);
6132 struct ext4_buddy e4b;
6133 int err = 0, ret, free_clusters_count;
6134 ext4_grpblk_t clusters_freed;
6135 ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
6136 ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
6137 unsigned long cluster_count = last_cluster - first_cluster + 1;
6138
6139 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
6140
6141 if (count == 0)
6142 return 0;
6143
6144 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
6145
6146
6147
6148
6149 if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
6150 ext4_warning(sb, "too many blocks added to group %u",
6151 block_group);
6152 err = -EINVAL;
6153 goto error_return;
6154 }
6155
6156 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
6157 if (IS_ERR(bitmap_bh)) {
6158 err = PTR_ERR(bitmap_bh);
6159 bitmap_bh = NULL;
6160 goto error_return;
6161 }
6162
6163 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
6164 if (!desc) {
6165 err = -EIO;
6166 goto error_return;
6167 }
6168
6169 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
6170 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
6171 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
6172 in_range(block + count - 1, ext4_inode_table(sb, desc),
6173 sbi->s_itb_per_group)) {
6174 ext4_error(sb, "Adding blocks in system zones - "
6175 "Block = %llu, count = %lu",
6176 block, count);
6177 err = -EINVAL;
6178 goto error_return;
6179 }
6180
6181 BUFFER_TRACE(bitmap_bh, "getting write access");
6182 err = ext4_journal_get_write_access(handle, sb, bitmap_bh,
6183 EXT4_JTR_NONE);
6184 if (err)
6185 goto error_return;
6186
6187
6188
6189
6190
6191
6192 BUFFER_TRACE(gd_bh, "get_write_access");
6193 err = ext4_journal_get_write_access(handle, sb, gd_bh, EXT4_JTR_NONE);
6194 if (err)
6195 goto error_return;
6196
6197 for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
6198 BUFFER_TRACE(bitmap_bh, "clear bit");
6199 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
6200 ext4_error(sb, "bit already cleared for block %llu",
6201 (ext4_fsblk_t)(block + i));
6202 BUFFER_TRACE(bitmap_bh, "bit already cleared");
6203 } else {
6204 clusters_freed++;
6205 }
6206 }
6207
6208 err = ext4_mb_load_buddy(sb, block_group, &e4b);
6209 if (err)
6210 goto error_return;
6211
6212
6213
6214
6215
6216
6217 ext4_lock_group(sb, block_group);
6218 mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
6219 mb_free_blocks(NULL, &e4b, bit, cluster_count);
6220 free_clusters_count = clusters_freed +
6221 ext4_free_group_clusters(sb, desc);
6222 ext4_free_group_clusters_set(sb, desc, free_clusters_count);
6223 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
6224 ext4_group_desc_csum_set(sb, block_group, desc);
6225 ext4_unlock_group(sb, block_group);
6226 percpu_counter_add(&sbi->s_freeclusters_counter,
6227 clusters_freed);
6228
6229 if (sbi->s_log_groups_per_flex) {
6230 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
6231 atomic64_add(clusters_freed,
6232 &sbi_array_rcu_deref(sbi, s_flex_groups,
6233 flex_group)->free_clusters);
6234 }
6235
6236 ext4_mb_unload_buddy(&e4b);
6237
6238
6239 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6240 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6241
6242
6243 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6244 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6245 if (!err)
6246 err = ret;
6247
6248error_return:
6249 brelse(bitmap_bh);
6250 ext4_std_error(sb, err);
6251 return err;
6252}
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265static int ext4_trim_extent(struct super_block *sb,
6266 int start, int count, struct ext4_buddy *e4b)
6267__releases(bitlock)
6268__acquires(bitlock)
6269{
6270 struct ext4_free_extent ex;
6271 ext4_group_t group = e4b->bd_group;
6272 int ret = 0;
6273
6274 trace_ext4_trim_extent(sb, group, start, count);
6275
6276 assert_spin_locked(ext4_group_lock_ptr(sb, group));
6277
6278 ex.fe_start = start;
6279 ex.fe_group = group;
6280 ex.fe_len = count;
6281
6282
6283
6284
6285
6286 mb_mark_used(e4b, &ex);
6287 ext4_unlock_group(sb, group);
6288 ret = ext4_issue_discard(sb, group, start, count, NULL);
6289 ext4_lock_group(sb, group);
6290 mb_free_blocks(NULL, e4b, start, ex.fe_len);
6291 return ret;
6292}
6293
6294static int ext4_try_to_trim_range(struct super_block *sb,
6295 struct ext4_buddy *e4b, ext4_grpblk_t start,
6296 ext4_grpblk_t max, ext4_grpblk_t minblocks)
6297__acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
6298__releases(ext4_group_lock_ptr(sb, e4b->bd_group))
6299{
6300 ext4_grpblk_t next, count, free_count;
6301 void *bitmap;
6302 int ret = 0;
6303
6304 bitmap = e4b->bd_bitmap;
6305 start = (e4b->bd_info->bb_first_free > start) ?
6306 e4b->bd_info->bb_first_free : start;
6307 count = 0;
6308 free_count = 0;
6309
6310 while (start <= max) {
6311 start = mb_find_next_zero_bit(bitmap, max + 1, start);
6312 if (start > max)
6313 break;
6314 next = mb_find_next_bit(bitmap, max + 1, start);
6315
6316 if ((next - start) >= minblocks) {
6317 ret = ext4_trim_extent(sb, start, next - start, e4b);
6318 if (ret && ret != -EOPNOTSUPP)
6319 break;
6320 ret = 0;
6321 count += next - start;
6322 }
6323 free_count += next - start;
6324 start = next + 1;
6325
6326 if (fatal_signal_pending(current)) {
6327 count = -ERESTARTSYS;
6328 break;
6329 }
6330
6331 if (need_resched()) {
6332 ext4_unlock_group(sb, e4b->bd_group);
6333 cond_resched();
6334 ext4_lock_group(sb, e4b->bd_group);
6335 }
6336
6337 if ((e4b->bd_info->bb_free - free_count) < minblocks)
6338 break;
6339 }
6340
6341 return count;
6342}
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357static ext4_grpblk_t
6358ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
6359 ext4_grpblk_t start, ext4_grpblk_t max,
6360 ext4_grpblk_t minblocks)
6361{
6362 struct ext4_buddy e4b;
6363 int ret;
6364
6365 trace_ext4_trim_all_free(sb, group, start, max);
6366
6367 ret = ext4_mb_load_buddy(sb, group, &e4b);
6368 if (ret) {
6369 ext4_warning(sb, "Error %d loading buddy information for %u",
6370 ret, group);
6371 return ret;
6372 }
6373
6374 ext4_lock_group(sb, group);
6375
6376 if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
6377 minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) {
6378 ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
6379 if (ret >= 0)
6380 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
6381 } else {
6382 ret = 0;
6383 }
6384
6385 ext4_unlock_group(sb, group);
6386 ext4_mb_unload_buddy(&e4b);
6387
6388 ext4_debug("trimmed %d blocks in the group %d\n",
6389 ret, group);
6390
6391 return ret;
6392}
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
6407{
6408 struct ext4_group_info *grp;
6409 ext4_group_t group, first_group, last_group;
6410 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
6411 uint64_t start, end, minlen, trimmed = 0;
6412 ext4_fsblk_t first_data_blk =
6413 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
6414 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
6415 int ret = 0;
6416
6417 start = range->start >> sb->s_blocksize_bits;
6418 end = start + (range->len >> sb->s_blocksize_bits) - 1;
6419 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
6420 range->minlen >> sb->s_blocksize_bits);
6421
6422 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
6423 start >= max_blks ||
6424 range->len < sb->s_blocksize)
6425 return -EINVAL;
6426 if (end >= max_blks)
6427 end = max_blks - 1;
6428 if (end <= first_data_blk)
6429 goto out;
6430 if (start < first_data_blk)
6431 start = first_data_blk;
6432
6433
6434 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
6435 &first_group, &first_cluster);
6436 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
6437 &last_group, &last_cluster);
6438
6439
6440 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6441
6442 for (group = first_group; group <= last_group; group++) {
6443 grp = ext4_get_group_info(sb, group);
6444
6445 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
6446 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
6447 if (ret)
6448 break;
6449 }
6450
6451
6452
6453
6454
6455
6456
6457 if (group == last_group)
6458 end = last_cluster;
6459
6460 if (grp->bb_free >= minlen) {
6461 cnt = ext4_trim_all_free(sb, group, first_cluster,
6462 end, minlen);
6463 if (cnt < 0) {
6464 ret = cnt;
6465 break;
6466 }
6467 trimmed += cnt;
6468 }
6469
6470
6471
6472
6473
6474 first_cluster = 0;
6475 }
6476
6477 if (!ret)
6478 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
6479
6480out:
6481 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
6482 return ret;
6483}
6484
6485
6486int
6487ext4_mballoc_query_range(
6488 struct super_block *sb,
6489 ext4_group_t group,
6490 ext4_grpblk_t start,
6491 ext4_grpblk_t end,
6492 ext4_mballoc_query_range_fn formatter,
6493 void *priv)
6494{
6495 void *bitmap;
6496 ext4_grpblk_t next;
6497 struct ext4_buddy e4b;
6498 int error;
6499
6500 error = ext4_mb_load_buddy(sb, group, &e4b);
6501 if (error)
6502 return error;
6503 bitmap = e4b.bd_bitmap;
6504
6505 ext4_lock_group(sb, group);
6506
6507 start = (e4b.bd_info->bb_first_free > start) ?
6508 e4b.bd_info->bb_first_free : start;
6509 if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
6510 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6511
6512 while (start <= end) {
6513 start = mb_find_next_zero_bit(bitmap, end + 1, start);
6514 if (start > end)
6515 break;
6516 next = mb_find_next_bit(bitmap, end + 1, start);
6517
6518 ext4_unlock_group(sb, group);
6519 error = formatter(sb, group, start, next - start, priv);
6520 if (error)
6521 goto out_unload;
6522 ext4_lock_group(sb, group);
6523
6524 start = next + 1;
6525 }
6526
6527 ext4_unlock_group(sb, group);
6528out_unload:
6529 ext4_mb_unload_buddy(&e4b);
6530
6531 return error;
6532}
6533