1
2
3
4
5
6
7
8
9
10
11
12#include "ext4_jbd2.h"
13#include "mballoc.h"
14#include <linux/log2.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/nospec.h>
18#include <linux/backing-dev.h>
19#include <trace/events/ext4.h>
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386static struct kmem_cache *ext4_pspace_cachep;
387static struct kmem_cache *ext4_ac_cachep;
388static struct kmem_cache *ext4_free_data_cachep;
389
390
391
392
393#define NR_GRPINFO_CACHES 8
394static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
395
396static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
397 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
398 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
399 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
400};
401
402static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
403 ext4_group_t group);
404static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
405 ext4_group_t group);
406static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
407
408static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
409 ext4_group_t group, int cr);
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429static DEFINE_PER_CPU(u64, discard_pa_seq);
430static inline u64 ext4_get_discard_pa_seq_sum(void)
431{
432 int __cpu;
433 u64 __seq = 0;
434
435 for_each_possible_cpu(__cpu)
436 __seq += per_cpu(discard_pa_seq, __cpu);
437 return __seq;
438}
439
440static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
441{
442#if BITS_PER_LONG == 64
443 *bit += ((unsigned long) addr & 7UL) << 3;
444 addr = (void *) ((unsigned long) addr & ~7UL);
445#elif BITS_PER_LONG == 32
446 *bit += ((unsigned long) addr & 3UL) << 3;
447 addr = (void *) ((unsigned long) addr & ~3UL);
448#else
449#error "how many bits you are?!"
450#endif
451 return addr;
452}
453
454static inline int mb_test_bit(int bit, void *addr)
455{
456
457
458
459
460 addr = mb_correct_addr_and_bit(&bit, addr);
461 return ext4_test_bit(bit, addr);
462}
463
464static inline void mb_set_bit(int bit, void *addr)
465{
466 addr = mb_correct_addr_and_bit(&bit, addr);
467 ext4_set_bit(bit, addr);
468}
469
470static inline void mb_clear_bit(int bit, void *addr)
471{
472 addr = mb_correct_addr_and_bit(&bit, addr);
473 ext4_clear_bit(bit, addr);
474}
475
476static inline int mb_test_and_clear_bit(int bit, void *addr)
477{
478 addr = mb_correct_addr_and_bit(&bit, addr);
479 return ext4_test_and_clear_bit(bit, addr);
480}
481
482static inline int mb_find_next_zero_bit(void *addr, int max, int start)
483{
484 int fix = 0, ret, tmpmax;
485 addr = mb_correct_addr_and_bit(&fix, addr);
486 tmpmax = max + fix;
487 start += fix;
488
489 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
490 if (ret > max)
491 return max;
492 return ret;
493}
494
495static inline int mb_find_next_bit(void *addr, int max, int start)
496{
497 int fix = 0, ret, tmpmax;
498 addr = mb_correct_addr_and_bit(&fix, addr);
499 tmpmax = max + fix;
500 start += fix;
501
502 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
503 if (ret > max)
504 return max;
505 return ret;
506}
507
508static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
509{
510 char *bb;
511
512 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
513 BUG_ON(max == NULL);
514
515 if (order > e4b->bd_blkbits + 1) {
516 *max = 0;
517 return NULL;
518 }
519
520
521 if (order == 0) {
522 *max = 1 << (e4b->bd_blkbits + 3);
523 return e4b->bd_bitmap;
524 }
525
526 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
527 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
528
529 return bb;
530}
531
532#ifdef DOUBLE_CHECK
533static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
534 int first, int count)
535{
536 int i;
537 struct super_block *sb = e4b->bd_sb;
538
539 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
540 return;
541 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
542 for (i = 0; i < count; i++) {
543 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
544 ext4_fsblk_t blocknr;
545
546 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
547 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
548 ext4_grp_locked_error(sb, e4b->bd_group,
549 inode ? inode->i_ino : 0,
550 blocknr,
551 "freeing block already freed "
552 "(bit %u)",
553 first + i);
554 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
555 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
556 }
557 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
558 }
559}
560
561static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
562{
563 int i;
564
565 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
566 return;
567 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
568 for (i = 0; i < count; i++) {
569 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
570 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
571 }
572}
573
574static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
575{
576 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
577 return;
578 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
579 unsigned char *b1, *b2;
580 int i;
581 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
582 b2 = (unsigned char *) bitmap;
583 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
584 if (b1[i] != b2[i]) {
585 ext4_msg(e4b->bd_sb, KERN_ERR,
586 "corruption in group %u "
587 "at byte %u(%u): %x in copy != %x "
588 "on disk/prealloc",
589 e4b->bd_group, i, i * 8, b1[i], b2[i]);
590 BUG();
591 }
592 }
593 }
594}
595
596static void mb_group_bb_bitmap_alloc(struct super_block *sb,
597 struct ext4_group_info *grp, ext4_group_t group)
598{
599 struct buffer_head *bh;
600
601 grp->bb_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
602 if (!grp->bb_bitmap)
603 return;
604
605 bh = ext4_read_block_bitmap(sb, group);
606 if (IS_ERR_OR_NULL(bh)) {
607 kfree(grp->bb_bitmap);
608 grp->bb_bitmap = NULL;
609 return;
610 }
611
612 memcpy(grp->bb_bitmap, bh->b_data, sb->s_blocksize);
613 put_bh(bh);
614}
615
616static void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
617{
618 kfree(grp->bb_bitmap);
619}
620
621#else
622static inline void mb_free_blocks_double(struct inode *inode,
623 struct ext4_buddy *e4b, int first, int count)
624{
625 return;
626}
627static inline void mb_mark_used_double(struct ext4_buddy *e4b,
628 int first, int count)
629{
630 return;
631}
632static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
633{
634 return;
635}
636
637static inline void mb_group_bb_bitmap_alloc(struct super_block *sb,
638 struct ext4_group_info *grp, ext4_group_t group)
639{
640 return;
641}
642
643static inline void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
644{
645 return;
646}
647#endif
648
649#ifdef AGGRESSIVE_CHECK
650
651#define MB_CHECK_ASSERT(assert) \
652do { \
653 if (!(assert)) { \
654 printk(KERN_EMERG \
655 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
656 function, file, line, # assert); \
657 BUG(); \
658 } \
659} while (0)
660
661static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
662 const char *function, int line)
663{
664 struct super_block *sb = e4b->bd_sb;
665 int order = e4b->bd_blkbits + 1;
666 int max;
667 int max2;
668 int i;
669 int j;
670 int k;
671 int count;
672 struct ext4_group_info *grp;
673 int fragments = 0;
674 int fstart;
675 struct list_head *cur;
676 void *buddy;
677 void *buddy2;
678
679 if (e4b->bd_info->bb_check_counter++ % 10)
680 return 0;
681
682 while (order > 1) {
683 buddy = mb_find_buddy(e4b, order, &max);
684 MB_CHECK_ASSERT(buddy);
685 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
686 MB_CHECK_ASSERT(buddy2);
687 MB_CHECK_ASSERT(buddy != buddy2);
688 MB_CHECK_ASSERT(max * 2 == max2);
689
690 count = 0;
691 for (i = 0; i < max; i++) {
692
693 if (mb_test_bit(i, buddy)) {
694
695 if (!mb_test_bit(i << 1, buddy2)) {
696 MB_CHECK_ASSERT(
697 mb_test_bit((i<<1)+1, buddy2));
698 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
699 MB_CHECK_ASSERT(
700 mb_test_bit(i << 1, buddy2));
701 }
702 continue;
703 }
704
705
706 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
707 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
708
709 for (j = 0; j < (1 << order); j++) {
710 k = (i * (1 << order)) + j;
711 MB_CHECK_ASSERT(
712 !mb_test_bit(k, e4b->bd_bitmap));
713 }
714 count++;
715 }
716 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
717 order--;
718 }
719
720 fstart = -1;
721 buddy = mb_find_buddy(e4b, 0, &max);
722 for (i = 0; i < max; i++) {
723 if (!mb_test_bit(i, buddy)) {
724 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
725 if (fstart == -1) {
726 fragments++;
727 fstart = i;
728 }
729 continue;
730 }
731 fstart = -1;
732
733 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
734 buddy2 = mb_find_buddy(e4b, j, &max2);
735 k = i >> j;
736 MB_CHECK_ASSERT(k < max2);
737 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
738 }
739 }
740 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
741 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
742
743 grp = ext4_get_group_info(sb, e4b->bd_group);
744 list_for_each(cur, &grp->bb_prealloc_list) {
745 ext4_group_t groupnr;
746 struct ext4_prealloc_space *pa;
747 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
748 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
749 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
750 for (i = 0; i < pa->pa_len; i++)
751 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
752 }
753 return 0;
754}
755#undef MB_CHECK_ASSERT
756#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
757 __FILE__, __func__, __LINE__)
758#else
759#define mb_check_buddy(e4b)
760#endif
761
762
763
764
765
766
767
768static void ext4_mb_mark_free_simple(struct super_block *sb,
769 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
770 struct ext4_group_info *grp)
771{
772 struct ext4_sb_info *sbi = EXT4_SB(sb);
773 ext4_grpblk_t min;
774 ext4_grpblk_t max;
775 ext4_grpblk_t chunk;
776 unsigned int border;
777
778 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
779
780 border = 2 << sb->s_blocksize_bits;
781
782 while (len > 0) {
783
784 max = ffs(first | border) - 1;
785
786
787 min = fls(len) - 1;
788
789 if (max < min)
790 min = max;
791 chunk = 1 << min;
792
793
794 grp->bb_counters[min]++;
795 if (min > 0)
796 mb_clear_bit(first >> min,
797 buddy + sbi->s_mb_offsets[min]);
798
799 len -= chunk;
800 first += chunk;
801 }
802}
803
804static void ext4_mb_rb_insert(struct rb_root *root, struct rb_node *new,
805 int (*cmp)(struct rb_node *, struct rb_node *))
806{
807 struct rb_node **iter = &root->rb_node, *parent = NULL;
808
809 while (*iter) {
810 parent = *iter;
811 if (cmp(new, *iter) > 0)
812 iter = &((*iter)->rb_left);
813 else
814 iter = &((*iter)->rb_right);
815 }
816
817 rb_link_node(new, parent, iter);
818 rb_insert_color(new, root);
819}
820
821static int
822ext4_mb_avg_fragment_size_cmp(struct rb_node *rb1, struct rb_node *rb2)
823{
824 struct ext4_group_info *grp1 = rb_entry(rb1,
825 struct ext4_group_info,
826 bb_avg_fragment_size_rb);
827 struct ext4_group_info *grp2 = rb_entry(rb2,
828 struct ext4_group_info,
829 bb_avg_fragment_size_rb);
830 int num_frags_1, num_frags_2;
831
832 num_frags_1 = grp1->bb_fragments ?
833 grp1->bb_free / grp1->bb_fragments : 0;
834 num_frags_2 = grp2->bb_fragments ?
835 grp2->bb_free / grp2->bb_fragments : 0;
836
837 return (num_frags_2 - num_frags_1);
838}
839
840
841
842
843
844static void
845mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
846{
847 struct ext4_sb_info *sbi = EXT4_SB(sb);
848
849 if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
850 return;
851
852 write_lock(&sbi->s_mb_rb_lock);
853 if (!RB_EMPTY_NODE(&grp->bb_avg_fragment_size_rb)) {
854 rb_erase(&grp->bb_avg_fragment_size_rb,
855 &sbi->s_mb_avg_fragment_size_root);
856 RB_CLEAR_NODE(&grp->bb_avg_fragment_size_rb);
857 }
858
859 ext4_mb_rb_insert(&sbi->s_mb_avg_fragment_size_root,
860 &grp->bb_avg_fragment_size_rb,
861 ext4_mb_avg_fragment_size_cmp);
862 write_unlock(&sbi->s_mb_rb_lock);
863}
864
865
866
867
868
869static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
870 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
871{
872 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
873 struct ext4_group_info *iter, *grp;
874 int i;
875
876 if (ac->ac_status == AC_STATUS_FOUND)
877 return;
878
879 if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR0_OPTIMIZED))
880 atomic_inc(&sbi->s_bal_cr0_bad_suggestions);
881
882 grp = NULL;
883 for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
884 if (list_empty(&sbi->s_mb_largest_free_orders[i]))
885 continue;
886 read_lock(&sbi->s_mb_largest_free_orders_locks[i]);
887 if (list_empty(&sbi->s_mb_largest_free_orders[i])) {
888 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
889 continue;
890 }
891 grp = NULL;
892 list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
893 bb_largest_free_order_node) {
894 if (sbi->s_mb_stats)
895 atomic64_inc(&sbi->s_bal_cX_groups_considered[0]);
896 if (likely(ext4_mb_good_group(ac, iter->bb_group, 0))) {
897 grp = iter;
898 break;
899 }
900 }
901 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
902 if (grp)
903 break;
904 }
905
906 if (!grp) {
907
908 *new_cr = 1;
909 } else {
910 *group = grp->bb_group;
911 ac->ac_last_optimal_group = *group;
912 ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
913 }
914}
915
916
917
918
919
920
921
922static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
923 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
924{
925 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
926 int avg_fragment_size, best_so_far;
927 struct rb_node *node, *found;
928 struct ext4_group_info *grp;
929
930
931
932
933
934
935 if (!read_trylock(&sbi->s_mb_rb_lock)) {
936 ac->ac_flags |= EXT4_MB_SEARCH_NEXT_LINEAR;
937 return;
938 }
939
940 if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
941 if (sbi->s_mb_stats)
942 atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
943
944 grp = ext4_get_group_info(ac->ac_sb, ac->ac_last_optimal_group);
945 for (found = rb_next(&grp->bb_avg_fragment_size_rb); found != NULL;
946 found = rb_next(found)) {
947 grp = rb_entry(found, struct ext4_group_info,
948 bb_avg_fragment_size_rb);
949 if (sbi->s_mb_stats)
950 atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
951 if (likely(ext4_mb_good_group(ac, grp->bb_group, 1)))
952 break;
953 }
954 goto done;
955 }
956
957 node = sbi->s_mb_avg_fragment_size_root.rb_node;
958 best_so_far = 0;
959 found = NULL;
960
961 while (node) {
962 grp = rb_entry(node, struct ext4_group_info,
963 bb_avg_fragment_size_rb);
964 avg_fragment_size = 0;
965 if (ext4_mb_good_group(ac, grp->bb_group, 1)) {
966 avg_fragment_size = grp->bb_fragments ?
967 grp->bb_free / grp->bb_fragments : 0;
968 if (!best_so_far || avg_fragment_size < best_so_far) {
969 best_so_far = avg_fragment_size;
970 found = node;
971 }
972 }
973 if (avg_fragment_size > ac->ac_g_ex.fe_len)
974 node = node->rb_right;
975 else
976 node = node->rb_left;
977 }
978
979done:
980 if (found) {
981 grp = rb_entry(found, struct ext4_group_info,
982 bb_avg_fragment_size_rb);
983 *group = grp->bb_group;
984 ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
985 } else {
986 *new_cr = 2;
987 }
988
989 read_unlock(&sbi->s_mb_rb_lock);
990 ac->ac_last_optimal_group = *group;
991}
992
993static inline int should_optimize_scan(struct ext4_allocation_context *ac)
994{
995 if (unlikely(!test_opt2(ac->ac_sb, MB_OPTIMIZE_SCAN)))
996 return 0;
997 if (ac->ac_criteria >= 2)
998 return 0;
999 if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
1000 return 0;
1001 return 1;
1002}
1003
1004
1005
1006
1007
1008static int
1009next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
1010{
1011 if (!should_optimize_scan(ac))
1012 goto inc_and_return;
1013
1014 if (ac->ac_groups_linear_remaining) {
1015 ac->ac_groups_linear_remaining--;
1016 goto inc_and_return;
1017 }
1018
1019 if (ac->ac_flags & EXT4_MB_SEARCH_NEXT_LINEAR) {
1020 ac->ac_flags &= ~EXT4_MB_SEARCH_NEXT_LINEAR;
1021 goto inc_and_return;
1022 }
1023
1024 return group;
1025inc_and_return:
1026
1027
1028
1029
1030 return group + 1 >= ngroups ? 0 : group + 1;
1031}
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
1047 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
1048{
1049 *new_cr = ac->ac_criteria;
1050
1051 if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
1052 return;
1053
1054 if (*new_cr == 0) {
1055 ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
1056 } else if (*new_cr == 1) {
1057 ext4_mb_choose_next_group_cr1(ac, new_cr, group, ngroups);
1058 } else {
1059
1060
1061
1062
1063 WARN_ON(1);
1064 }
1065}
1066
1067
1068
1069
1070
1071static void
1072mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
1073{
1074 struct ext4_sb_info *sbi = EXT4_SB(sb);
1075 int i;
1076
1077 if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
1078 write_lock(&sbi->s_mb_largest_free_orders_locks[
1079 grp->bb_largest_free_order]);
1080 list_del_init(&grp->bb_largest_free_order_node);
1081 write_unlock(&sbi->s_mb_largest_free_orders_locks[
1082 grp->bb_largest_free_order]);
1083 }
1084 grp->bb_largest_free_order = -1;
1085
1086 for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
1087 if (grp->bb_counters[i] > 0) {
1088 grp->bb_largest_free_order = i;
1089 break;
1090 }
1091 }
1092 if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
1093 grp->bb_largest_free_order >= 0 && grp->bb_free) {
1094 write_lock(&sbi->s_mb_largest_free_orders_locks[
1095 grp->bb_largest_free_order]);
1096 list_add_tail(&grp->bb_largest_free_order_node,
1097 &sbi->s_mb_largest_free_orders[grp->bb_largest_free_order]);
1098 write_unlock(&sbi->s_mb_largest_free_orders_locks[
1099 grp->bb_largest_free_order]);
1100 }
1101}
1102
1103static noinline_for_stack
1104void ext4_mb_generate_buddy(struct super_block *sb,
1105 void *buddy, void *bitmap, ext4_group_t group)
1106{
1107 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1108 struct ext4_sb_info *sbi = EXT4_SB(sb);
1109 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
1110 ext4_grpblk_t i = 0;
1111 ext4_grpblk_t first;
1112 ext4_grpblk_t len;
1113 unsigned free = 0;
1114 unsigned fragments = 0;
1115 unsigned long long period = get_cycles();
1116
1117
1118
1119 i = mb_find_next_zero_bit(bitmap, max, 0);
1120 grp->bb_first_free = i;
1121 while (i < max) {
1122 fragments++;
1123 first = i;
1124 i = mb_find_next_bit(bitmap, max, i);
1125 len = i - first;
1126 free += len;
1127 if (len > 1)
1128 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
1129 else
1130 grp->bb_counters[0]++;
1131 if (i < max)
1132 i = mb_find_next_zero_bit(bitmap, max, i);
1133 }
1134 grp->bb_fragments = fragments;
1135
1136 if (free != grp->bb_free) {
1137 ext4_grp_locked_error(sb, group, 0, 0,
1138 "block bitmap and bg descriptor "
1139 "inconsistent: %u vs %u free clusters",
1140 free, grp->bb_free);
1141
1142
1143
1144
1145 grp->bb_free = free;
1146 ext4_mark_group_bitmap_corrupted(sb, group,
1147 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1148 }
1149 mb_set_largest_free_order(sb, grp);
1150
1151 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
1152
1153 period = get_cycles() - period;
1154 atomic_inc(&sbi->s_mb_buddies_generated);
1155 atomic64_add(period, &sbi->s_mb_generation_time);
1156 mb_update_avg_fragment_size(sb, grp);
1157}
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
1180{
1181 ext4_group_t ngroups;
1182 int blocksize;
1183 int blocks_per_page;
1184 int groups_per_page;
1185 int err = 0;
1186 int i;
1187 ext4_group_t first_group, group;
1188 int first_block;
1189 struct super_block *sb;
1190 struct buffer_head *bhs;
1191 struct buffer_head **bh = NULL;
1192 struct inode *inode;
1193 char *data;
1194 char *bitmap;
1195 struct ext4_group_info *grinfo;
1196
1197 inode = page->mapping->host;
1198 sb = inode->i_sb;
1199 ngroups = ext4_get_groups_count(sb);
1200 blocksize = i_blocksize(inode);
1201 blocks_per_page = PAGE_SIZE / blocksize;
1202
1203 mb_debug(sb, "init page %lu\n", page->index);
1204
1205 groups_per_page = blocks_per_page >> 1;
1206 if (groups_per_page == 0)
1207 groups_per_page = 1;
1208
1209
1210 if (groups_per_page > 1) {
1211 i = sizeof(struct buffer_head *) * groups_per_page;
1212 bh = kzalloc(i, gfp);
1213 if (bh == NULL) {
1214 err = -ENOMEM;
1215 goto out;
1216 }
1217 } else
1218 bh = &bhs;
1219
1220 first_group = page->index * blocks_per_page / 2;
1221
1222
1223 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
1224 if (group >= ngroups)
1225 break;
1226
1227 grinfo = ext4_get_group_info(sb, group);
1228
1229
1230
1231
1232
1233
1234 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
1235 bh[i] = NULL;
1236 continue;
1237 }
1238 bh[i] = ext4_read_block_bitmap_nowait(sb, group, false);
1239 if (IS_ERR(bh[i])) {
1240 err = PTR_ERR(bh[i]);
1241 bh[i] = NULL;
1242 goto out;
1243 }
1244 mb_debug(sb, "read bitmap for group %u\n", group);
1245 }
1246
1247
1248 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
1249 int err2;
1250
1251 if (!bh[i])
1252 continue;
1253 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
1254 if (!err)
1255 err = err2;
1256 }
1257
1258 first_block = page->index * blocks_per_page;
1259 for (i = 0; i < blocks_per_page; i++) {
1260 group = (first_block + i) >> 1;
1261 if (group >= ngroups)
1262 break;
1263
1264 if (!bh[group - first_group])
1265
1266 continue;
1267
1268 if (!buffer_verified(bh[group - first_group]))
1269
1270 continue;
1271 err = 0;
1272
1273
1274
1275
1276
1277
1278
1279 data = page_address(page) + (i * blocksize);
1280 bitmap = bh[group - first_group]->b_data;
1281
1282
1283
1284
1285
1286 if ((first_block + i) & 1) {
1287
1288 BUG_ON(incore == NULL);
1289 mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
1290 group, page->index, i * blocksize);
1291 trace_ext4_mb_buddy_bitmap_load(sb, group);
1292 grinfo = ext4_get_group_info(sb, group);
1293 grinfo->bb_fragments = 0;
1294 memset(grinfo->bb_counters, 0,
1295 sizeof(*grinfo->bb_counters) *
1296 (MB_NUM_ORDERS(sb)));
1297
1298
1299
1300 ext4_lock_group(sb, group);
1301
1302 memset(data, 0xff, blocksize);
1303 ext4_mb_generate_buddy(sb, data, incore, group);
1304 ext4_unlock_group(sb, group);
1305 incore = NULL;
1306 } else {
1307
1308 BUG_ON(incore != NULL);
1309 mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
1310 group, page->index, i * blocksize);
1311 trace_ext4_mb_bitmap_load(sb, group);
1312
1313
1314 ext4_lock_group(sb, group);
1315 memcpy(data, bitmap, blocksize);
1316
1317
1318 ext4_mb_generate_from_pa(sb, data, group);
1319 ext4_mb_generate_from_freelist(sb, data, group);
1320 ext4_unlock_group(sb, group);
1321
1322
1323
1324
1325 incore = data;
1326 }
1327 }
1328 SetPageUptodate(page);
1329
1330out:
1331 if (bh) {
1332 for (i = 0; i < groups_per_page; i++)
1333 brelse(bh[i]);
1334 if (bh != &bhs)
1335 kfree(bh);
1336 }
1337 return err;
1338}
1339
1340
1341
1342
1343
1344
1345
1346static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
1347 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
1348{
1349 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
1350 int block, pnum, poff;
1351 int blocks_per_page;
1352 struct page *page;
1353
1354 e4b->bd_buddy_page = NULL;
1355 e4b->bd_bitmap_page = NULL;
1356
1357 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1358
1359
1360
1361
1362
1363 block = group * 2;
1364 pnum = block / blocks_per_page;
1365 poff = block % blocks_per_page;
1366 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1367 if (!page)
1368 return -ENOMEM;
1369 BUG_ON(page->mapping != inode->i_mapping);
1370 e4b->bd_bitmap_page = page;
1371 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1372
1373 if (blocks_per_page >= 2) {
1374
1375 return 0;
1376 }
1377
1378 block++;
1379 pnum = block / blocks_per_page;
1380 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1381 if (!page)
1382 return -ENOMEM;
1383 BUG_ON(page->mapping != inode->i_mapping);
1384 e4b->bd_buddy_page = page;
1385 return 0;
1386}
1387
1388static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1389{
1390 if (e4b->bd_bitmap_page) {
1391 unlock_page(e4b->bd_bitmap_page);
1392 put_page(e4b->bd_bitmap_page);
1393 }
1394 if (e4b->bd_buddy_page) {
1395 unlock_page(e4b->bd_buddy_page);
1396 put_page(e4b->bd_buddy_page);
1397 }
1398}
1399
1400
1401
1402
1403
1404
1405static noinline_for_stack
1406int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1407{
1408
1409 struct ext4_group_info *this_grp;
1410 struct ext4_buddy e4b;
1411 struct page *page;
1412 int ret = 0;
1413
1414 might_sleep();
1415 mb_debug(sb, "init group %u\n", group);
1416 this_grp = ext4_get_group_info(sb, group);
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1427 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1428
1429
1430
1431
1432 goto err;
1433 }
1434
1435 page = e4b.bd_bitmap_page;
1436 ret = ext4_mb_init_cache(page, NULL, gfp);
1437 if (ret)
1438 goto err;
1439 if (!PageUptodate(page)) {
1440 ret = -EIO;
1441 goto err;
1442 }
1443
1444 if (e4b.bd_buddy_page == NULL) {
1445
1446
1447
1448
1449
1450 ret = 0;
1451 goto err;
1452 }
1453
1454 page = e4b.bd_buddy_page;
1455 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1456 if (ret)
1457 goto err;
1458 if (!PageUptodate(page)) {
1459 ret = -EIO;
1460 goto err;
1461 }
1462err:
1463 ext4_mb_put_buddy_page_lock(&e4b);
1464 return ret;
1465}
1466
1467
1468
1469
1470
1471
1472static noinline_for_stack int
1473ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1474 struct ext4_buddy *e4b, gfp_t gfp)
1475{
1476 int blocks_per_page;
1477 int block;
1478 int pnum;
1479 int poff;
1480 struct page *page;
1481 int ret;
1482 struct ext4_group_info *grp;
1483 struct ext4_sb_info *sbi = EXT4_SB(sb);
1484 struct inode *inode = sbi->s_buddy_cache;
1485
1486 might_sleep();
1487 mb_debug(sb, "load group %u\n", group);
1488
1489 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1490 grp = ext4_get_group_info(sb, group);
1491
1492 e4b->bd_blkbits = sb->s_blocksize_bits;
1493 e4b->bd_info = grp;
1494 e4b->bd_sb = sb;
1495 e4b->bd_group = group;
1496 e4b->bd_buddy_page = NULL;
1497 e4b->bd_bitmap_page = NULL;
1498
1499 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1500
1501
1502
1503
1504 ret = ext4_mb_init_group(sb, group, gfp);
1505 if (ret)
1506 return ret;
1507 }
1508
1509
1510
1511
1512
1513
1514 block = group * 2;
1515 pnum = block / blocks_per_page;
1516 poff = block % blocks_per_page;
1517
1518
1519
1520 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1521 if (page == NULL || !PageUptodate(page)) {
1522 if (page)
1523
1524
1525
1526
1527
1528
1529
1530
1531 put_page(page);
1532 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1533 if (page) {
1534 BUG_ON(page->mapping != inode->i_mapping);
1535 if (!PageUptodate(page)) {
1536 ret = ext4_mb_init_cache(page, NULL, gfp);
1537 if (ret) {
1538 unlock_page(page);
1539 goto err;
1540 }
1541 mb_cmp_bitmaps(e4b, page_address(page) +
1542 (poff * sb->s_blocksize));
1543 }
1544 unlock_page(page);
1545 }
1546 }
1547 if (page == NULL) {
1548 ret = -ENOMEM;
1549 goto err;
1550 }
1551 if (!PageUptodate(page)) {
1552 ret = -EIO;
1553 goto err;
1554 }
1555
1556
1557 e4b->bd_bitmap_page = page;
1558 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1559
1560 block++;
1561 pnum = block / blocks_per_page;
1562 poff = block % blocks_per_page;
1563
1564 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1565 if (page == NULL || !PageUptodate(page)) {
1566 if (page)
1567 put_page(page);
1568 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1569 if (page) {
1570 BUG_ON(page->mapping != inode->i_mapping);
1571 if (!PageUptodate(page)) {
1572 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1573 gfp);
1574 if (ret) {
1575 unlock_page(page);
1576 goto err;
1577 }
1578 }
1579 unlock_page(page);
1580 }
1581 }
1582 if (page == NULL) {
1583 ret = -ENOMEM;
1584 goto err;
1585 }
1586 if (!PageUptodate(page)) {
1587 ret = -EIO;
1588 goto err;
1589 }
1590
1591
1592 e4b->bd_buddy_page = page;
1593 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1594
1595 return 0;
1596
1597err:
1598 if (page)
1599 put_page(page);
1600 if (e4b->bd_bitmap_page)
1601 put_page(e4b->bd_bitmap_page);
1602 if (e4b->bd_buddy_page)
1603 put_page(e4b->bd_buddy_page);
1604 e4b->bd_buddy = NULL;
1605 e4b->bd_bitmap = NULL;
1606 return ret;
1607}
1608
1609static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1610 struct ext4_buddy *e4b)
1611{
1612 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1613}
1614
1615static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1616{
1617 if (e4b->bd_bitmap_page)
1618 put_page(e4b->bd_bitmap_page);
1619 if (e4b->bd_buddy_page)
1620 put_page(e4b->bd_buddy_page);
1621}
1622
1623
1624static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1625{
1626 int order = 1, max;
1627 void *bb;
1628
1629 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1630 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1631
1632 while (order <= e4b->bd_blkbits + 1) {
1633 bb = mb_find_buddy(e4b, order, &max);
1634 if (!mb_test_bit(block >> order, bb)) {
1635
1636 return order;
1637 }
1638 order++;
1639 }
1640 return 0;
1641}
1642
1643static void mb_clear_bits(void *bm, int cur, int len)
1644{
1645 __u32 *addr;
1646
1647 len = cur + len;
1648 while (cur < len) {
1649 if ((cur & 31) == 0 && (len - cur) >= 32) {
1650
1651 addr = bm + (cur >> 3);
1652 *addr = 0;
1653 cur += 32;
1654 continue;
1655 }
1656 mb_clear_bit(cur, bm);
1657 cur++;
1658 }
1659}
1660
1661
1662
1663
1664static int mb_test_and_clear_bits(void *bm, int cur, int len)
1665{
1666 __u32 *addr;
1667 int zero_bit = -1;
1668
1669 len = cur + len;
1670 while (cur < len) {
1671 if ((cur & 31) == 0 && (len - cur) >= 32) {
1672
1673 addr = bm + (cur >> 3);
1674 if (*addr != (__u32)(-1) && zero_bit == -1)
1675 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1676 *addr = 0;
1677 cur += 32;
1678 continue;
1679 }
1680 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1681 zero_bit = cur;
1682 cur++;
1683 }
1684
1685 return zero_bit;
1686}
1687
1688void ext4_set_bits(void *bm, int cur, int len)
1689{
1690 __u32 *addr;
1691
1692 len = cur + len;
1693 while (cur < len) {
1694 if ((cur & 31) == 0 && (len - cur) >= 32) {
1695
1696 addr = bm + (cur >> 3);
1697 *addr = 0xffffffff;
1698 cur += 32;
1699 continue;
1700 }
1701 mb_set_bit(cur, bm);
1702 cur++;
1703 }
1704}
1705
1706static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1707{
1708 if (mb_test_bit(*bit + side, bitmap)) {
1709 mb_clear_bit(*bit, bitmap);
1710 (*bit) -= side;
1711 return 1;
1712 }
1713 else {
1714 (*bit) += side;
1715 mb_set_bit(*bit, bitmap);
1716 return -1;
1717 }
1718}
1719
1720static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1721{
1722 int max;
1723 int order = 1;
1724 void *buddy = mb_find_buddy(e4b, order, &max);
1725
1726 while (buddy) {
1727 void *buddy2;
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758 if (first & 1)
1759 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1760 if (!(last & 1))
1761 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1762 if (first > last)
1763 break;
1764 order++;
1765
1766 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1767 mb_clear_bits(buddy, first, last - first + 1);
1768 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1769 break;
1770 }
1771 first >>= 1;
1772 last >>= 1;
1773 buddy = buddy2;
1774 }
1775}
1776
1777static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1778 int first, int count)
1779{
1780 int left_is_free = 0;
1781 int right_is_free = 0;
1782 int block;
1783 int last = first + count - 1;
1784 struct super_block *sb = e4b->bd_sb;
1785
1786 if (WARN_ON(count == 0))
1787 return;
1788 BUG_ON(last >= (sb->s_blocksize << 3));
1789 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1790
1791 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1792 return;
1793
1794 mb_check_buddy(e4b);
1795 mb_free_blocks_double(inode, e4b, first, count);
1796
1797 this_cpu_inc(discard_pa_seq);
1798 e4b->bd_info->bb_free += count;
1799 if (first < e4b->bd_info->bb_first_free)
1800 e4b->bd_info->bb_first_free = first;
1801
1802
1803
1804
1805 if (first != 0)
1806 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1807 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1808 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1809 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1810
1811 if (unlikely(block != -1)) {
1812 struct ext4_sb_info *sbi = EXT4_SB(sb);
1813 ext4_fsblk_t blocknr;
1814
1815 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1816 blocknr += EXT4_C2B(sbi, block);
1817 if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
1818 ext4_grp_locked_error(sb, e4b->bd_group,
1819 inode ? inode->i_ino : 0,
1820 blocknr,
1821 "freeing already freed block (bit %u); block bitmap corrupt.",
1822 block);
1823 ext4_mark_group_bitmap_corrupted(
1824 sb, e4b->bd_group,
1825 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1826 }
1827 goto done;
1828 }
1829
1830
1831 if (left_is_free && right_is_free)
1832 e4b->bd_info->bb_fragments--;
1833 else if (!left_is_free && !right_is_free)
1834 e4b->bd_info->bb_fragments++;
1835
1836
1837
1838
1839
1840
1841
1842 if (first & 1) {
1843 first += !left_is_free;
1844 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1845 }
1846 if (!(last & 1)) {
1847 last -= !right_is_free;
1848 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1849 }
1850
1851 if (first <= last)
1852 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1853
1854done:
1855 mb_set_largest_free_order(sb, e4b->bd_info);
1856 mb_update_avg_fragment_size(sb, e4b->bd_info);
1857 mb_check_buddy(e4b);
1858}
1859
1860static int mb_find_extent(struct ext4_buddy *e4b, int block,
1861 int needed, struct ext4_free_extent *ex)
1862{
1863 int next = block;
1864 int max, order;
1865 void *buddy;
1866
1867 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1868 BUG_ON(ex == NULL);
1869
1870 buddy = mb_find_buddy(e4b, 0, &max);
1871 BUG_ON(buddy == NULL);
1872 BUG_ON(block >= max);
1873 if (mb_test_bit(block, buddy)) {
1874 ex->fe_len = 0;
1875 ex->fe_start = 0;
1876 ex->fe_group = 0;
1877 return 0;
1878 }
1879
1880
1881 order = mb_find_order_for_block(e4b, block);
1882 block = block >> order;
1883
1884 ex->fe_len = 1 << order;
1885 ex->fe_start = block << order;
1886 ex->fe_group = e4b->bd_group;
1887
1888
1889 next = next - ex->fe_start;
1890 ex->fe_len -= next;
1891 ex->fe_start += next;
1892
1893 while (needed > ex->fe_len &&
1894 mb_find_buddy(e4b, order, &max)) {
1895
1896 if (block + 1 >= max)
1897 break;
1898
1899 next = (block + 1) * (1 << order);
1900 if (mb_test_bit(next, e4b->bd_bitmap))
1901 break;
1902
1903 order = mb_find_order_for_block(e4b, next);
1904
1905 block = next >> order;
1906 ex->fe_len += 1 << order;
1907 }
1908
1909 if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
1910
1911 WARN_ON(1);
1912 ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
1913 "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
1914 block, order, needed, ex->fe_group, ex->fe_start,
1915 ex->fe_len, ex->fe_logical);
1916 ex->fe_len = 0;
1917 ex->fe_start = 0;
1918 ex->fe_group = 0;
1919 }
1920 return ex->fe_len;
1921}
1922
1923static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1924{
1925 int ord;
1926 int mlen = 0;
1927 int max = 0;
1928 int cur;
1929 int start = ex->fe_start;
1930 int len = ex->fe_len;
1931 unsigned ret = 0;
1932 int len0 = len;
1933 void *buddy;
1934
1935 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1936 BUG_ON(e4b->bd_group != ex->fe_group);
1937 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1938 mb_check_buddy(e4b);
1939 mb_mark_used_double(e4b, start, len);
1940
1941 this_cpu_inc(discard_pa_seq);
1942 e4b->bd_info->bb_free -= len;
1943 if (e4b->bd_info->bb_first_free == start)
1944 e4b->bd_info->bb_first_free += len;
1945
1946
1947 if (start != 0)
1948 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1949 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1950 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1951 if (mlen && max)
1952 e4b->bd_info->bb_fragments++;
1953 else if (!mlen && !max)
1954 e4b->bd_info->bb_fragments--;
1955
1956
1957 while (len) {
1958 ord = mb_find_order_for_block(e4b, start);
1959
1960 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1961
1962 mlen = 1 << ord;
1963 buddy = mb_find_buddy(e4b, ord, &max);
1964 BUG_ON((start >> ord) >= max);
1965 mb_set_bit(start >> ord, buddy);
1966 e4b->bd_info->bb_counters[ord]--;
1967 start += mlen;
1968 len -= mlen;
1969 BUG_ON(len < 0);
1970 continue;
1971 }
1972
1973
1974 if (ret == 0)
1975 ret = len | (ord << 16);
1976
1977
1978 BUG_ON(ord <= 0);
1979 buddy = mb_find_buddy(e4b, ord, &max);
1980 mb_set_bit(start >> ord, buddy);
1981 e4b->bd_info->bb_counters[ord]--;
1982
1983 ord--;
1984 cur = (start >> ord) & ~1U;
1985 buddy = mb_find_buddy(e4b, ord, &max);
1986 mb_clear_bit(cur, buddy);
1987 mb_clear_bit(cur + 1, buddy);
1988 e4b->bd_info->bb_counters[ord]++;
1989 e4b->bd_info->bb_counters[ord]++;
1990 }
1991 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1992
1993 mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info);
1994 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1995 mb_check_buddy(e4b);
1996
1997 return ret;
1998}
1999
2000
2001
2002
2003static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
2004 struct ext4_buddy *e4b)
2005{
2006 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2007 int ret;
2008
2009 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
2010 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2011
2012 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
2013 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
2014 ret = mb_mark_used(e4b, &ac->ac_b_ex);
2015
2016
2017
2018 ac->ac_f_ex = ac->ac_b_ex;
2019
2020 ac->ac_status = AC_STATUS_FOUND;
2021 ac->ac_tail = ret & 0xffff;
2022 ac->ac_buddy = ret >> 16;
2023
2024
2025
2026
2027
2028
2029
2030
2031 ac->ac_bitmap_page = e4b->bd_bitmap_page;
2032 get_page(ac->ac_bitmap_page);
2033 ac->ac_buddy_page = e4b->bd_buddy_page;
2034 get_page(ac->ac_buddy_page);
2035
2036 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2037 spin_lock(&sbi->s_md_lock);
2038 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
2039 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
2040 spin_unlock(&sbi->s_md_lock);
2041 }
2042
2043
2044
2045
2046
2047 if (ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
2048 ext4_mb_new_preallocation(ac);
2049
2050}
2051
2052static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
2053 struct ext4_buddy *e4b,
2054 int finish_group)
2055{
2056 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2057 struct ext4_free_extent *bex = &ac->ac_b_ex;
2058 struct ext4_free_extent *gex = &ac->ac_g_ex;
2059 struct ext4_free_extent ex;
2060 int max;
2061
2062 if (ac->ac_status == AC_STATUS_FOUND)
2063 return;
2064
2065
2066
2067 if (ac->ac_found > sbi->s_mb_max_to_scan &&
2068 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2069 ac->ac_status = AC_STATUS_BREAK;
2070 return;
2071 }
2072
2073
2074
2075
2076 if (bex->fe_len < gex->fe_len)
2077 return;
2078
2079 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
2080 && bex->fe_group == e4b->bd_group) {
2081
2082
2083
2084 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
2085 if (max >= gex->fe_len) {
2086 ext4_mb_use_best_found(ac, e4b);
2087 return;
2088 }
2089 }
2090}
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
2103 struct ext4_free_extent *ex,
2104 struct ext4_buddy *e4b)
2105{
2106 struct ext4_free_extent *bex = &ac->ac_b_ex;
2107 struct ext4_free_extent *gex = &ac->ac_g_ex;
2108
2109 BUG_ON(ex->fe_len <= 0);
2110 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
2111 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
2112 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
2113
2114 ac->ac_found++;
2115
2116
2117
2118
2119 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2120 *bex = *ex;
2121 ext4_mb_use_best_found(ac, e4b);
2122 return;
2123 }
2124
2125
2126
2127
2128 if (ex->fe_len == gex->fe_len) {
2129 *bex = *ex;
2130 ext4_mb_use_best_found(ac, e4b);
2131 return;
2132 }
2133
2134
2135
2136
2137 if (bex->fe_len == 0) {
2138 *bex = *ex;
2139 return;
2140 }
2141
2142
2143
2144
2145 if (bex->fe_len < gex->fe_len) {
2146
2147
2148 if (ex->fe_len > bex->fe_len)
2149 *bex = *ex;
2150 } else if (ex->fe_len > gex->fe_len) {
2151
2152
2153
2154 if (ex->fe_len < bex->fe_len)
2155 *bex = *ex;
2156 }
2157
2158 ext4_mb_check_limits(ac, e4b, 0);
2159}
2160
2161static noinline_for_stack
2162int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
2163 struct ext4_buddy *e4b)
2164{
2165 struct ext4_free_extent ex = ac->ac_b_ex;
2166 ext4_group_t group = ex.fe_group;
2167 int max;
2168 int err;
2169
2170 BUG_ON(ex.fe_len <= 0);
2171 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
2172 if (err)
2173 return err;
2174
2175 ext4_lock_group(ac->ac_sb, group);
2176 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
2177
2178 if (max > 0) {
2179 ac->ac_b_ex = ex;
2180 ext4_mb_use_best_found(ac, e4b);
2181 }
2182
2183 ext4_unlock_group(ac->ac_sb, group);
2184 ext4_mb_unload_buddy(e4b);
2185
2186 return 0;
2187}
2188
2189static noinline_for_stack
2190int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
2191 struct ext4_buddy *e4b)
2192{
2193 ext4_group_t group = ac->ac_g_ex.fe_group;
2194 int max;
2195 int err;
2196 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2197 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2198 struct ext4_free_extent ex;
2199
2200 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
2201 return 0;
2202 if (grp->bb_free == 0)
2203 return 0;
2204
2205 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
2206 if (err)
2207 return err;
2208
2209 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
2210 ext4_mb_unload_buddy(e4b);
2211 return 0;
2212 }
2213
2214 ext4_lock_group(ac->ac_sb, group);
2215 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
2216 ac->ac_g_ex.fe_len, &ex);
2217 ex.fe_logical = 0xDEADFA11;
2218
2219 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
2220 ext4_fsblk_t start;
2221
2222 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
2223 ex.fe_start;
2224
2225 if (do_div(start, sbi->s_stripe) == 0) {
2226 ac->ac_found++;
2227 ac->ac_b_ex = ex;
2228 ext4_mb_use_best_found(ac, e4b);
2229 }
2230 } else if (max >= ac->ac_g_ex.fe_len) {
2231 BUG_ON(ex.fe_len <= 0);
2232 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
2233 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
2234 ac->ac_found++;
2235 ac->ac_b_ex = ex;
2236 ext4_mb_use_best_found(ac, e4b);
2237 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
2238
2239
2240 BUG_ON(ex.fe_len <= 0);
2241 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
2242 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
2243 ac->ac_found++;
2244 ac->ac_b_ex = ex;
2245 ext4_mb_use_best_found(ac, e4b);
2246 }
2247 ext4_unlock_group(ac->ac_sb, group);
2248 ext4_mb_unload_buddy(e4b);
2249
2250 return 0;
2251}
2252
2253
2254
2255
2256
2257static noinline_for_stack
2258void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
2259 struct ext4_buddy *e4b)
2260{
2261 struct super_block *sb = ac->ac_sb;
2262 struct ext4_group_info *grp = e4b->bd_info;
2263 void *buddy;
2264 int i;
2265 int k;
2266 int max;
2267
2268 BUG_ON(ac->ac_2order <= 0);
2269 for (i = ac->ac_2order; i < MB_NUM_ORDERS(sb); i++) {
2270 if (grp->bb_counters[i] == 0)
2271 continue;
2272
2273 buddy = mb_find_buddy(e4b, i, &max);
2274 BUG_ON(buddy == NULL);
2275
2276 k = mb_find_next_zero_bit(buddy, max, 0);
2277 if (k >= max) {
2278 ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
2279 "%d free clusters of order %d. But found 0",
2280 grp->bb_counters[i], i);
2281 ext4_mark_group_bitmap_corrupted(ac->ac_sb,
2282 e4b->bd_group,
2283 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2284 break;
2285 }
2286 ac->ac_found++;
2287
2288 ac->ac_b_ex.fe_len = 1 << i;
2289 ac->ac_b_ex.fe_start = k << i;
2290 ac->ac_b_ex.fe_group = e4b->bd_group;
2291
2292 ext4_mb_use_best_found(ac, e4b);
2293
2294 BUG_ON(ac->ac_f_ex.fe_len != ac->ac_g_ex.fe_len);
2295
2296 if (EXT4_SB(sb)->s_mb_stats)
2297 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
2298
2299 break;
2300 }
2301}
2302
2303
2304
2305
2306
2307
2308static noinline_for_stack
2309void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
2310 struct ext4_buddy *e4b)
2311{
2312 struct super_block *sb = ac->ac_sb;
2313 void *bitmap = e4b->bd_bitmap;
2314 struct ext4_free_extent ex;
2315 int i;
2316 int free;
2317
2318 free = e4b->bd_info->bb_free;
2319 if (WARN_ON(free <= 0))
2320 return;
2321
2322 i = e4b->bd_info->bb_first_free;
2323
2324 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
2325 i = mb_find_next_zero_bit(bitmap,
2326 EXT4_CLUSTERS_PER_GROUP(sb), i);
2327 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
2328
2329
2330
2331
2332
2333 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
2334 "%d free clusters as per "
2335 "group info. But bitmap says 0",
2336 free);
2337 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
2338 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2339 break;
2340 }
2341
2342 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
2343 if (WARN_ON(ex.fe_len <= 0))
2344 break;
2345 if (free < ex.fe_len) {
2346 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
2347 "%d free clusters as per "
2348 "group info. But got %d blocks",
2349 free, ex.fe_len);
2350 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
2351 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2352
2353
2354
2355
2356
2357 break;
2358 }
2359 ex.fe_logical = 0xDEADC0DE;
2360 ext4_mb_measure_extent(ac, &ex, e4b);
2361
2362 i += ex.fe_len;
2363 free -= ex.fe_len;
2364 }
2365
2366 ext4_mb_check_limits(ac, e4b, 1);
2367}
2368
2369
2370
2371
2372
2373static noinline_for_stack
2374void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
2375 struct ext4_buddy *e4b)
2376{
2377 struct super_block *sb = ac->ac_sb;
2378 struct ext4_sb_info *sbi = EXT4_SB(sb);
2379 void *bitmap = e4b->bd_bitmap;
2380 struct ext4_free_extent ex;
2381 ext4_fsblk_t first_group_block;
2382 ext4_fsblk_t a;
2383 ext4_grpblk_t i;
2384 int max;
2385
2386 BUG_ON(sbi->s_stripe == 0);
2387
2388
2389 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2390
2391 a = first_group_block + sbi->s_stripe - 1;
2392 do_div(a, sbi->s_stripe);
2393 i = (a * sbi->s_stripe) - first_group_block;
2394
2395 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2396 if (!mb_test_bit(i, bitmap)) {
2397 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2398 if (max >= sbi->s_stripe) {
2399 ac->ac_found++;
2400 ex.fe_logical = 0xDEADF00D;
2401 ac->ac_b_ex = ex;
2402 ext4_mb_use_best_found(ac, e4b);
2403 break;
2404 }
2405 }
2406 i += sbi->s_stripe;
2407 }
2408}
2409
2410
2411
2412
2413
2414
2415static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
2416 ext4_group_t group, int cr)
2417{
2418 ext4_grpblk_t free, fragments;
2419 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2420 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2421
2422 BUG_ON(cr < 0 || cr >= 4);
2423
2424 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2425 return false;
2426
2427 free = grp->bb_free;
2428 if (free == 0)
2429 return false;
2430
2431 fragments = grp->bb_fragments;
2432 if (fragments == 0)
2433 return false;
2434
2435 switch (cr) {
2436 case 0:
2437 BUG_ON(ac->ac_2order == 0);
2438
2439
2440 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2441 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2442 ((group % flex_size) == 0))
2443 return false;
2444
2445 if (free < ac->ac_g_ex.fe_len)
2446 return false;
2447
2448 if (ac->ac_2order >= MB_NUM_ORDERS(ac->ac_sb))
2449 return true;
2450
2451 if (grp->bb_largest_free_order < ac->ac_2order)
2452 return false;
2453
2454 return true;
2455 case 1:
2456 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2457 return true;
2458 break;
2459 case 2:
2460 if (free >= ac->ac_g_ex.fe_len)
2461 return true;
2462 break;
2463 case 3:
2464 return true;
2465 default:
2466 BUG();
2467 }
2468
2469 return false;
2470}
2471
2472
2473
2474
2475
2476
2477static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
2478 ext4_group_t group, int cr)
2479{
2480 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2481 struct super_block *sb = ac->ac_sb;
2482 struct ext4_sb_info *sbi = EXT4_SB(sb);
2483 bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK;
2484 ext4_grpblk_t free;
2485 int ret = 0;
2486
2487 if (sbi->s_mb_stats)
2488 atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
2489 if (should_lock)
2490 ext4_lock_group(sb, group);
2491 free = grp->bb_free;
2492 if (free == 0)
2493 goto out;
2494 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2495 goto out;
2496 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2497 goto out;
2498 if (should_lock)
2499 ext4_unlock_group(sb, group);
2500
2501
2502 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2503 struct ext4_group_desc *gdp =
2504 ext4_get_group_desc(sb, group, NULL);
2505 int ret;
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515 if (cr < 2 &&
2516 (!sbi->s_log_groups_per_flex ||
2517 ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
2518 !(ext4_has_group_desc_csum(sb) &&
2519 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))))
2520 return 0;
2521 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
2522 if (ret)
2523 return ret;
2524 }
2525
2526 if (should_lock)
2527 ext4_lock_group(sb, group);
2528 ret = ext4_mb_good_group(ac, group, cr);
2529out:
2530 if (should_lock)
2531 ext4_unlock_group(sb, group);
2532 return ret;
2533}
2534
2535
2536
2537
2538
2539ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
2540 unsigned int nr, int *cnt)
2541{
2542 ext4_group_t ngroups = ext4_get_groups_count(sb);
2543 struct buffer_head *bh;
2544 struct blk_plug plug;
2545
2546 blk_start_plug(&plug);
2547 while (nr-- > 0) {
2548 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
2549 NULL);
2550 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
2551
2552
2553
2554
2555
2556
2557
2558
2559 if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
2560 EXT4_MB_GRP_NEED_INIT(grp) &&
2561 ext4_free_group_clusters(sb, gdp) > 0 &&
2562 !(ext4_has_group_desc_csum(sb) &&
2563 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
2564 bh = ext4_read_block_bitmap_nowait(sb, group, true);
2565 if (bh && !IS_ERR(bh)) {
2566 if (!buffer_uptodate(bh) && cnt)
2567 (*cnt)++;
2568 brelse(bh);
2569 }
2570 }
2571 if (++group >= ngroups)
2572 group = 0;
2573 }
2574 blk_finish_plug(&plug);
2575 return group;
2576}
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
2591 unsigned int nr)
2592{
2593 while (nr-- > 0) {
2594 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
2595 NULL);
2596 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
2597
2598 if (!group)
2599 group = ext4_get_groups_count(sb);
2600 group--;
2601 grp = ext4_get_group_info(sb, group);
2602
2603 if (EXT4_MB_GRP_NEED_INIT(grp) &&
2604 ext4_free_group_clusters(sb, gdp) > 0 &&
2605 !(ext4_has_group_desc_csum(sb) &&
2606 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
2607 if (ext4_mb_init_group(sb, group, GFP_NOFS))
2608 break;
2609 }
2610 }
2611}
2612
2613static noinline_for_stack int
2614ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2615{
2616 ext4_group_t prefetch_grp = 0, ngroups, group, i;
2617 int cr = -1;
2618 int err = 0, first_err = 0;
2619 unsigned int nr = 0, prefetch_ios = 0;
2620 struct ext4_sb_info *sbi;
2621 struct super_block *sb;
2622 struct ext4_buddy e4b;
2623 int lost;
2624
2625 sb = ac->ac_sb;
2626 sbi = EXT4_SB(sb);
2627 ngroups = ext4_get_groups_count(sb);
2628
2629 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2630 ngroups = sbi->s_blockfile_groups;
2631
2632 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2633
2634
2635 err = ext4_mb_find_by_goal(ac, &e4b);
2636 if (err || ac->ac_status == AC_STATUS_FOUND)
2637 goto out;
2638
2639 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2640 goto out;
2641
2642
2643
2644
2645
2646
2647 i = fls(ac->ac_g_ex.fe_len);
2648 ac->ac_2order = 0;
2649
2650
2651
2652
2653
2654
2655
2656 if (i >= sbi->s_mb_order2_reqs && i <= MB_NUM_ORDERS(sb)) {
2657
2658
2659
2660 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2661 ac->ac_2order = array_index_nospec(i - 1,
2662 MB_NUM_ORDERS(sb));
2663 }
2664
2665
2666 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2667
2668 spin_lock(&sbi->s_md_lock);
2669 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2670 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2671 spin_unlock(&sbi->s_md_lock);
2672 }
2673
2674
2675 cr = ac->ac_2order ? 0 : 1;
2676
2677
2678
2679
2680repeat:
2681 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2682 ac->ac_criteria = cr;
2683
2684
2685
2686
2687 group = ac->ac_g_ex.fe_group;
2688 ac->ac_last_optimal_group = group;
2689 ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
2690 prefetch_grp = group;
2691
2692 for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
2693 i++) {
2694 int ret = 0, new_cr;
2695
2696 cond_resched();
2697
2698 ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
2699 if (new_cr != cr) {
2700 cr = new_cr;
2701 goto repeat;
2702 }
2703
2704
2705
2706
2707
2708
2709
2710 if ((prefetch_grp == group) &&
2711 (cr > 1 ||
2712 prefetch_ios < sbi->s_mb_prefetch_limit)) {
2713 unsigned int curr_ios = prefetch_ios;
2714
2715 nr = sbi->s_mb_prefetch;
2716 if (ext4_has_feature_flex_bg(sb)) {
2717 nr = 1 << sbi->s_log_groups_per_flex;
2718 nr -= group & (nr - 1);
2719 nr = min(nr, sbi->s_mb_prefetch);
2720 }
2721 prefetch_grp = ext4_mb_prefetch(sb, group,
2722 nr, &prefetch_ios);
2723 if (prefetch_ios == curr_ios)
2724 nr = 0;
2725 }
2726
2727
2728 ret = ext4_mb_good_group_nolock(ac, group, cr);
2729 if (ret <= 0) {
2730 if (!first_err)
2731 first_err = ret;
2732 continue;
2733 }
2734
2735 err = ext4_mb_load_buddy(sb, group, &e4b);
2736 if (err)
2737 goto out;
2738
2739 ext4_lock_group(sb, group);
2740
2741
2742
2743
2744
2745 ret = ext4_mb_good_group(ac, group, cr);
2746 if (ret == 0) {
2747 ext4_unlock_group(sb, group);
2748 ext4_mb_unload_buddy(&e4b);
2749 continue;
2750 }
2751
2752 ac->ac_groups_scanned++;
2753 if (cr == 0)
2754 ext4_mb_simple_scan_group(ac, &e4b);
2755 else if (cr == 1 && sbi->s_stripe &&
2756 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2757 ext4_mb_scan_aligned(ac, &e4b);
2758 else
2759 ext4_mb_complex_scan_group(ac, &e4b);
2760
2761 ext4_unlock_group(sb, group);
2762 ext4_mb_unload_buddy(&e4b);
2763
2764 if (ac->ac_status != AC_STATUS_CONTINUE)
2765 break;
2766 }
2767
2768 if (sbi->s_mb_stats && i == ngroups)
2769 atomic64_inc(&sbi->s_bal_cX_failed[cr]);
2770 }
2771
2772 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2773 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2774
2775
2776
2777
2778 ext4_mb_try_best_found(ac, &e4b);
2779 if (ac->ac_status != AC_STATUS_FOUND) {
2780
2781
2782
2783
2784
2785 lost = atomic_inc_return(&sbi->s_mb_lost_chunks);
2786 mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n",
2787 ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start,
2788 ac->ac_b_ex.fe_len, lost);
2789
2790 ac->ac_b_ex.fe_group = 0;
2791 ac->ac_b_ex.fe_start = 0;
2792 ac->ac_b_ex.fe_len = 0;
2793 ac->ac_status = AC_STATUS_CONTINUE;
2794 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2795 cr = 3;
2796 goto repeat;
2797 }
2798 }
2799
2800 if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND)
2801 atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]);
2802out:
2803 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2804 err = first_err;
2805
2806 mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n",
2807 ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
2808 ac->ac_flags, cr, err);
2809
2810 if (nr)
2811 ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
2812
2813 return err;
2814}
2815
2816static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2817{
2818 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2819 ext4_group_t group;
2820
2821 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2822 return NULL;
2823 group = *pos + 1;
2824 return (void *) ((unsigned long) group);
2825}
2826
2827static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2828{
2829 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2830 ext4_group_t group;
2831
2832 ++*pos;
2833 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2834 return NULL;
2835 group = *pos + 1;
2836 return (void *) ((unsigned long) group);
2837}
2838
2839static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2840{
2841 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2842 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2843 int i;
2844 int err, buddy_loaded = 0;
2845 struct ext4_buddy e4b;
2846 struct ext4_group_info *grinfo;
2847 unsigned char blocksize_bits = min_t(unsigned char,
2848 sb->s_blocksize_bits,
2849 EXT4_MAX_BLOCK_LOG_SIZE);
2850 struct sg {
2851 struct ext4_group_info info;
2852 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2853 } sg;
2854
2855 group--;
2856 if (group == 0)
2857 seq_puts(seq, "#group: free frags first ["
2858 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2859 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2860
2861 i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2862 sizeof(struct ext4_group_info);
2863
2864 grinfo = ext4_get_group_info(sb, group);
2865
2866 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2867 err = ext4_mb_load_buddy(sb, group, &e4b);
2868 if (err) {
2869 seq_printf(seq, "#%-5u: I/O error\n", group);
2870 return 0;
2871 }
2872 buddy_loaded = 1;
2873 }
2874
2875 memcpy(&sg, ext4_get_group_info(sb, group), i);
2876
2877 if (buddy_loaded)
2878 ext4_mb_unload_buddy(&e4b);
2879
2880 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2881 sg.info.bb_fragments, sg.info.bb_first_free);
2882 for (i = 0; i <= 13; i++)
2883 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
2884 sg.info.bb_counters[i] : 0);
2885 seq_puts(seq, " ]\n");
2886
2887 return 0;
2888}
2889
2890static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2891{
2892}
2893
2894const struct seq_operations ext4_mb_seq_groups_ops = {
2895 .start = ext4_mb_seq_groups_start,
2896 .next = ext4_mb_seq_groups_next,
2897 .stop = ext4_mb_seq_groups_stop,
2898 .show = ext4_mb_seq_groups_show,
2899};
2900
2901int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
2902{
2903 struct super_block *sb = (struct super_block *)seq->private;
2904 struct ext4_sb_info *sbi = EXT4_SB(sb);
2905
2906 seq_puts(seq, "mballoc:\n");
2907 if (!sbi->s_mb_stats) {
2908 seq_puts(seq, "\tmb stats collection turned off.\n");
2909 seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n");
2910 return 0;
2911 }
2912 seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
2913 seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
2914
2915 seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned));
2916
2917 seq_puts(seq, "\tcr0_stats:\n");
2918 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0]));
2919 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2920 atomic64_read(&sbi->s_bal_cX_groups_considered[0]));
2921 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2922 atomic64_read(&sbi->s_bal_cX_failed[0]));
2923 seq_printf(seq, "\t\tbad_suggestions: %u\n",
2924 atomic_read(&sbi->s_bal_cr0_bad_suggestions));
2925
2926 seq_puts(seq, "\tcr1_stats:\n");
2927 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1]));
2928 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2929 atomic64_read(&sbi->s_bal_cX_groups_considered[1]));
2930 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2931 atomic64_read(&sbi->s_bal_cX_failed[1]));
2932 seq_printf(seq, "\t\tbad_suggestions: %u\n",
2933 atomic_read(&sbi->s_bal_cr1_bad_suggestions));
2934
2935 seq_puts(seq, "\tcr2_stats:\n");
2936 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2]));
2937 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2938 atomic64_read(&sbi->s_bal_cX_groups_considered[2]));
2939 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2940 atomic64_read(&sbi->s_bal_cX_failed[2]));
2941
2942 seq_puts(seq, "\tcr3_stats:\n");
2943 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3]));
2944 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2945 atomic64_read(&sbi->s_bal_cX_groups_considered[3]));
2946 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2947 atomic64_read(&sbi->s_bal_cX_failed[3]));
2948 seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned));
2949 seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
2950 seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
2951 seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
2952 seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
2953
2954 seq_printf(seq, "\tbuddies_generated: %u/%u\n",
2955 atomic_read(&sbi->s_mb_buddies_generated),
2956 ext4_get_groups_count(sb));
2957 seq_printf(seq, "\tbuddies_time_used: %llu\n",
2958 atomic64_read(&sbi->s_mb_generation_time));
2959 seq_printf(seq, "\tpreallocated: %u\n",
2960 atomic_read(&sbi->s_mb_preallocated));
2961 seq_printf(seq, "\tdiscarded: %u\n",
2962 atomic_read(&sbi->s_mb_discarded));
2963 return 0;
2964}
2965
2966static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
2967{
2968 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2969 unsigned long position;
2970
2971 read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
2972
2973 if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
2974 return NULL;
2975 position = *pos + 1;
2976 return (void *) ((unsigned long) position);
2977}
2978
2979static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos)
2980{
2981 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2982 unsigned long position;
2983
2984 ++*pos;
2985 if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
2986 return NULL;
2987 position = *pos + 1;
2988 return (void *) ((unsigned long) position);
2989}
2990
2991static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
2992{
2993 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2994 struct ext4_sb_info *sbi = EXT4_SB(sb);
2995 unsigned long position = ((unsigned long) v);
2996 struct ext4_group_info *grp;
2997 struct rb_node *n;
2998 unsigned int count, min, max;
2999
3000 position--;
3001 if (position >= MB_NUM_ORDERS(sb)) {
3002 seq_puts(seq, "fragment_size_tree:\n");
3003 n = rb_first(&sbi->s_mb_avg_fragment_size_root);
3004 if (!n) {
3005 seq_puts(seq, "\ttree_min: 0\n\ttree_max: 0\n\ttree_nodes: 0\n");
3006 return 0;
3007 }
3008 grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
3009 min = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
3010 count = 1;
3011 while (rb_next(n)) {
3012 count++;
3013 n = rb_next(n);
3014 }
3015 grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
3016 max = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
3017
3018 seq_printf(seq, "\ttree_min: %u\n\ttree_max: %u\n\ttree_nodes: %u\n",
3019 min, max, count);
3020 return 0;
3021 }
3022
3023 if (position == 0) {
3024 seq_printf(seq, "optimize_scan: %d\n",
3025 test_opt2(sb, MB_OPTIMIZE_SCAN) ? 1 : 0);
3026 seq_puts(seq, "max_free_order_lists:\n");
3027 }
3028 count = 0;
3029 list_for_each_entry(grp, &sbi->s_mb_largest_free_orders[position],
3030 bb_largest_free_order_node)
3031 count++;
3032 seq_printf(seq, "\tlist_order_%u_groups: %u\n",
3033 (unsigned int)position, count);
3034
3035 return 0;
3036}
3037
3038static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
3039{
3040 struct super_block *sb = PDE_DATA(file_inode(seq->file));
3041
3042 read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
3043}
3044
3045const struct seq_operations ext4_mb_seq_structs_summary_ops = {
3046 .start = ext4_mb_seq_structs_summary_start,
3047 .next = ext4_mb_seq_structs_summary_next,
3048 .stop = ext4_mb_seq_structs_summary_stop,
3049 .show = ext4_mb_seq_structs_summary_show,
3050};
3051
3052static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
3053{
3054 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
3055 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
3056
3057 BUG_ON(!cachep);
3058 return cachep;
3059}
3060
3061
3062
3063
3064
3065int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
3066{
3067 struct ext4_sb_info *sbi = EXT4_SB(sb);
3068 unsigned size;
3069 struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
3070
3071 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
3072 EXT4_DESC_PER_BLOCK_BITS(sb);
3073 if (size <= sbi->s_group_info_size)
3074 return 0;
3075
3076 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
3077 new_groupinfo = kvzalloc(size, GFP_KERNEL);
3078 if (!new_groupinfo) {
3079 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
3080 return -ENOMEM;
3081 }
3082 rcu_read_lock();
3083 old_groupinfo = rcu_dereference(sbi->s_group_info);
3084 if (old_groupinfo)
3085 memcpy(new_groupinfo, old_groupinfo,
3086 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
3087 rcu_read_unlock();
3088 rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
3089 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
3090 if (old_groupinfo)
3091 ext4_kvfree_array_rcu(old_groupinfo);
3092 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
3093 sbi->s_group_info_size);
3094 return 0;
3095}
3096
3097
3098int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
3099 struct ext4_group_desc *desc)
3100{
3101 int i;
3102 int metalen = 0;
3103 int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
3104 struct ext4_sb_info *sbi = EXT4_SB(sb);
3105 struct ext4_group_info **meta_group_info;
3106 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3107
3108
3109
3110
3111
3112
3113 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
3114 metalen = sizeof(*meta_group_info) <<
3115 EXT4_DESC_PER_BLOCK_BITS(sb);
3116 meta_group_info = kmalloc(metalen, GFP_NOFS);
3117 if (meta_group_info == NULL) {
3118 ext4_msg(sb, KERN_ERR, "can't allocate mem "
3119 "for a buddy group");
3120 goto exit_meta_group_info;
3121 }
3122 rcu_read_lock();
3123 rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
3124 rcu_read_unlock();
3125 }
3126
3127 meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
3128 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
3129
3130 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
3131 if (meta_group_info[i] == NULL) {
3132 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
3133 goto exit_group_info;
3134 }
3135 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
3136 &(meta_group_info[i]->bb_state));
3137
3138
3139
3140
3141
3142 if (ext4_has_group_desc_csum(sb) &&
3143 (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3144 meta_group_info[i]->bb_free =
3145 ext4_free_clusters_after_init(sb, group, desc);
3146 } else {
3147 meta_group_info[i]->bb_free =
3148 ext4_free_group_clusters(sb, desc);
3149 }
3150
3151 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
3152 init_rwsem(&meta_group_info[i]->alloc_sem);
3153 meta_group_info[i]->bb_free_root = RB_ROOT;
3154 INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
3155 RB_CLEAR_NODE(&meta_group_info[i]->bb_avg_fragment_size_rb);
3156 meta_group_info[i]->bb_largest_free_order = -1;
3157 meta_group_info[i]->bb_group = group;
3158
3159 mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
3160 return 0;
3161
3162exit_group_info:
3163
3164 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
3165 struct ext4_group_info ***group_info;
3166
3167 rcu_read_lock();
3168 group_info = rcu_dereference(sbi->s_group_info);
3169 kfree(group_info[idx]);
3170 group_info[idx] = NULL;
3171 rcu_read_unlock();
3172 }
3173exit_meta_group_info:
3174 return -ENOMEM;
3175}
3176
3177static int ext4_mb_init_backend(struct super_block *sb)
3178{
3179 ext4_group_t ngroups = ext4_get_groups_count(sb);
3180 ext4_group_t i;
3181 struct ext4_sb_info *sbi = EXT4_SB(sb);
3182 int err;
3183 struct ext4_group_desc *desc;
3184 struct ext4_group_info ***group_info;
3185 struct kmem_cache *cachep;
3186
3187 err = ext4_mb_alloc_groupinfo(sb, ngroups);
3188 if (err)
3189 return err;
3190
3191 sbi->s_buddy_cache = new_inode(sb);
3192 if (sbi->s_buddy_cache == NULL) {
3193 ext4_msg(sb, KERN_ERR, "can't get new inode");
3194 goto err_freesgi;
3195 }
3196
3197
3198
3199
3200 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
3201 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
3202 for (i = 0; i < ngroups; i++) {
3203 cond_resched();
3204 desc = ext4_get_group_desc(sb, i, NULL);
3205 if (desc == NULL) {
3206 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
3207 goto err_freebuddy;
3208 }
3209 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
3210 goto err_freebuddy;
3211 }
3212
3213 if (ext4_has_feature_flex_bg(sb)) {
3214
3215
3216
3217
3218 if (sbi->s_es->s_log_groups_per_flex >= 32) {
3219 ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group");
3220 goto err_freebuddy;
3221 }
3222 sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex,
3223 BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
3224 sbi->s_mb_prefetch *= 8;
3225 } else {
3226 sbi->s_mb_prefetch = 32;
3227 }
3228 if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
3229 sbi->s_mb_prefetch = ext4_get_groups_count(sb);
3230
3231
3232
3233
3234
3235
3236
3237 sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4;
3238 if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb))
3239 sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb);
3240
3241 return 0;
3242
3243err_freebuddy:
3244 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3245 while (i-- > 0)
3246 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
3247 i = sbi->s_group_info_size;
3248 rcu_read_lock();
3249 group_info = rcu_dereference(sbi->s_group_info);
3250 while (i-- > 0)
3251 kfree(group_info[i]);
3252 rcu_read_unlock();
3253 iput(sbi->s_buddy_cache);
3254err_freesgi:
3255 rcu_read_lock();
3256 kvfree(rcu_dereference(sbi->s_group_info));
3257 rcu_read_unlock();
3258 return -ENOMEM;
3259}
3260
3261static void ext4_groupinfo_destroy_slabs(void)
3262{
3263 int i;
3264
3265 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
3266 kmem_cache_destroy(ext4_groupinfo_caches[i]);
3267 ext4_groupinfo_caches[i] = NULL;
3268 }
3269}
3270
3271static int ext4_groupinfo_create_slab(size_t size)
3272{
3273 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
3274 int slab_size;
3275 int blocksize_bits = order_base_2(size);
3276 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
3277 struct kmem_cache *cachep;
3278
3279 if (cache_index >= NR_GRPINFO_CACHES)
3280 return -EINVAL;
3281
3282 if (unlikely(cache_index < 0))
3283 cache_index = 0;
3284
3285 mutex_lock(&ext4_grpinfo_slab_create_mutex);
3286 if (ext4_groupinfo_caches[cache_index]) {
3287 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
3288 return 0;
3289 }
3290
3291 slab_size = offsetof(struct ext4_group_info,
3292 bb_counters[blocksize_bits + 2]);
3293
3294 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
3295 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
3296 NULL);
3297
3298 ext4_groupinfo_caches[cache_index] = cachep;
3299
3300 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
3301 if (!cachep) {
3302 printk(KERN_EMERG
3303 "EXT4-fs: no memory for groupinfo slab cache\n");
3304 return -ENOMEM;
3305 }
3306
3307 return 0;
3308}
3309
3310int ext4_mb_init(struct super_block *sb)
3311{
3312 struct ext4_sb_info *sbi = EXT4_SB(sb);
3313 unsigned i, j;
3314 unsigned offset, offset_incr;
3315 unsigned max;
3316 int ret;
3317
3318 i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_offsets);
3319
3320 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
3321 if (sbi->s_mb_offsets == NULL) {
3322 ret = -ENOMEM;
3323 goto out;
3324 }
3325
3326 i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_maxs);
3327 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
3328 if (sbi->s_mb_maxs == NULL) {
3329 ret = -ENOMEM;
3330 goto out;
3331 }
3332
3333 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
3334 if (ret < 0)
3335 goto out;
3336
3337
3338 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
3339 sbi->s_mb_offsets[0] = 0;
3340
3341 i = 1;
3342 offset = 0;
3343 offset_incr = 1 << (sb->s_blocksize_bits - 1);
3344 max = sb->s_blocksize << 2;
3345 do {
3346 sbi->s_mb_offsets[i] = offset;
3347 sbi->s_mb_maxs[i] = max;
3348 offset += offset_incr;
3349 offset_incr = offset_incr >> 1;
3350 max = max >> 1;
3351 i++;
3352 } while (i < MB_NUM_ORDERS(sb));
3353
3354 sbi->s_mb_avg_fragment_size_root = RB_ROOT;
3355 sbi->s_mb_largest_free_orders =
3356 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
3357 GFP_KERNEL);
3358 if (!sbi->s_mb_largest_free_orders) {
3359 ret = -ENOMEM;
3360 goto out;
3361 }
3362 sbi->s_mb_largest_free_orders_locks =
3363 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
3364 GFP_KERNEL);
3365 if (!sbi->s_mb_largest_free_orders_locks) {
3366 ret = -ENOMEM;
3367 goto out;
3368 }
3369 for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
3370 INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
3371 rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
3372 }
3373 rwlock_init(&sbi->s_mb_rb_lock);
3374
3375 spin_lock_init(&sbi->s_md_lock);
3376 sbi->s_mb_free_pending = 0;
3377 INIT_LIST_HEAD(&sbi->s_freed_data_list);
3378
3379 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
3380 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
3381 sbi->s_mb_stats = MB_DEFAULT_STATS;
3382 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
3383 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
3384 sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
3398 sbi->s_cluster_bits, 32);
3399
3400
3401
3402
3403
3404
3405
3406
3407 if (sbi->s_stripe > 1) {
3408 sbi->s_mb_group_prealloc = roundup(
3409 sbi->s_mb_group_prealloc, sbi->s_stripe);
3410 }
3411
3412 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
3413 if (sbi->s_locality_groups == NULL) {
3414 ret = -ENOMEM;
3415 goto out;
3416 }
3417 for_each_possible_cpu(i) {
3418 struct ext4_locality_group *lg;
3419 lg = per_cpu_ptr(sbi->s_locality_groups, i);
3420 mutex_init(&lg->lg_mutex);
3421 for (j = 0; j < PREALLOC_TB_SIZE; j++)
3422 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
3423 spin_lock_init(&lg->lg_prealloc_lock);
3424 }
3425
3426 if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
3427 sbi->s_mb_max_linear_groups = 0;
3428 else
3429 sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
3430
3431 ret = ext4_mb_init_backend(sb);
3432 if (ret != 0)
3433 goto out_free_locality_groups;
3434
3435 return 0;
3436
3437out_free_locality_groups:
3438 free_percpu(sbi->s_locality_groups);
3439 sbi->s_locality_groups = NULL;
3440out:
3441 kfree(sbi->s_mb_largest_free_orders);
3442 kfree(sbi->s_mb_largest_free_orders_locks);
3443 kfree(sbi->s_mb_offsets);
3444 sbi->s_mb_offsets = NULL;
3445 kfree(sbi->s_mb_maxs);
3446 sbi->s_mb_maxs = NULL;
3447 return ret;
3448}
3449
3450
3451static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
3452{
3453 struct ext4_prealloc_space *pa;
3454 struct list_head *cur, *tmp;
3455 int count = 0;
3456
3457 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
3458 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3459 list_del(&pa->pa_group_list);
3460 count++;
3461 kmem_cache_free(ext4_pspace_cachep, pa);
3462 }
3463 return count;
3464}
3465
3466int ext4_mb_release(struct super_block *sb)
3467{
3468 ext4_group_t ngroups = ext4_get_groups_count(sb);
3469 ext4_group_t i;
3470 int num_meta_group_infos;
3471 struct ext4_group_info *grinfo, ***group_info;
3472 struct ext4_sb_info *sbi = EXT4_SB(sb);
3473 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3474 int count;
3475
3476 if (sbi->s_group_info) {
3477 for (i = 0; i < ngroups; i++) {
3478 cond_resched();
3479 grinfo = ext4_get_group_info(sb, i);
3480 mb_group_bb_bitmap_free(grinfo);
3481 ext4_lock_group(sb, i);
3482 count = ext4_mb_cleanup_pa(grinfo);
3483 if (count)
3484 mb_debug(sb, "mballoc: %d PAs left\n",
3485 count);
3486 ext4_unlock_group(sb, i);
3487 kmem_cache_free(cachep, grinfo);
3488 }
3489 num_meta_group_infos = (ngroups +
3490 EXT4_DESC_PER_BLOCK(sb) - 1) >>
3491 EXT4_DESC_PER_BLOCK_BITS(sb);
3492 rcu_read_lock();
3493 group_info = rcu_dereference(sbi->s_group_info);
3494 for (i = 0; i < num_meta_group_infos; i++)
3495 kfree(group_info[i]);
3496 kvfree(group_info);
3497 rcu_read_unlock();
3498 }
3499 kfree(sbi->s_mb_largest_free_orders);
3500 kfree(sbi->s_mb_largest_free_orders_locks);
3501 kfree(sbi->s_mb_offsets);
3502 kfree(sbi->s_mb_maxs);
3503 iput(sbi->s_buddy_cache);
3504 if (sbi->s_mb_stats) {
3505 ext4_msg(sb, KERN_INFO,
3506 "mballoc: %u blocks %u reqs (%u success)",
3507 atomic_read(&sbi->s_bal_allocated),
3508 atomic_read(&sbi->s_bal_reqs),
3509 atomic_read(&sbi->s_bal_success));
3510 ext4_msg(sb, KERN_INFO,
3511 "mballoc: %u extents scanned, %u groups scanned, %u goal hits, "
3512 "%u 2^N hits, %u breaks, %u lost",
3513 atomic_read(&sbi->s_bal_ex_scanned),
3514 atomic_read(&sbi->s_bal_groups_scanned),
3515 atomic_read(&sbi->s_bal_goals),
3516 atomic_read(&sbi->s_bal_2orders),
3517 atomic_read(&sbi->s_bal_breaks),
3518 atomic_read(&sbi->s_mb_lost_chunks));
3519 ext4_msg(sb, KERN_INFO,
3520 "mballoc: %u generated and it took %llu",
3521 atomic_read(&sbi->s_mb_buddies_generated),
3522 atomic64_read(&sbi->s_mb_generation_time));
3523 ext4_msg(sb, KERN_INFO,
3524 "mballoc: %u preallocated, %u discarded",
3525 atomic_read(&sbi->s_mb_preallocated),
3526 atomic_read(&sbi->s_mb_discarded));
3527 }
3528
3529 free_percpu(sbi->s_locality_groups);
3530
3531 return 0;
3532}
3533
3534static inline int ext4_issue_discard(struct super_block *sb,
3535 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
3536 struct bio **biop)
3537{
3538 ext4_fsblk_t discard_block;
3539
3540 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
3541 ext4_group_first_block_no(sb, block_group));
3542 count = EXT4_C2B(EXT4_SB(sb), count);
3543 trace_ext4_discard_blocks(sb,
3544 (unsigned long long) discard_block, count);
3545 if (biop) {
3546 return __blkdev_issue_discard(sb->s_bdev,
3547 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
3548 (sector_t)count << (sb->s_blocksize_bits - 9),
3549 GFP_NOFS, 0, biop);
3550 } else
3551 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
3552}
3553
3554static void ext4_free_data_in_buddy(struct super_block *sb,
3555 struct ext4_free_data *entry)
3556{
3557 struct ext4_buddy e4b;
3558 struct ext4_group_info *db;
3559 int err, count = 0, count2 = 0;
3560
3561 mb_debug(sb, "gonna free %u blocks in group %u (0x%p):",
3562 entry->efd_count, entry->efd_group, entry);
3563
3564 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
3565
3566 BUG_ON(err != 0);
3567
3568 spin_lock(&EXT4_SB(sb)->s_md_lock);
3569 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
3570 spin_unlock(&EXT4_SB(sb)->s_md_lock);
3571
3572 db = e4b.bd_info;
3573
3574 count += entry->efd_count;
3575 count2++;
3576 ext4_lock_group(sb, entry->efd_group);
3577
3578 rb_erase(&entry->efd_node, &(db->bb_free_root));
3579 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
3580
3581
3582
3583
3584
3585
3586
3587 if (!test_opt(sb, DISCARD))
3588 EXT4_MB_GRP_CLEAR_TRIMMED(db);
3589
3590 if (!db->bb_free_root.rb_node) {
3591
3592
3593
3594 put_page(e4b.bd_buddy_page);
3595 put_page(e4b.bd_bitmap_page);
3596 }
3597 ext4_unlock_group(sb, entry->efd_group);
3598 kmem_cache_free(ext4_free_data_cachep, entry);
3599 ext4_mb_unload_buddy(&e4b);
3600
3601 mb_debug(sb, "freed %d blocks in %d structures\n", count,
3602 count2);
3603}
3604
3605
3606
3607
3608
3609void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
3610{
3611 struct ext4_sb_info *sbi = EXT4_SB(sb);
3612 struct ext4_free_data *entry, *tmp;
3613 struct bio *discard_bio = NULL;
3614 struct list_head freed_data_list;
3615 struct list_head *cut_pos = NULL;
3616 int err;
3617
3618 INIT_LIST_HEAD(&freed_data_list);
3619
3620 spin_lock(&sbi->s_md_lock);
3621 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
3622 if (entry->efd_tid != commit_tid)
3623 break;
3624 cut_pos = &entry->efd_list;
3625 }
3626 if (cut_pos)
3627 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
3628 cut_pos);
3629 spin_unlock(&sbi->s_md_lock);
3630
3631 if (test_opt(sb, DISCARD)) {
3632 list_for_each_entry(entry, &freed_data_list, efd_list) {
3633 err = ext4_issue_discard(sb, entry->efd_group,
3634 entry->efd_start_cluster,
3635 entry->efd_count,
3636 &discard_bio);
3637 if (err && err != -EOPNOTSUPP) {
3638 ext4_msg(sb, KERN_WARNING, "discard request in"
3639 " group:%d block:%d count:%d failed"
3640 " with %d", entry->efd_group,
3641 entry->efd_start_cluster,
3642 entry->efd_count, err);
3643 } else if (err == -EOPNOTSUPP)
3644 break;
3645 }
3646
3647 if (discard_bio) {
3648 submit_bio_wait(discard_bio);
3649 bio_put(discard_bio);
3650 }
3651 }
3652
3653 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
3654 ext4_free_data_in_buddy(sb, entry);
3655}
3656
3657int __init ext4_init_mballoc(void)
3658{
3659 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
3660 SLAB_RECLAIM_ACCOUNT);
3661 if (ext4_pspace_cachep == NULL)
3662 goto out;
3663
3664 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
3665 SLAB_RECLAIM_ACCOUNT);
3666 if (ext4_ac_cachep == NULL)
3667 goto out_pa_free;
3668
3669 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
3670 SLAB_RECLAIM_ACCOUNT);
3671 if (ext4_free_data_cachep == NULL)
3672 goto out_ac_free;
3673
3674 return 0;
3675
3676out_ac_free:
3677 kmem_cache_destroy(ext4_ac_cachep);
3678out_pa_free:
3679 kmem_cache_destroy(ext4_pspace_cachep);
3680out:
3681 return -ENOMEM;
3682}
3683
3684void ext4_exit_mballoc(void)
3685{
3686
3687
3688
3689
3690 rcu_barrier();
3691 kmem_cache_destroy(ext4_pspace_cachep);
3692 kmem_cache_destroy(ext4_ac_cachep);
3693 kmem_cache_destroy(ext4_free_data_cachep);
3694 ext4_groupinfo_destroy_slabs();
3695}
3696
3697
3698
3699
3700
3701
3702static noinline_for_stack int
3703ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3704 handle_t *handle, unsigned int reserv_clstrs)
3705{
3706 struct buffer_head *bitmap_bh = NULL;
3707 struct ext4_group_desc *gdp;
3708 struct buffer_head *gdp_bh;
3709 struct ext4_sb_info *sbi;
3710 struct super_block *sb;
3711 ext4_fsblk_t block;
3712 int err, len;
3713
3714 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3715 BUG_ON(ac->ac_b_ex.fe_len <= 0);
3716
3717 sb = ac->ac_sb;
3718 sbi = EXT4_SB(sb);
3719
3720 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
3721 if (IS_ERR(bitmap_bh)) {
3722 err = PTR_ERR(bitmap_bh);
3723 bitmap_bh = NULL;
3724 goto out_err;
3725 }
3726
3727 BUFFER_TRACE(bitmap_bh, "getting write access");
3728 err = ext4_journal_get_write_access(handle, bitmap_bh);
3729 if (err)
3730 goto out_err;
3731
3732 err = -EIO;
3733 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
3734 if (!gdp)
3735 goto out_err;
3736
3737 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
3738 ext4_free_group_clusters(sb, gdp));
3739
3740 BUFFER_TRACE(gdp_bh, "get_write_access");
3741 err = ext4_journal_get_write_access(handle, gdp_bh);
3742 if (err)
3743 goto out_err;
3744
3745 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3746
3747 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3748 if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
3749 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
3750 "fs metadata", block, block+len);
3751
3752
3753
3754
3755 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3756 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3757 ac->ac_b_ex.fe_len);
3758 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3759 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3760 if (!err)
3761 err = -EFSCORRUPTED;
3762 goto out_err;
3763 }
3764
3765 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3766#ifdef AGGRESSIVE_CHECK
3767 {
3768 int i;
3769 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
3770 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
3771 bitmap_bh->b_data));
3772 }
3773 }
3774#endif
3775 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3776 ac->ac_b_ex.fe_len);
3777 if (ext4_has_group_desc_csum(sb) &&
3778 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3779 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3780 ext4_free_group_clusters_set(sb, gdp,
3781 ext4_free_clusters_after_init(sb,
3782 ac->ac_b_ex.fe_group, gdp));
3783 }
3784 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
3785 ext4_free_group_clusters_set(sb, gdp, len);
3786 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
3787 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
3788
3789 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3790 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
3791
3792
3793
3794 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3795
3796 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
3797 reserv_clstrs);
3798
3799 if (sbi->s_log_groups_per_flex) {
3800 ext4_group_t flex_group = ext4_flex_group(sbi,
3801 ac->ac_b_ex.fe_group);
3802 atomic64_sub(ac->ac_b_ex.fe_len,
3803 &sbi_array_rcu_deref(sbi, s_flex_groups,
3804 flex_group)->free_clusters);
3805 }
3806
3807 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3808 if (err)
3809 goto out_err;
3810 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3811
3812out_err:
3813 brelse(bitmap_bh);
3814 return err;
3815}
3816
3817
3818
3819
3820
3821void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
3822 int len, int state)
3823{
3824 struct buffer_head *bitmap_bh = NULL;
3825 struct ext4_group_desc *gdp;
3826 struct buffer_head *gdp_bh;
3827 struct ext4_sb_info *sbi = EXT4_SB(sb);
3828 ext4_group_t group;
3829 ext4_grpblk_t blkoff;
3830 int i, clen, err;
3831 int already;
3832
3833 clen = EXT4_B2C(sbi, len);
3834
3835 ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
3836 bitmap_bh = ext4_read_block_bitmap(sb, group);
3837 if (IS_ERR(bitmap_bh)) {
3838 err = PTR_ERR(bitmap_bh);
3839 bitmap_bh = NULL;
3840 goto out_err;
3841 }
3842
3843 err = -EIO;
3844 gdp = ext4_get_group_desc(sb, group, &gdp_bh);
3845 if (!gdp)
3846 goto out_err;
3847
3848 ext4_lock_group(sb, group);
3849 already = 0;
3850 for (i = 0; i < clen; i++)
3851 if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state)
3852 already++;
3853
3854 if (state)
3855 ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
3856 else
3857 mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
3858 if (ext4_has_group_desc_csum(sb) &&
3859 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3860 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3861 ext4_free_group_clusters_set(sb, gdp,
3862 ext4_free_clusters_after_init(sb,
3863 group, gdp));
3864 }
3865 if (state)
3866 clen = ext4_free_group_clusters(sb, gdp) - clen + already;
3867 else
3868 clen = ext4_free_group_clusters(sb, gdp) + clen - already;
3869
3870 ext4_free_group_clusters_set(sb, gdp, clen);
3871 ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
3872 ext4_group_desc_csum_set(sb, group, gdp);
3873
3874 ext4_unlock_group(sb, group);
3875
3876 if (sbi->s_log_groups_per_flex) {
3877 ext4_group_t flex_group = ext4_flex_group(sbi, group);
3878
3879 atomic64_sub(len,
3880 &sbi_array_rcu_deref(sbi, s_flex_groups,
3881 flex_group)->free_clusters);
3882 }
3883
3884 err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
3885 if (err)
3886 goto out_err;
3887 sync_dirty_buffer(bitmap_bh);
3888 err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
3889 sync_dirty_buffer(gdp_bh);
3890
3891out_err:
3892 brelse(bitmap_bh);
3893}
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3905{
3906 struct super_block *sb = ac->ac_sb;
3907 struct ext4_locality_group *lg = ac->ac_lg;
3908
3909 BUG_ON(lg == NULL);
3910 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3911 mb_debug(sb, "goal %u blocks for locality group\n", ac->ac_g_ex.fe_len);
3912}
3913
3914
3915
3916
3917
3918static noinline_for_stack void
3919ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3920 struct ext4_allocation_request *ar)
3921{
3922 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3923 int bsbits, max;
3924 ext4_lblk_t end;
3925 loff_t size, start_off;
3926 loff_t orig_size __maybe_unused;
3927 ext4_lblk_t start;
3928 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3929 struct ext4_prealloc_space *pa;
3930
3931
3932
3933 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3934 return;
3935
3936
3937 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3938 return;
3939
3940
3941
3942 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
3943 return;
3944
3945 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
3946 ext4_mb_normalize_group_request(ac);
3947 return ;
3948 }
3949
3950 bsbits = ac->ac_sb->s_blocksize_bits;
3951
3952
3953
3954 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3955 size = size << bsbits;
3956 if (size < i_size_read(ac->ac_inode))
3957 size = i_size_read(ac->ac_inode);
3958 orig_size = size;
3959
3960
3961 max = 2 << bsbits;
3962
3963#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3964 (req <= (size) || max <= (chunk_size))
3965
3966
3967
3968 start_off = 0;
3969 if (size <= 16 * 1024) {
3970 size = 16 * 1024;
3971 } else if (size <= 32 * 1024) {
3972 size = 32 * 1024;
3973 } else if (size <= 64 * 1024) {
3974 size = 64 * 1024;
3975 } else if (size <= 128 * 1024) {
3976 size = 128 * 1024;
3977 } else if (size <= 256 * 1024) {
3978 size = 256 * 1024;
3979 } else if (size <= 512 * 1024) {
3980 size = 512 * 1024;
3981 } else if (size <= 1024 * 1024) {
3982 size = 1024 * 1024;
3983 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3984 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3985 (21 - bsbits)) << 21;
3986 size = 2 * 1024 * 1024;
3987 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3988 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3989 (22 - bsbits)) << 22;
3990 size = 4 * 1024 * 1024;
3991 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3992 (8<<20)>>bsbits, max, 8 * 1024)) {
3993 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3994 (23 - bsbits)) << 23;
3995 size = 8 * 1024 * 1024;
3996 } else {
3997 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3998 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3999 ac->ac_o_ex.fe_len) << bsbits;
4000 }
4001 size = size >> bsbits;
4002 start = start_off >> bsbits;
4003
4004
4005 if (ar->pleft && start <= ar->lleft) {
4006 size -= ar->lleft + 1 - start;
4007 start = ar->lleft + 1;
4008 }
4009 if (ar->pright && start + size - 1 >= ar->lright)
4010 size -= start + size - ar->lright;
4011
4012
4013
4014
4015
4016 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
4017 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
4018
4019 end = start + size;
4020
4021
4022 rcu_read_lock();
4023 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4024 ext4_lblk_t pa_end;
4025
4026 if (pa->pa_deleted)
4027 continue;
4028 spin_lock(&pa->pa_lock);
4029 if (pa->pa_deleted) {
4030 spin_unlock(&pa->pa_lock);
4031 continue;
4032 }
4033
4034 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
4035 pa->pa_len);
4036
4037
4038 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
4039 ac->ac_o_ex.fe_logical < pa->pa_lstart));
4040
4041
4042 if (pa->pa_lstart >= end || pa_end <= start) {
4043 spin_unlock(&pa->pa_lock);
4044 continue;
4045 }
4046 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
4047
4048
4049 if (pa_end <= ac->ac_o_ex.fe_logical) {
4050 BUG_ON(pa_end < start);
4051 start = pa_end;
4052 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
4053 BUG_ON(pa->pa_lstart > end);
4054 end = pa->pa_lstart;
4055 }
4056 spin_unlock(&pa->pa_lock);
4057 }
4058 rcu_read_unlock();
4059 size = end - start;
4060
4061
4062 rcu_read_lock();
4063 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4064 ext4_lblk_t pa_end;
4065
4066 spin_lock(&pa->pa_lock);
4067 if (pa->pa_deleted == 0) {
4068 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
4069 pa->pa_len);
4070 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
4071 }
4072 spin_unlock(&pa->pa_lock);
4073 }
4074 rcu_read_unlock();
4075
4076 if (start + size <= ac->ac_o_ex.fe_logical &&
4077 start > ac->ac_o_ex.fe_logical) {
4078 ext4_msg(ac->ac_sb, KERN_ERR,
4079 "start %lu, size %lu, fe_logical %lu",
4080 (unsigned long) start, (unsigned long) size,
4081 (unsigned long) ac->ac_o_ex.fe_logical);
4082 BUG();
4083 }
4084 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
4085
4086
4087
4088
4089
4090 ac->ac_g_ex.fe_logical = start;
4091 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
4092
4093
4094 if (ar->pright && (ar->lright == (start + size))) {
4095
4096 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
4097 &ac->ac_f_ex.fe_group,
4098 &ac->ac_f_ex.fe_start);
4099 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
4100 }
4101 if (ar->pleft && (ar->lleft + 1 == start)) {
4102
4103 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
4104 &ac->ac_f_ex.fe_group,
4105 &ac->ac_f_ex.fe_start);
4106 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
4107 }
4108
4109 mb_debug(ac->ac_sb, "goal: %lld(was %lld) blocks at %u\n", size,
4110 orig_size, start);
4111}
4112
4113static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
4114{
4115 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4116
4117 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len >= 1) {
4118 atomic_inc(&sbi->s_bal_reqs);
4119 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
4120 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
4121 atomic_inc(&sbi->s_bal_success);
4122 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
4123 atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned);
4124 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
4125 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
4126 atomic_inc(&sbi->s_bal_goals);
4127 if (ac->ac_found > sbi->s_mb_max_to_scan)
4128 atomic_inc(&sbi->s_bal_breaks);
4129 }
4130
4131 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
4132 trace_ext4_mballoc_alloc(ac);
4133 else
4134 trace_ext4_mballoc_prealloc(ac);
4135}
4136
4137
4138
4139
4140
4141
4142
4143static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
4144{
4145 struct ext4_prealloc_space *pa = ac->ac_pa;
4146 struct ext4_buddy e4b;
4147 int err;
4148
4149 if (pa == NULL) {
4150 if (ac->ac_f_ex.fe_len == 0)
4151 return;
4152 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
4153 if (err) {
4154
4155
4156
4157
4158
4159 WARN(1, "mb_load_buddy failed (%d)", err);
4160 return;
4161 }
4162 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
4163 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
4164 ac->ac_f_ex.fe_len);
4165 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
4166 ext4_mb_unload_buddy(&e4b);
4167 return;
4168 }
4169 if (pa->pa_type == MB_INODE_PA)
4170 pa->pa_free += ac->ac_b_ex.fe_len;
4171}
4172
4173
4174
4175
4176static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
4177 struct ext4_prealloc_space *pa)
4178{
4179 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4180 ext4_fsblk_t start;
4181 ext4_fsblk_t end;
4182 int len;
4183
4184
4185 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
4186 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
4187 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
4188 len = EXT4_NUM_B2C(sbi, end - start);
4189 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
4190 &ac->ac_b_ex.fe_start);
4191 ac->ac_b_ex.fe_len = len;
4192 ac->ac_status = AC_STATUS_FOUND;
4193 ac->ac_pa = pa;
4194
4195 BUG_ON(start < pa->pa_pstart);
4196 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
4197 BUG_ON(pa->pa_free < len);
4198 pa->pa_free -= len;
4199
4200 mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa);
4201}
4202
4203
4204
4205
4206static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
4207 struct ext4_prealloc_space *pa)
4208{
4209 unsigned int len = ac->ac_o_ex.fe_len;
4210
4211 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
4212 &ac->ac_b_ex.fe_group,
4213 &ac->ac_b_ex.fe_start);
4214 ac->ac_b_ex.fe_len = len;
4215 ac->ac_status = AC_STATUS_FOUND;
4216 ac->ac_pa = pa;
4217
4218
4219
4220
4221
4222
4223
4224 mb_debug(ac->ac_sb, "use %u/%u from group pa %p\n",
4225 pa->pa_lstart-len, len, pa);
4226}
4227
4228
4229
4230
4231
4232
4233
4234static struct ext4_prealloc_space *
4235ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
4236 struct ext4_prealloc_space *pa,
4237 struct ext4_prealloc_space *cpa)
4238{
4239 ext4_fsblk_t cur_distance, new_distance;
4240
4241 if (cpa == NULL) {
4242 atomic_inc(&pa->pa_count);
4243 return pa;
4244 }
4245 cur_distance = abs(goal_block - cpa->pa_pstart);
4246 new_distance = abs(goal_block - pa->pa_pstart);
4247
4248 if (cur_distance <= new_distance)
4249 return cpa;
4250
4251
4252 atomic_dec(&cpa->pa_count);
4253 atomic_inc(&pa->pa_count);
4254 return pa;
4255}
4256
4257
4258
4259
4260static noinline_for_stack bool
4261ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
4262{
4263 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4264 int order, i;
4265 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
4266 struct ext4_locality_group *lg;
4267 struct ext4_prealloc_space *pa, *cpa = NULL;
4268 ext4_fsblk_t goal_block;
4269
4270
4271 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4272 return false;
4273
4274
4275 rcu_read_lock();
4276 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4277
4278
4279
4280 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
4281 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
4282 EXT4_C2B(sbi, pa->pa_len)))
4283 continue;
4284
4285
4286 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
4287 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
4288 EXT4_MAX_BLOCK_FILE_PHYS))
4289 continue;
4290
4291
4292 spin_lock(&pa->pa_lock);
4293 if (pa->pa_deleted == 0 && pa->pa_free) {
4294 atomic_inc(&pa->pa_count);
4295 ext4_mb_use_inode_pa(ac, pa);
4296 spin_unlock(&pa->pa_lock);
4297 ac->ac_criteria = 10;
4298 rcu_read_unlock();
4299 return true;
4300 }
4301 spin_unlock(&pa->pa_lock);
4302 }
4303 rcu_read_unlock();
4304
4305
4306 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
4307 return false;
4308
4309
4310 lg = ac->ac_lg;
4311 if (lg == NULL)
4312 return false;
4313 order = fls(ac->ac_o_ex.fe_len) - 1;
4314 if (order > PREALLOC_TB_SIZE - 1)
4315
4316 order = PREALLOC_TB_SIZE - 1;
4317
4318 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
4319
4320
4321
4322
4323 for (i = order; i < PREALLOC_TB_SIZE; i++) {
4324 rcu_read_lock();
4325 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
4326 pa_inode_list) {
4327 spin_lock(&pa->pa_lock);
4328 if (pa->pa_deleted == 0 &&
4329 pa->pa_free >= ac->ac_o_ex.fe_len) {
4330
4331 cpa = ext4_mb_check_group_pa(goal_block,
4332 pa, cpa);
4333 }
4334 spin_unlock(&pa->pa_lock);
4335 }
4336 rcu_read_unlock();
4337 }
4338 if (cpa) {
4339 ext4_mb_use_group_pa(ac, cpa);
4340 ac->ac_criteria = 20;
4341 return true;
4342 }
4343 return false;
4344}
4345
4346
4347
4348
4349
4350
4351
4352static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
4353 ext4_group_t group)
4354{
4355 struct rb_node *n;
4356 struct ext4_group_info *grp;
4357 struct ext4_free_data *entry;
4358
4359 grp = ext4_get_group_info(sb, group);
4360 n = rb_first(&(grp->bb_free_root));
4361
4362 while (n) {
4363 entry = rb_entry(n, struct ext4_free_data, efd_node);
4364 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
4365 n = rb_next(n);
4366 }
4367 return;
4368}
4369
4370
4371
4372
4373
4374
4375static noinline_for_stack
4376void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
4377 ext4_group_t group)
4378{
4379 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
4380 struct ext4_prealloc_space *pa;
4381 struct list_head *cur;
4382 ext4_group_t groupnr;
4383 ext4_grpblk_t start;
4384 int preallocated = 0;
4385 int len;
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395 list_for_each(cur, &grp->bb_prealloc_list) {
4396 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
4397 spin_lock(&pa->pa_lock);
4398 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4399 &groupnr, &start);
4400 len = pa->pa_len;
4401 spin_unlock(&pa->pa_lock);
4402 if (unlikely(len == 0))
4403 continue;
4404 BUG_ON(groupnr != group);
4405 ext4_set_bits(bitmap, start, len);
4406 preallocated += len;
4407 }
4408 mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
4409}
4410
4411static void ext4_mb_mark_pa_deleted(struct super_block *sb,
4412 struct ext4_prealloc_space *pa)
4413{
4414 struct ext4_inode_info *ei;
4415
4416 if (pa->pa_deleted) {
4417 ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n",
4418 pa->pa_type, pa->pa_pstart, pa->pa_lstart,
4419 pa->pa_len);
4420 return;
4421 }
4422
4423 pa->pa_deleted = 1;
4424
4425 if (pa->pa_type == MB_INODE_PA) {
4426 ei = EXT4_I(pa->pa_inode);
4427 atomic_dec(&ei->i_prealloc_active);
4428 }
4429}
4430
4431static void ext4_mb_pa_callback(struct rcu_head *head)
4432{
4433 struct ext4_prealloc_space *pa;
4434 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
4435
4436 BUG_ON(atomic_read(&pa->pa_count));
4437 BUG_ON(pa->pa_deleted == 0);
4438 kmem_cache_free(ext4_pspace_cachep, pa);
4439}
4440
4441
4442
4443
4444
4445static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
4446 struct super_block *sb, struct ext4_prealloc_space *pa)
4447{
4448 ext4_group_t grp;
4449 ext4_fsblk_t grp_blk;
4450
4451
4452 spin_lock(&pa->pa_lock);
4453 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
4454 spin_unlock(&pa->pa_lock);
4455 return;
4456 }
4457
4458 if (pa->pa_deleted == 1) {
4459 spin_unlock(&pa->pa_lock);
4460 return;
4461 }
4462
4463 ext4_mb_mark_pa_deleted(sb, pa);
4464 spin_unlock(&pa->pa_lock);
4465
4466 grp_blk = pa->pa_pstart;
4467
4468
4469
4470
4471 if (pa->pa_type == MB_GROUP_PA)
4472 grp_blk--;
4473
4474 grp = ext4_get_group_number(sb, grp_blk);
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490 ext4_lock_group(sb, grp);
4491 list_del(&pa->pa_group_list);
4492 ext4_unlock_group(sb, grp);
4493
4494 spin_lock(pa->pa_obj_lock);
4495 list_del_rcu(&pa->pa_inode_list);
4496 spin_unlock(pa->pa_obj_lock);
4497
4498 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4499}
4500
4501
4502
4503
4504static noinline_for_stack void
4505ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
4506{
4507 struct super_block *sb = ac->ac_sb;
4508 struct ext4_sb_info *sbi = EXT4_SB(sb);
4509 struct ext4_prealloc_space *pa;
4510 struct ext4_group_info *grp;
4511 struct ext4_inode_info *ei;
4512
4513
4514 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
4515 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
4516 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
4517 BUG_ON(ac->ac_pa == NULL);
4518
4519 pa = ac->ac_pa;
4520
4521 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
4522 int winl;
4523 int wins;
4524 int win;
4525 int offs;
4526
4527
4528
4529
4530 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
4531 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
4532
4533
4534
4535
4536 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
4537
4538
4539 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
4540
4541
4542 win = min(winl, wins);
4543
4544 offs = ac->ac_o_ex.fe_logical %
4545 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4546 if (offs && offs < win)
4547 win = offs;
4548
4549 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
4550 EXT4_NUM_B2C(sbi, win);
4551 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
4552 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
4553 }
4554
4555
4556
4557 ac->ac_f_ex = ac->ac_b_ex;
4558
4559 pa->pa_lstart = ac->ac_b_ex.fe_logical;
4560 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4561 pa->pa_len = ac->ac_b_ex.fe_len;
4562 pa->pa_free = pa->pa_len;
4563 spin_lock_init(&pa->pa_lock);
4564 INIT_LIST_HEAD(&pa->pa_inode_list);
4565 INIT_LIST_HEAD(&pa->pa_group_list);
4566 pa->pa_deleted = 0;
4567 pa->pa_type = MB_INODE_PA;
4568
4569 mb_debug(sb, "new inode pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
4570 pa->pa_len, pa->pa_lstart);
4571 trace_ext4_mb_new_inode_pa(ac, pa);
4572
4573 ext4_mb_use_inode_pa(ac, pa);
4574 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
4575
4576 ei = EXT4_I(ac->ac_inode);
4577 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
4578
4579 pa->pa_obj_lock = &ei->i_prealloc_lock;
4580 pa->pa_inode = ac->ac_inode;
4581
4582 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
4583
4584 spin_lock(pa->pa_obj_lock);
4585 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
4586 spin_unlock(pa->pa_obj_lock);
4587 atomic_inc(&ei->i_prealloc_active);
4588}
4589
4590
4591
4592
4593static noinline_for_stack void
4594ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
4595{
4596 struct super_block *sb = ac->ac_sb;
4597 struct ext4_locality_group *lg;
4598 struct ext4_prealloc_space *pa;
4599 struct ext4_group_info *grp;
4600
4601
4602 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
4603 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
4604 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
4605 BUG_ON(ac->ac_pa == NULL);
4606
4607 pa = ac->ac_pa;
4608
4609
4610
4611 ac->ac_f_ex = ac->ac_b_ex;
4612
4613 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4614 pa->pa_lstart = pa->pa_pstart;
4615 pa->pa_len = ac->ac_b_ex.fe_len;
4616 pa->pa_free = pa->pa_len;
4617 spin_lock_init(&pa->pa_lock);
4618 INIT_LIST_HEAD(&pa->pa_inode_list);
4619 INIT_LIST_HEAD(&pa->pa_group_list);
4620 pa->pa_deleted = 0;
4621 pa->pa_type = MB_GROUP_PA;
4622
4623 mb_debug(sb, "new group pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
4624 pa->pa_len, pa->pa_lstart);
4625 trace_ext4_mb_new_group_pa(ac, pa);
4626
4627 ext4_mb_use_group_pa(ac, pa);
4628 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
4629
4630 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
4631 lg = ac->ac_lg;
4632 BUG_ON(lg == NULL);
4633
4634 pa->pa_obj_lock = &lg->lg_prealloc_lock;
4635 pa->pa_inode = NULL;
4636
4637 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
4638
4639
4640
4641
4642
4643}
4644
4645static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
4646{
4647 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4648 ext4_mb_new_group_pa(ac);
4649 else
4650 ext4_mb_new_inode_pa(ac);
4651}
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661static noinline_for_stack int
4662ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
4663 struct ext4_prealloc_space *pa)
4664{
4665 struct super_block *sb = e4b->bd_sb;
4666 struct ext4_sb_info *sbi = EXT4_SB(sb);
4667 unsigned int end;
4668 unsigned int next;
4669 ext4_group_t group;
4670 ext4_grpblk_t bit;
4671 unsigned long long grp_blk_start;
4672 int free = 0;
4673
4674 BUG_ON(pa->pa_deleted == 0);
4675 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
4676 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
4677 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
4678 end = bit + pa->pa_len;
4679
4680 while (bit < end) {
4681 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
4682 if (bit >= end)
4683 break;
4684 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
4685 mb_debug(sb, "free preallocated %u/%u in group %u\n",
4686 (unsigned) ext4_group_first_block_no(sb, group) + bit,
4687 (unsigned) next - bit, (unsigned) group);
4688 free += next - bit;
4689
4690 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
4691 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
4692 EXT4_C2B(sbi, bit)),
4693 next - bit);
4694 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
4695 bit = next + 1;
4696 }
4697 if (free != pa->pa_free) {
4698 ext4_msg(e4b->bd_sb, KERN_CRIT,
4699 "pa %p: logic %lu, phys. %lu, len %d",
4700 pa, (unsigned long) pa->pa_lstart,
4701 (unsigned long) pa->pa_pstart,
4702 pa->pa_len);
4703 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
4704 free, pa->pa_free);
4705
4706
4707
4708
4709 }
4710 atomic_add(free, &sbi->s_mb_discarded);
4711
4712 return 0;
4713}
4714
4715static noinline_for_stack int
4716ext4_mb_release_group_pa(struct ext4_buddy *e4b,
4717 struct ext4_prealloc_space *pa)
4718{
4719 struct super_block *sb = e4b->bd_sb;
4720 ext4_group_t group;
4721 ext4_grpblk_t bit;
4722
4723 trace_ext4_mb_release_group_pa(sb, pa);
4724 BUG_ON(pa->pa_deleted == 0);
4725 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
4726 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
4727 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
4728 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
4729 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
4730
4731 return 0;
4732}
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743static noinline_for_stack int
4744ext4_mb_discard_group_preallocations(struct super_block *sb,
4745 ext4_group_t group, int needed)
4746{
4747 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
4748 struct buffer_head *bitmap_bh = NULL;
4749 struct ext4_prealloc_space *pa, *tmp;
4750 struct list_head list;
4751 struct ext4_buddy e4b;
4752 int err;
4753 int busy = 0;
4754 int free, free_total = 0;
4755
4756 mb_debug(sb, "discard preallocation for group %u\n", group);
4757 if (list_empty(&grp->bb_prealloc_list))
4758 goto out_dbg;
4759
4760 bitmap_bh = ext4_read_block_bitmap(sb, group);
4761 if (IS_ERR(bitmap_bh)) {
4762 err = PTR_ERR(bitmap_bh);
4763 ext4_error_err(sb, -err,
4764 "Error %d reading block bitmap for %u",
4765 err, group);
4766 goto out_dbg;
4767 }
4768
4769 err = ext4_mb_load_buddy(sb, group, &e4b);
4770 if (err) {
4771 ext4_warning(sb, "Error %d loading buddy information for %u",
4772 err, group);
4773 put_bh(bitmap_bh);
4774 goto out_dbg;
4775 }
4776
4777 if (needed == 0)
4778 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
4779
4780 INIT_LIST_HEAD(&list);
4781repeat:
4782 free = 0;
4783 ext4_lock_group(sb, group);
4784 list_for_each_entry_safe(pa, tmp,
4785 &grp->bb_prealloc_list, pa_group_list) {
4786 spin_lock(&pa->pa_lock);
4787 if (atomic_read(&pa->pa_count)) {
4788 spin_unlock(&pa->pa_lock);
4789 busy = 1;
4790 continue;
4791 }
4792 if (pa->pa_deleted) {
4793 spin_unlock(&pa->pa_lock);
4794 continue;
4795 }
4796
4797
4798 ext4_mb_mark_pa_deleted(sb, pa);
4799
4800 if (!free)
4801 this_cpu_inc(discard_pa_seq);
4802
4803
4804 free += pa->pa_free;
4805
4806 spin_unlock(&pa->pa_lock);
4807
4808 list_del(&pa->pa_group_list);
4809 list_add(&pa->u.pa_tmp_list, &list);
4810 }
4811
4812
4813 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4814
4815
4816 spin_lock(pa->pa_obj_lock);
4817 list_del_rcu(&pa->pa_inode_list);
4818 spin_unlock(pa->pa_obj_lock);
4819
4820 if (pa->pa_type == MB_GROUP_PA)
4821 ext4_mb_release_group_pa(&e4b, pa);
4822 else
4823 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4824
4825 list_del(&pa->u.pa_tmp_list);
4826 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4827 }
4828
4829 free_total += free;
4830
4831
4832 if (free_total < needed && busy) {
4833 ext4_unlock_group(sb, group);
4834 cond_resched();
4835 busy = 0;
4836 goto repeat;
4837 }
4838 ext4_unlock_group(sb, group);
4839 ext4_mb_unload_buddy(&e4b);
4840 put_bh(bitmap_bh);
4841out_dbg:
4842 mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
4843 free_total, group, grp->bb_free);
4844 return free_total;
4845}
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
4857{
4858 struct ext4_inode_info *ei = EXT4_I(inode);
4859 struct super_block *sb = inode->i_sb;
4860 struct buffer_head *bitmap_bh = NULL;
4861 struct ext4_prealloc_space *pa, *tmp;
4862 ext4_group_t group = 0;
4863 struct list_head list;
4864 struct ext4_buddy e4b;
4865 int err;
4866
4867 if (!S_ISREG(inode->i_mode)) {
4868
4869 return;
4870 }
4871
4872 if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
4873 return;
4874
4875 mb_debug(sb, "discard preallocation for inode %lu\n",
4876 inode->i_ino);
4877 trace_ext4_discard_preallocations(inode,
4878 atomic_read(&ei->i_prealloc_active), needed);
4879
4880 INIT_LIST_HEAD(&list);
4881
4882 if (needed == 0)
4883 needed = UINT_MAX;
4884
4885repeat:
4886
4887 spin_lock(&ei->i_prealloc_lock);
4888 while (!list_empty(&ei->i_prealloc_list) && needed) {
4889 pa = list_entry(ei->i_prealloc_list.prev,
4890 struct ext4_prealloc_space, pa_inode_list);
4891 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
4892 spin_lock(&pa->pa_lock);
4893 if (atomic_read(&pa->pa_count)) {
4894
4895
4896 spin_unlock(&pa->pa_lock);
4897 spin_unlock(&ei->i_prealloc_lock);
4898 ext4_msg(sb, KERN_ERR,
4899 "uh-oh! used pa while discarding");
4900 WARN_ON(1);
4901 schedule_timeout_uninterruptible(HZ);
4902 goto repeat;
4903
4904 }
4905 if (pa->pa_deleted == 0) {
4906 ext4_mb_mark_pa_deleted(sb, pa);
4907 spin_unlock(&pa->pa_lock);
4908 list_del_rcu(&pa->pa_inode_list);
4909 list_add(&pa->u.pa_tmp_list, &list);
4910 needed--;
4911 continue;
4912 }
4913
4914
4915 spin_unlock(&pa->pa_lock);
4916 spin_unlock(&ei->i_prealloc_lock);
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930 schedule_timeout_uninterruptible(HZ);
4931 goto repeat;
4932 }
4933 spin_unlock(&ei->i_prealloc_lock);
4934
4935 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4936 BUG_ON(pa->pa_type != MB_INODE_PA);
4937 group = ext4_get_group_number(sb, pa->pa_pstart);
4938
4939 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4940 GFP_NOFS|__GFP_NOFAIL);
4941 if (err) {
4942 ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
4943 err, group);
4944 continue;
4945 }
4946
4947 bitmap_bh = ext4_read_block_bitmap(sb, group);
4948 if (IS_ERR(bitmap_bh)) {
4949 err = PTR_ERR(bitmap_bh);
4950 ext4_error_err(sb, -err, "Error %d reading block bitmap for %u",
4951 err, group);
4952 ext4_mb_unload_buddy(&e4b);
4953 continue;
4954 }
4955
4956 ext4_lock_group(sb, group);
4957 list_del(&pa->pa_group_list);
4958 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4959 ext4_unlock_group(sb, group);
4960
4961 ext4_mb_unload_buddy(&e4b);
4962 put_bh(bitmap_bh);
4963
4964 list_del(&pa->u.pa_tmp_list);
4965 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4966 }
4967}
4968
4969static int ext4_mb_pa_alloc(struct ext4_allocation_context *ac)
4970{
4971 struct ext4_prealloc_space *pa;
4972
4973 BUG_ON(ext4_pspace_cachep == NULL);
4974 pa = kmem_cache_zalloc(ext4_pspace_cachep, GFP_NOFS);
4975 if (!pa)
4976 return -ENOMEM;
4977 atomic_set(&pa->pa_count, 1);
4978 ac->ac_pa = pa;
4979 return 0;
4980}
4981
4982static void ext4_mb_pa_free(struct ext4_allocation_context *ac)
4983{
4984 struct ext4_prealloc_space *pa = ac->ac_pa;
4985
4986 BUG_ON(!pa);
4987 ac->ac_pa = NULL;
4988 WARN_ON(!atomic_dec_and_test(&pa->pa_count));
4989 kmem_cache_free(ext4_pspace_cachep, pa);
4990}
4991
4992#ifdef CONFIG_EXT4_DEBUG
4993static inline void ext4_mb_show_pa(struct super_block *sb)
4994{
4995 ext4_group_t i, ngroups;
4996
4997 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
4998 return;
4999
5000 ngroups = ext4_get_groups_count(sb);
5001 mb_debug(sb, "groups: ");
5002 for (i = 0; i < ngroups; i++) {
5003 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
5004 struct ext4_prealloc_space *pa;
5005 ext4_grpblk_t start;
5006 struct list_head *cur;
5007 ext4_lock_group(sb, i);
5008 list_for_each(cur, &grp->bb_prealloc_list) {
5009 pa = list_entry(cur, struct ext4_prealloc_space,
5010 pa_group_list);
5011 spin_lock(&pa->pa_lock);
5012 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
5013 NULL, &start);
5014 spin_unlock(&pa->pa_lock);
5015 mb_debug(sb, "PA:%u:%d:%d\n", i, start,
5016 pa->pa_len);
5017 }
5018 ext4_unlock_group(sb, i);
5019 mb_debug(sb, "%u: %d/%d\n", i, grp->bb_free,
5020 grp->bb_fragments);
5021 }
5022}
5023
5024static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
5025{
5026 struct super_block *sb = ac->ac_sb;
5027
5028 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
5029 return;
5030
5031 mb_debug(sb, "Can't allocate:"
5032 " Allocation context details:");
5033 mb_debug(sb, "status %u flags 0x%x",
5034 ac->ac_status, ac->ac_flags);
5035 mb_debug(sb, "orig %lu/%lu/%lu@%lu, "
5036 "goal %lu/%lu/%lu@%lu, "
5037 "best %lu/%lu/%lu@%lu cr %d",
5038 (unsigned long)ac->ac_o_ex.fe_group,
5039 (unsigned long)ac->ac_o_ex.fe_start,
5040 (unsigned long)ac->ac_o_ex.fe_len,
5041 (unsigned long)ac->ac_o_ex.fe_logical,
5042 (unsigned long)ac->ac_g_ex.fe_group,
5043 (unsigned long)ac->ac_g_ex.fe_start,
5044 (unsigned long)ac->ac_g_ex.fe_len,
5045 (unsigned long)ac->ac_g_ex.fe_logical,
5046 (unsigned long)ac->ac_b_ex.fe_group,
5047 (unsigned long)ac->ac_b_ex.fe_start,
5048 (unsigned long)ac->ac_b_ex.fe_len,
5049 (unsigned long)ac->ac_b_ex.fe_logical,
5050 (int)ac->ac_criteria);
5051 mb_debug(sb, "%u found", ac->ac_found);
5052 ext4_mb_show_pa(sb);
5053}
5054#else
5055static inline void ext4_mb_show_pa(struct super_block *sb)
5056{
5057 return;
5058}
5059static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
5060{
5061 ext4_mb_show_pa(ac->ac_sb);
5062 return;
5063}
5064#endif
5065
5066
5067
5068
5069
5070
5071
5072
5073static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
5074{
5075 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
5076 int bsbits = ac->ac_sb->s_blocksize_bits;
5077 loff_t size, isize;
5078
5079 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
5080 return;
5081
5082 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
5083 return;
5084
5085 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
5086 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
5087 >> bsbits;
5088
5089 if ((size == isize) && !ext4_fs_is_busy(sbi) &&
5090 !inode_is_open_for_write(ac->ac_inode)) {
5091 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
5092 return;
5093 }
5094
5095 if (sbi->s_mb_group_prealloc <= 0) {
5096 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
5097 return;
5098 }
5099
5100
5101 size = max(size, isize);
5102 if (size > sbi->s_mb_stream_request) {
5103 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
5104 return;
5105 }
5106
5107 BUG_ON(ac->ac_lg != NULL);
5108
5109
5110
5111
5112
5113 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
5114
5115
5116 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
5117
5118
5119 mutex_lock(&ac->ac_lg->lg_mutex);
5120}
5121
5122static noinline_for_stack int
5123ext4_mb_initialize_context(struct ext4_allocation_context *ac,
5124 struct ext4_allocation_request *ar)
5125{
5126 struct super_block *sb = ar->inode->i_sb;
5127 struct ext4_sb_info *sbi = EXT4_SB(sb);
5128 struct ext4_super_block *es = sbi->s_es;
5129 ext4_group_t group;
5130 unsigned int len;
5131 ext4_fsblk_t goal;
5132 ext4_grpblk_t block;
5133
5134
5135 len = ar->len;
5136
5137
5138 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
5139 len = EXT4_CLUSTERS_PER_GROUP(sb);
5140
5141
5142 goal = ar->goal;
5143 if (goal < le32_to_cpu(es->s_first_data_block) ||
5144 goal >= ext4_blocks_count(es))
5145 goal = le32_to_cpu(es->s_first_data_block);
5146 ext4_get_group_no_and_offset(sb, goal, &group, &block);
5147
5148
5149 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
5150 ac->ac_status = AC_STATUS_CONTINUE;
5151 ac->ac_sb = sb;
5152 ac->ac_inode = ar->inode;
5153 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
5154 ac->ac_o_ex.fe_group = group;
5155 ac->ac_o_ex.fe_start = block;
5156 ac->ac_o_ex.fe_len = len;
5157 ac->ac_g_ex = ac->ac_o_ex;
5158 ac->ac_flags = ar->flags;
5159
5160
5161
5162 ext4_mb_group_or_file(ac);
5163
5164 mb_debug(sb, "init ac: %u blocks @ %u, goal %u, flags 0x%x, 2^%d, "
5165 "left: %u/%u, right %u/%u to %swritable\n",
5166 (unsigned) ar->len, (unsigned) ar->logical,
5167 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
5168 (unsigned) ar->lleft, (unsigned) ar->pleft,
5169 (unsigned) ar->lright, (unsigned) ar->pright,
5170 inode_is_open_for_write(ar->inode) ? "" : "non-");
5171 return 0;
5172
5173}
5174
5175static noinline_for_stack void
5176ext4_mb_discard_lg_preallocations(struct super_block *sb,
5177 struct ext4_locality_group *lg,
5178 int order, int total_entries)
5179{
5180 ext4_group_t group = 0;
5181 struct ext4_buddy e4b;
5182 struct list_head discard_list;
5183 struct ext4_prealloc_space *pa, *tmp;
5184
5185 mb_debug(sb, "discard locality group preallocation\n");
5186
5187 INIT_LIST_HEAD(&discard_list);
5188
5189 spin_lock(&lg->lg_prealloc_lock);
5190 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
5191 pa_inode_list,
5192 lockdep_is_held(&lg->lg_prealloc_lock)) {
5193 spin_lock(&pa->pa_lock);
5194 if (atomic_read(&pa->pa_count)) {
5195
5196
5197
5198
5199
5200 spin_unlock(&pa->pa_lock);
5201 continue;
5202 }
5203 if (pa->pa_deleted) {
5204 spin_unlock(&pa->pa_lock);
5205 continue;
5206 }
5207
5208 BUG_ON(pa->pa_type != MB_GROUP_PA);
5209
5210
5211 ext4_mb_mark_pa_deleted(sb, pa);
5212 spin_unlock(&pa->pa_lock);
5213
5214 list_del_rcu(&pa->pa_inode_list);
5215 list_add(&pa->u.pa_tmp_list, &discard_list);
5216
5217 total_entries--;
5218 if (total_entries <= 5) {
5219
5220
5221
5222
5223
5224
5225 break;
5226 }
5227 }
5228 spin_unlock(&lg->lg_prealloc_lock);
5229
5230 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
5231 int err;
5232
5233 group = ext4_get_group_number(sb, pa->pa_pstart);
5234 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
5235 GFP_NOFS|__GFP_NOFAIL);
5236 if (err) {
5237 ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
5238 err, group);
5239 continue;
5240 }
5241 ext4_lock_group(sb, group);
5242 list_del(&pa->pa_group_list);
5243 ext4_mb_release_group_pa(&e4b, pa);
5244 ext4_unlock_group(sb, group);
5245
5246 ext4_mb_unload_buddy(&e4b);
5247 list_del(&pa->u.pa_tmp_list);
5248 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
5249 }
5250}
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
5262{
5263 int order, added = 0, lg_prealloc_count = 1;
5264 struct super_block *sb = ac->ac_sb;
5265 struct ext4_locality_group *lg = ac->ac_lg;
5266 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
5267
5268 order = fls(pa->pa_free) - 1;
5269 if (order > PREALLOC_TB_SIZE - 1)
5270
5271 order = PREALLOC_TB_SIZE - 1;
5272
5273 spin_lock(&lg->lg_prealloc_lock);
5274 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
5275 pa_inode_list,
5276 lockdep_is_held(&lg->lg_prealloc_lock)) {
5277 spin_lock(&tmp_pa->pa_lock);
5278 if (tmp_pa->pa_deleted) {
5279 spin_unlock(&tmp_pa->pa_lock);
5280 continue;
5281 }
5282 if (!added && pa->pa_free < tmp_pa->pa_free) {
5283
5284 list_add_tail_rcu(&pa->pa_inode_list,
5285 &tmp_pa->pa_inode_list);
5286 added = 1;
5287
5288
5289
5290
5291 }
5292 spin_unlock(&tmp_pa->pa_lock);
5293 lg_prealloc_count++;
5294 }
5295 if (!added)
5296 list_add_tail_rcu(&pa->pa_inode_list,
5297 &lg->lg_prealloc_list[order]);
5298 spin_unlock(&lg->lg_prealloc_lock);
5299
5300
5301 if (lg_prealloc_count > 8) {
5302 ext4_mb_discard_lg_preallocations(sb, lg,
5303 order, lg_prealloc_count);
5304 return;
5305 }
5306 return ;
5307}
5308
5309
5310
5311
5312static void ext4_mb_trim_inode_pa(struct inode *inode)
5313{
5314 struct ext4_inode_info *ei = EXT4_I(inode);
5315 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5316 int count, delta;
5317
5318 count = atomic_read(&ei->i_prealloc_active);
5319 delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1;
5320 if (count > sbi->s_mb_max_inode_prealloc + delta) {
5321 count -= sbi->s_mb_max_inode_prealloc;
5322 ext4_discard_preallocations(inode, count);
5323 }
5324}
5325
5326
5327
5328
5329static int ext4_mb_release_context(struct ext4_allocation_context *ac)
5330{
5331 struct inode *inode = ac->ac_inode;
5332 struct ext4_inode_info *ei = EXT4_I(inode);
5333 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
5334 struct ext4_prealloc_space *pa = ac->ac_pa;
5335 if (pa) {
5336 if (pa->pa_type == MB_GROUP_PA) {
5337
5338 spin_lock(&pa->pa_lock);
5339 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
5340 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
5341 pa->pa_free -= ac->ac_b_ex.fe_len;
5342 pa->pa_len -= ac->ac_b_ex.fe_len;
5343 spin_unlock(&pa->pa_lock);
5344
5345
5346
5347
5348
5349
5350
5351 if (likely(pa->pa_free)) {
5352 spin_lock(pa->pa_obj_lock);
5353 list_del_rcu(&pa->pa_inode_list);
5354 spin_unlock(pa->pa_obj_lock);
5355 ext4_mb_add_n_trim(ac);
5356 }
5357 }
5358
5359 if (pa->pa_type == MB_INODE_PA) {
5360
5361
5362
5363
5364 spin_lock(pa->pa_obj_lock);
5365 list_move(&pa->pa_inode_list, &ei->i_prealloc_list);
5366 spin_unlock(pa->pa_obj_lock);
5367 }
5368
5369 ext4_mb_put_pa(ac, ac->ac_sb, pa);
5370 }
5371 if (ac->ac_bitmap_page)
5372 put_page(ac->ac_bitmap_page);
5373 if (ac->ac_buddy_page)
5374 put_page(ac->ac_buddy_page);
5375 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
5376 mutex_unlock(&ac->ac_lg->lg_mutex);
5377 ext4_mb_collect_stats(ac);
5378 ext4_mb_trim_inode_pa(inode);
5379 return 0;
5380}
5381
5382static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
5383{
5384 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
5385 int ret;
5386 int freed = 0;
5387
5388 trace_ext4_mb_discard_preallocations(sb, needed);
5389 for (i = 0; i < ngroups && needed > 0; i++) {
5390 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
5391 freed += ret;
5392 needed -= ret;
5393 }
5394
5395 return freed;
5396}
5397
5398static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb,
5399 struct ext4_allocation_context *ac, u64 *seq)
5400{
5401 int freed;
5402 u64 seq_retry = 0;
5403 bool ret = false;
5404
5405 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
5406 if (freed) {
5407 ret = true;
5408 goto out_dbg;
5409 }
5410 seq_retry = ext4_get_discard_pa_seq_sum();
5411 if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) {
5412 ac->ac_flags |= EXT4_MB_STRICT_CHECK;
5413 *seq = seq_retry;
5414 ret = true;
5415 }
5416
5417out_dbg:
5418 mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no");
5419 return ret;
5420}
5421
5422static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
5423 struct ext4_allocation_request *ar, int *errp);
5424
5425
5426
5427
5428
5429
5430ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
5431 struct ext4_allocation_request *ar, int *errp)
5432{
5433 struct ext4_allocation_context *ac = NULL;
5434 struct ext4_sb_info *sbi;
5435 struct super_block *sb;
5436 ext4_fsblk_t block = 0;
5437 unsigned int inquota = 0;
5438 unsigned int reserv_clstrs = 0;
5439 u64 seq;
5440
5441 might_sleep();
5442 sb = ar->inode->i_sb;
5443 sbi = EXT4_SB(sb);
5444
5445 trace_ext4_request_blocks(ar);
5446 if (sbi->s_mount_state & EXT4_FC_REPLAY)
5447 return ext4_mb_new_blocks_simple(handle, ar, errp);
5448
5449
5450 if (ext4_is_quota_file(ar->inode))
5451 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
5452
5453 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
5454
5455
5456
5457
5458 while (ar->len &&
5459 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
5460
5461
5462 cond_resched();
5463 ar->len = ar->len >> 1;
5464 }
5465 if (!ar->len) {
5466 ext4_mb_show_pa(sb);
5467 *errp = -ENOSPC;
5468 return 0;
5469 }
5470 reserv_clstrs = ar->len;
5471 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
5472 dquot_alloc_block_nofail(ar->inode,
5473 EXT4_C2B(sbi, ar->len));
5474 } else {
5475 while (ar->len &&
5476 dquot_alloc_block(ar->inode,
5477 EXT4_C2B(sbi, ar->len))) {
5478
5479 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
5480 ar->len--;
5481 }
5482 }
5483 inquota = ar->len;
5484 if (ar->len == 0) {
5485 *errp = -EDQUOT;
5486 goto out;
5487 }
5488 }
5489
5490 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
5491 if (!ac) {
5492 ar->len = 0;
5493 *errp = -ENOMEM;
5494 goto out;
5495 }
5496
5497 *errp = ext4_mb_initialize_context(ac, ar);
5498 if (*errp) {
5499 ar->len = 0;
5500 goto out;
5501 }
5502
5503 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
5504 seq = this_cpu_read(discard_pa_seq);
5505 if (!ext4_mb_use_preallocated(ac)) {
5506 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
5507 ext4_mb_normalize_request(ac, ar);
5508
5509 *errp = ext4_mb_pa_alloc(ac);
5510 if (*errp)
5511 goto errout;
5512repeat:
5513
5514 *errp = ext4_mb_regular_allocator(ac);
5515
5516
5517
5518
5519
5520
5521
5522 if (*errp) {
5523 ext4_mb_pa_free(ac);
5524 ext4_discard_allocated_blocks(ac);
5525 goto errout;
5526 }
5527 if (ac->ac_status == AC_STATUS_FOUND &&
5528 ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len)
5529 ext4_mb_pa_free(ac);
5530 }
5531 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
5532 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
5533 if (*errp) {
5534 ext4_discard_allocated_blocks(ac);
5535 goto errout;
5536 } else {
5537 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
5538 ar->len = ac->ac_b_ex.fe_len;
5539 }
5540 } else {
5541 if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
5542 goto repeat;
5543
5544
5545
5546
5547 ext4_mb_pa_free(ac);
5548 *errp = -ENOSPC;
5549 }
5550
5551errout:
5552 if (*errp) {
5553 ac->ac_b_ex.fe_len = 0;
5554 ar->len = 0;
5555 ext4_mb_show_ac(ac);
5556 }
5557 ext4_mb_release_context(ac);
5558out:
5559 if (ac)
5560 kmem_cache_free(ext4_ac_cachep, ac);
5561 if (inquota && ar->len < inquota)
5562 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
5563 if (!ar->len) {
5564 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
5565
5566 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
5567 reserv_clstrs);
5568 }
5569
5570 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
5571
5572 return block;
5573}
5574
5575
5576
5577
5578
5579
5580static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
5581 struct ext4_free_data *entry,
5582 struct ext4_free_data *new_entry,
5583 struct rb_root *entry_rb_root)
5584{
5585 if ((entry->efd_tid != new_entry->efd_tid) ||
5586 (entry->efd_group != new_entry->efd_group))
5587 return;
5588 if (entry->efd_start_cluster + entry->efd_count ==
5589 new_entry->efd_start_cluster) {
5590 new_entry->efd_start_cluster = entry->efd_start_cluster;
5591 new_entry->efd_count += entry->efd_count;
5592 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
5593 entry->efd_start_cluster) {
5594 new_entry->efd_count += entry->efd_count;
5595 } else
5596 return;
5597 spin_lock(&sbi->s_md_lock);
5598 list_del(&entry->efd_list);
5599 spin_unlock(&sbi->s_md_lock);
5600 rb_erase(&entry->efd_node, entry_rb_root);
5601 kmem_cache_free(ext4_free_data_cachep, entry);
5602}
5603
5604static noinline_for_stack int
5605ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
5606 struct ext4_free_data *new_entry)
5607{
5608 ext4_group_t group = e4b->bd_group;
5609 ext4_grpblk_t cluster;
5610 ext4_grpblk_t clusters = new_entry->efd_count;
5611 struct ext4_free_data *entry;
5612 struct ext4_group_info *db = e4b->bd_info;
5613 struct super_block *sb = e4b->bd_sb;
5614 struct ext4_sb_info *sbi = EXT4_SB(sb);
5615 struct rb_node **n = &db->bb_free_root.rb_node, *node;
5616 struct rb_node *parent = NULL, *new_node;
5617
5618 BUG_ON(!ext4_handle_valid(handle));
5619 BUG_ON(e4b->bd_bitmap_page == NULL);
5620 BUG_ON(e4b->bd_buddy_page == NULL);
5621
5622 new_node = &new_entry->efd_node;
5623 cluster = new_entry->efd_start_cluster;
5624
5625 if (!*n) {
5626
5627
5628
5629
5630
5631 get_page(e4b->bd_buddy_page);
5632 get_page(e4b->bd_bitmap_page);
5633 }
5634 while (*n) {
5635 parent = *n;
5636 entry = rb_entry(parent, struct ext4_free_data, efd_node);
5637 if (cluster < entry->efd_start_cluster)
5638 n = &(*n)->rb_left;
5639 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
5640 n = &(*n)->rb_right;
5641 else {
5642 ext4_grp_locked_error(sb, group, 0,
5643 ext4_group_first_block_no(sb, group) +
5644 EXT4_C2B(sbi, cluster),
5645 "Block already on to-be-freed list");
5646 kmem_cache_free(ext4_free_data_cachep, new_entry);
5647 return 0;
5648 }
5649 }
5650
5651 rb_link_node(new_node, parent, n);
5652 rb_insert_color(new_node, &db->bb_free_root);
5653
5654
5655 node = rb_prev(new_node);
5656 if (node) {
5657 entry = rb_entry(node, struct ext4_free_data, efd_node);
5658 ext4_try_merge_freed_extent(sbi, entry, new_entry,
5659 &(db->bb_free_root));
5660 }
5661
5662 node = rb_next(new_node);
5663 if (node) {
5664 entry = rb_entry(node, struct ext4_free_data, efd_node);
5665 ext4_try_merge_freed_extent(sbi, entry, new_entry,
5666 &(db->bb_free_root));
5667 }
5668
5669 spin_lock(&sbi->s_md_lock);
5670 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
5671 sbi->s_mb_free_pending += clusters;
5672 spin_unlock(&sbi->s_md_lock);
5673 return 0;
5674}
5675
5676
5677
5678
5679
5680
5681static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
5682 struct ext4_allocation_request *ar, int *errp)
5683{
5684 struct buffer_head *bitmap_bh;
5685 struct super_block *sb = ar->inode->i_sb;
5686 ext4_group_t group;
5687 ext4_grpblk_t blkoff;
5688 int i = sb->s_blocksize;
5689 ext4_fsblk_t goal, block;
5690 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
5691
5692 goal = ar->goal;
5693 if (goal < le32_to_cpu(es->s_first_data_block) ||
5694 goal >= ext4_blocks_count(es))
5695 goal = le32_to_cpu(es->s_first_data_block);
5696
5697 ar->len = 0;
5698 ext4_get_group_no_and_offset(sb, goal, &group, &blkoff);
5699 for (; group < ext4_get_groups_count(sb); group++) {
5700 bitmap_bh = ext4_read_block_bitmap(sb, group);
5701 if (IS_ERR(bitmap_bh)) {
5702 *errp = PTR_ERR(bitmap_bh);
5703 pr_warn("Failed to read block bitmap\n");
5704 return 0;
5705 }
5706
5707 ext4_get_group_no_and_offset(sb,
5708 max(ext4_group_first_block_no(sb, group), goal),
5709 NULL, &blkoff);
5710 i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize,
5711 blkoff);
5712 brelse(bitmap_bh);
5713 if (i >= sb->s_blocksize)
5714 continue;
5715 if (ext4_fc_replay_check_excluded(sb,
5716 ext4_group_first_block_no(sb, group) + i))
5717 continue;
5718 break;
5719 }
5720
5721 if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize)
5722 return 0;
5723
5724 block = ext4_group_first_block_no(sb, group) + i;
5725 ext4_mb_mark_bb(sb, block, 1, 1);
5726 ar->len = 1;
5727
5728 return block;
5729}
5730
5731static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
5732 unsigned long count)
5733{
5734 struct buffer_head *bitmap_bh;
5735 struct super_block *sb = inode->i_sb;
5736 struct ext4_group_desc *gdp;
5737 struct buffer_head *gdp_bh;
5738 ext4_group_t group;
5739 ext4_grpblk_t blkoff;
5740 int already_freed = 0, err, i;
5741
5742 ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
5743 bitmap_bh = ext4_read_block_bitmap(sb, group);
5744 if (IS_ERR(bitmap_bh)) {
5745 err = PTR_ERR(bitmap_bh);
5746 pr_warn("Failed to read block bitmap\n");
5747 return;
5748 }
5749 gdp = ext4_get_group_desc(sb, group, &gdp_bh);
5750 if (!gdp)
5751 return;
5752
5753 for (i = 0; i < count; i++) {
5754 if (!mb_test_bit(blkoff + i, bitmap_bh->b_data))
5755 already_freed++;
5756 }
5757 mb_clear_bits(bitmap_bh->b_data, blkoff, count);
5758 err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
5759 if (err)
5760 return;
5761 ext4_free_group_clusters_set(
5762 sb, gdp, ext4_free_group_clusters(sb, gdp) +
5763 count - already_freed);
5764 ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
5765 ext4_group_desc_csum_set(sb, group, gdp);
5766 ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
5767 sync_dirty_buffer(bitmap_bh);
5768 sync_dirty_buffer(gdp_bh);
5769 brelse(bitmap_bh);
5770}
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781void ext4_free_blocks(handle_t *handle, struct inode *inode,
5782 struct buffer_head *bh, ext4_fsblk_t block,
5783 unsigned long count, int flags)
5784{
5785 struct buffer_head *bitmap_bh = NULL;
5786 struct super_block *sb = inode->i_sb;
5787 struct ext4_group_desc *gdp;
5788 unsigned int overflow;
5789 ext4_grpblk_t bit;
5790 struct buffer_head *gd_bh;
5791 ext4_group_t block_group;
5792 struct ext4_sb_info *sbi;
5793 struct ext4_buddy e4b;
5794 unsigned int count_clusters;
5795 int err = 0;
5796 int ret;
5797
5798 sbi = EXT4_SB(sb);
5799
5800 if (sbi->s_mount_state & EXT4_FC_REPLAY) {
5801 ext4_free_blocks_simple(inode, block, count);
5802 return;
5803 }
5804
5805 might_sleep();
5806 if (bh) {
5807 if (block)
5808 BUG_ON(block != bh->b_blocknr);
5809 else
5810 block = bh->b_blocknr;
5811 }
5812
5813 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
5814 !ext4_inode_block_valid(inode, block, count)) {
5815 ext4_error(sb, "Freeing blocks not in datazone - "
5816 "block = %llu, count = %lu", block, count);
5817 goto error_return;
5818 }
5819
5820 ext4_debug("freeing block %llu\n", block);
5821 trace_ext4_free_blocks(inode, block, count, flags);
5822
5823 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
5824 BUG_ON(count > 1);
5825
5826 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
5827 inode, bh, block);
5828 }
5829
5830
5831
5832
5833
5834
5835
5836
5837 overflow = EXT4_PBLK_COFF(sbi, block);
5838 if (overflow) {
5839 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
5840 overflow = sbi->s_cluster_ratio - overflow;
5841 block += overflow;
5842 if (count > overflow)
5843 count -= overflow;
5844 else
5845 return;
5846 } else {
5847 block -= overflow;
5848 count += overflow;
5849 }
5850 }
5851 overflow = EXT4_LBLK_COFF(sbi, count);
5852 if (overflow) {
5853 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
5854 if (count > overflow)
5855 count -= overflow;
5856 else
5857 return;
5858 } else
5859 count += sbi->s_cluster_ratio - overflow;
5860 }
5861
5862 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
5863 int i;
5864 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
5865
5866 for (i = 0; i < count; i++) {
5867 cond_resched();
5868 if (is_metadata)
5869 bh = sb_find_get_block(inode->i_sb, block + i);
5870 ext4_forget(handle, is_metadata, inode, bh, block + i);
5871 }
5872 }
5873
5874do_more:
5875 overflow = 0;
5876 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
5877
5878 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
5879 ext4_get_group_info(sb, block_group))))
5880 return;
5881
5882
5883
5884
5885
5886 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
5887 overflow = EXT4_C2B(sbi, bit) + count -
5888 EXT4_BLOCKS_PER_GROUP(sb);
5889 count -= overflow;
5890 }
5891 count_clusters = EXT4_NUM_B2C(sbi, count);
5892 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
5893 if (IS_ERR(bitmap_bh)) {
5894 err = PTR_ERR(bitmap_bh);
5895 bitmap_bh = NULL;
5896 goto error_return;
5897 }
5898 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
5899 if (!gdp) {
5900 err = -EIO;
5901 goto error_return;
5902 }
5903
5904 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
5905 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
5906 in_range(block, ext4_inode_table(sb, gdp),
5907 sbi->s_itb_per_group) ||
5908 in_range(block + count - 1, ext4_inode_table(sb, gdp),
5909 sbi->s_itb_per_group)) {
5910
5911 ext4_error(sb, "Freeing blocks in system zone - "
5912 "Block = %llu, count = %lu", block, count);
5913
5914 goto error_return;
5915 }
5916
5917 BUFFER_TRACE(bitmap_bh, "getting write access");
5918 err = ext4_journal_get_write_access(handle, bitmap_bh);
5919 if (err)
5920 goto error_return;
5921
5922
5923
5924
5925
5926
5927 BUFFER_TRACE(gd_bh, "get_write_access");
5928 err = ext4_journal_get_write_access(handle, gd_bh);
5929 if (err)
5930 goto error_return;
5931#ifdef AGGRESSIVE_CHECK
5932 {
5933 int i;
5934 for (i = 0; i < count_clusters; i++)
5935 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
5936 }
5937#endif
5938 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
5939
5940
5941 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
5942 GFP_NOFS|__GFP_NOFAIL);
5943 if (err)
5944 goto error_return;
5945
5946
5947
5948
5949
5950
5951
5952 if (ext4_handle_valid(handle) &&
5953 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
5954 !ext4_should_writeback_data(inode))) {
5955 struct ext4_free_data *new_entry;
5956
5957
5958
5959
5960 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
5961 GFP_NOFS|__GFP_NOFAIL);
5962 new_entry->efd_start_cluster = bit;
5963 new_entry->efd_group = block_group;
5964 new_entry->efd_count = count_clusters;
5965 new_entry->efd_tid = handle->h_transaction->t_tid;
5966
5967 ext4_lock_group(sb, block_group);
5968 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
5969 ext4_mb_free_metadata(handle, &e4b, new_entry);
5970 } else {
5971
5972
5973
5974
5975 if (test_opt(sb, DISCARD)) {
5976 err = ext4_issue_discard(sb, block_group, bit, count,
5977 NULL);
5978 if (err && err != -EOPNOTSUPP)
5979 ext4_msg(sb, KERN_WARNING, "discard request in"
5980 " group:%d block:%d count:%lu failed"
5981 " with %d", block_group, bit, count,
5982 err);
5983 } else
5984 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
5985
5986 ext4_lock_group(sb, block_group);
5987 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
5988 mb_free_blocks(inode, &e4b, bit, count_clusters);
5989 }
5990
5991 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
5992 ext4_free_group_clusters_set(sb, gdp, ret);
5993 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
5994 ext4_group_desc_csum_set(sb, block_group, gdp);
5995 ext4_unlock_group(sb, block_group);
5996
5997 if (sbi->s_log_groups_per_flex) {
5998 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
5999 atomic64_add(count_clusters,
6000 &sbi_array_rcu_deref(sbi, s_flex_groups,
6001 flex_group)->free_clusters);
6002 }
6003
6004
6005
6006
6007
6008
6009 if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
6010 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
6011 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
6012 percpu_counter_add(&sbi->s_freeclusters_counter,
6013 count_clusters);
6014 }
6015
6016 ext4_mb_unload_buddy(&e4b);
6017
6018
6019 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6020 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6021
6022
6023 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6024 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6025 if (!err)
6026 err = ret;
6027
6028 if (overflow && !err) {
6029 block += count;
6030 count = overflow;
6031 put_bh(bitmap_bh);
6032 goto do_more;
6033 }
6034error_return:
6035 brelse(bitmap_bh);
6036 ext4_std_error(sb, err);
6037 return;
6038}
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
6050 ext4_fsblk_t block, unsigned long count)
6051{
6052 struct buffer_head *bitmap_bh = NULL;
6053 struct buffer_head *gd_bh;
6054 ext4_group_t block_group;
6055 ext4_grpblk_t bit;
6056 unsigned int i;
6057 struct ext4_group_desc *desc;
6058 struct ext4_sb_info *sbi = EXT4_SB(sb);
6059 struct ext4_buddy e4b;
6060 int err = 0, ret, free_clusters_count;
6061 ext4_grpblk_t clusters_freed;
6062 ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
6063 ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
6064 unsigned long cluster_count = last_cluster - first_cluster + 1;
6065
6066 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
6067
6068 if (count == 0)
6069 return 0;
6070
6071 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
6072
6073
6074
6075
6076 if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
6077 ext4_warning(sb, "too many blocks added to group %u",
6078 block_group);
6079 err = -EINVAL;
6080 goto error_return;
6081 }
6082
6083 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
6084 if (IS_ERR(bitmap_bh)) {
6085 err = PTR_ERR(bitmap_bh);
6086 bitmap_bh = NULL;
6087 goto error_return;
6088 }
6089
6090 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
6091 if (!desc) {
6092 err = -EIO;
6093 goto error_return;
6094 }
6095
6096 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
6097 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
6098 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
6099 in_range(block + count - 1, ext4_inode_table(sb, desc),
6100 sbi->s_itb_per_group)) {
6101 ext4_error(sb, "Adding blocks in system zones - "
6102 "Block = %llu, count = %lu",
6103 block, count);
6104 err = -EINVAL;
6105 goto error_return;
6106 }
6107
6108 BUFFER_TRACE(bitmap_bh, "getting write access");
6109 err = ext4_journal_get_write_access(handle, bitmap_bh);
6110 if (err)
6111 goto error_return;
6112
6113
6114
6115
6116
6117
6118 BUFFER_TRACE(gd_bh, "get_write_access");
6119 err = ext4_journal_get_write_access(handle, gd_bh);
6120 if (err)
6121 goto error_return;
6122
6123 for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
6124 BUFFER_TRACE(bitmap_bh, "clear bit");
6125 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
6126 ext4_error(sb, "bit already cleared for block %llu",
6127 (ext4_fsblk_t)(block + i));
6128 BUFFER_TRACE(bitmap_bh, "bit already cleared");
6129 } else {
6130 clusters_freed++;
6131 }
6132 }
6133
6134 err = ext4_mb_load_buddy(sb, block_group, &e4b);
6135 if (err)
6136 goto error_return;
6137
6138
6139
6140
6141
6142
6143 ext4_lock_group(sb, block_group);
6144 mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
6145 mb_free_blocks(NULL, &e4b, bit, cluster_count);
6146 free_clusters_count = clusters_freed +
6147 ext4_free_group_clusters(sb, desc);
6148 ext4_free_group_clusters_set(sb, desc, free_clusters_count);
6149 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
6150 ext4_group_desc_csum_set(sb, block_group, desc);
6151 ext4_unlock_group(sb, block_group);
6152 percpu_counter_add(&sbi->s_freeclusters_counter,
6153 clusters_freed);
6154
6155 if (sbi->s_log_groups_per_flex) {
6156 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
6157 atomic64_add(clusters_freed,
6158 &sbi_array_rcu_deref(sbi, s_flex_groups,
6159 flex_group)->free_clusters);
6160 }
6161
6162 ext4_mb_unload_buddy(&e4b);
6163
6164
6165 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6166 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6167
6168
6169 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6170 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6171 if (!err)
6172 err = ret;
6173
6174error_return:
6175 brelse(bitmap_bh);
6176 ext4_std_error(sb, err);
6177 return err;
6178}
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192static int ext4_trim_extent(struct super_block *sb, int start, int count,
6193 ext4_group_t group, struct ext4_buddy *e4b)
6194__releases(bitlock)
6195__acquires(bitlock)
6196{
6197 struct ext4_free_extent ex;
6198 int ret = 0;
6199
6200 trace_ext4_trim_extent(sb, group, start, count);
6201
6202 assert_spin_locked(ext4_group_lock_ptr(sb, group));
6203
6204 ex.fe_start = start;
6205 ex.fe_group = group;
6206 ex.fe_len = count;
6207
6208
6209
6210
6211
6212 mb_mark_used(e4b, &ex);
6213 ext4_unlock_group(sb, group);
6214 ret = ext4_issue_discard(sb, group, start, count, NULL);
6215 ext4_lock_group(sb, group);
6216 mb_free_blocks(NULL, e4b, start, ex.fe_len);
6217 return ret;
6218}
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238static ext4_grpblk_t
6239ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
6240 ext4_grpblk_t start, ext4_grpblk_t max,
6241 ext4_grpblk_t minblocks)
6242{
6243 void *bitmap;
6244 ext4_grpblk_t next, count = 0, free_count = 0;
6245 struct ext4_buddy e4b;
6246 int ret = 0;
6247
6248 trace_ext4_trim_all_free(sb, group, start, max);
6249
6250 ret = ext4_mb_load_buddy(sb, group, &e4b);
6251 if (ret) {
6252 ext4_warning(sb, "Error %d loading buddy information for %u",
6253 ret, group);
6254 return ret;
6255 }
6256 bitmap = e4b.bd_bitmap;
6257
6258 ext4_lock_group(sb, group);
6259 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
6260 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
6261 goto out;
6262
6263 start = (e4b.bd_info->bb_first_free > start) ?
6264 e4b.bd_info->bb_first_free : start;
6265
6266 while (start <= max) {
6267 start = mb_find_next_zero_bit(bitmap, max + 1, start);
6268 if (start > max)
6269 break;
6270 next = mb_find_next_bit(bitmap, max + 1, start);
6271
6272 if ((next - start) >= minblocks) {
6273 ret = ext4_trim_extent(sb, start,
6274 next - start, group, &e4b);
6275 if (ret && ret != -EOPNOTSUPP)
6276 break;
6277 ret = 0;
6278 count += next - start;
6279 }
6280 free_count += next - start;
6281 start = next + 1;
6282
6283 if (fatal_signal_pending(current)) {
6284 count = -ERESTARTSYS;
6285 break;
6286 }
6287
6288 if (need_resched()) {
6289 ext4_unlock_group(sb, group);
6290 cond_resched();
6291 ext4_lock_group(sb, group);
6292 }
6293
6294 if ((e4b.bd_info->bb_free - free_count) < minblocks)
6295 break;
6296 }
6297
6298 if (!ret) {
6299 ret = count;
6300 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
6301 }
6302out:
6303 ext4_unlock_group(sb, group);
6304 ext4_mb_unload_buddy(&e4b);
6305
6306 ext4_debug("trimmed %d blocks in the group %d\n",
6307 count, group);
6308
6309 return ret;
6310}
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
6325{
6326 struct ext4_group_info *grp;
6327 ext4_group_t group, first_group, last_group;
6328 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
6329 uint64_t start, end, minlen, trimmed = 0;
6330 ext4_fsblk_t first_data_blk =
6331 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
6332 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
6333 int ret = 0;
6334
6335 start = range->start >> sb->s_blocksize_bits;
6336 end = start + (range->len >> sb->s_blocksize_bits) - 1;
6337 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
6338 range->minlen >> sb->s_blocksize_bits);
6339
6340 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
6341 start >= max_blks ||
6342 range->len < sb->s_blocksize)
6343 return -EINVAL;
6344 if (end >= max_blks)
6345 end = max_blks - 1;
6346 if (end <= first_data_blk)
6347 goto out;
6348 if (start < first_data_blk)
6349 start = first_data_blk;
6350
6351
6352 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
6353 &first_group, &first_cluster);
6354 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
6355 &last_group, &last_cluster);
6356
6357
6358 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6359
6360 for (group = first_group; group <= last_group; group++) {
6361 grp = ext4_get_group_info(sb, group);
6362
6363 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
6364 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
6365 if (ret)
6366 break;
6367 }
6368
6369
6370
6371
6372
6373
6374
6375 if (group == last_group)
6376 end = last_cluster;
6377
6378 if (grp->bb_free >= minlen) {
6379 cnt = ext4_trim_all_free(sb, group, first_cluster,
6380 end, minlen);
6381 if (cnt < 0) {
6382 ret = cnt;
6383 break;
6384 }
6385 trimmed += cnt;
6386 }
6387
6388
6389
6390
6391
6392 first_cluster = 0;
6393 }
6394
6395 if (!ret)
6396 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
6397
6398out:
6399 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
6400 return ret;
6401}
6402
6403
6404int
6405ext4_mballoc_query_range(
6406 struct super_block *sb,
6407 ext4_group_t group,
6408 ext4_grpblk_t start,
6409 ext4_grpblk_t end,
6410 ext4_mballoc_query_range_fn formatter,
6411 void *priv)
6412{
6413 void *bitmap;
6414 ext4_grpblk_t next;
6415 struct ext4_buddy e4b;
6416 int error;
6417
6418 error = ext4_mb_load_buddy(sb, group, &e4b);
6419 if (error)
6420 return error;
6421 bitmap = e4b.bd_bitmap;
6422
6423 ext4_lock_group(sb, group);
6424
6425 start = (e4b.bd_info->bb_first_free > start) ?
6426 e4b.bd_info->bb_first_free : start;
6427 if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
6428 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6429
6430 while (start <= end) {
6431 start = mb_find_next_zero_bit(bitmap, end + 1, start);
6432 if (start > end)
6433 break;
6434 next = mb_find_next_bit(bitmap, end + 1, start);
6435
6436 ext4_unlock_group(sb, group);
6437 error = formatter(sb, group, start, next - start, priv);
6438 if (error)
6439 goto out_unload;
6440 ext4_lock_group(sb, group);
6441
6442 start = next + 1;
6443 }
6444
6445 ext4_unlock_group(sb, group);
6446out_unload:
6447 ext4_mb_unload_buddy(&e4b);
6448
6449 return error;
6450}
6451