1
2
3
4
5
6
7
8
9
10
11
12#include "ext4_jbd2.h"
13#include "mballoc.h"
14#include <linux/log2.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/nospec.h>
18#include <linux/backing-dev.h>
19#include <trace/events/ext4.h>
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386static struct kmem_cache *ext4_pspace_cachep;
387static struct kmem_cache *ext4_ac_cachep;
388static struct kmem_cache *ext4_free_data_cachep;
389
390
391
392
393#define NR_GRPINFO_CACHES 8
394static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
395
396static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
397 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
398 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
399 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
400};
401
402static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
403 ext4_group_t group);
404static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
405 ext4_group_t group);
406static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
407
408static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
409 ext4_group_t group, int cr);
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429static DEFINE_PER_CPU(u64, discard_pa_seq);
430static inline u64 ext4_get_discard_pa_seq_sum(void)
431{
432 int __cpu;
433 u64 __seq = 0;
434
435 for_each_possible_cpu(__cpu)
436 __seq += per_cpu(discard_pa_seq, __cpu);
437 return __seq;
438}
439
440static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
441{
442#if BITS_PER_LONG == 64
443 *bit += ((unsigned long) addr & 7UL) << 3;
444 addr = (void *) ((unsigned long) addr & ~7UL);
445#elif BITS_PER_LONG == 32
446 *bit += ((unsigned long) addr & 3UL) << 3;
447 addr = (void *) ((unsigned long) addr & ~3UL);
448#else
449#error "how many bits you are?!"
450#endif
451 return addr;
452}
453
454static inline int mb_test_bit(int bit, void *addr)
455{
456
457
458
459
460 addr = mb_correct_addr_and_bit(&bit, addr);
461 return ext4_test_bit(bit, addr);
462}
463
464static inline void mb_set_bit(int bit, void *addr)
465{
466 addr = mb_correct_addr_and_bit(&bit, addr);
467 ext4_set_bit(bit, addr);
468}
469
470static inline void mb_clear_bit(int bit, void *addr)
471{
472 addr = mb_correct_addr_and_bit(&bit, addr);
473 ext4_clear_bit(bit, addr);
474}
475
476static inline int mb_test_and_clear_bit(int bit, void *addr)
477{
478 addr = mb_correct_addr_and_bit(&bit, addr);
479 return ext4_test_and_clear_bit(bit, addr);
480}
481
482static inline int mb_find_next_zero_bit(void *addr, int max, int start)
483{
484 int fix = 0, ret, tmpmax;
485 addr = mb_correct_addr_and_bit(&fix, addr);
486 tmpmax = max + fix;
487 start += fix;
488
489 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
490 if (ret > max)
491 return max;
492 return ret;
493}
494
495static inline int mb_find_next_bit(void *addr, int max, int start)
496{
497 int fix = 0, ret, tmpmax;
498 addr = mb_correct_addr_and_bit(&fix, addr);
499 tmpmax = max + fix;
500 start += fix;
501
502 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
503 if (ret > max)
504 return max;
505 return ret;
506}
507
508static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
509{
510 char *bb;
511
512 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
513 BUG_ON(max == NULL);
514
515 if (order > e4b->bd_blkbits + 1) {
516 *max = 0;
517 return NULL;
518 }
519
520
521 if (order == 0) {
522 *max = 1 << (e4b->bd_blkbits + 3);
523 return e4b->bd_bitmap;
524 }
525
526 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
527 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
528
529 return bb;
530}
531
532#ifdef DOUBLE_CHECK
533static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
534 int first, int count)
535{
536 int i;
537 struct super_block *sb = e4b->bd_sb;
538
539 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
540 return;
541 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
542 for (i = 0; i < count; i++) {
543 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
544 ext4_fsblk_t blocknr;
545
546 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
547 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
548 ext4_grp_locked_error(sb, e4b->bd_group,
549 inode ? inode->i_ino : 0,
550 blocknr,
551 "freeing block already freed "
552 "(bit %u)",
553 first + i);
554 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
555 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
556 }
557 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
558 }
559}
560
561static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
562{
563 int i;
564
565 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
566 return;
567 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
568 for (i = 0; i < count; i++) {
569 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
570 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
571 }
572}
573
574static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
575{
576 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
577 return;
578 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
579 unsigned char *b1, *b2;
580 int i;
581 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
582 b2 = (unsigned char *) bitmap;
583 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
584 if (b1[i] != b2[i]) {
585 ext4_msg(e4b->bd_sb, KERN_ERR,
586 "corruption in group %u "
587 "at byte %u(%u): %x in copy != %x "
588 "on disk/prealloc",
589 e4b->bd_group, i, i * 8, b1[i], b2[i]);
590 BUG();
591 }
592 }
593 }
594}
595
596static void mb_group_bb_bitmap_alloc(struct super_block *sb,
597 struct ext4_group_info *grp, ext4_group_t group)
598{
599 struct buffer_head *bh;
600
601 grp->bb_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
602 if (!grp->bb_bitmap)
603 return;
604
605 bh = ext4_read_block_bitmap(sb, group);
606 if (IS_ERR_OR_NULL(bh)) {
607 kfree(grp->bb_bitmap);
608 grp->bb_bitmap = NULL;
609 return;
610 }
611
612 memcpy(grp->bb_bitmap, bh->b_data, sb->s_blocksize);
613 put_bh(bh);
614}
615
616static void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
617{
618 kfree(grp->bb_bitmap);
619}
620
621#else
622static inline void mb_free_blocks_double(struct inode *inode,
623 struct ext4_buddy *e4b, int first, int count)
624{
625 return;
626}
627static inline void mb_mark_used_double(struct ext4_buddy *e4b,
628 int first, int count)
629{
630 return;
631}
632static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
633{
634 return;
635}
636
637static inline void mb_group_bb_bitmap_alloc(struct super_block *sb,
638 struct ext4_group_info *grp, ext4_group_t group)
639{
640 return;
641}
642
643static inline void mb_group_bb_bitmap_free(struct ext4_group_info *grp)
644{
645 return;
646}
647#endif
648
649#ifdef AGGRESSIVE_CHECK
650
651#define MB_CHECK_ASSERT(assert) \
652do { \
653 if (!(assert)) { \
654 printk(KERN_EMERG \
655 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
656 function, file, line, # assert); \
657 BUG(); \
658 } \
659} while (0)
660
661static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
662 const char *function, int line)
663{
664 struct super_block *sb = e4b->bd_sb;
665 int order = e4b->bd_blkbits + 1;
666 int max;
667 int max2;
668 int i;
669 int j;
670 int k;
671 int count;
672 struct ext4_group_info *grp;
673 int fragments = 0;
674 int fstart;
675 struct list_head *cur;
676 void *buddy;
677 void *buddy2;
678
679 if (e4b->bd_info->bb_check_counter++ % 10)
680 return 0;
681
682 while (order > 1) {
683 buddy = mb_find_buddy(e4b, order, &max);
684 MB_CHECK_ASSERT(buddy);
685 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
686 MB_CHECK_ASSERT(buddy2);
687 MB_CHECK_ASSERT(buddy != buddy2);
688 MB_CHECK_ASSERT(max * 2 == max2);
689
690 count = 0;
691 for (i = 0; i < max; i++) {
692
693 if (mb_test_bit(i, buddy)) {
694
695 if (!mb_test_bit(i << 1, buddy2)) {
696 MB_CHECK_ASSERT(
697 mb_test_bit((i<<1)+1, buddy2));
698 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
699 MB_CHECK_ASSERT(
700 mb_test_bit(i << 1, buddy2));
701 }
702 continue;
703 }
704
705
706 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
707 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
708
709 for (j = 0; j < (1 << order); j++) {
710 k = (i * (1 << order)) + j;
711 MB_CHECK_ASSERT(
712 !mb_test_bit(k, e4b->bd_bitmap));
713 }
714 count++;
715 }
716 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
717 order--;
718 }
719
720 fstart = -1;
721 buddy = mb_find_buddy(e4b, 0, &max);
722 for (i = 0; i < max; i++) {
723 if (!mb_test_bit(i, buddy)) {
724 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
725 if (fstart == -1) {
726 fragments++;
727 fstart = i;
728 }
729 continue;
730 }
731 fstart = -1;
732
733 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
734 buddy2 = mb_find_buddy(e4b, j, &max2);
735 k = i >> j;
736 MB_CHECK_ASSERT(k < max2);
737 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
738 }
739 }
740 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
741 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
742
743 grp = ext4_get_group_info(sb, e4b->bd_group);
744 list_for_each(cur, &grp->bb_prealloc_list) {
745 ext4_group_t groupnr;
746 struct ext4_prealloc_space *pa;
747 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
748 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
749 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
750 for (i = 0; i < pa->pa_len; i++)
751 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
752 }
753 return 0;
754}
755#undef MB_CHECK_ASSERT
756#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
757 __FILE__, __func__, __LINE__)
758#else
759#define mb_check_buddy(e4b)
760#endif
761
762
763
764
765
766
767
768static void ext4_mb_mark_free_simple(struct super_block *sb,
769 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
770 struct ext4_group_info *grp)
771{
772 struct ext4_sb_info *sbi = EXT4_SB(sb);
773 ext4_grpblk_t min;
774 ext4_grpblk_t max;
775 ext4_grpblk_t chunk;
776 unsigned int border;
777
778 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
779
780 border = 2 << sb->s_blocksize_bits;
781
782 while (len > 0) {
783
784 max = ffs(first | border) - 1;
785
786
787 min = fls(len) - 1;
788
789 if (max < min)
790 min = max;
791 chunk = 1 << min;
792
793
794 grp->bb_counters[min]++;
795 if (min > 0)
796 mb_clear_bit(first >> min,
797 buddy + sbi->s_mb_offsets[min]);
798
799 len -= chunk;
800 first += chunk;
801 }
802}
803
804static void ext4_mb_rb_insert(struct rb_root *root, struct rb_node *new,
805 int (*cmp)(struct rb_node *, struct rb_node *))
806{
807 struct rb_node **iter = &root->rb_node, *parent = NULL;
808
809 while (*iter) {
810 parent = *iter;
811 if (cmp(new, *iter) > 0)
812 iter = &((*iter)->rb_left);
813 else
814 iter = &((*iter)->rb_right);
815 }
816
817 rb_link_node(new, parent, iter);
818 rb_insert_color(new, root);
819}
820
821static int
822ext4_mb_avg_fragment_size_cmp(struct rb_node *rb1, struct rb_node *rb2)
823{
824 struct ext4_group_info *grp1 = rb_entry(rb1,
825 struct ext4_group_info,
826 bb_avg_fragment_size_rb);
827 struct ext4_group_info *grp2 = rb_entry(rb2,
828 struct ext4_group_info,
829 bb_avg_fragment_size_rb);
830 int num_frags_1, num_frags_2;
831
832 num_frags_1 = grp1->bb_fragments ?
833 grp1->bb_free / grp1->bb_fragments : 0;
834 num_frags_2 = grp2->bb_fragments ?
835 grp2->bb_free / grp2->bb_fragments : 0;
836
837 return (num_frags_2 - num_frags_1);
838}
839
840
841
842
843
844static void
845mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
846{
847 struct ext4_sb_info *sbi = EXT4_SB(sb);
848
849 if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
850 return;
851
852 write_lock(&sbi->s_mb_rb_lock);
853 if (!RB_EMPTY_NODE(&grp->bb_avg_fragment_size_rb)) {
854 rb_erase(&grp->bb_avg_fragment_size_rb,
855 &sbi->s_mb_avg_fragment_size_root);
856 RB_CLEAR_NODE(&grp->bb_avg_fragment_size_rb);
857 }
858
859 ext4_mb_rb_insert(&sbi->s_mb_avg_fragment_size_root,
860 &grp->bb_avg_fragment_size_rb,
861 ext4_mb_avg_fragment_size_cmp);
862 write_unlock(&sbi->s_mb_rb_lock);
863}
864
865
866
867
868
869static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
870 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
871{
872 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
873 struct ext4_group_info *iter, *grp;
874 int i;
875
876 if (ac->ac_status == AC_STATUS_FOUND)
877 return;
878
879 if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR0_OPTIMIZED))
880 atomic_inc(&sbi->s_bal_cr0_bad_suggestions);
881
882 grp = NULL;
883 for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
884 if (list_empty(&sbi->s_mb_largest_free_orders[i]))
885 continue;
886 read_lock(&sbi->s_mb_largest_free_orders_locks[i]);
887 if (list_empty(&sbi->s_mb_largest_free_orders[i])) {
888 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
889 continue;
890 }
891 grp = NULL;
892 list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
893 bb_largest_free_order_node) {
894 if (sbi->s_mb_stats)
895 atomic64_inc(&sbi->s_bal_cX_groups_considered[0]);
896 if (likely(ext4_mb_good_group(ac, iter->bb_group, 0))) {
897 grp = iter;
898 break;
899 }
900 }
901 read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
902 if (grp)
903 break;
904 }
905
906 if (!grp) {
907
908 *new_cr = 1;
909 } else {
910 *group = grp->bb_group;
911 ac->ac_last_optimal_group = *group;
912 ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
913 }
914}
915
916
917
918
919
920
921
922static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
923 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
924{
925 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
926 int avg_fragment_size, best_so_far;
927 struct rb_node *node, *found;
928 struct ext4_group_info *grp;
929
930
931
932
933
934
935 if (!read_trylock(&sbi->s_mb_rb_lock)) {
936 ac->ac_flags |= EXT4_MB_SEARCH_NEXT_LINEAR;
937 return;
938 }
939
940 if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
941 if (sbi->s_mb_stats)
942 atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
943
944 grp = ext4_get_group_info(ac->ac_sb, ac->ac_last_optimal_group);
945 for (found = rb_next(&grp->bb_avg_fragment_size_rb); found != NULL;
946 found = rb_next(found)) {
947 grp = rb_entry(found, struct ext4_group_info,
948 bb_avg_fragment_size_rb);
949 if (sbi->s_mb_stats)
950 atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
951 if (likely(ext4_mb_good_group(ac, grp->bb_group, 1)))
952 break;
953 }
954 goto done;
955 }
956
957 node = sbi->s_mb_avg_fragment_size_root.rb_node;
958 best_so_far = 0;
959 found = NULL;
960
961 while (node) {
962 grp = rb_entry(node, struct ext4_group_info,
963 bb_avg_fragment_size_rb);
964 avg_fragment_size = 0;
965 if (ext4_mb_good_group(ac, grp->bb_group, 1)) {
966 avg_fragment_size = grp->bb_fragments ?
967 grp->bb_free / grp->bb_fragments : 0;
968 if (!best_so_far || avg_fragment_size < best_so_far) {
969 best_so_far = avg_fragment_size;
970 found = node;
971 }
972 }
973 if (avg_fragment_size > ac->ac_g_ex.fe_len)
974 node = node->rb_right;
975 else
976 node = node->rb_left;
977 }
978
979done:
980 if (found) {
981 grp = rb_entry(found, struct ext4_group_info,
982 bb_avg_fragment_size_rb);
983 *group = grp->bb_group;
984 ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
985 } else {
986 *new_cr = 2;
987 }
988
989 read_unlock(&sbi->s_mb_rb_lock);
990 ac->ac_last_optimal_group = *group;
991}
992
993static inline int should_optimize_scan(struct ext4_allocation_context *ac)
994{
995 if (unlikely(!test_opt2(ac->ac_sb, MB_OPTIMIZE_SCAN)))
996 return 0;
997 if (ac->ac_criteria >= 2)
998 return 0;
999 if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
1000 return 0;
1001 return 1;
1002}
1003
1004
1005
1006
1007
1008static int
1009next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
1010{
1011 if (!should_optimize_scan(ac))
1012 goto inc_and_return;
1013
1014 if (ac->ac_groups_linear_remaining) {
1015 ac->ac_groups_linear_remaining--;
1016 goto inc_and_return;
1017 }
1018
1019 if (ac->ac_flags & EXT4_MB_SEARCH_NEXT_LINEAR) {
1020 ac->ac_flags &= ~EXT4_MB_SEARCH_NEXT_LINEAR;
1021 goto inc_and_return;
1022 }
1023
1024 return group;
1025inc_and_return:
1026
1027
1028
1029
1030 return group + 1 >= ngroups ? 0 : group + 1;
1031}
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
1047 int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
1048{
1049 *new_cr = ac->ac_criteria;
1050
1051 if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
1052 return;
1053
1054 if (*new_cr == 0) {
1055 ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
1056 } else if (*new_cr == 1) {
1057 ext4_mb_choose_next_group_cr1(ac, new_cr, group, ngroups);
1058 } else {
1059
1060
1061
1062
1063 WARN_ON(1);
1064 }
1065}
1066
1067
1068
1069
1070
1071static void
1072mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
1073{
1074 struct ext4_sb_info *sbi = EXT4_SB(sb);
1075 int i;
1076
1077 if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
1078 write_lock(&sbi->s_mb_largest_free_orders_locks[
1079 grp->bb_largest_free_order]);
1080 list_del_init(&grp->bb_largest_free_order_node);
1081 write_unlock(&sbi->s_mb_largest_free_orders_locks[
1082 grp->bb_largest_free_order]);
1083 }
1084 grp->bb_largest_free_order = -1;
1085
1086 for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
1087 if (grp->bb_counters[i] > 0) {
1088 grp->bb_largest_free_order = i;
1089 break;
1090 }
1091 }
1092 if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
1093 grp->bb_largest_free_order >= 0 && grp->bb_free) {
1094 write_lock(&sbi->s_mb_largest_free_orders_locks[
1095 grp->bb_largest_free_order]);
1096 list_add_tail(&grp->bb_largest_free_order_node,
1097 &sbi->s_mb_largest_free_orders[grp->bb_largest_free_order]);
1098 write_unlock(&sbi->s_mb_largest_free_orders_locks[
1099 grp->bb_largest_free_order]);
1100 }
1101}
1102
1103static noinline_for_stack
1104void ext4_mb_generate_buddy(struct super_block *sb,
1105 void *buddy, void *bitmap, ext4_group_t group)
1106{
1107 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1108 struct ext4_sb_info *sbi = EXT4_SB(sb);
1109 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
1110 ext4_grpblk_t i = 0;
1111 ext4_grpblk_t first;
1112 ext4_grpblk_t len;
1113 unsigned free = 0;
1114 unsigned fragments = 0;
1115 unsigned long long period = get_cycles();
1116
1117
1118
1119 i = mb_find_next_zero_bit(bitmap, max, 0);
1120 grp->bb_first_free = i;
1121 while (i < max) {
1122 fragments++;
1123 first = i;
1124 i = mb_find_next_bit(bitmap, max, i);
1125 len = i - first;
1126 free += len;
1127 if (len > 1)
1128 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
1129 else
1130 grp->bb_counters[0]++;
1131 if (i < max)
1132 i = mb_find_next_zero_bit(bitmap, max, i);
1133 }
1134 grp->bb_fragments = fragments;
1135
1136 if (free != grp->bb_free) {
1137 ext4_grp_locked_error(sb, group, 0, 0,
1138 "block bitmap and bg descriptor "
1139 "inconsistent: %u vs %u free clusters",
1140 free, grp->bb_free);
1141
1142
1143
1144
1145 grp->bb_free = free;
1146 ext4_mark_group_bitmap_corrupted(sb, group,
1147 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1148 }
1149 mb_set_largest_free_order(sb, grp);
1150
1151 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
1152
1153 period = get_cycles() - period;
1154 atomic_inc(&sbi->s_mb_buddies_generated);
1155 atomic64_add(period, &sbi->s_mb_generation_time);
1156 mb_update_avg_fragment_size(sb, grp);
1157}
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
1180{
1181 ext4_group_t ngroups;
1182 int blocksize;
1183 int blocks_per_page;
1184 int groups_per_page;
1185 int err = 0;
1186 int i;
1187 ext4_group_t first_group, group;
1188 int first_block;
1189 struct super_block *sb;
1190 struct buffer_head *bhs;
1191 struct buffer_head **bh = NULL;
1192 struct inode *inode;
1193 char *data;
1194 char *bitmap;
1195 struct ext4_group_info *grinfo;
1196
1197 inode = page->mapping->host;
1198 sb = inode->i_sb;
1199 ngroups = ext4_get_groups_count(sb);
1200 blocksize = i_blocksize(inode);
1201 blocks_per_page = PAGE_SIZE / blocksize;
1202
1203 mb_debug(sb, "init page %lu\n", page->index);
1204
1205 groups_per_page = blocks_per_page >> 1;
1206 if (groups_per_page == 0)
1207 groups_per_page = 1;
1208
1209
1210 if (groups_per_page > 1) {
1211 i = sizeof(struct buffer_head *) * groups_per_page;
1212 bh = kzalloc(i, gfp);
1213 if (bh == NULL) {
1214 err = -ENOMEM;
1215 goto out;
1216 }
1217 } else
1218 bh = &bhs;
1219
1220 first_group = page->index * blocks_per_page / 2;
1221
1222
1223 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
1224 if (group >= ngroups)
1225 break;
1226
1227 grinfo = ext4_get_group_info(sb, group);
1228
1229
1230
1231
1232
1233
1234 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
1235 bh[i] = NULL;
1236 continue;
1237 }
1238 bh[i] = ext4_read_block_bitmap_nowait(sb, group, false);
1239 if (IS_ERR(bh[i])) {
1240 err = PTR_ERR(bh[i]);
1241 bh[i] = NULL;
1242 goto out;
1243 }
1244 mb_debug(sb, "read bitmap for group %u\n", group);
1245 }
1246
1247
1248 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
1249 int err2;
1250
1251 if (!bh[i])
1252 continue;
1253 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
1254 if (!err)
1255 err = err2;
1256 }
1257
1258 first_block = page->index * blocks_per_page;
1259 for (i = 0; i < blocks_per_page; i++) {
1260 group = (first_block + i) >> 1;
1261 if (group >= ngroups)
1262 break;
1263
1264 if (!bh[group - first_group])
1265
1266 continue;
1267
1268 if (!buffer_verified(bh[group - first_group]))
1269
1270 continue;
1271 err = 0;
1272
1273
1274
1275
1276
1277
1278
1279 data = page_address(page) + (i * blocksize);
1280 bitmap = bh[group - first_group]->b_data;
1281
1282
1283
1284
1285
1286 if ((first_block + i) & 1) {
1287
1288 BUG_ON(incore == NULL);
1289 mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
1290 group, page->index, i * blocksize);
1291 trace_ext4_mb_buddy_bitmap_load(sb, group);
1292 grinfo = ext4_get_group_info(sb, group);
1293 grinfo->bb_fragments = 0;
1294 memset(grinfo->bb_counters, 0,
1295 sizeof(*grinfo->bb_counters) *
1296 (MB_NUM_ORDERS(sb)));
1297
1298
1299
1300 ext4_lock_group(sb, group);
1301
1302 memset(data, 0xff, blocksize);
1303 ext4_mb_generate_buddy(sb, data, incore, group);
1304 ext4_unlock_group(sb, group);
1305 incore = NULL;
1306 } else {
1307
1308 BUG_ON(incore != NULL);
1309 mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
1310 group, page->index, i * blocksize);
1311 trace_ext4_mb_bitmap_load(sb, group);
1312
1313
1314 ext4_lock_group(sb, group);
1315 memcpy(data, bitmap, blocksize);
1316
1317
1318 ext4_mb_generate_from_pa(sb, data, group);
1319 ext4_mb_generate_from_freelist(sb, data, group);
1320 ext4_unlock_group(sb, group);
1321
1322
1323
1324
1325 incore = data;
1326 }
1327 }
1328 SetPageUptodate(page);
1329
1330out:
1331 if (bh) {
1332 for (i = 0; i < groups_per_page; i++)
1333 brelse(bh[i]);
1334 if (bh != &bhs)
1335 kfree(bh);
1336 }
1337 return err;
1338}
1339
1340
1341
1342
1343
1344
1345
1346static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
1347 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
1348{
1349 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
1350 int block, pnum, poff;
1351 int blocks_per_page;
1352 struct page *page;
1353
1354 e4b->bd_buddy_page = NULL;
1355 e4b->bd_bitmap_page = NULL;
1356
1357 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1358
1359
1360
1361
1362
1363 block = group * 2;
1364 pnum = block / blocks_per_page;
1365 poff = block % blocks_per_page;
1366 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1367 if (!page)
1368 return -ENOMEM;
1369 BUG_ON(page->mapping != inode->i_mapping);
1370 e4b->bd_bitmap_page = page;
1371 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1372
1373 if (blocks_per_page >= 2) {
1374
1375 return 0;
1376 }
1377
1378 block++;
1379 pnum = block / blocks_per_page;
1380 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1381 if (!page)
1382 return -ENOMEM;
1383 BUG_ON(page->mapping != inode->i_mapping);
1384 e4b->bd_buddy_page = page;
1385 return 0;
1386}
1387
1388static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1389{
1390 if (e4b->bd_bitmap_page) {
1391 unlock_page(e4b->bd_bitmap_page);
1392 put_page(e4b->bd_bitmap_page);
1393 }
1394 if (e4b->bd_buddy_page) {
1395 unlock_page(e4b->bd_buddy_page);
1396 put_page(e4b->bd_buddy_page);
1397 }
1398}
1399
1400
1401
1402
1403
1404
1405static noinline_for_stack
1406int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1407{
1408
1409 struct ext4_group_info *this_grp;
1410 struct ext4_buddy e4b;
1411 struct page *page;
1412 int ret = 0;
1413
1414 might_sleep();
1415 mb_debug(sb, "init group %u\n", group);
1416 this_grp = ext4_get_group_info(sb, group);
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1427 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1428
1429
1430
1431
1432 goto err;
1433 }
1434
1435 page = e4b.bd_bitmap_page;
1436 ret = ext4_mb_init_cache(page, NULL, gfp);
1437 if (ret)
1438 goto err;
1439 if (!PageUptodate(page)) {
1440 ret = -EIO;
1441 goto err;
1442 }
1443
1444 if (e4b.bd_buddy_page == NULL) {
1445
1446
1447
1448
1449
1450 ret = 0;
1451 goto err;
1452 }
1453
1454 page = e4b.bd_buddy_page;
1455 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1456 if (ret)
1457 goto err;
1458 if (!PageUptodate(page)) {
1459 ret = -EIO;
1460 goto err;
1461 }
1462err:
1463 ext4_mb_put_buddy_page_lock(&e4b);
1464 return ret;
1465}
1466
1467
1468
1469
1470
1471
1472static noinline_for_stack int
1473ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1474 struct ext4_buddy *e4b, gfp_t gfp)
1475{
1476 int blocks_per_page;
1477 int block;
1478 int pnum;
1479 int poff;
1480 struct page *page;
1481 int ret;
1482 struct ext4_group_info *grp;
1483 struct ext4_sb_info *sbi = EXT4_SB(sb);
1484 struct inode *inode = sbi->s_buddy_cache;
1485
1486 might_sleep();
1487 mb_debug(sb, "load group %u\n", group);
1488
1489 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1490 grp = ext4_get_group_info(sb, group);
1491
1492 e4b->bd_blkbits = sb->s_blocksize_bits;
1493 e4b->bd_info = grp;
1494 e4b->bd_sb = sb;
1495 e4b->bd_group = group;
1496 e4b->bd_buddy_page = NULL;
1497 e4b->bd_bitmap_page = NULL;
1498
1499 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1500
1501
1502
1503
1504 ret = ext4_mb_init_group(sb, group, gfp);
1505 if (ret)
1506 return ret;
1507 }
1508
1509
1510
1511
1512
1513
1514 block = group * 2;
1515 pnum = block / blocks_per_page;
1516 poff = block % blocks_per_page;
1517
1518
1519
1520 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1521 if (page == NULL || !PageUptodate(page)) {
1522 if (page)
1523
1524
1525
1526
1527
1528
1529
1530
1531 put_page(page);
1532 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1533 if (page) {
1534 BUG_ON(page->mapping != inode->i_mapping);
1535 if (!PageUptodate(page)) {
1536 ret = ext4_mb_init_cache(page, NULL, gfp);
1537 if (ret) {
1538 unlock_page(page);
1539 goto err;
1540 }
1541 mb_cmp_bitmaps(e4b, page_address(page) +
1542 (poff * sb->s_blocksize));
1543 }
1544 unlock_page(page);
1545 }
1546 }
1547 if (page == NULL) {
1548 ret = -ENOMEM;
1549 goto err;
1550 }
1551 if (!PageUptodate(page)) {
1552 ret = -EIO;
1553 goto err;
1554 }
1555
1556
1557 e4b->bd_bitmap_page = page;
1558 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1559
1560 block++;
1561 pnum = block / blocks_per_page;
1562 poff = block % blocks_per_page;
1563
1564 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1565 if (page == NULL || !PageUptodate(page)) {
1566 if (page)
1567 put_page(page);
1568 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1569 if (page) {
1570 BUG_ON(page->mapping != inode->i_mapping);
1571 if (!PageUptodate(page)) {
1572 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1573 gfp);
1574 if (ret) {
1575 unlock_page(page);
1576 goto err;
1577 }
1578 }
1579 unlock_page(page);
1580 }
1581 }
1582 if (page == NULL) {
1583 ret = -ENOMEM;
1584 goto err;
1585 }
1586 if (!PageUptodate(page)) {
1587 ret = -EIO;
1588 goto err;
1589 }
1590
1591
1592 e4b->bd_buddy_page = page;
1593 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1594
1595 return 0;
1596
1597err:
1598 if (page)
1599 put_page(page);
1600 if (e4b->bd_bitmap_page)
1601 put_page(e4b->bd_bitmap_page);
1602 if (e4b->bd_buddy_page)
1603 put_page(e4b->bd_buddy_page);
1604 e4b->bd_buddy = NULL;
1605 e4b->bd_bitmap = NULL;
1606 return ret;
1607}
1608
1609static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1610 struct ext4_buddy *e4b)
1611{
1612 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1613}
1614
1615static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1616{
1617 if (e4b->bd_bitmap_page)
1618 put_page(e4b->bd_bitmap_page);
1619 if (e4b->bd_buddy_page)
1620 put_page(e4b->bd_buddy_page);
1621}
1622
1623
1624static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1625{
1626 int order = 1, max;
1627 void *bb;
1628
1629 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1630 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1631
1632 while (order <= e4b->bd_blkbits + 1) {
1633 bb = mb_find_buddy(e4b, order, &max);
1634 if (!mb_test_bit(block >> order, bb)) {
1635
1636 return order;
1637 }
1638 order++;
1639 }
1640 return 0;
1641}
1642
1643static void mb_clear_bits(void *bm, int cur, int len)
1644{
1645 __u32 *addr;
1646
1647 len = cur + len;
1648 while (cur < len) {
1649 if ((cur & 31) == 0 && (len - cur) >= 32) {
1650
1651 addr = bm + (cur >> 3);
1652 *addr = 0;
1653 cur += 32;
1654 continue;
1655 }
1656 mb_clear_bit(cur, bm);
1657 cur++;
1658 }
1659}
1660
1661
1662
1663
1664static int mb_test_and_clear_bits(void *bm, int cur, int len)
1665{
1666 __u32 *addr;
1667 int zero_bit = -1;
1668
1669 len = cur + len;
1670 while (cur < len) {
1671 if ((cur & 31) == 0 && (len - cur) >= 32) {
1672
1673 addr = bm + (cur >> 3);
1674 if (*addr != (__u32)(-1) && zero_bit == -1)
1675 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1676 *addr = 0;
1677 cur += 32;
1678 continue;
1679 }
1680 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1681 zero_bit = cur;
1682 cur++;
1683 }
1684
1685 return zero_bit;
1686}
1687
1688void ext4_set_bits(void *bm, int cur, int len)
1689{
1690 __u32 *addr;
1691
1692 len = cur + len;
1693 while (cur < len) {
1694 if ((cur & 31) == 0 && (len - cur) >= 32) {
1695
1696 addr = bm + (cur >> 3);
1697 *addr = 0xffffffff;
1698 cur += 32;
1699 continue;
1700 }
1701 mb_set_bit(cur, bm);
1702 cur++;
1703 }
1704}
1705
1706static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1707{
1708 if (mb_test_bit(*bit + side, bitmap)) {
1709 mb_clear_bit(*bit, bitmap);
1710 (*bit) -= side;
1711 return 1;
1712 }
1713 else {
1714 (*bit) += side;
1715 mb_set_bit(*bit, bitmap);
1716 return -1;
1717 }
1718}
1719
1720static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1721{
1722 int max;
1723 int order = 1;
1724 void *buddy = mb_find_buddy(e4b, order, &max);
1725
1726 while (buddy) {
1727 void *buddy2;
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758 if (first & 1)
1759 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1760 if (!(last & 1))
1761 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1762 if (first > last)
1763 break;
1764 order++;
1765
1766 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1767 mb_clear_bits(buddy, first, last - first + 1);
1768 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1769 break;
1770 }
1771 first >>= 1;
1772 last >>= 1;
1773 buddy = buddy2;
1774 }
1775}
1776
1777static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1778 int first, int count)
1779{
1780 int left_is_free = 0;
1781 int right_is_free = 0;
1782 int block;
1783 int last = first + count - 1;
1784 struct super_block *sb = e4b->bd_sb;
1785
1786 if (WARN_ON(count == 0))
1787 return;
1788 BUG_ON(last >= (sb->s_blocksize << 3));
1789 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1790
1791 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1792 return;
1793
1794 mb_check_buddy(e4b);
1795 mb_free_blocks_double(inode, e4b, first, count);
1796
1797 this_cpu_inc(discard_pa_seq);
1798 e4b->bd_info->bb_free += count;
1799 if (first < e4b->bd_info->bb_first_free)
1800 e4b->bd_info->bb_first_free = first;
1801
1802
1803
1804
1805 if (first != 0)
1806 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1807 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1808 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1809 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1810
1811 if (unlikely(block != -1)) {
1812 struct ext4_sb_info *sbi = EXT4_SB(sb);
1813 ext4_fsblk_t blocknr;
1814
1815 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1816 blocknr += EXT4_C2B(sbi, block);
1817 if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
1818 ext4_grp_locked_error(sb, e4b->bd_group,
1819 inode ? inode->i_ino : 0,
1820 blocknr,
1821 "freeing already freed block (bit %u); block bitmap corrupt.",
1822 block);
1823 ext4_mark_group_bitmap_corrupted(
1824 sb, e4b->bd_group,
1825 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1826 }
1827 goto done;
1828 }
1829
1830
1831 if (left_is_free && right_is_free)
1832 e4b->bd_info->bb_fragments--;
1833 else if (!left_is_free && !right_is_free)
1834 e4b->bd_info->bb_fragments++;
1835
1836
1837
1838
1839
1840
1841
1842 if (first & 1) {
1843 first += !left_is_free;
1844 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1845 }
1846 if (!(last & 1)) {
1847 last -= !right_is_free;
1848 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1849 }
1850
1851 if (first <= last)
1852 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1853
1854done:
1855 mb_set_largest_free_order(sb, e4b->bd_info);
1856 mb_update_avg_fragment_size(sb, e4b->bd_info);
1857 mb_check_buddy(e4b);
1858}
1859
1860static int mb_find_extent(struct ext4_buddy *e4b, int block,
1861 int needed, struct ext4_free_extent *ex)
1862{
1863 int next = block;
1864 int max, order;
1865 void *buddy;
1866
1867 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1868 BUG_ON(ex == NULL);
1869
1870 buddy = mb_find_buddy(e4b, 0, &max);
1871 BUG_ON(buddy == NULL);
1872 BUG_ON(block >= max);
1873 if (mb_test_bit(block, buddy)) {
1874 ex->fe_len = 0;
1875 ex->fe_start = 0;
1876 ex->fe_group = 0;
1877 return 0;
1878 }
1879
1880
1881 order = mb_find_order_for_block(e4b, block);
1882 block = block >> order;
1883
1884 ex->fe_len = 1 << order;
1885 ex->fe_start = block << order;
1886 ex->fe_group = e4b->bd_group;
1887
1888
1889 next = next - ex->fe_start;
1890 ex->fe_len -= next;
1891 ex->fe_start += next;
1892
1893 while (needed > ex->fe_len &&
1894 mb_find_buddy(e4b, order, &max)) {
1895
1896 if (block + 1 >= max)
1897 break;
1898
1899 next = (block + 1) * (1 << order);
1900 if (mb_test_bit(next, e4b->bd_bitmap))
1901 break;
1902
1903 order = mb_find_order_for_block(e4b, next);
1904
1905 block = next >> order;
1906 ex->fe_len += 1 << order;
1907 }
1908
1909 if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
1910
1911 WARN_ON(1);
1912 ext4_grp_locked_error(e4b->bd_sb, e4b->bd_group, 0, 0,
1913 "corruption or bug in mb_find_extent "
1914 "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
1915 block, order, needed, ex->fe_group, ex->fe_start,
1916 ex->fe_len, ex->fe_logical);
1917 ex->fe_len = 0;
1918 ex->fe_start = 0;
1919 ex->fe_group = 0;
1920 }
1921 return ex->fe_len;
1922}
1923
1924static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1925{
1926 int ord;
1927 int mlen = 0;
1928 int max = 0;
1929 int cur;
1930 int start = ex->fe_start;
1931 int len = ex->fe_len;
1932 unsigned ret = 0;
1933 int len0 = len;
1934 void *buddy;
1935
1936 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1937 BUG_ON(e4b->bd_group != ex->fe_group);
1938 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1939 mb_check_buddy(e4b);
1940 mb_mark_used_double(e4b, start, len);
1941
1942 this_cpu_inc(discard_pa_seq);
1943 e4b->bd_info->bb_free -= len;
1944 if (e4b->bd_info->bb_first_free == start)
1945 e4b->bd_info->bb_first_free += len;
1946
1947
1948 if (start != 0)
1949 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1950 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1951 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1952 if (mlen && max)
1953 e4b->bd_info->bb_fragments++;
1954 else if (!mlen && !max)
1955 e4b->bd_info->bb_fragments--;
1956
1957
1958 while (len) {
1959 ord = mb_find_order_for_block(e4b, start);
1960
1961 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1962
1963 mlen = 1 << ord;
1964 buddy = mb_find_buddy(e4b, ord, &max);
1965 BUG_ON((start >> ord) >= max);
1966 mb_set_bit(start >> ord, buddy);
1967 e4b->bd_info->bb_counters[ord]--;
1968 start += mlen;
1969 len -= mlen;
1970 BUG_ON(len < 0);
1971 continue;
1972 }
1973
1974
1975 if (ret == 0)
1976 ret = len | (ord << 16);
1977
1978
1979 BUG_ON(ord <= 0);
1980 buddy = mb_find_buddy(e4b, ord, &max);
1981 mb_set_bit(start >> ord, buddy);
1982 e4b->bd_info->bb_counters[ord]--;
1983
1984 ord--;
1985 cur = (start >> ord) & ~1U;
1986 buddy = mb_find_buddy(e4b, ord, &max);
1987 mb_clear_bit(cur, buddy);
1988 mb_clear_bit(cur + 1, buddy);
1989 e4b->bd_info->bb_counters[ord]++;
1990 e4b->bd_info->bb_counters[ord]++;
1991 }
1992 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1993
1994 mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info);
1995 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1996 mb_check_buddy(e4b);
1997
1998 return ret;
1999}
2000
2001
2002
2003
2004static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
2005 struct ext4_buddy *e4b)
2006{
2007 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2008 int ret;
2009
2010 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
2011 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2012
2013 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
2014 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
2015 ret = mb_mark_used(e4b, &ac->ac_b_ex);
2016
2017
2018
2019 ac->ac_f_ex = ac->ac_b_ex;
2020
2021 ac->ac_status = AC_STATUS_FOUND;
2022 ac->ac_tail = ret & 0xffff;
2023 ac->ac_buddy = ret >> 16;
2024
2025
2026
2027
2028
2029
2030
2031
2032 ac->ac_bitmap_page = e4b->bd_bitmap_page;
2033 get_page(ac->ac_bitmap_page);
2034 ac->ac_buddy_page = e4b->bd_buddy_page;
2035 get_page(ac->ac_buddy_page);
2036
2037 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2038 spin_lock(&sbi->s_md_lock);
2039 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
2040 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
2041 spin_unlock(&sbi->s_md_lock);
2042 }
2043
2044
2045
2046
2047
2048 if (ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
2049 ext4_mb_new_preallocation(ac);
2050
2051}
2052
2053static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
2054 struct ext4_buddy *e4b,
2055 int finish_group)
2056{
2057 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2058 struct ext4_free_extent *bex = &ac->ac_b_ex;
2059 struct ext4_free_extent *gex = &ac->ac_g_ex;
2060 struct ext4_free_extent ex;
2061 int max;
2062
2063 if (ac->ac_status == AC_STATUS_FOUND)
2064 return;
2065
2066
2067
2068 if (ac->ac_found > sbi->s_mb_max_to_scan &&
2069 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2070 ac->ac_status = AC_STATUS_BREAK;
2071 return;
2072 }
2073
2074
2075
2076
2077 if (bex->fe_len < gex->fe_len)
2078 return;
2079
2080 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
2081 && bex->fe_group == e4b->bd_group) {
2082
2083
2084
2085 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
2086 if (max >= gex->fe_len) {
2087 ext4_mb_use_best_found(ac, e4b);
2088 return;
2089 }
2090 }
2091}
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
2104 struct ext4_free_extent *ex,
2105 struct ext4_buddy *e4b)
2106{
2107 struct ext4_free_extent *bex = &ac->ac_b_ex;
2108 struct ext4_free_extent *gex = &ac->ac_g_ex;
2109
2110 BUG_ON(ex->fe_len <= 0);
2111 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
2112 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
2113 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
2114
2115 ac->ac_found++;
2116
2117
2118
2119
2120 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2121 *bex = *ex;
2122 ext4_mb_use_best_found(ac, e4b);
2123 return;
2124 }
2125
2126
2127
2128
2129 if (ex->fe_len == gex->fe_len) {
2130 *bex = *ex;
2131 ext4_mb_use_best_found(ac, e4b);
2132 return;
2133 }
2134
2135
2136
2137
2138 if (bex->fe_len == 0) {
2139 *bex = *ex;
2140 return;
2141 }
2142
2143
2144
2145
2146 if (bex->fe_len < gex->fe_len) {
2147
2148
2149 if (ex->fe_len > bex->fe_len)
2150 *bex = *ex;
2151 } else if (ex->fe_len > gex->fe_len) {
2152
2153
2154
2155 if (ex->fe_len < bex->fe_len)
2156 *bex = *ex;
2157 }
2158
2159 ext4_mb_check_limits(ac, e4b, 0);
2160}
2161
2162static noinline_for_stack
2163int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
2164 struct ext4_buddy *e4b)
2165{
2166 struct ext4_free_extent ex = ac->ac_b_ex;
2167 ext4_group_t group = ex.fe_group;
2168 int max;
2169 int err;
2170
2171 BUG_ON(ex.fe_len <= 0);
2172 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
2173 if (err)
2174 return err;
2175
2176 ext4_lock_group(ac->ac_sb, group);
2177 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
2178
2179 if (max > 0) {
2180 ac->ac_b_ex = ex;
2181 ext4_mb_use_best_found(ac, e4b);
2182 }
2183
2184 ext4_unlock_group(ac->ac_sb, group);
2185 ext4_mb_unload_buddy(e4b);
2186
2187 return 0;
2188}
2189
2190static noinline_for_stack
2191int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
2192 struct ext4_buddy *e4b)
2193{
2194 ext4_group_t group = ac->ac_g_ex.fe_group;
2195 int max;
2196 int err;
2197 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2198 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2199 struct ext4_free_extent ex;
2200
2201 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
2202 return 0;
2203 if (grp->bb_free == 0)
2204 return 0;
2205
2206 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
2207 if (err)
2208 return err;
2209
2210 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
2211 ext4_mb_unload_buddy(e4b);
2212 return 0;
2213 }
2214
2215 ext4_lock_group(ac->ac_sb, group);
2216 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
2217 ac->ac_g_ex.fe_len, &ex);
2218 ex.fe_logical = 0xDEADFA11;
2219
2220 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
2221 ext4_fsblk_t start;
2222
2223 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
2224 ex.fe_start;
2225
2226 if (do_div(start, sbi->s_stripe) == 0) {
2227 ac->ac_found++;
2228 ac->ac_b_ex = ex;
2229 ext4_mb_use_best_found(ac, e4b);
2230 }
2231 } else if (max >= ac->ac_g_ex.fe_len) {
2232 BUG_ON(ex.fe_len <= 0);
2233 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
2234 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
2235 ac->ac_found++;
2236 ac->ac_b_ex = ex;
2237 ext4_mb_use_best_found(ac, e4b);
2238 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
2239
2240
2241 BUG_ON(ex.fe_len <= 0);
2242 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
2243 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
2244 ac->ac_found++;
2245 ac->ac_b_ex = ex;
2246 ext4_mb_use_best_found(ac, e4b);
2247 }
2248 ext4_unlock_group(ac->ac_sb, group);
2249 ext4_mb_unload_buddy(e4b);
2250
2251 return 0;
2252}
2253
2254
2255
2256
2257
2258static noinline_for_stack
2259void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
2260 struct ext4_buddy *e4b)
2261{
2262 struct super_block *sb = ac->ac_sb;
2263 struct ext4_group_info *grp = e4b->bd_info;
2264 void *buddy;
2265 int i;
2266 int k;
2267 int max;
2268
2269 BUG_ON(ac->ac_2order <= 0);
2270 for (i = ac->ac_2order; i < MB_NUM_ORDERS(sb); i++) {
2271 if (grp->bb_counters[i] == 0)
2272 continue;
2273
2274 buddy = mb_find_buddy(e4b, i, &max);
2275 BUG_ON(buddy == NULL);
2276
2277 k = mb_find_next_zero_bit(buddy, max, 0);
2278 if (k >= max) {
2279 ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
2280 "%d free clusters of order %d. But found 0",
2281 grp->bb_counters[i], i);
2282 ext4_mark_group_bitmap_corrupted(ac->ac_sb,
2283 e4b->bd_group,
2284 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2285 break;
2286 }
2287 ac->ac_found++;
2288
2289 ac->ac_b_ex.fe_len = 1 << i;
2290 ac->ac_b_ex.fe_start = k << i;
2291 ac->ac_b_ex.fe_group = e4b->bd_group;
2292
2293 ext4_mb_use_best_found(ac, e4b);
2294
2295 BUG_ON(ac->ac_f_ex.fe_len != ac->ac_g_ex.fe_len);
2296
2297 if (EXT4_SB(sb)->s_mb_stats)
2298 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
2299
2300 break;
2301 }
2302}
2303
2304
2305
2306
2307
2308
2309static noinline_for_stack
2310void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
2311 struct ext4_buddy *e4b)
2312{
2313 struct super_block *sb = ac->ac_sb;
2314 void *bitmap = e4b->bd_bitmap;
2315 struct ext4_free_extent ex;
2316 int i;
2317 int free;
2318
2319 free = e4b->bd_info->bb_free;
2320 if (WARN_ON(free <= 0))
2321 return;
2322
2323 i = e4b->bd_info->bb_first_free;
2324
2325 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
2326 i = mb_find_next_zero_bit(bitmap,
2327 EXT4_CLUSTERS_PER_GROUP(sb), i);
2328 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
2329
2330
2331
2332
2333
2334 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
2335 "%d free clusters as per "
2336 "group info. But bitmap says 0",
2337 free);
2338 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
2339 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2340 break;
2341 }
2342
2343 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
2344 if (WARN_ON(ex.fe_len <= 0))
2345 break;
2346 if (free < ex.fe_len) {
2347 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
2348 "%d free clusters as per "
2349 "group info. But got %d blocks",
2350 free, ex.fe_len);
2351 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
2352 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
2353
2354
2355
2356
2357
2358 break;
2359 }
2360 ex.fe_logical = 0xDEADC0DE;
2361 ext4_mb_measure_extent(ac, &ex, e4b);
2362
2363 i += ex.fe_len;
2364 free -= ex.fe_len;
2365 }
2366
2367 ext4_mb_check_limits(ac, e4b, 1);
2368}
2369
2370
2371
2372
2373
2374static noinline_for_stack
2375void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
2376 struct ext4_buddy *e4b)
2377{
2378 struct super_block *sb = ac->ac_sb;
2379 struct ext4_sb_info *sbi = EXT4_SB(sb);
2380 void *bitmap = e4b->bd_bitmap;
2381 struct ext4_free_extent ex;
2382 ext4_fsblk_t first_group_block;
2383 ext4_fsblk_t a;
2384 ext4_grpblk_t i;
2385 int max;
2386
2387 BUG_ON(sbi->s_stripe == 0);
2388
2389
2390 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2391
2392 a = first_group_block + sbi->s_stripe - 1;
2393 do_div(a, sbi->s_stripe);
2394 i = (a * sbi->s_stripe) - first_group_block;
2395
2396 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2397 if (!mb_test_bit(i, bitmap)) {
2398 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2399 if (max >= sbi->s_stripe) {
2400 ac->ac_found++;
2401 ex.fe_logical = 0xDEADF00D;
2402 ac->ac_b_ex = ex;
2403 ext4_mb_use_best_found(ac, e4b);
2404 break;
2405 }
2406 }
2407 i += sbi->s_stripe;
2408 }
2409}
2410
2411
2412
2413
2414
2415
2416static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
2417 ext4_group_t group, int cr)
2418{
2419 ext4_grpblk_t free, fragments;
2420 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2421 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2422
2423 BUG_ON(cr < 0 || cr >= 4);
2424
2425 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2426 return false;
2427
2428 free = grp->bb_free;
2429 if (free == 0)
2430 return false;
2431
2432 fragments = grp->bb_fragments;
2433 if (fragments == 0)
2434 return false;
2435
2436 switch (cr) {
2437 case 0:
2438 BUG_ON(ac->ac_2order == 0);
2439
2440
2441 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2442 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2443 ((group % flex_size) == 0))
2444 return false;
2445
2446 if (free < ac->ac_g_ex.fe_len)
2447 return false;
2448
2449 if (ac->ac_2order >= MB_NUM_ORDERS(ac->ac_sb))
2450 return true;
2451
2452 if (grp->bb_largest_free_order < ac->ac_2order)
2453 return false;
2454
2455 return true;
2456 case 1:
2457 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2458 return true;
2459 break;
2460 case 2:
2461 if (free >= ac->ac_g_ex.fe_len)
2462 return true;
2463 break;
2464 case 3:
2465 return true;
2466 default:
2467 BUG();
2468 }
2469
2470 return false;
2471}
2472
2473
2474
2475
2476
2477
2478static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
2479 ext4_group_t group, int cr)
2480{
2481 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2482 struct super_block *sb = ac->ac_sb;
2483 struct ext4_sb_info *sbi = EXT4_SB(sb);
2484 bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK;
2485 ext4_grpblk_t free;
2486 int ret = 0;
2487
2488 if (sbi->s_mb_stats)
2489 atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
2490 if (should_lock)
2491 ext4_lock_group(sb, group);
2492 free = grp->bb_free;
2493 if (free == 0)
2494 goto out;
2495 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2496 goto out;
2497 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2498 goto out;
2499 if (should_lock)
2500 ext4_unlock_group(sb, group);
2501
2502
2503 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2504 struct ext4_group_desc *gdp =
2505 ext4_get_group_desc(sb, group, NULL);
2506 int ret;
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516 if (cr < 2 &&
2517 (!sbi->s_log_groups_per_flex ||
2518 ((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
2519 !(ext4_has_group_desc_csum(sb) &&
2520 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))))
2521 return 0;
2522 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
2523 if (ret)
2524 return ret;
2525 }
2526
2527 if (should_lock)
2528 ext4_lock_group(sb, group);
2529 ret = ext4_mb_good_group(ac, group, cr);
2530out:
2531 if (should_lock)
2532 ext4_unlock_group(sb, group);
2533 return ret;
2534}
2535
2536
2537
2538
2539
2540ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
2541 unsigned int nr, int *cnt)
2542{
2543 ext4_group_t ngroups = ext4_get_groups_count(sb);
2544 struct buffer_head *bh;
2545 struct blk_plug plug;
2546
2547 blk_start_plug(&plug);
2548 while (nr-- > 0) {
2549 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
2550 NULL);
2551 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
2552
2553
2554
2555
2556
2557
2558
2559
2560 if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
2561 EXT4_MB_GRP_NEED_INIT(grp) &&
2562 ext4_free_group_clusters(sb, gdp) > 0 &&
2563 !(ext4_has_group_desc_csum(sb) &&
2564 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
2565 bh = ext4_read_block_bitmap_nowait(sb, group, true);
2566 if (bh && !IS_ERR(bh)) {
2567 if (!buffer_uptodate(bh) && cnt)
2568 (*cnt)++;
2569 brelse(bh);
2570 }
2571 }
2572 if (++group >= ngroups)
2573 group = 0;
2574 }
2575 blk_finish_plug(&plug);
2576 return group;
2577}
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
2592 unsigned int nr)
2593{
2594 while (nr-- > 0) {
2595 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
2596 NULL);
2597 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
2598
2599 if (!group)
2600 group = ext4_get_groups_count(sb);
2601 group--;
2602 grp = ext4_get_group_info(sb, group);
2603
2604 if (EXT4_MB_GRP_NEED_INIT(grp) &&
2605 ext4_free_group_clusters(sb, gdp) > 0 &&
2606 !(ext4_has_group_desc_csum(sb) &&
2607 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
2608 if (ext4_mb_init_group(sb, group, GFP_NOFS))
2609 break;
2610 }
2611 }
2612}
2613
2614static noinline_for_stack int
2615ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2616{
2617 ext4_group_t prefetch_grp = 0, ngroups, group, i;
2618 int cr = -1;
2619 int err = 0, first_err = 0;
2620 unsigned int nr = 0, prefetch_ios = 0;
2621 struct ext4_sb_info *sbi;
2622 struct super_block *sb;
2623 struct ext4_buddy e4b;
2624 int lost;
2625
2626 sb = ac->ac_sb;
2627 sbi = EXT4_SB(sb);
2628 ngroups = ext4_get_groups_count(sb);
2629
2630 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2631 ngroups = sbi->s_blockfile_groups;
2632
2633 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2634
2635
2636 err = ext4_mb_find_by_goal(ac, &e4b);
2637 if (err || ac->ac_status == AC_STATUS_FOUND)
2638 goto out;
2639
2640 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2641 goto out;
2642
2643
2644
2645
2646
2647
2648 i = fls(ac->ac_g_ex.fe_len);
2649 ac->ac_2order = 0;
2650
2651
2652
2653
2654
2655
2656
2657 if (i >= sbi->s_mb_order2_reqs && i <= MB_NUM_ORDERS(sb)) {
2658
2659
2660
2661 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2662 ac->ac_2order = array_index_nospec(i - 1,
2663 MB_NUM_ORDERS(sb));
2664 }
2665
2666
2667 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2668
2669 spin_lock(&sbi->s_md_lock);
2670 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2671 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2672 spin_unlock(&sbi->s_md_lock);
2673 }
2674
2675
2676 cr = ac->ac_2order ? 0 : 1;
2677
2678
2679
2680
2681repeat:
2682 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2683 ac->ac_criteria = cr;
2684
2685
2686
2687
2688 group = ac->ac_g_ex.fe_group;
2689 ac->ac_last_optimal_group = group;
2690 ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
2691 prefetch_grp = group;
2692
2693 for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
2694 i++) {
2695 int ret = 0, new_cr;
2696
2697 cond_resched();
2698
2699 ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
2700 if (new_cr != cr) {
2701 cr = new_cr;
2702 goto repeat;
2703 }
2704
2705
2706
2707
2708
2709
2710
2711 if ((prefetch_grp == group) &&
2712 (cr > 1 ||
2713 prefetch_ios < sbi->s_mb_prefetch_limit)) {
2714 unsigned int curr_ios = prefetch_ios;
2715
2716 nr = sbi->s_mb_prefetch;
2717 if (ext4_has_feature_flex_bg(sb)) {
2718 nr = 1 << sbi->s_log_groups_per_flex;
2719 nr -= group & (nr - 1);
2720 nr = min(nr, sbi->s_mb_prefetch);
2721 }
2722 prefetch_grp = ext4_mb_prefetch(sb, group,
2723 nr, &prefetch_ios);
2724 if (prefetch_ios == curr_ios)
2725 nr = 0;
2726 }
2727
2728
2729 ret = ext4_mb_good_group_nolock(ac, group, cr);
2730 if (ret <= 0) {
2731 if (!first_err)
2732 first_err = ret;
2733 continue;
2734 }
2735
2736 err = ext4_mb_load_buddy(sb, group, &e4b);
2737 if (err)
2738 goto out;
2739
2740 ext4_lock_group(sb, group);
2741
2742
2743
2744
2745
2746 ret = ext4_mb_good_group(ac, group, cr);
2747 if (ret == 0) {
2748 ext4_unlock_group(sb, group);
2749 ext4_mb_unload_buddy(&e4b);
2750 continue;
2751 }
2752
2753 ac->ac_groups_scanned++;
2754 if (cr == 0)
2755 ext4_mb_simple_scan_group(ac, &e4b);
2756 else if (cr == 1 && sbi->s_stripe &&
2757 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2758 ext4_mb_scan_aligned(ac, &e4b);
2759 else
2760 ext4_mb_complex_scan_group(ac, &e4b);
2761
2762 ext4_unlock_group(sb, group);
2763 ext4_mb_unload_buddy(&e4b);
2764
2765 if (ac->ac_status != AC_STATUS_CONTINUE)
2766 break;
2767 }
2768
2769 if (sbi->s_mb_stats && i == ngroups)
2770 atomic64_inc(&sbi->s_bal_cX_failed[cr]);
2771 }
2772
2773 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2774 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2775
2776
2777
2778
2779 ext4_mb_try_best_found(ac, &e4b);
2780 if (ac->ac_status != AC_STATUS_FOUND) {
2781
2782
2783
2784
2785
2786 lost = atomic_inc_return(&sbi->s_mb_lost_chunks);
2787 mb_debug(sb, "lost chunk, group: %u, start: %d, len: %d, lost: %d\n",
2788 ac->ac_b_ex.fe_group, ac->ac_b_ex.fe_start,
2789 ac->ac_b_ex.fe_len, lost);
2790
2791 ac->ac_b_ex.fe_group = 0;
2792 ac->ac_b_ex.fe_start = 0;
2793 ac->ac_b_ex.fe_len = 0;
2794 ac->ac_status = AC_STATUS_CONTINUE;
2795 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2796 cr = 3;
2797 goto repeat;
2798 }
2799 }
2800
2801 if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND)
2802 atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]);
2803out:
2804 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2805 err = first_err;
2806
2807 mb_debug(sb, "Best len %d, origin len %d, ac_status %u, ac_flags 0x%x, cr %d ret %d\n",
2808 ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
2809 ac->ac_flags, cr, err);
2810
2811 if (nr)
2812 ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
2813
2814 return err;
2815}
2816
2817static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2818{
2819 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2820 ext4_group_t group;
2821
2822 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2823 return NULL;
2824 group = *pos + 1;
2825 return (void *) ((unsigned long) group);
2826}
2827
2828static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2829{
2830 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2831 ext4_group_t group;
2832
2833 ++*pos;
2834 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2835 return NULL;
2836 group = *pos + 1;
2837 return (void *) ((unsigned long) group);
2838}
2839
2840static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2841{
2842 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2843 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2844 int i;
2845 int err, buddy_loaded = 0;
2846 struct ext4_buddy e4b;
2847 struct ext4_group_info *grinfo;
2848 unsigned char blocksize_bits = min_t(unsigned char,
2849 sb->s_blocksize_bits,
2850 EXT4_MAX_BLOCK_LOG_SIZE);
2851 struct sg {
2852 struct ext4_group_info info;
2853 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2854 } sg;
2855
2856 group--;
2857 if (group == 0)
2858 seq_puts(seq, "#group: free frags first ["
2859 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2860 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2861
2862 i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2863 sizeof(struct ext4_group_info);
2864
2865 grinfo = ext4_get_group_info(sb, group);
2866
2867 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2868 err = ext4_mb_load_buddy(sb, group, &e4b);
2869 if (err) {
2870 seq_printf(seq, "#%-5u: I/O error\n", group);
2871 return 0;
2872 }
2873 buddy_loaded = 1;
2874 }
2875
2876 memcpy(&sg, ext4_get_group_info(sb, group), i);
2877
2878 if (buddy_loaded)
2879 ext4_mb_unload_buddy(&e4b);
2880
2881 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2882 sg.info.bb_fragments, sg.info.bb_first_free);
2883 for (i = 0; i <= 13; i++)
2884 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
2885 sg.info.bb_counters[i] : 0);
2886 seq_puts(seq, " ]\n");
2887
2888 return 0;
2889}
2890
2891static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2892{
2893}
2894
2895const struct seq_operations ext4_mb_seq_groups_ops = {
2896 .start = ext4_mb_seq_groups_start,
2897 .next = ext4_mb_seq_groups_next,
2898 .stop = ext4_mb_seq_groups_stop,
2899 .show = ext4_mb_seq_groups_show,
2900};
2901
2902int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
2903{
2904 struct super_block *sb = (struct super_block *)seq->private;
2905 struct ext4_sb_info *sbi = EXT4_SB(sb);
2906
2907 seq_puts(seq, "mballoc:\n");
2908 if (!sbi->s_mb_stats) {
2909 seq_puts(seq, "\tmb stats collection turned off.\n");
2910 seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n");
2911 return 0;
2912 }
2913 seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
2914 seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
2915
2916 seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned));
2917
2918 seq_puts(seq, "\tcr0_stats:\n");
2919 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0]));
2920 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2921 atomic64_read(&sbi->s_bal_cX_groups_considered[0]));
2922 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2923 atomic64_read(&sbi->s_bal_cX_failed[0]));
2924 seq_printf(seq, "\t\tbad_suggestions: %u\n",
2925 atomic_read(&sbi->s_bal_cr0_bad_suggestions));
2926
2927 seq_puts(seq, "\tcr1_stats:\n");
2928 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1]));
2929 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2930 atomic64_read(&sbi->s_bal_cX_groups_considered[1]));
2931 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2932 atomic64_read(&sbi->s_bal_cX_failed[1]));
2933 seq_printf(seq, "\t\tbad_suggestions: %u\n",
2934 atomic_read(&sbi->s_bal_cr1_bad_suggestions));
2935
2936 seq_puts(seq, "\tcr2_stats:\n");
2937 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2]));
2938 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2939 atomic64_read(&sbi->s_bal_cX_groups_considered[2]));
2940 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2941 atomic64_read(&sbi->s_bal_cX_failed[2]));
2942
2943 seq_puts(seq, "\tcr3_stats:\n");
2944 seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3]));
2945 seq_printf(seq, "\t\tgroups_considered: %llu\n",
2946 atomic64_read(&sbi->s_bal_cX_groups_considered[3]));
2947 seq_printf(seq, "\t\tuseless_loops: %llu\n",
2948 atomic64_read(&sbi->s_bal_cX_failed[3]));
2949 seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned));
2950 seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
2951 seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
2952 seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
2953 seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
2954
2955 seq_printf(seq, "\tbuddies_generated: %u/%u\n",
2956 atomic_read(&sbi->s_mb_buddies_generated),
2957 ext4_get_groups_count(sb));
2958 seq_printf(seq, "\tbuddies_time_used: %llu\n",
2959 atomic64_read(&sbi->s_mb_generation_time));
2960 seq_printf(seq, "\tpreallocated: %u\n",
2961 atomic_read(&sbi->s_mb_preallocated));
2962 seq_printf(seq, "\tdiscarded: %u\n",
2963 atomic_read(&sbi->s_mb_discarded));
2964 return 0;
2965}
2966
2967static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
2968{
2969 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2970 unsigned long position;
2971
2972 read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
2973
2974 if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
2975 return NULL;
2976 position = *pos + 1;
2977 return (void *) ((unsigned long) position);
2978}
2979
2980static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos)
2981{
2982 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2983 unsigned long position;
2984
2985 ++*pos;
2986 if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
2987 return NULL;
2988 position = *pos + 1;
2989 return (void *) ((unsigned long) position);
2990}
2991
2992static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
2993{
2994 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2995 struct ext4_sb_info *sbi = EXT4_SB(sb);
2996 unsigned long position = ((unsigned long) v);
2997 struct ext4_group_info *grp;
2998 struct rb_node *n;
2999 unsigned int count, min, max;
3000
3001 position--;
3002 if (position >= MB_NUM_ORDERS(sb)) {
3003 seq_puts(seq, "fragment_size_tree:\n");
3004 n = rb_first(&sbi->s_mb_avg_fragment_size_root);
3005 if (!n) {
3006 seq_puts(seq, "\ttree_min: 0\n\ttree_max: 0\n\ttree_nodes: 0\n");
3007 return 0;
3008 }
3009 grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
3010 min = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
3011 count = 1;
3012 while (rb_next(n)) {
3013 count++;
3014 n = rb_next(n);
3015 }
3016 grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
3017 max = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
3018
3019 seq_printf(seq, "\ttree_min: %u\n\ttree_max: %u\n\ttree_nodes: %u\n",
3020 min, max, count);
3021 return 0;
3022 }
3023
3024 if (position == 0) {
3025 seq_printf(seq, "optimize_scan: %d\n",
3026 test_opt2(sb, MB_OPTIMIZE_SCAN) ? 1 : 0);
3027 seq_puts(seq, "max_free_order_lists:\n");
3028 }
3029 count = 0;
3030 list_for_each_entry(grp, &sbi->s_mb_largest_free_orders[position],
3031 bb_largest_free_order_node)
3032 count++;
3033 seq_printf(seq, "\tlist_order_%u_groups: %u\n",
3034 (unsigned int)position, count);
3035
3036 return 0;
3037}
3038
3039static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
3040{
3041 struct super_block *sb = PDE_DATA(file_inode(seq->file));
3042
3043 read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
3044}
3045
3046const struct seq_operations ext4_mb_seq_structs_summary_ops = {
3047 .start = ext4_mb_seq_structs_summary_start,
3048 .next = ext4_mb_seq_structs_summary_next,
3049 .stop = ext4_mb_seq_structs_summary_stop,
3050 .show = ext4_mb_seq_structs_summary_show,
3051};
3052
3053static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
3054{
3055 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
3056 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
3057
3058 BUG_ON(!cachep);
3059 return cachep;
3060}
3061
3062
3063
3064
3065
3066int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
3067{
3068 struct ext4_sb_info *sbi = EXT4_SB(sb);
3069 unsigned size;
3070 struct ext4_group_info ***old_groupinfo, ***new_groupinfo;
3071
3072 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
3073 EXT4_DESC_PER_BLOCK_BITS(sb);
3074 if (size <= sbi->s_group_info_size)
3075 return 0;
3076
3077 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
3078 new_groupinfo = kvzalloc(size, GFP_KERNEL);
3079 if (!new_groupinfo) {
3080 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
3081 return -ENOMEM;
3082 }
3083 rcu_read_lock();
3084 old_groupinfo = rcu_dereference(sbi->s_group_info);
3085 if (old_groupinfo)
3086 memcpy(new_groupinfo, old_groupinfo,
3087 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
3088 rcu_read_unlock();
3089 rcu_assign_pointer(sbi->s_group_info, new_groupinfo);
3090 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
3091 if (old_groupinfo)
3092 ext4_kvfree_array_rcu(old_groupinfo);
3093 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
3094 sbi->s_group_info_size);
3095 return 0;
3096}
3097
3098
3099int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
3100 struct ext4_group_desc *desc)
3101{
3102 int i;
3103 int metalen = 0;
3104 int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb);
3105 struct ext4_sb_info *sbi = EXT4_SB(sb);
3106 struct ext4_group_info **meta_group_info;
3107 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3108
3109
3110
3111
3112
3113
3114 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
3115 metalen = sizeof(*meta_group_info) <<
3116 EXT4_DESC_PER_BLOCK_BITS(sb);
3117 meta_group_info = kmalloc(metalen, GFP_NOFS);
3118 if (meta_group_info == NULL) {
3119 ext4_msg(sb, KERN_ERR, "can't allocate mem "
3120 "for a buddy group");
3121 goto exit_meta_group_info;
3122 }
3123 rcu_read_lock();
3124 rcu_dereference(sbi->s_group_info)[idx] = meta_group_info;
3125 rcu_read_unlock();
3126 }
3127
3128 meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx);
3129 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
3130
3131 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
3132 if (meta_group_info[i] == NULL) {
3133 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
3134 goto exit_group_info;
3135 }
3136 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
3137 &(meta_group_info[i]->bb_state));
3138
3139
3140
3141
3142
3143 if (ext4_has_group_desc_csum(sb) &&
3144 (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3145 meta_group_info[i]->bb_free =
3146 ext4_free_clusters_after_init(sb, group, desc);
3147 } else {
3148 meta_group_info[i]->bb_free =
3149 ext4_free_group_clusters(sb, desc);
3150 }
3151
3152 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
3153 init_rwsem(&meta_group_info[i]->alloc_sem);
3154 meta_group_info[i]->bb_free_root = RB_ROOT;
3155 INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
3156 RB_CLEAR_NODE(&meta_group_info[i]->bb_avg_fragment_size_rb);
3157 meta_group_info[i]->bb_largest_free_order = -1;
3158 meta_group_info[i]->bb_group = group;
3159
3160 mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
3161 return 0;
3162
3163exit_group_info:
3164
3165 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
3166 struct ext4_group_info ***group_info;
3167
3168 rcu_read_lock();
3169 group_info = rcu_dereference(sbi->s_group_info);
3170 kfree(group_info[idx]);
3171 group_info[idx] = NULL;
3172 rcu_read_unlock();
3173 }
3174exit_meta_group_info:
3175 return -ENOMEM;
3176}
3177
3178static int ext4_mb_init_backend(struct super_block *sb)
3179{
3180 ext4_group_t ngroups = ext4_get_groups_count(sb);
3181 ext4_group_t i;
3182 struct ext4_sb_info *sbi = EXT4_SB(sb);
3183 int err;
3184 struct ext4_group_desc *desc;
3185 struct ext4_group_info ***group_info;
3186 struct kmem_cache *cachep;
3187
3188 err = ext4_mb_alloc_groupinfo(sb, ngroups);
3189 if (err)
3190 return err;
3191
3192 sbi->s_buddy_cache = new_inode(sb);
3193 if (sbi->s_buddy_cache == NULL) {
3194 ext4_msg(sb, KERN_ERR, "can't get new inode");
3195 goto err_freesgi;
3196 }
3197
3198
3199
3200
3201 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
3202 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
3203 for (i = 0; i < ngroups; i++) {
3204 cond_resched();
3205 desc = ext4_get_group_desc(sb, i, NULL);
3206 if (desc == NULL) {
3207 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
3208 goto err_freebuddy;
3209 }
3210 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
3211 goto err_freebuddy;
3212 }
3213
3214 if (ext4_has_feature_flex_bg(sb)) {
3215
3216
3217
3218
3219 if (sbi->s_es->s_log_groups_per_flex >= 32) {
3220 ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group");
3221 goto err_freebuddy;
3222 }
3223 sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex,
3224 BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
3225 sbi->s_mb_prefetch *= 8;
3226 } else {
3227 sbi->s_mb_prefetch = 32;
3228 }
3229 if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
3230 sbi->s_mb_prefetch = ext4_get_groups_count(sb);
3231
3232
3233
3234
3235
3236
3237
3238 sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4;
3239 if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb))
3240 sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb);
3241
3242 return 0;
3243
3244err_freebuddy:
3245 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3246 while (i-- > 0)
3247 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
3248 i = sbi->s_group_info_size;
3249 rcu_read_lock();
3250 group_info = rcu_dereference(sbi->s_group_info);
3251 while (i-- > 0)
3252 kfree(group_info[i]);
3253 rcu_read_unlock();
3254 iput(sbi->s_buddy_cache);
3255err_freesgi:
3256 rcu_read_lock();
3257 kvfree(rcu_dereference(sbi->s_group_info));
3258 rcu_read_unlock();
3259 return -ENOMEM;
3260}
3261
3262static void ext4_groupinfo_destroy_slabs(void)
3263{
3264 int i;
3265
3266 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
3267 kmem_cache_destroy(ext4_groupinfo_caches[i]);
3268 ext4_groupinfo_caches[i] = NULL;
3269 }
3270}
3271
3272static int ext4_groupinfo_create_slab(size_t size)
3273{
3274 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
3275 int slab_size;
3276 int blocksize_bits = order_base_2(size);
3277 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
3278 struct kmem_cache *cachep;
3279
3280 if (cache_index >= NR_GRPINFO_CACHES)
3281 return -EINVAL;
3282
3283 if (unlikely(cache_index < 0))
3284 cache_index = 0;
3285
3286 mutex_lock(&ext4_grpinfo_slab_create_mutex);
3287 if (ext4_groupinfo_caches[cache_index]) {
3288 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
3289 return 0;
3290 }
3291
3292 slab_size = offsetof(struct ext4_group_info,
3293 bb_counters[blocksize_bits + 2]);
3294
3295 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
3296 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
3297 NULL);
3298
3299 ext4_groupinfo_caches[cache_index] = cachep;
3300
3301 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
3302 if (!cachep) {
3303 printk(KERN_EMERG
3304 "EXT4-fs: no memory for groupinfo slab cache\n");
3305 return -ENOMEM;
3306 }
3307
3308 return 0;
3309}
3310
3311int ext4_mb_init(struct super_block *sb)
3312{
3313 struct ext4_sb_info *sbi = EXT4_SB(sb);
3314 unsigned i, j;
3315 unsigned offset, offset_incr;
3316 unsigned max;
3317 int ret;
3318
3319 i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_offsets);
3320
3321 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
3322 if (sbi->s_mb_offsets == NULL) {
3323 ret = -ENOMEM;
3324 goto out;
3325 }
3326
3327 i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_maxs);
3328 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
3329 if (sbi->s_mb_maxs == NULL) {
3330 ret = -ENOMEM;
3331 goto out;
3332 }
3333
3334 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
3335 if (ret < 0)
3336 goto out;
3337
3338
3339 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
3340 sbi->s_mb_offsets[0] = 0;
3341
3342 i = 1;
3343 offset = 0;
3344 offset_incr = 1 << (sb->s_blocksize_bits - 1);
3345 max = sb->s_blocksize << 2;
3346 do {
3347 sbi->s_mb_offsets[i] = offset;
3348 sbi->s_mb_maxs[i] = max;
3349 offset += offset_incr;
3350 offset_incr = offset_incr >> 1;
3351 max = max >> 1;
3352 i++;
3353 } while (i < MB_NUM_ORDERS(sb));
3354
3355 sbi->s_mb_avg_fragment_size_root = RB_ROOT;
3356 sbi->s_mb_largest_free_orders =
3357 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
3358 GFP_KERNEL);
3359 if (!sbi->s_mb_largest_free_orders) {
3360 ret = -ENOMEM;
3361 goto out;
3362 }
3363 sbi->s_mb_largest_free_orders_locks =
3364 kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
3365 GFP_KERNEL);
3366 if (!sbi->s_mb_largest_free_orders_locks) {
3367 ret = -ENOMEM;
3368 goto out;
3369 }
3370 for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
3371 INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
3372 rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
3373 }
3374 rwlock_init(&sbi->s_mb_rb_lock);
3375
3376 spin_lock_init(&sbi->s_md_lock);
3377 sbi->s_mb_free_pending = 0;
3378 INIT_LIST_HEAD(&sbi->s_freed_data_list);
3379
3380 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
3381 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
3382 sbi->s_mb_stats = MB_DEFAULT_STATS;
3383 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
3384 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
3385 sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
3399 sbi->s_cluster_bits, 32);
3400
3401
3402
3403
3404
3405
3406
3407
3408 if (sbi->s_stripe > 1) {
3409 sbi->s_mb_group_prealloc = roundup(
3410 sbi->s_mb_group_prealloc, sbi->s_stripe);
3411 }
3412
3413 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
3414 if (sbi->s_locality_groups == NULL) {
3415 ret = -ENOMEM;
3416 goto out;
3417 }
3418 for_each_possible_cpu(i) {
3419 struct ext4_locality_group *lg;
3420 lg = per_cpu_ptr(sbi->s_locality_groups, i);
3421 mutex_init(&lg->lg_mutex);
3422 for (j = 0; j < PREALLOC_TB_SIZE; j++)
3423 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
3424 spin_lock_init(&lg->lg_prealloc_lock);
3425 }
3426
3427 if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
3428 sbi->s_mb_max_linear_groups = 0;
3429 else
3430 sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
3431
3432 ret = ext4_mb_init_backend(sb);
3433 if (ret != 0)
3434 goto out_free_locality_groups;
3435
3436 return 0;
3437
3438out_free_locality_groups:
3439 free_percpu(sbi->s_locality_groups);
3440 sbi->s_locality_groups = NULL;
3441out:
3442 kfree(sbi->s_mb_largest_free_orders);
3443 kfree(sbi->s_mb_largest_free_orders_locks);
3444 kfree(sbi->s_mb_offsets);
3445 sbi->s_mb_offsets = NULL;
3446 kfree(sbi->s_mb_maxs);
3447 sbi->s_mb_maxs = NULL;
3448 return ret;
3449}
3450
3451
3452static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
3453{
3454 struct ext4_prealloc_space *pa;
3455 struct list_head *cur, *tmp;
3456 int count = 0;
3457
3458 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
3459 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3460 list_del(&pa->pa_group_list);
3461 count++;
3462 kmem_cache_free(ext4_pspace_cachep, pa);
3463 }
3464 return count;
3465}
3466
3467int ext4_mb_release(struct super_block *sb)
3468{
3469 ext4_group_t ngroups = ext4_get_groups_count(sb);
3470 ext4_group_t i;
3471 int num_meta_group_infos;
3472 struct ext4_group_info *grinfo, ***group_info;
3473 struct ext4_sb_info *sbi = EXT4_SB(sb);
3474 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
3475 int count;
3476
3477 if (sbi->s_group_info) {
3478 for (i = 0; i < ngroups; i++) {
3479 cond_resched();
3480 grinfo = ext4_get_group_info(sb, i);
3481 mb_group_bb_bitmap_free(grinfo);
3482 ext4_lock_group(sb, i);
3483 count = ext4_mb_cleanup_pa(grinfo);
3484 if (count)
3485 mb_debug(sb, "mballoc: %d PAs left\n",
3486 count);
3487 ext4_unlock_group(sb, i);
3488 kmem_cache_free(cachep, grinfo);
3489 }
3490 num_meta_group_infos = (ngroups +
3491 EXT4_DESC_PER_BLOCK(sb) - 1) >>
3492 EXT4_DESC_PER_BLOCK_BITS(sb);
3493 rcu_read_lock();
3494 group_info = rcu_dereference(sbi->s_group_info);
3495 for (i = 0; i < num_meta_group_infos; i++)
3496 kfree(group_info[i]);
3497 kvfree(group_info);
3498 rcu_read_unlock();
3499 }
3500 kfree(sbi->s_mb_largest_free_orders);
3501 kfree(sbi->s_mb_largest_free_orders_locks);
3502 kfree(sbi->s_mb_offsets);
3503 kfree(sbi->s_mb_maxs);
3504 iput(sbi->s_buddy_cache);
3505 if (sbi->s_mb_stats) {
3506 ext4_msg(sb, KERN_INFO,
3507 "mballoc: %u blocks %u reqs (%u success)",
3508 atomic_read(&sbi->s_bal_allocated),
3509 atomic_read(&sbi->s_bal_reqs),
3510 atomic_read(&sbi->s_bal_success));
3511 ext4_msg(sb, KERN_INFO,
3512 "mballoc: %u extents scanned, %u groups scanned, %u goal hits, "
3513 "%u 2^N hits, %u breaks, %u lost",
3514 atomic_read(&sbi->s_bal_ex_scanned),
3515 atomic_read(&sbi->s_bal_groups_scanned),
3516 atomic_read(&sbi->s_bal_goals),
3517 atomic_read(&sbi->s_bal_2orders),
3518 atomic_read(&sbi->s_bal_breaks),
3519 atomic_read(&sbi->s_mb_lost_chunks));
3520 ext4_msg(sb, KERN_INFO,
3521 "mballoc: %u generated and it took %llu",
3522 atomic_read(&sbi->s_mb_buddies_generated),
3523 atomic64_read(&sbi->s_mb_generation_time));
3524 ext4_msg(sb, KERN_INFO,
3525 "mballoc: %u preallocated, %u discarded",
3526 atomic_read(&sbi->s_mb_preallocated),
3527 atomic_read(&sbi->s_mb_discarded));
3528 }
3529
3530 free_percpu(sbi->s_locality_groups);
3531
3532 return 0;
3533}
3534
3535static inline int ext4_issue_discard(struct super_block *sb,
3536 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
3537 struct bio **biop)
3538{
3539 ext4_fsblk_t discard_block;
3540
3541 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
3542 ext4_group_first_block_no(sb, block_group));
3543 count = EXT4_C2B(EXT4_SB(sb), count);
3544 trace_ext4_discard_blocks(sb,
3545 (unsigned long long) discard_block, count);
3546 if (biop) {
3547 return __blkdev_issue_discard(sb->s_bdev,
3548 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
3549 (sector_t)count << (sb->s_blocksize_bits - 9),
3550 GFP_NOFS, 0, biop);
3551 } else
3552 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
3553}
3554
3555static void ext4_free_data_in_buddy(struct super_block *sb,
3556 struct ext4_free_data *entry)
3557{
3558 struct ext4_buddy e4b;
3559 struct ext4_group_info *db;
3560 int err, count = 0, count2 = 0;
3561
3562 mb_debug(sb, "gonna free %u blocks in group %u (0x%p):",
3563 entry->efd_count, entry->efd_group, entry);
3564
3565 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
3566
3567 BUG_ON(err != 0);
3568
3569 spin_lock(&EXT4_SB(sb)->s_md_lock);
3570 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
3571 spin_unlock(&EXT4_SB(sb)->s_md_lock);
3572
3573 db = e4b.bd_info;
3574
3575 count += entry->efd_count;
3576 count2++;
3577 ext4_lock_group(sb, entry->efd_group);
3578
3579 rb_erase(&entry->efd_node, &(db->bb_free_root));
3580 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
3581
3582
3583
3584
3585
3586
3587
3588 if (!test_opt(sb, DISCARD))
3589 EXT4_MB_GRP_CLEAR_TRIMMED(db);
3590
3591 if (!db->bb_free_root.rb_node) {
3592
3593
3594
3595 put_page(e4b.bd_buddy_page);
3596 put_page(e4b.bd_bitmap_page);
3597 }
3598 ext4_unlock_group(sb, entry->efd_group);
3599 kmem_cache_free(ext4_free_data_cachep, entry);
3600 ext4_mb_unload_buddy(&e4b);
3601
3602 mb_debug(sb, "freed %d blocks in %d structures\n", count,
3603 count2);
3604}
3605
3606
3607
3608
3609
3610void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
3611{
3612 struct ext4_sb_info *sbi = EXT4_SB(sb);
3613 struct ext4_free_data *entry, *tmp;
3614 struct bio *discard_bio = NULL;
3615 struct list_head freed_data_list;
3616 struct list_head *cut_pos = NULL;
3617 int err;
3618
3619 INIT_LIST_HEAD(&freed_data_list);
3620
3621 spin_lock(&sbi->s_md_lock);
3622 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
3623 if (entry->efd_tid != commit_tid)
3624 break;
3625 cut_pos = &entry->efd_list;
3626 }
3627 if (cut_pos)
3628 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
3629 cut_pos);
3630 spin_unlock(&sbi->s_md_lock);
3631
3632 if (test_opt(sb, DISCARD)) {
3633 list_for_each_entry(entry, &freed_data_list, efd_list) {
3634 err = ext4_issue_discard(sb, entry->efd_group,
3635 entry->efd_start_cluster,
3636 entry->efd_count,
3637 &discard_bio);
3638 if (err && err != -EOPNOTSUPP) {
3639 ext4_msg(sb, KERN_WARNING, "discard request in"
3640 " group:%d block:%d count:%d failed"
3641 " with %d", entry->efd_group,
3642 entry->efd_start_cluster,
3643 entry->efd_count, err);
3644 } else if (err == -EOPNOTSUPP)
3645 break;
3646 }
3647
3648 if (discard_bio) {
3649 submit_bio_wait(discard_bio);
3650 bio_put(discard_bio);
3651 }
3652 }
3653
3654 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
3655 ext4_free_data_in_buddy(sb, entry);
3656}
3657
3658int __init ext4_init_mballoc(void)
3659{
3660 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
3661 SLAB_RECLAIM_ACCOUNT);
3662 if (ext4_pspace_cachep == NULL)
3663 goto out;
3664
3665 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
3666 SLAB_RECLAIM_ACCOUNT);
3667 if (ext4_ac_cachep == NULL)
3668 goto out_pa_free;
3669
3670 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
3671 SLAB_RECLAIM_ACCOUNT);
3672 if (ext4_free_data_cachep == NULL)
3673 goto out_ac_free;
3674
3675 return 0;
3676
3677out_ac_free:
3678 kmem_cache_destroy(ext4_ac_cachep);
3679out_pa_free:
3680 kmem_cache_destroy(ext4_pspace_cachep);
3681out:
3682 return -ENOMEM;
3683}
3684
3685void ext4_exit_mballoc(void)
3686{
3687
3688
3689
3690
3691 rcu_barrier();
3692 kmem_cache_destroy(ext4_pspace_cachep);
3693 kmem_cache_destroy(ext4_ac_cachep);
3694 kmem_cache_destroy(ext4_free_data_cachep);
3695 ext4_groupinfo_destroy_slabs();
3696}
3697
3698
3699
3700
3701
3702
3703static noinline_for_stack int
3704ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3705 handle_t *handle, unsigned int reserv_clstrs)
3706{
3707 struct buffer_head *bitmap_bh = NULL;
3708 struct ext4_group_desc *gdp;
3709 struct buffer_head *gdp_bh;
3710 struct ext4_sb_info *sbi;
3711 struct super_block *sb;
3712 ext4_fsblk_t block;
3713 int err, len;
3714
3715 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3716 BUG_ON(ac->ac_b_ex.fe_len <= 0);
3717
3718 sb = ac->ac_sb;
3719 sbi = EXT4_SB(sb);
3720
3721 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
3722 if (IS_ERR(bitmap_bh)) {
3723 err = PTR_ERR(bitmap_bh);
3724 bitmap_bh = NULL;
3725 goto out_err;
3726 }
3727
3728 BUFFER_TRACE(bitmap_bh, "getting write access");
3729 err = ext4_journal_get_write_access(handle, bitmap_bh);
3730 if (err)
3731 goto out_err;
3732
3733 err = -EIO;
3734 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
3735 if (!gdp)
3736 goto out_err;
3737
3738 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
3739 ext4_free_group_clusters(sb, gdp));
3740
3741 BUFFER_TRACE(gdp_bh, "get_write_access");
3742 err = ext4_journal_get_write_access(handle, gdp_bh);
3743 if (err)
3744 goto out_err;
3745
3746 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3747
3748 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3749 if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
3750 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
3751 "fs metadata", block, block+len);
3752
3753
3754
3755
3756 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3757 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3758 ac->ac_b_ex.fe_len);
3759 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3760 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3761 if (!err)
3762 err = -EFSCORRUPTED;
3763 goto out_err;
3764 }
3765
3766 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3767#ifdef AGGRESSIVE_CHECK
3768 {
3769 int i;
3770 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
3771 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
3772 bitmap_bh->b_data));
3773 }
3774 }
3775#endif
3776 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3777 ac->ac_b_ex.fe_len);
3778 if (ext4_has_group_desc_csum(sb) &&
3779 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3780 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3781 ext4_free_group_clusters_set(sb, gdp,
3782 ext4_free_clusters_after_init(sb,
3783 ac->ac_b_ex.fe_group, gdp));
3784 }
3785 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
3786 ext4_free_group_clusters_set(sb, gdp, len);
3787 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
3788 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
3789
3790 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3791 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
3792
3793
3794
3795 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3796
3797 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
3798 reserv_clstrs);
3799
3800 if (sbi->s_log_groups_per_flex) {
3801 ext4_group_t flex_group = ext4_flex_group(sbi,
3802 ac->ac_b_ex.fe_group);
3803 atomic64_sub(ac->ac_b_ex.fe_len,
3804 &sbi_array_rcu_deref(sbi, s_flex_groups,
3805 flex_group)->free_clusters);
3806 }
3807
3808 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3809 if (err)
3810 goto out_err;
3811 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3812
3813out_err:
3814 brelse(bitmap_bh);
3815 return err;
3816}
3817
3818
3819
3820
3821
3822void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
3823 int len, int state)
3824{
3825 struct buffer_head *bitmap_bh = NULL;
3826 struct ext4_group_desc *gdp;
3827 struct buffer_head *gdp_bh;
3828 struct ext4_sb_info *sbi = EXT4_SB(sb);
3829 ext4_group_t group;
3830 ext4_grpblk_t blkoff;
3831 int i, clen, err;
3832 int already;
3833
3834 clen = EXT4_B2C(sbi, len);
3835
3836 ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
3837 bitmap_bh = ext4_read_block_bitmap(sb, group);
3838 if (IS_ERR(bitmap_bh)) {
3839 err = PTR_ERR(bitmap_bh);
3840 bitmap_bh = NULL;
3841 goto out_err;
3842 }
3843
3844 err = -EIO;
3845 gdp = ext4_get_group_desc(sb, group, &gdp_bh);
3846 if (!gdp)
3847 goto out_err;
3848
3849 ext4_lock_group(sb, group);
3850 already = 0;
3851 for (i = 0; i < clen; i++)
3852 if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state)
3853 already++;
3854
3855 if (state)
3856 ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
3857 else
3858 mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
3859 if (ext4_has_group_desc_csum(sb) &&
3860 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3861 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3862 ext4_free_group_clusters_set(sb, gdp,
3863 ext4_free_clusters_after_init(sb,
3864 group, gdp));
3865 }
3866 if (state)
3867 clen = ext4_free_group_clusters(sb, gdp) - clen + already;
3868 else
3869 clen = ext4_free_group_clusters(sb, gdp) + clen - already;
3870
3871 ext4_free_group_clusters_set(sb, gdp, clen);
3872 ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
3873 ext4_group_desc_csum_set(sb, group, gdp);
3874
3875 ext4_unlock_group(sb, group);
3876
3877 if (sbi->s_log_groups_per_flex) {
3878 ext4_group_t flex_group = ext4_flex_group(sbi, group);
3879
3880 atomic64_sub(len,
3881 &sbi_array_rcu_deref(sbi, s_flex_groups,
3882 flex_group)->free_clusters);
3883 }
3884
3885 err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
3886 if (err)
3887 goto out_err;
3888 sync_dirty_buffer(bitmap_bh);
3889 err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
3890 sync_dirty_buffer(gdp_bh);
3891
3892out_err:
3893 brelse(bitmap_bh);
3894}
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3906{
3907 struct super_block *sb = ac->ac_sb;
3908 struct ext4_locality_group *lg = ac->ac_lg;
3909
3910 BUG_ON(lg == NULL);
3911 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3912 mb_debug(sb, "goal %u blocks for locality group\n", ac->ac_g_ex.fe_len);
3913}
3914
3915
3916
3917
3918
3919static noinline_for_stack void
3920ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3921 struct ext4_allocation_request *ar)
3922{
3923 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3924 int bsbits, max;
3925 ext4_lblk_t end;
3926 loff_t size, start_off;
3927 loff_t orig_size __maybe_unused;
3928 ext4_lblk_t start;
3929 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3930 struct ext4_prealloc_space *pa;
3931
3932
3933
3934 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3935 return;
3936
3937
3938 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3939 return;
3940
3941
3942
3943 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
3944 return;
3945
3946 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
3947 ext4_mb_normalize_group_request(ac);
3948 return ;
3949 }
3950
3951 bsbits = ac->ac_sb->s_blocksize_bits;
3952
3953
3954
3955 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3956 size = size << bsbits;
3957 if (size < i_size_read(ac->ac_inode))
3958 size = i_size_read(ac->ac_inode);
3959 orig_size = size;
3960
3961
3962 max = 2 << bsbits;
3963
3964#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3965 (req <= (size) || max <= (chunk_size))
3966
3967
3968
3969 start_off = 0;
3970 if (size <= 16 * 1024) {
3971 size = 16 * 1024;
3972 } else if (size <= 32 * 1024) {
3973 size = 32 * 1024;
3974 } else if (size <= 64 * 1024) {
3975 size = 64 * 1024;
3976 } else if (size <= 128 * 1024) {
3977 size = 128 * 1024;
3978 } else if (size <= 256 * 1024) {
3979 size = 256 * 1024;
3980 } else if (size <= 512 * 1024) {
3981 size = 512 * 1024;
3982 } else if (size <= 1024 * 1024) {
3983 size = 1024 * 1024;
3984 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3985 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3986 (21 - bsbits)) << 21;
3987 size = 2 * 1024 * 1024;
3988 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3989 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3990 (22 - bsbits)) << 22;
3991 size = 4 * 1024 * 1024;
3992 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3993 (8<<20)>>bsbits, max, 8 * 1024)) {
3994 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3995 (23 - bsbits)) << 23;
3996 size = 8 * 1024 * 1024;
3997 } else {
3998 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3999 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
4000 ac->ac_o_ex.fe_len) << bsbits;
4001 }
4002 size = size >> bsbits;
4003 start = start_off >> bsbits;
4004
4005
4006 if (ar->pleft && start <= ar->lleft) {
4007 size -= ar->lleft + 1 - start;
4008 start = ar->lleft + 1;
4009 }
4010 if (ar->pright && start + size - 1 >= ar->lright)
4011 size -= start + size - ar->lright;
4012
4013
4014
4015
4016
4017 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
4018 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
4019
4020 end = start + size;
4021
4022
4023 rcu_read_lock();
4024 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4025 ext4_lblk_t pa_end;
4026
4027 if (pa->pa_deleted)
4028 continue;
4029 spin_lock(&pa->pa_lock);
4030 if (pa->pa_deleted) {
4031 spin_unlock(&pa->pa_lock);
4032 continue;
4033 }
4034
4035 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
4036 pa->pa_len);
4037
4038
4039 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
4040 ac->ac_o_ex.fe_logical < pa->pa_lstart));
4041
4042
4043 if (pa->pa_lstart >= end || pa_end <= start) {
4044 spin_unlock(&pa->pa_lock);
4045 continue;
4046 }
4047 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
4048
4049
4050 if (pa_end <= ac->ac_o_ex.fe_logical) {
4051 BUG_ON(pa_end < start);
4052 start = pa_end;
4053 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
4054 BUG_ON(pa->pa_lstart > end);
4055 end = pa->pa_lstart;
4056 }
4057 spin_unlock(&pa->pa_lock);
4058 }
4059 rcu_read_unlock();
4060 size = end - start;
4061
4062
4063 rcu_read_lock();
4064 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4065 ext4_lblk_t pa_end;
4066
4067 spin_lock(&pa->pa_lock);
4068 if (pa->pa_deleted == 0) {
4069 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
4070 pa->pa_len);
4071 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
4072 }
4073 spin_unlock(&pa->pa_lock);
4074 }
4075 rcu_read_unlock();
4076
4077 if (start + size <= ac->ac_o_ex.fe_logical &&
4078 start > ac->ac_o_ex.fe_logical) {
4079 ext4_msg(ac->ac_sb, KERN_ERR,
4080 "start %lu, size %lu, fe_logical %lu",
4081 (unsigned long) start, (unsigned long) size,
4082 (unsigned long) ac->ac_o_ex.fe_logical);
4083 BUG();
4084 }
4085 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
4086
4087
4088
4089
4090
4091 ac->ac_g_ex.fe_logical = start;
4092 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
4093
4094
4095 if (ar->pright && (ar->lright == (start + size))) {
4096
4097 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
4098 &ac->ac_f_ex.fe_group,
4099 &ac->ac_f_ex.fe_start);
4100 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
4101 }
4102 if (ar->pleft && (ar->lleft + 1 == start)) {
4103
4104 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
4105 &ac->ac_f_ex.fe_group,
4106 &ac->ac_f_ex.fe_start);
4107 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
4108 }
4109
4110 mb_debug(ac->ac_sb, "goal: %lld(was %lld) blocks at %u\n", size,
4111 orig_size, start);
4112}
4113
4114static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
4115{
4116 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4117
4118 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len >= 1) {
4119 atomic_inc(&sbi->s_bal_reqs);
4120 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
4121 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
4122 atomic_inc(&sbi->s_bal_success);
4123 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
4124 atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned);
4125 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
4126 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
4127 atomic_inc(&sbi->s_bal_goals);
4128 if (ac->ac_found > sbi->s_mb_max_to_scan)
4129 atomic_inc(&sbi->s_bal_breaks);
4130 }
4131
4132 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
4133 trace_ext4_mballoc_alloc(ac);
4134 else
4135 trace_ext4_mballoc_prealloc(ac);
4136}
4137
4138
4139
4140
4141
4142
4143
4144static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
4145{
4146 struct ext4_prealloc_space *pa = ac->ac_pa;
4147 struct ext4_buddy e4b;
4148 int err;
4149
4150 if (pa == NULL) {
4151 if (ac->ac_f_ex.fe_len == 0)
4152 return;
4153 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
4154 if (err) {
4155
4156
4157
4158
4159
4160 WARN(1, "mb_load_buddy failed (%d)", err);
4161 return;
4162 }
4163 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
4164 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
4165 ac->ac_f_ex.fe_len);
4166 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
4167 ext4_mb_unload_buddy(&e4b);
4168 return;
4169 }
4170 if (pa->pa_type == MB_INODE_PA)
4171 pa->pa_free += ac->ac_b_ex.fe_len;
4172}
4173
4174
4175
4176
4177static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
4178 struct ext4_prealloc_space *pa)
4179{
4180 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4181 ext4_fsblk_t start;
4182 ext4_fsblk_t end;
4183 int len;
4184
4185
4186 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
4187 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
4188 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
4189 len = EXT4_NUM_B2C(sbi, end - start);
4190 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
4191 &ac->ac_b_ex.fe_start);
4192 ac->ac_b_ex.fe_len = len;
4193 ac->ac_status = AC_STATUS_FOUND;
4194 ac->ac_pa = pa;
4195
4196 BUG_ON(start < pa->pa_pstart);
4197 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
4198 BUG_ON(pa->pa_free < len);
4199 pa->pa_free -= len;
4200
4201 mb_debug(ac->ac_sb, "use %llu/%d from inode pa %p\n", start, len, pa);
4202}
4203
4204
4205
4206
4207static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
4208 struct ext4_prealloc_space *pa)
4209{
4210 unsigned int len = ac->ac_o_ex.fe_len;
4211
4212 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
4213 &ac->ac_b_ex.fe_group,
4214 &ac->ac_b_ex.fe_start);
4215 ac->ac_b_ex.fe_len = len;
4216 ac->ac_status = AC_STATUS_FOUND;
4217 ac->ac_pa = pa;
4218
4219
4220
4221
4222
4223
4224
4225 mb_debug(ac->ac_sb, "use %u/%u from group pa %p\n",
4226 pa->pa_lstart-len, len, pa);
4227}
4228
4229
4230
4231
4232
4233
4234
4235static struct ext4_prealloc_space *
4236ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
4237 struct ext4_prealloc_space *pa,
4238 struct ext4_prealloc_space *cpa)
4239{
4240 ext4_fsblk_t cur_distance, new_distance;
4241
4242 if (cpa == NULL) {
4243 atomic_inc(&pa->pa_count);
4244 return pa;
4245 }
4246 cur_distance = abs(goal_block - cpa->pa_pstart);
4247 new_distance = abs(goal_block - pa->pa_pstart);
4248
4249 if (cur_distance <= new_distance)
4250 return cpa;
4251
4252
4253 atomic_dec(&cpa->pa_count);
4254 atomic_inc(&pa->pa_count);
4255 return pa;
4256}
4257
4258
4259
4260
4261static noinline_for_stack bool
4262ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
4263{
4264 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4265 int order, i;
4266 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
4267 struct ext4_locality_group *lg;
4268 struct ext4_prealloc_space *pa, *cpa = NULL;
4269 ext4_fsblk_t goal_block;
4270
4271
4272 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4273 return false;
4274
4275
4276 rcu_read_lock();
4277 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
4278
4279
4280
4281 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
4282 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
4283 EXT4_C2B(sbi, pa->pa_len)))
4284 continue;
4285
4286
4287 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
4288 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
4289 EXT4_MAX_BLOCK_FILE_PHYS))
4290 continue;
4291
4292
4293 spin_lock(&pa->pa_lock);
4294 if (pa->pa_deleted == 0 && pa->pa_free) {
4295 atomic_inc(&pa->pa_count);
4296 ext4_mb_use_inode_pa(ac, pa);
4297 spin_unlock(&pa->pa_lock);
4298 ac->ac_criteria = 10;
4299 rcu_read_unlock();
4300 return true;
4301 }
4302 spin_unlock(&pa->pa_lock);
4303 }
4304 rcu_read_unlock();
4305
4306
4307 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
4308 return false;
4309
4310
4311 lg = ac->ac_lg;
4312 if (lg == NULL)
4313 return false;
4314 order = fls(ac->ac_o_ex.fe_len) - 1;
4315 if (order > PREALLOC_TB_SIZE - 1)
4316
4317 order = PREALLOC_TB_SIZE - 1;
4318
4319 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
4320
4321
4322
4323
4324 for (i = order; i < PREALLOC_TB_SIZE; i++) {
4325 rcu_read_lock();
4326 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
4327 pa_inode_list) {
4328 spin_lock(&pa->pa_lock);
4329 if (pa->pa_deleted == 0 &&
4330 pa->pa_free >= ac->ac_o_ex.fe_len) {
4331
4332 cpa = ext4_mb_check_group_pa(goal_block,
4333 pa, cpa);
4334 }
4335 spin_unlock(&pa->pa_lock);
4336 }
4337 rcu_read_unlock();
4338 }
4339 if (cpa) {
4340 ext4_mb_use_group_pa(ac, cpa);
4341 ac->ac_criteria = 20;
4342 return true;
4343 }
4344 return false;
4345}
4346
4347
4348
4349
4350
4351
4352
4353static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
4354 ext4_group_t group)
4355{
4356 struct rb_node *n;
4357 struct ext4_group_info *grp;
4358 struct ext4_free_data *entry;
4359
4360 grp = ext4_get_group_info(sb, group);
4361 n = rb_first(&(grp->bb_free_root));
4362
4363 while (n) {
4364 entry = rb_entry(n, struct ext4_free_data, efd_node);
4365 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
4366 n = rb_next(n);
4367 }
4368 return;
4369}
4370
4371
4372
4373
4374
4375
4376static noinline_for_stack
4377void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
4378 ext4_group_t group)
4379{
4380 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
4381 struct ext4_prealloc_space *pa;
4382 struct list_head *cur;
4383 ext4_group_t groupnr;
4384 ext4_grpblk_t start;
4385 int preallocated = 0;
4386 int len;
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396 list_for_each(cur, &grp->bb_prealloc_list) {
4397 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
4398 spin_lock(&pa->pa_lock);
4399 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4400 &groupnr, &start);
4401 len = pa->pa_len;
4402 spin_unlock(&pa->pa_lock);
4403 if (unlikely(len == 0))
4404 continue;
4405 BUG_ON(groupnr != group);
4406 ext4_set_bits(bitmap, start, len);
4407 preallocated += len;
4408 }
4409 mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
4410}
4411
4412static void ext4_mb_mark_pa_deleted(struct super_block *sb,
4413 struct ext4_prealloc_space *pa)
4414{
4415 struct ext4_inode_info *ei;
4416
4417 if (pa->pa_deleted) {
4418 ext4_warning(sb, "deleted pa, type:%d, pblk:%llu, lblk:%u, len:%d\n",
4419 pa->pa_type, pa->pa_pstart, pa->pa_lstart,
4420 pa->pa_len);
4421 return;
4422 }
4423
4424 pa->pa_deleted = 1;
4425
4426 if (pa->pa_type == MB_INODE_PA) {
4427 ei = EXT4_I(pa->pa_inode);
4428 atomic_dec(&ei->i_prealloc_active);
4429 }
4430}
4431
4432static void ext4_mb_pa_callback(struct rcu_head *head)
4433{
4434 struct ext4_prealloc_space *pa;
4435 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
4436
4437 BUG_ON(atomic_read(&pa->pa_count));
4438 BUG_ON(pa->pa_deleted == 0);
4439 kmem_cache_free(ext4_pspace_cachep, pa);
4440}
4441
4442
4443
4444
4445
4446static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
4447 struct super_block *sb, struct ext4_prealloc_space *pa)
4448{
4449 ext4_group_t grp;
4450 ext4_fsblk_t grp_blk;
4451
4452
4453 spin_lock(&pa->pa_lock);
4454 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
4455 spin_unlock(&pa->pa_lock);
4456 return;
4457 }
4458
4459 if (pa->pa_deleted == 1) {
4460 spin_unlock(&pa->pa_lock);
4461 return;
4462 }
4463
4464 ext4_mb_mark_pa_deleted(sb, pa);
4465 spin_unlock(&pa->pa_lock);
4466
4467 grp_blk = pa->pa_pstart;
4468
4469
4470
4471
4472 if (pa->pa_type == MB_GROUP_PA)
4473 grp_blk--;
4474
4475 grp = ext4_get_group_number(sb, grp_blk);
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491 ext4_lock_group(sb, grp);
4492 list_del(&pa->pa_group_list);
4493 ext4_unlock_group(sb, grp);
4494
4495 spin_lock(pa->pa_obj_lock);
4496 list_del_rcu(&pa->pa_inode_list);
4497 spin_unlock(pa->pa_obj_lock);
4498
4499 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4500}
4501
4502
4503
4504
4505static noinline_for_stack void
4506ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
4507{
4508 struct super_block *sb = ac->ac_sb;
4509 struct ext4_sb_info *sbi = EXT4_SB(sb);
4510 struct ext4_prealloc_space *pa;
4511 struct ext4_group_info *grp;
4512 struct ext4_inode_info *ei;
4513
4514
4515 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
4516 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
4517 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
4518 BUG_ON(ac->ac_pa == NULL);
4519
4520 pa = ac->ac_pa;
4521
4522 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
4523 int winl;
4524 int wins;
4525 int win;
4526 int offs;
4527
4528
4529
4530
4531 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
4532 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
4533
4534
4535
4536
4537 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
4538
4539
4540 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
4541
4542
4543 win = min(winl, wins);
4544
4545 offs = ac->ac_o_ex.fe_logical %
4546 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4547 if (offs && offs < win)
4548 win = offs;
4549
4550 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
4551 EXT4_NUM_B2C(sbi, win);
4552 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
4553 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
4554 }
4555
4556
4557
4558 ac->ac_f_ex = ac->ac_b_ex;
4559
4560 pa->pa_lstart = ac->ac_b_ex.fe_logical;
4561 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4562 pa->pa_len = ac->ac_b_ex.fe_len;
4563 pa->pa_free = pa->pa_len;
4564 spin_lock_init(&pa->pa_lock);
4565 INIT_LIST_HEAD(&pa->pa_inode_list);
4566 INIT_LIST_HEAD(&pa->pa_group_list);
4567 pa->pa_deleted = 0;
4568 pa->pa_type = MB_INODE_PA;
4569
4570 mb_debug(sb, "new inode pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
4571 pa->pa_len, pa->pa_lstart);
4572 trace_ext4_mb_new_inode_pa(ac, pa);
4573
4574 ext4_mb_use_inode_pa(ac, pa);
4575 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
4576
4577 ei = EXT4_I(ac->ac_inode);
4578 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
4579
4580 pa->pa_obj_lock = &ei->i_prealloc_lock;
4581 pa->pa_inode = ac->ac_inode;
4582
4583 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
4584
4585 spin_lock(pa->pa_obj_lock);
4586 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
4587 spin_unlock(pa->pa_obj_lock);
4588 atomic_inc(&ei->i_prealloc_active);
4589}
4590
4591
4592
4593
4594static noinline_for_stack void
4595ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
4596{
4597 struct super_block *sb = ac->ac_sb;
4598 struct ext4_locality_group *lg;
4599 struct ext4_prealloc_space *pa;
4600 struct ext4_group_info *grp;
4601
4602
4603 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
4604 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
4605 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
4606 BUG_ON(ac->ac_pa == NULL);
4607
4608 pa = ac->ac_pa;
4609
4610
4611
4612 ac->ac_f_ex = ac->ac_b_ex;
4613
4614 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4615 pa->pa_lstart = pa->pa_pstart;
4616 pa->pa_len = ac->ac_b_ex.fe_len;
4617 pa->pa_free = pa->pa_len;
4618 spin_lock_init(&pa->pa_lock);
4619 INIT_LIST_HEAD(&pa->pa_inode_list);
4620 INIT_LIST_HEAD(&pa->pa_group_list);
4621 pa->pa_deleted = 0;
4622 pa->pa_type = MB_GROUP_PA;
4623
4624 mb_debug(sb, "new group pa %p: %llu/%d for %u\n", pa, pa->pa_pstart,
4625 pa->pa_len, pa->pa_lstart);
4626 trace_ext4_mb_new_group_pa(ac, pa);
4627
4628 ext4_mb_use_group_pa(ac, pa);
4629 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
4630
4631 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
4632 lg = ac->ac_lg;
4633 BUG_ON(lg == NULL);
4634
4635 pa->pa_obj_lock = &lg->lg_prealloc_lock;
4636 pa->pa_inode = NULL;
4637
4638 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
4639
4640
4641
4642
4643
4644}
4645
4646static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
4647{
4648 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4649 ext4_mb_new_group_pa(ac);
4650 else
4651 ext4_mb_new_inode_pa(ac);
4652}
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662static noinline_for_stack int
4663ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
4664 struct ext4_prealloc_space *pa)
4665{
4666 struct super_block *sb = e4b->bd_sb;
4667 struct ext4_sb_info *sbi = EXT4_SB(sb);
4668 unsigned int end;
4669 unsigned int next;
4670 ext4_group_t group;
4671 ext4_grpblk_t bit;
4672 unsigned long long grp_blk_start;
4673 int free = 0;
4674
4675 BUG_ON(pa->pa_deleted == 0);
4676 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
4677 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
4678 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
4679 end = bit + pa->pa_len;
4680
4681 while (bit < end) {
4682 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
4683 if (bit >= end)
4684 break;
4685 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
4686 mb_debug(sb, "free preallocated %u/%u in group %u\n",
4687 (unsigned) ext4_group_first_block_no(sb, group) + bit,
4688 (unsigned) next - bit, (unsigned) group);
4689 free += next - bit;
4690
4691 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
4692 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
4693 EXT4_C2B(sbi, bit)),
4694 next - bit);
4695 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
4696 bit = next + 1;
4697 }
4698 if (free != pa->pa_free) {
4699 ext4_msg(e4b->bd_sb, KERN_CRIT,
4700 "pa %p: logic %lu, phys. %lu, len %d",
4701 pa, (unsigned long) pa->pa_lstart,
4702 (unsigned long) pa->pa_pstart,
4703 pa->pa_len);
4704 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
4705 free, pa->pa_free);
4706
4707
4708
4709
4710 }
4711 atomic_add(free, &sbi->s_mb_discarded);
4712
4713 return 0;
4714}
4715
4716static noinline_for_stack int
4717ext4_mb_release_group_pa(struct ext4_buddy *e4b,
4718 struct ext4_prealloc_space *pa)
4719{
4720 struct super_block *sb = e4b->bd_sb;
4721 ext4_group_t group;
4722 ext4_grpblk_t bit;
4723
4724 trace_ext4_mb_release_group_pa(sb, pa);
4725 BUG_ON(pa->pa_deleted == 0);
4726 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
4727 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
4728 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
4729 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
4730 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
4731
4732 return 0;
4733}
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744static noinline_for_stack int
4745ext4_mb_discard_group_preallocations(struct super_block *sb,
4746 ext4_group_t group, int needed)
4747{
4748 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
4749 struct buffer_head *bitmap_bh = NULL;
4750 struct ext4_prealloc_space *pa, *tmp;
4751 struct list_head list;
4752 struct ext4_buddy e4b;
4753 int err;
4754 int busy = 0;
4755 int free, free_total = 0;
4756
4757 mb_debug(sb, "discard preallocation for group %u\n", group);
4758 if (list_empty(&grp->bb_prealloc_list))
4759 goto out_dbg;
4760
4761 bitmap_bh = ext4_read_block_bitmap(sb, group);
4762 if (IS_ERR(bitmap_bh)) {
4763 err = PTR_ERR(bitmap_bh);
4764 ext4_error_err(sb, -err,
4765 "Error %d reading block bitmap for %u",
4766 err, group);
4767 goto out_dbg;
4768 }
4769
4770 err = ext4_mb_load_buddy(sb, group, &e4b);
4771 if (err) {
4772 ext4_warning(sb, "Error %d loading buddy information for %u",
4773 err, group);
4774 put_bh(bitmap_bh);
4775 goto out_dbg;
4776 }
4777
4778 if (needed == 0)
4779 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
4780
4781 INIT_LIST_HEAD(&list);
4782repeat:
4783 free = 0;
4784 ext4_lock_group(sb, group);
4785 list_for_each_entry_safe(pa, tmp,
4786 &grp->bb_prealloc_list, pa_group_list) {
4787 spin_lock(&pa->pa_lock);
4788 if (atomic_read(&pa->pa_count)) {
4789 spin_unlock(&pa->pa_lock);
4790 busy = 1;
4791 continue;
4792 }
4793 if (pa->pa_deleted) {
4794 spin_unlock(&pa->pa_lock);
4795 continue;
4796 }
4797
4798
4799 ext4_mb_mark_pa_deleted(sb, pa);
4800
4801 if (!free)
4802 this_cpu_inc(discard_pa_seq);
4803
4804
4805 free += pa->pa_free;
4806
4807 spin_unlock(&pa->pa_lock);
4808
4809 list_del(&pa->pa_group_list);
4810 list_add(&pa->u.pa_tmp_list, &list);
4811 }
4812
4813
4814 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4815
4816
4817 spin_lock(pa->pa_obj_lock);
4818 list_del_rcu(&pa->pa_inode_list);
4819 spin_unlock(pa->pa_obj_lock);
4820
4821 if (pa->pa_type == MB_GROUP_PA)
4822 ext4_mb_release_group_pa(&e4b, pa);
4823 else
4824 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4825
4826 list_del(&pa->u.pa_tmp_list);
4827 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4828 }
4829
4830 free_total += free;
4831
4832
4833 if (free_total < needed && busy) {
4834 ext4_unlock_group(sb, group);
4835 cond_resched();
4836 busy = 0;
4837 goto repeat;
4838 }
4839 ext4_unlock_group(sb, group);
4840 ext4_mb_unload_buddy(&e4b);
4841 put_bh(bitmap_bh);
4842out_dbg:
4843 mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
4844 free_total, group, grp->bb_free);
4845 return free_total;
4846}
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
4858{
4859 struct ext4_inode_info *ei = EXT4_I(inode);
4860 struct super_block *sb = inode->i_sb;
4861 struct buffer_head *bitmap_bh = NULL;
4862 struct ext4_prealloc_space *pa, *tmp;
4863 ext4_group_t group = 0;
4864 struct list_head list;
4865 struct ext4_buddy e4b;
4866 int err;
4867
4868 if (!S_ISREG(inode->i_mode)) {
4869
4870 return;
4871 }
4872
4873 if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
4874 return;
4875
4876 mb_debug(sb, "discard preallocation for inode %lu\n",
4877 inode->i_ino);
4878 trace_ext4_discard_preallocations(inode,
4879 atomic_read(&ei->i_prealloc_active), needed);
4880
4881 INIT_LIST_HEAD(&list);
4882
4883 if (needed == 0)
4884 needed = UINT_MAX;
4885
4886repeat:
4887
4888 spin_lock(&ei->i_prealloc_lock);
4889 while (!list_empty(&ei->i_prealloc_list) && needed) {
4890 pa = list_entry(ei->i_prealloc_list.prev,
4891 struct ext4_prealloc_space, pa_inode_list);
4892 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
4893 spin_lock(&pa->pa_lock);
4894 if (atomic_read(&pa->pa_count)) {
4895
4896
4897 spin_unlock(&pa->pa_lock);
4898 spin_unlock(&ei->i_prealloc_lock);
4899 ext4_msg(sb, KERN_ERR,
4900 "uh-oh! used pa while discarding");
4901 WARN_ON(1);
4902 schedule_timeout_uninterruptible(HZ);
4903 goto repeat;
4904
4905 }
4906 if (pa->pa_deleted == 0) {
4907 ext4_mb_mark_pa_deleted(sb, pa);
4908 spin_unlock(&pa->pa_lock);
4909 list_del_rcu(&pa->pa_inode_list);
4910 list_add(&pa->u.pa_tmp_list, &list);
4911 needed--;
4912 continue;
4913 }
4914
4915
4916 spin_unlock(&pa->pa_lock);
4917 spin_unlock(&ei->i_prealloc_lock);
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931 schedule_timeout_uninterruptible(HZ);
4932 goto repeat;
4933 }
4934 spin_unlock(&ei->i_prealloc_lock);
4935
4936 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4937 BUG_ON(pa->pa_type != MB_INODE_PA);
4938 group = ext4_get_group_number(sb, pa->pa_pstart);
4939
4940 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4941 GFP_NOFS|__GFP_NOFAIL);
4942 if (err) {
4943 ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
4944 err, group);
4945 continue;
4946 }
4947
4948 bitmap_bh = ext4_read_block_bitmap(sb, group);
4949 if (IS_ERR(bitmap_bh)) {
4950 err = PTR_ERR(bitmap_bh);
4951 ext4_error_err(sb, -err, "Error %d reading block bitmap for %u",
4952 err, group);
4953 ext4_mb_unload_buddy(&e4b);
4954 continue;
4955 }
4956
4957 ext4_lock_group(sb, group);
4958 list_del(&pa->pa_group_list);
4959 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4960 ext4_unlock_group(sb, group);
4961
4962 ext4_mb_unload_buddy(&e4b);
4963 put_bh(bitmap_bh);
4964
4965 list_del(&pa->u.pa_tmp_list);
4966 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4967 }
4968}
4969
4970static int ext4_mb_pa_alloc(struct ext4_allocation_context *ac)
4971{
4972 struct ext4_prealloc_space *pa;
4973
4974 BUG_ON(ext4_pspace_cachep == NULL);
4975 pa = kmem_cache_zalloc(ext4_pspace_cachep, GFP_NOFS);
4976 if (!pa)
4977 return -ENOMEM;
4978 atomic_set(&pa->pa_count, 1);
4979 ac->ac_pa = pa;
4980 return 0;
4981}
4982
4983static void ext4_mb_pa_free(struct ext4_allocation_context *ac)
4984{
4985 struct ext4_prealloc_space *pa = ac->ac_pa;
4986
4987 BUG_ON(!pa);
4988 ac->ac_pa = NULL;
4989 WARN_ON(!atomic_dec_and_test(&pa->pa_count));
4990 kmem_cache_free(ext4_pspace_cachep, pa);
4991}
4992
4993#ifdef CONFIG_EXT4_DEBUG
4994static inline void ext4_mb_show_pa(struct super_block *sb)
4995{
4996 ext4_group_t i, ngroups;
4997
4998 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
4999 return;
5000
5001 ngroups = ext4_get_groups_count(sb);
5002 mb_debug(sb, "groups: ");
5003 for (i = 0; i < ngroups; i++) {
5004 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
5005 struct ext4_prealloc_space *pa;
5006 ext4_grpblk_t start;
5007 struct list_head *cur;
5008 ext4_lock_group(sb, i);
5009 list_for_each(cur, &grp->bb_prealloc_list) {
5010 pa = list_entry(cur, struct ext4_prealloc_space,
5011 pa_group_list);
5012 spin_lock(&pa->pa_lock);
5013 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
5014 NULL, &start);
5015 spin_unlock(&pa->pa_lock);
5016 mb_debug(sb, "PA:%u:%d:%d\n", i, start,
5017 pa->pa_len);
5018 }
5019 ext4_unlock_group(sb, i);
5020 mb_debug(sb, "%u: %d/%d\n", i, grp->bb_free,
5021 grp->bb_fragments);
5022 }
5023}
5024
5025static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
5026{
5027 struct super_block *sb = ac->ac_sb;
5028
5029 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
5030 return;
5031
5032 mb_debug(sb, "Can't allocate:"
5033 " Allocation context details:");
5034 mb_debug(sb, "status %u flags 0x%x",
5035 ac->ac_status, ac->ac_flags);
5036 mb_debug(sb, "orig %lu/%lu/%lu@%lu, "
5037 "goal %lu/%lu/%lu@%lu, "
5038 "best %lu/%lu/%lu@%lu cr %d",
5039 (unsigned long)ac->ac_o_ex.fe_group,
5040 (unsigned long)ac->ac_o_ex.fe_start,
5041 (unsigned long)ac->ac_o_ex.fe_len,
5042 (unsigned long)ac->ac_o_ex.fe_logical,
5043 (unsigned long)ac->ac_g_ex.fe_group,
5044 (unsigned long)ac->ac_g_ex.fe_start,
5045 (unsigned long)ac->ac_g_ex.fe_len,
5046 (unsigned long)ac->ac_g_ex.fe_logical,
5047 (unsigned long)ac->ac_b_ex.fe_group,
5048 (unsigned long)ac->ac_b_ex.fe_start,
5049 (unsigned long)ac->ac_b_ex.fe_len,
5050 (unsigned long)ac->ac_b_ex.fe_logical,
5051 (int)ac->ac_criteria);
5052 mb_debug(sb, "%u found", ac->ac_found);
5053 ext4_mb_show_pa(sb);
5054}
5055#else
5056static inline void ext4_mb_show_pa(struct super_block *sb)
5057{
5058 return;
5059}
5060static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
5061{
5062 ext4_mb_show_pa(ac->ac_sb);
5063 return;
5064}
5065#endif
5066
5067
5068
5069
5070
5071
5072
5073
5074static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
5075{
5076 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
5077 int bsbits = ac->ac_sb->s_blocksize_bits;
5078 loff_t size, isize;
5079
5080 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
5081 return;
5082
5083 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
5084 return;
5085
5086 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
5087 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
5088 >> bsbits;
5089
5090 if ((size == isize) && !ext4_fs_is_busy(sbi) &&
5091 !inode_is_open_for_write(ac->ac_inode)) {
5092 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
5093 return;
5094 }
5095
5096 if (sbi->s_mb_group_prealloc <= 0) {
5097 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
5098 return;
5099 }
5100
5101
5102 size = max(size, isize);
5103 if (size > sbi->s_mb_stream_request) {
5104 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
5105 return;
5106 }
5107
5108 BUG_ON(ac->ac_lg != NULL);
5109
5110
5111
5112
5113
5114 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
5115
5116
5117 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
5118
5119
5120 mutex_lock(&ac->ac_lg->lg_mutex);
5121}
5122
5123static noinline_for_stack int
5124ext4_mb_initialize_context(struct ext4_allocation_context *ac,
5125 struct ext4_allocation_request *ar)
5126{
5127 struct super_block *sb = ar->inode->i_sb;
5128 struct ext4_sb_info *sbi = EXT4_SB(sb);
5129 struct ext4_super_block *es = sbi->s_es;
5130 ext4_group_t group;
5131 unsigned int len;
5132 ext4_fsblk_t goal;
5133 ext4_grpblk_t block;
5134
5135
5136 len = ar->len;
5137
5138
5139 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
5140 len = EXT4_CLUSTERS_PER_GROUP(sb);
5141
5142
5143 goal = ar->goal;
5144 if (goal < le32_to_cpu(es->s_first_data_block) ||
5145 goal >= ext4_blocks_count(es))
5146 goal = le32_to_cpu(es->s_first_data_block);
5147 ext4_get_group_no_and_offset(sb, goal, &group, &block);
5148
5149
5150 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
5151 ac->ac_status = AC_STATUS_CONTINUE;
5152 ac->ac_sb = sb;
5153 ac->ac_inode = ar->inode;
5154 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
5155 ac->ac_o_ex.fe_group = group;
5156 ac->ac_o_ex.fe_start = block;
5157 ac->ac_o_ex.fe_len = len;
5158 ac->ac_g_ex = ac->ac_o_ex;
5159 ac->ac_flags = ar->flags;
5160
5161
5162
5163 ext4_mb_group_or_file(ac);
5164
5165 mb_debug(sb, "init ac: %u blocks @ %u, goal %u, flags 0x%x, 2^%d, "
5166 "left: %u/%u, right %u/%u to %swritable\n",
5167 (unsigned) ar->len, (unsigned) ar->logical,
5168 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
5169 (unsigned) ar->lleft, (unsigned) ar->pleft,
5170 (unsigned) ar->lright, (unsigned) ar->pright,
5171 inode_is_open_for_write(ar->inode) ? "" : "non-");
5172 return 0;
5173
5174}
5175
5176static noinline_for_stack void
5177ext4_mb_discard_lg_preallocations(struct super_block *sb,
5178 struct ext4_locality_group *lg,
5179 int order, int total_entries)
5180{
5181 ext4_group_t group = 0;
5182 struct ext4_buddy e4b;
5183 struct list_head discard_list;
5184 struct ext4_prealloc_space *pa, *tmp;
5185
5186 mb_debug(sb, "discard locality group preallocation\n");
5187
5188 INIT_LIST_HEAD(&discard_list);
5189
5190 spin_lock(&lg->lg_prealloc_lock);
5191 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
5192 pa_inode_list,
5193 lockdep_is_held(&lg->lg_prealloc_lock)) {
5194 spin_lock(&pa->pa_lock);
5195 if (atomic_read(&pa->pa_count)) {
5196
5197
5198
5199
5200
5201 spin_unlock(&pa->pa_lock);
5202 continue;
5203 }
5204 if (pa->pa_deleted) {
5205 spin_unlock(&pa->pa_lock);
5206 continue;
5207 }
5208
5209 BUG_ON(pa->pa_type != MB_GROUP_PA);
5210
5211
5212 ext4_mb_mark_pa_deleted(sb, pa);
5213 spin_unlock(&pa->pa_lock);
5214
5215 list_del_rcu(&pa->pa_inode_list);
5216 list_add(&pa->u.pa_tmp_list, &discard_list);
5217
5218 total_entries--;
5219 if (total_entries <= 5) {
5220
5221
5222
5223
5224
5225
5226 break;
5227 }
5228 }
5229 spin_unlock(&lg->lg_prealloc_lock);
5230
5231 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
5232 int err;
5233
5234 group = ext4_get_group_number(sb, pa->pa_pstart);
5235 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
5236 GFP_NOFS|__GFP_NOFAIL);
5237 if (err) {
5238 ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
5239 err, group);
5240 continue;
5241 }
5242 ext4_lock_group(sb, group);
5243 list_del(&pa->pa_group_list);
5244 ext4_mb_release_group_pa(&e4b, pa);
5245 ext4_unlock_group(sb, group);
5246
5247 ext4_mb_unload_buddy(&e4b);
5248 list_del(&pa->u.pa_tmp_list);
5249 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
5250 }
5251}
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
5263{
5264 int order, added = 0, lg_prealloc_count = 1;
5265 struct super_block *sb = ac->ac_sb;
5266 struct ext4_locality_group *lg = ac->ac_lg;
5267 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
5268
5269 order = fls(pa->pa_free) - 1;
5270 if (order > PREALLOC_TB_SIZE - 1)
5271
5272 order = PREALLOC_TB_SIZE - 1;
5273
5274 spin_lock(&lg->lg_prealloc_lock);
5275 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
5276 pa_inode_list,
5277 lockdep_is_held(&lg->lg_prealloc_lock)) {
5278 spin_lock(&tmp_pa->pa_lock);
5279 if (tmp_pa->pa_deleted) {
5280 spin_unlock(&tmp_pa->pa_lock);
5281 continue;
5282 }
5283 if (!added && pa->pa_free < tmp_pa->pa_free) {
5284
5285 list_add_tail_rcu(&pa->pa_inode_list,
5286 &tmp_pa->pa_inode_list);
5287 added = 1;
5288
5289
5290
5291
5292 }
5293 spin_unlock(&tmp_pa->pa_lock);
5294 lg_prealloc_count++;
5295 }
5296 if (!added)
5297 list_add_tail_rcu(&pa->pa_inode_list,
5298 &lg->lg_prealloc_list[order]);
5299 spin_unlock(&lg->lg_prealloc_lock);
5300
5301
5302 if (lg_prealloc_count > 8) {
5303 ext4_mb_discard_lg_preallocations(sb, lg,
5304 order, lg_prealloc_count);
5305 return;
5306 }
5307 return ;
5308}
5309
5310
5311
5312
5313static void ext4_mb_trim_inode_pa(struct inode *inode)
5314{
5315 struct ext4_inode_info *ei = EXT4_I(inode);
5316 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5317 int count, delta;
5318
5319 count = atomic_read(&ei->i_prealloc_active);
5320 delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1;
5321 if (count > sbi->s_mb_max_inode_prealloc + delta) {
5322 count -= sbi->s_mb_max_inode_prealloc;
5323 ext4_discard_preallocations(inode, count);
5324 }
5325}
5326
5327
5328
5329
5330static int ext4_mb_release_context(struct ext4_allocation_context *ac)
5331{
5332 struct inode *inode = ac->ac_inode;
5333 struct ext4_inode_info *ei = EXT4_I(inode);
5334 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
5335 struct ext4_prealloc_space *pa = ac->ac_pa;
5336 if (pa) {
5337 if (pa->pa_type == MB_GROUP_PA) {
5338
5339 spin_lock(&pa->pa_lock);
5340 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
5341 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
5342 pa->pa_free -= ac->ac_b_ex.fe_len;
5343 pa->pa_len -= ac->ac_b_ex.fe_len;
5344 spin_unlock(&pa->pa_lock);
5345
5346
5347
5348
5349
5350
5351
5352 if (likely(pa->pa_free)) {
5353 spin_lock(pa->pa_obj_lock);
5354 list_del_rcu(&pa->pa_inode_list);
5355 spin_unlock(pa->pa_obj_lock);
5356 ext4_mb_add_n_trim(ac);
5357 }
5358 }
5359
5360 if (pa->pa_type == MB_INODE_PA) {
5361
5362
5363
5364
5365 spin_lock(pa->pa_obj_lock);
5366 list_move(&pa->pa_inode_list, &ei->i_prealloc_list);
5367 spin_unlock(pa->pa_obj_lock);
5368 }
5369
5370 ext4_mb_put_pa(ac, ac->ac_sb, pa);
5371 }
5372 if (ac->ac_bitmap_page)
5373 put_page(ac->ac_bitmap_page);
5374 if (ac->ac_buddy_page)
5375 put_page(ac->ac_buddy_page);
5376 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
5377 mutex_unlock(&ac->ac_lg->lg_mutex);
5378 ext4_mb_collect_stats(ac);
5379 ext4_mb_trim_inode_pa(inode);
5380 return 0;
5381}
5382
5383static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
5384{
5385 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
5386 int ret;
5387 int freed = 0;
5388
5389 trace_ext4_mb_discard_preallocations(sb, needed);
5390 for (i = 0; i < ngroups && needed > 0; i++) {
5391 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
5392 freed += ret;
5393 needed -= ret;
5394 }
5395
5396 return freed;
5397}
5398
5399static bool ext4_mb_discard_preallocations_should_retry(struct super_block *sb,
5400 struct ext4_allocation_context *ac, u64 *seq)
5401{
5402 int freed;
5403 u64 seq_retry = 0;
5404 bool ret = false;
5405
5406 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
5407 if (freed) {
5408 ret = true;
5409 goto out_dbg;
5410 }
5411 seq_retry = ext4_get_discard_pa_seq_sum();
5412 if (!(ac->ac_flags & EXT4_MB_STRICT_CHECK) || seq_retry != *seq) {
5413 ac->ac_flags |= EXT4_MB_STRICT_CHECK;
5414 *seq = seq_retry;
5415 ret = true;
5416 }
5417
5418out_dbg:
5419 mb_debug(sb, "freed %d, retry ? %s\n", freed, ret ? "yes" : "no");
5420 return ret;
5421}
5422
5423static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
5424 struct ext4_allocation_request *ar, int *errp);
5425
5426
5427
5428
5429
5430
5431ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
5432 struct ext4_allocation_request *ar, int *errp)
5433{
5434 struct ext4_allocation_context *ac = NULL;
5435 struct ext4_sb_info *sbi;
5436 struct super_block *sb;
5437 ext4_fsblk_t block = 0;
5438 unsigned int inquota = 0;
5439 unsigned int reserv_clstrs = 0;
5440 u64 seq;
5441
5442 might_sleep();
5443 sb = ar->inode->i_sb;
5444 sbi = EXT4_SB(sb);
5445
5446 trace_ext4_request_blocks(ar);
5447 if (sbi->s_mount_state & EXT4_FC_REPLAY)
5448 return ext4_mb_new_blocks_simple(handle, ar, errp);
5449
5450
5451 if (ext4_is_quota_file(ar->inode))
5452 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
5453
5454 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
5455
5456
5457
5458
5459 while (ar->len &&
5460 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
5461
5462
5463 cond_resched();
5464 ar->len = ar->len >> 1;
5465 }
5466 if (!ar->len) {
5467 ext4_mb_show_pa(sb);
5468 *errp = -ENOSPC;
5469 return 0;
5470 }
5471 reserv_clstrs = ar->len;
5472 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
5473 dquot_alloc_block_nofail(ar->inode,
5474 EXT4_C2B(sbi, ar->len));
5475 } else {
5476 while (ar->len &&
5477 dquot_alloc_block(ar->inode,
5478 EXT4_C2B(sbi, ar->len))) {
5479
5480 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
5481 ar->len--;
5482 }
5483 }
5484 inquota = ar->len;
5485 if (ar->len == 0) {
5486 *errp = -EDQUOT;
5487 goto out;
5488 }
5489 }
5490
5491 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
5492 if (!ac) {
5493 ar->len = 0;
5494 *errp = -ENOMEM;
5495 goto out;
5496 }
5497
5498 *errp = ext4_mb_initialize_context(ac, ar);
5499 if (*errp) {
5500 ar->len = 0;
5501 goto out;
5502 }
5503
5504 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
5505 seq = this_cpu_read(discard_pa_seq);
5506 if (!ext4_mb_use_preallocated(ac)) {
5507 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
5508 ext4_mb_normalize_request(ac, ar);
5509
5510 *errp = ext4_mb_pa_alloc(ac);
5511 if (*errp)
5512 goto errout;
5513repeat:
5514
5515 *errp = ext4_mb_regular_allocator(ac);
5516
5517
5518
5519
5520
5521
5522
5523 if (*errp) {
5524 ext4_mb_pa_free(ac);
5525 ext4_discard_allocated_blocks(ac);
5526 goto errout;
5527 }
5528 if (ac->ac_status == AC_STATUS_FOUND &&
5529 ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len)
5530 ext4_mb_pa_free(ac);
5531 }
5532 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
5533 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
5534 if (*errp) {
5535 ext4_discard_allocated_blocks(ac);
5536 goto errout;
5537 } else {
5538 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
5539 ar->len = ac->ac_b_ex.fe_len;
5540 }
5541 } else {
5542 if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
5543 goto repeat;
5544
5545
5546
5547
5548 ext4_mb_pa_free(ac);
5549 *errp = -ENOSPC;
5550 }
5551
5552errout:
5553 if (*errp) {
5554 ac->ac_b_ex.fe_len = 0;
5555 ar->len = 0;
5556 ext4_mb_show_ac(ac);
5557 }
5558 ext4_mb_release_context(ac);
5559out:
5560 if (ac)
5561 kmem_cache_free(ext4_ac_cachep, ac);
5562 if (inquota && ar->len < inquota)
5563 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
5564 if (!ar->len) {
5565 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
5566
5567 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
5568 reserv_clstrs);
5569 }
5570
5571 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
5572
5573 return block;
5574}
5575
5576
5577
5578
5579
5580
5581static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
5582 struct ext4_free_data *entry,
5583 struct ext4_free_data *new_entry,
5584 struct rb_root *entry_rb_root)
5585{
5586 if ((entry->efd_tid != new_entry->efd_tid) ||
5587 (entry->efd_group != new_entry->efd_group))
5588 return;
5589 if (entry->efd_start_cluster + entry->efd_count ==
5590 new_entry->efd_start_cluster) {
5591 new_entry->efd_start_cluster = entry->efd_start_cluster;
5592 new_entry->efd_count += entry->efd_count;
5593 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
5594 entry->efd_start_cluster) {
5595 new_entry->efd_count += entry->efd_count;
5596 } else
5597 return;
5598 spin_lock(&sbi->s_md_lock);
5599 list_del(&entry->efd_list);
5600 spin_unlock(&sbi->s_md_lock);
5601 rb_erase(&entry->efd_node, entry_rb_root);
5602 kmem_cache_free(ext4_free_data_cachep, entry);
5603}
5604
5605static noinline_for_stack int
5606ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
5607 struct ext4_free_data *new_entry)
5608{
5609 ext4_group_t group = e4b->bd_group;
5610 ext4_grpblk_t cluster;
5611 ext4_grpblk_t clusters = new_entry->efd_count;
5612 struct ext4_free_data *entry;
5613 struct ext4_group_info *db = e4b->bd_info;
5614 struct super_block *sb = e4b->bd_sb;
5615 struct ext4_sb_info *sbi = EXT4_SB(sb);
5616 struct rb_node **n = &db->bb_free_root.rb_node, *node;
5617 struct rb_node *parent = NULL, *new_node;
5618
5619 BUG_ON(!ext4_handle_valid(handle));
5620 BUG_ON(e4b->bd_bitmap_page == NULL);
5621 BUG_ON(e4b->bd_buddy_page == NULL);
5622
5623 new_node = &new_entry->efd_node;
5624 cluster = new_entry->efd_start_cluster;
5625
5626 if (!*n) {
5627
5628
5629
5630
5631
5632 get_page(e4b->bd_buddy_page);
5633 get_page(e4b->bd_bitmap_page);
5634 }
5635 while (*n) {
5636 parent = *n;
5637 entry = rb_entry(parent, struct ext4_free_data, efd_node);
5638 if (cluster < entry->efd_start_cluster)
5639 n = &(*n)->rb_left;
5640 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
5641 n = &(*n)->rb_right;
5642 else {
5643 ext4_grp_locked_error(sb, group, 0,
5644 ext4_group_first_block_no(sb, group) +
5645 EXT4_C2B(sbi, cluster),
5646 "Block already on to-be-freed list");
5647 kmem_cache_free(ext4_free_data_cachep, new_entry);
5648 return 0;
5649 }
5650 }
5651
5652 rb_link_node(new_node, parent, n);
5653 rb_insert_color(new_node, &db->bb_free_root);
5654
5655
5656 node = rb_prev(new_node);
5657 if (node) {
5658 entry = rb_entry(node, struct ext4_free_data, efd_node);
5659 ext4_try_merge_freed_extent(sbi, entry, new_entry,
5660 &(db->bb_free_root));
5661 }
5662
5663 node = rb_next(new_node);
5664 if (node) {
5665 entry = rb_entry(node, struct ext4_free_data, efd_node);
5666 ext4_try_merge_freed_extent(sbi, entry, new_entry,
5667 &(db->bb_free_root));
5668 }
5669
5670 spin_lock(&sbi->s_md_lock);
5671 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
5672 sbi->s_mb_free_pending += clusters;
5673 spin_unlock(&sbi->s_md_lock);
5674 return 0;
5675}
5676
5677
5678
5679
5680
5681
5682static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
5683 struct ext4_allocation_request *ar, int *errp)
5684{
5685 struct buffer_head *bitmap_bh;
5686 struct super_block *sb = ar->inode->i_sb;
5687 ext4_group_t group;
5688 ext4_grpblk_t blkoff;
5689 int i = sb->s_blocksize;
5690 ext4_fsblk_t goal, block;
5691 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
5692
5693 goal = ar->goal;
5694 if (goal < le32_to_cpu(es->s_first_data_block) ||
5695 goal >= ext4_blocks_count(es))
5696 goal = le32_to_cpu(es->s_first_data_block);
5697
5698 ar->len = 0;
5699 ext4_get_group_no_and_offset(sb, goal, &group, &blkoff);
5700 for (; group < ext4_get_groups_count(sb); group++) {
5701 bitmap_bh = ext4_read_block_bitmap(sb, group);
5702 if (IS_ERR(bitmap_bh)) {
5703 *errp = PTR_ERR(bitmap_bh);
5704 pr_warn("Failed to read block bitmap\n");
5705 return 0;
5706 }
5707
5708 ext4_get_group_no_and_offset(sb,
5709 max(ext4_group_first_block_no(sb, group), goal),
5710 NULL, &blkoff);
5711 i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize,
5712 blkoff);
5713 brelse(bitmap_bh);
5714 if (i >= sb->s_blocksize)
5715 continue;
5716 if (ext4_fc_replay_check_excluded(sb,
5717 ext4_group_first_block_no(sb, group) + i))
5718 continue;
5719 break;
5720 }
5721
5722 if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize)
5723 return 0;
5724
5725 block = ext4_group_first_block_no(sb, group) + i;
5726 ext4_mb_mark_bb(sb, block, 1, 1);
5727 ar->len = 1;
5728
5729 return block;
5730}
5731
5732static void ext4_free_blocks_simple(struct inode *inode, ext4_fsblk_t block,
5733 unsigned long count)
5734{
5735 struct buffer_head *bitmap_bh;
5736 struct super_block *sb = inode->i_sb;
5737 struct ext4_group_desc *gdp;
5738 struct buffer_head *gdp_bh;
5739 ext4_group_t group;
5740 ext4_grpblk_t blkoff;
5741 int already_freed = 0, err, i;
5742
5743 ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
5744 bitmap_bh = ext4_read_block_bitmap(sb, group);
5745 if (IS_ERR(bitmap_bh)) {
5746 err = PTR_ERR(bitmap_bh);
5747 pr_warn("Failed to read block bitmap\n");
5748 return;
5749 }
5750 gdp = ext4_get_group_desc(sb, group, &gdp_bh);
5751 if (!gdp)
5752 return;
5753
5754 for (i = 0; i < count; i++) {
5755 if (!mb_test_bit(blkoff + i, bitmap_bh->b_data))
5756 already_freed++;
5757 }
5758 mb_clear_bits(bitmap_bh->b_data, blkoff, count);
5759 err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
5760 if (err)
5761 return;
5762 ext4_free_group_clusters_set(
5763 sb, gdp, ext4_free_group_clusters(sb, gdp) +
5764 count - already_freed);
5765 ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
5766 ext4_group_desc_csum_set(sb, group, gdp);
5767 ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
5768 sync_dirty_buffer(bitmap_bh);
5769 sync_dirty_buffer(gdp_bh);
5770 brelse(bitmap_bh);
5771}
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782void ext4_free_blocks(handle_t *handle, struct inode *inode,
5783 struct buffer_head *bh, ext4_fsblk_t block,
5784 unsigned long count, int flags)
5785{
5786 struct buffer_head *bitmap_bh = NULL;
5787 struct super_block *sb = inode->i_sb;
5788 struct ext4_group_desc *gdp;
5789 unsigned int overflow;
5790 ext4_grpblk_t bit;
5791 struct buffer_head *gd_bh;
5792 ext4_group_t block_group;
5793 struct ext4_sb_info *sbi;
5794 struct ext4_buddy e4b;
5795 unsigned int count_clusters;
5796 int err = 0;
5797 int ret;
5798
5799 sbi = EXT4_SB(sb);
5800
5801 if (sbi->s_mount_state & EXT4_FC_REPLAY) {
5802 ext4_free_blocks_simple(inode, block, count);
5803 return;
5804 }
5805
5806 might_sleep();
5807 if (bh) {
5808 if (block)
5809 BUG_ON(block != bh->b_blocknr);
5810 else
5811 block = bh->b_blocknr;
5812 }
5813
5814 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
5815 !ext4_inode_block_valid(inode, block, count)) {
5816 ext4_error(sb, "Freeing blocks not in datazone - "
5817 "block = %llu, count = %lu", block, count);
5818 goto error_return;
5819 }
5820
5821 ext4_debug("freeing block %llu\n", block);
5822 trace_ext4_free_blocks(inode, block, count, flags);
5823
5824 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
5825 BUG_ON(count > 1);
5826
5827 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
5828 inode, bh, block);
5829 }
5830
5831
5832
5833
5834
5835
5836
5837
5838 overflow = EXT4_PBLK_COFF(sbi, block);
5839 if (overflow) {
5840 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
5841 overflow = sbi->s_cluster_ratio - overflow;
5842 block += overflow;
5843 if (count > overflow)
5844 count -= overflow;
5845 else
5846 return;
5847 } else {
5848 block -= overflow;
5849 count += overflow;
5850 }
5851 }
5852 overflow = EXT4_LBLK_COFF(sbi, count);
5853 if (overflow) {
5854 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
5855 if (count > overflow)
5856 count -= overflow;
5857 else
5858 return;
5859 } else
5860 count += sbi->s_cluster_ratio - overflow;
5861 }
5862
5863 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
5864 int i;
5865 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
5866
5867 for (i = 0; i < count; i++) {
5868 cond_resched();
5869 if (is_metadata)
5870 bh = sb_find_get_block(inode->i_sb, block + i);
5871 ext4_forget(handle, is_metadata, inode, bh, block + i);
5872 }
5873 }
5874
5875do_more:
5876 overflow = 0;
5877 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
5878
5879 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
5880 ext4_get_group_info(sb, block_group))))
5881 return;
5882
5883
5884
5885
5886
5887 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
5888 overflow = EXT4_C2B(sbi, bit) + count -
5889 EXT4_BLOCKS_PER_GROUP(sb);
5890 count -= overflow;
5891 }
5892 count_clusters = EXT4_NUM_B2C(sbi, count);
5893 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
5894 if (IS_ERR(bitmap_bh)) {
5895 err = PTR_ERR(bitmap_bh);
5896 bitmap_bh = NULL;
5897 goto error_return;
5898 }
5899 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
5900 if (!gdp) {
5901 err = -EIO;
5902 goto error_return;
5903 }
5904
5905 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
5906 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
5907 in_range(block, ext4_inode_table(sb, gdp),
5908 sbi->s_itb_per_group) ||
5909 in_range(block + count - 1, ext4_inode_table(sb, gdp),
5910 sbi->s_itb_per_group)) {
5911
5912 ext4_error(sb, "Freeing blocks in system zone - "
5913 "Block = %llu, count = %lu", block, count);
5914
5915 goto error_return;
5916 }
5917
5918 BUFFER_TRACE(bitmap_bh, "getting write access");
5919 err = ext4_journal_get_write_access(handle, bitmap_bh);
5920 if (err)
5921 goto error_return;
5922
5923
5924
5925
5926
5927
5928 BUFFER_TRACE(gd_bh, "get_write_access");
5929 err = ext4_journal_get_write_access(handle, gd_bh);
5930 if (err)
5931 goto error_return;
5932#ifdef AGGRESSIVE_CHECK
5933 {
5934 int i;
5935 for (i = 0; i < count_clusters; i++)
5936 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
5937 }
5938#endif
5939 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
5940
5941
5942 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
5943 GFP_NOFS|__GFP_NOFAIL);
5944 if (err)
5945 goto error_return;
5946
5947
5948
5949
5950
5951
5952
5953 if (ext4_handle_valid(handle) &&
5954 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
5955 !ext4_should_writeback_data(inode))) {
5956 struct ext4_free_data *new_entry;
5957
5958
5959
5960
5961 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
5962 GFP_NOFS|__GFP_NOFAIL);
5963 new_entry->efd_start_cluster = bit;
5964 new_entry->efd_group = block_group;
5965 new_entry->efd_count = count_clusters;
5966 new_entry->efd_tid = handle->h_transaction->t_tid;
5967
5968 ext4_lock_group(sb, block_group);
5969 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
5970 ext4_mb_free_metadata(handle, &e4b, new_entry);
5971 } else {
5972
5973
5974
5975
5976 if (test_opt(sb, DISCARD)) {
5977 err = ext4_issue_discard(sb, block_group, bit, count,
5978 NULL);
5979 if (err && err != -EOPNOTSUPP)
5980 ext4_msg(sb, KERN_WARNING, "discard request in"
5981 " group:%d block:%d count:%lu failed"
5982 " with %d", block_group, bit, count,
5983 err);
5984 } else
5985 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
5986
5987 ext4_lock_group(sb, block_group);
5988 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
5989 mb_free_blocks(inode, &e4b, bit, count_clusters);
5990 }
5991
5992 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
5993 ext4_free_group_clusters_set(sb, gdp, ret);
5994 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
5995 ext4_group_desc_csum_set(sb, block_group, gdp);
5996 ext4_unlock_group(sb, block_group);
5997
5998 if (sbi->s_log_groups_per_flex) {
5999 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
6000 atomic64_add(count_clusters,
6001 &sbi_array_rcu_deref(sbi, s_flex_groups,
6002 flex_group)->free_clusters);
6003 }
6004
6005
6006
6007
6008
6009
6010 if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
6011 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
6012 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
6013 percpu_counter_add(&sbi->s_freeclusters_counter,
6014 count_clusters);
6015 }
6016
6017 ext4_mb_unload_buddy(&e4b);
6018
6019
6020 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6021 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6022
6023
6024 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6025 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6026 if (!err)
6027 err = ret;
6028
6029 if (overflow && !err) {
6030 block += count;
6031 count = overflow;
6032 put_bh(bitmap_bh);
6033 goto do_more;
6034 }
6035error_return:
6036 brelse(bitmap_bh);
6037 ext4_std_error(sb, err);
6038 return;
6039}
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
6051 ext4_fsblk_t block, unsigned long count)
6052{
6053 struct buffer_head *bitmap_bh = NULL;
6054 struct buffer_head *gd_bh;
6055 ext4_group_t block_group;
6056 ext4_grpblk_t bit;
6057 unsigned int i;
6058 struct ext4_group_desc *desc;
6059 struct ext4_sb_info *sbi = EXT4_SB(sb);
6060 struct ext4_buddy e4b;
6061 int err = 0, ret, free_clusters_count;
6062 ext4_grpblk_t clusters_freed;
6063 ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
6064 ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
6065 unsigned long cluster_count = last_cluster - first_cluster + 1;
6066
6067 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
6068
6069 if (count == 0)
6070 return 0;
6071
6072 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
6073
6074
6075
6076
6077 if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
6078 ext4_warning(sb, "too many blocks added to group %u",
6079 block_group);
6080 err = -EINVAL;
6081 goto error_return;
6082 }
6083
6084 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
6085 if (IS_ERR(bitmap_bh)) {
6086 err = PTR_ERR(bitmap_bh);
6087 bitmap_bh = NULL;
6088 goto error_return;
6089 }
6090
6091 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
6092 if (!desc) {
6093 err = -EIO;
6094 goto error_return;
6095 }
6096
6097 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
6098 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
6099 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
6100 in_range(block + count - 1, ext4_inode_table(sb, desc),
6101 sbi->s_itb_per_group)) {
6102 ext4_error(sb, "Adding blocks in system zones - "
6103 "Block = %llu, count = %lu",
6104 block, count);
6105 err = -EINVAL;
6106 goto error_return;
6107 }
6108
6109 BUFFER_TRACE(bitmap_bh, "getting write access");
6110 err = ext4_journal_get_write_access(handle, bitmap_bh);
6111 if (err)
6112 goto error_return;
6113
6114
6115
6116
6117
6118
6119 BUFFER_TRACE(gd_bh, "get_write_access");
6120 err = ext4_journal_get_write_access(handle, gd_bh);
6121 if (err)
6122 goto error_return;
6123
6124 for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
6125 BUFFER_TRACE(bitmap_bh, "clear bit");
6126 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
6127 ext4_error(sb, "bit already cleared for block %llu",
6128 (ext4_fsblk_t)(block + i));
6129 BUFFER_TRACE(bitmap_bh, "bit already cleared");
6130 } else {
6131 clusters_freed++;
6132 }
6133 }
6134
6135 err = ext4_mb_load_buddy(sb, block_group, &e4b);
6136 if (err)
6137 goto error_return;
6138
6139
6140
6141
6142
6143
6144 ext4_lock_group(sb, block_group);
6145 mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
6146 mb_free_blocks(NULL, &e4b, bit, cluster_count);
6147 free_clusters_count = clusters_freed +
6148 ext4_free_group_clusters(sb, desc);
6149 ext4_free_group_clusters_set(sb, desc, free_clusters_count);
6150 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
6151 ext4_group_desc_csum_set(sb, block_group, desc);
6152 ext4_unlock_group(sb, block_group);
6153 percpu_counter_add(&sbi->s_freeclusters_counter,
6154 clusters_freed);
6155
6156 if (sbi->s_log_groups_per_flex) {
6157 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
6158 atomic64_add(clusters_freed,
6159 &sbi_array_rcu_deref(sbi, s_flex_groups,
6160 flex_group)->free_clusters);
6161 }
6162
6163 ext4_mb_unload_buddy(&e4b);
6164
6165
6166 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
6167 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
6168
6169
6170 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
6171 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
6172 if (!err)
6173 err = ret;
6174
6175error_return:
6176 brelse(bitmap_bh);
6177 ext4_std_error(sb, err);
6178 return err;
6179}
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193static int ext4_trim_extent(struct super_block *sb, int start, int count,
6194 ext4_group_t group, struct ext4_buddy *e4b)
6195__releases(bitlock)
6196__acquires(bitlock)
6197{
6198 struct ext4_free_extent ex;
6199 int ret = 0;
6200
6201 trace_ext4_trim_extent(sb, group, start, count);
6202
6203 assert_spin_locked(ext4_group_lock_ptr(sb, group));
6204
6205 ex.fe_start = start;
6206 ex.fe_group = group;
6207 ex.fe_len = count;
6208
6209
6210
6211
6212
6213 mb_mark_used(e4b, &ex);
6214 ext4_unlock_group(sb, group);
6215 ret = ext4_issue_discard(sb, group, start, count, NULL);
6216 ext4_lock_group(sb, group);
6217 mb_free_blocks(NULL, e4b, start, ex.fe_len);
6218 return ret;
6219}
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239static ext4_grpblk_t
6240ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
6241 ext4_grpblk_t start, ext4_grpblk_t max,
6242 ext4_grpblk_t minblocks)
6243{
6244 void *bitmap;
6245 ext4_grpblk_t next, count = 0, free_count = 0;
6246 struct ext4_buddy e4b;
6247 int ret = 0;
6248
6249 trace_ext4_trim_all_free(sb, group, start, max);
6250
6251 ret = ext4_mb_load_buddy(sb, group, &e4b);
6252 if (ret) {
6253 ext4_warning(sb, "Error %d loading buddy information for %u",
6254 ret, group);
6255 return ret;
6256 }
6257 bitmap = e4b.bd_bitmap;
6258
6259 ext4_lock_group(sb, group);
6260 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
6261 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
6262 goto out;
6263
6264 start = (e4b.bd_info->bb_first_free > start) ?
6265 e4b.bd_info->bb_first_free : start;
6266
6267 while (start <= max) {
6268 start = mb_find_next_zero_bit(bitmap, max + 1, start);
6269 if (start > max)
6270 break;
6271 next = mb_find_next_bit(bitmap, max + 1, start);
6272
6273 if ((next - start) >= minblocks) {
6274 ret = ext4_trim_extent(sb, start,
6275 next - start, group, &e4b);
6276 if (ret && ret != -EOPNOTSUPP)
6277 break;
6278 ret = 0;
6279 count += next - start;
6280 }
6281 free_count += next - start;
6282 start = next + 1;
6283
6284 if (fatal_signal_pending(current)) {
6285 count = -ERESTARTSYS;
6286 break;
6287 }
6288
6289 if (need_resched()) {
6290 ext4_unlock_group(sb, group);
6291 cond_resched();
6292 ext4_lock_group(sb, group);
6293 }
6294
6295 if ((e4b.bd_info->bb_free - free_count) < minblocks)
6296 break;
6297 }
6298
6299 if (!ret) {
6300 ret = count;
6301 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
6302 }
6303out:
6304 ext4_unlock_group(sb, group);
6305 ext4_mb_unload_buddy(&e4b);
6306
6307 ext4_debug("trimmed %d blocks in the group %d\n",
6308 count, group);
6309
6310 return ret;
6311}
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
6326{
6327 struct ext4_group_info *grp;
6328 ext4_group_t group, first_group, last_group;
6329 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
6330 uint64_t start, end, minlen, trimmed = 0;
6331 ext4_fsblk_t first_data_blk =
6332 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
6333 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
6334 int ret = 0;
6335
6336 start = range->start >> sb->s_blocksize_bits;
6337 end = start + (range->len >> sb->s_blocksize_bits) - 1;
6338 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
6339 range->minlen >> sb->s_blocksize_bits);
6340
6341 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
6342 start >= max_blks ||
6343 range->len < sb->s_blocksize)
6344 return -EINVAL;
6345 if (end >= max_blks)
6346 end = max_blks - 1;
6347 if (end <= first_data_blk)
6348 goto out;
6349 if (start < first_data_blk)
6350 start = first_data_blk;
6351
6352
6353 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
6354 &first_group, &first_cluster);
6355 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
6356 &last_group, &last_cluster);
6357
6358
6359 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6360
6361 for (group = first_group; group <= last_group; group++) {
6362 grp = ext4_get_group_info(sb, group);
6363
6364 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
6365 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
6366 if (ret)
6367 break;
6368 }
6369
6370
6371
6372
6373
6374
6375
6376 if (group == last_group)
6377 end = last_cluster;
6378
6379 if (grp->bb_free >= minlen) {
6380 cnt = ext4_trim_all_free(sb, group, first_cluster,
6381 end, minlen);
6382 if (cnt < 0) {
6383 ret = cnt;
6384 break;
6385 }
6386 trimmed += cnt;
6387 }
6388
6389
6390
6391
6392
6393 first_cluster = 0;
6394 }
6395
6396 if (!ret)
6397 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
6398
6399out:
6400 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
6401 return ret;
6402}
6403
6404
6405int
6406ext4_mballoc_query_range(
6407 struct super_block *sb,
6408 ext4_group_t group,
6409 ext4_grpblk_t start,
6410 ext4_grpblk_t end,
6411 ext4_mballoc_query_range_fn formatter,
6412 void *priv)
6413{
6414 void *bitmap;
6415 ext4_grpblk_t next;
6416 struct ext4_buddy e4b;
6417 int error;
6418
6419 error = ext4_mb_load_buddy(sb, group, &e4b);
6420 if (error)
6421 return error;
6422 bitmap = e4b.bd_bitmap;
6423
6424 ext4_lock_group(sb, group);
6425
6426 start = (e4b.bd_info->bb_first_free > start) ?
6427 e4b.bd_info->bb_first_free : start;
6428 if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
6429 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
6430
6431 while (start <= end) {
6432 start = mb_find_next_zero_bit(bitmap, end + 1, start);
6433 if (start > end)
6434 break;
6435 next = mb_find_next_bit(bitmap, end + 1, start);
6436
6437 ext4_unlock_group(sb, group);
6438 error = formatter(sb, group, start, next - start, priv);
6439 if (error)
6440 goto out_unload;
6441 ext4_lock_group(sb, group);
6442
6443 start = next + 1;
6444 }
6445
6446 ext4_unlock_group(sb, group);
6447out_unload:
6448 ext4_mb_unload_buddy(&e4b);
6449
6450 return error;
6451}
6452