1
2
3
4
5
6
7
8
9
10
11
12#include "ext4_jbd2.h"
13#include "mballoc.h"
14#include <linux/log2.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/nospec.h>
18#include <linux/backing-dev.h>
19#include <trace/events/ext4.h>
20
21#ifdef CONFIG_EXT4_DEBUG
22ushort ext4_mballoc_debug __read_mostly;
23
24module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
25MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
26#endif
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339static struct kmem_cache *ext4_pspace_cachep;
340static struct kmem_cache *ext4_ac_cachep;
341static struct kmem_cache *ext4_free_data_cachep;
342
343
344
345
346#define NR_GRPINFO_CACHES 8
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348
349static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
350 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
351 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
352 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
353};
354
355static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
356 ext4_group_t group);
357static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
358 ext4_group_t group);
359
360static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
361{
362#if BITS_PER_LONG == 64
363 *bit += ((unsigned long) addr & 7UL) << 3;
364 addr = (void *) ((unsigned long) addr & ~7UL);
365#elif BITS_PER_LONG == 32
366 *bit += ((unsigned long) addr & 3UL) << 3;
367 addr = (void *) ((unsigned long) addr & ~3UL);
368#else
369#error "how many bits you are?!"
370#endif
371 return addr;
372}
373
374static inline int mb_test_bit(int bit, void *addr)
375{
376
377
378
379
380 addr = mb_correct_addr_and_bit(&bit, addr);
381 return ext4_test_bit(bit, addr);
382}
383
384static inline void mb_set_bit(int bit, void *addr)
385{
386 addr = mb_correct_addr_and_bit(&bit, addr);
387 ext4_set_bit(bit, addr);
388}
389
390static inline void mb_clear_bit(int bit, void *addr)
391{
392 addr = mb_correct_addr_and_bit(&bit, addr);
393 ext4_clear_bit(bit, addr);
394}
395
396static inline int mb_test_and_clear_bit(int bit, void *addr)
397{
398 addr = mb_correct_addr_and_bit(&bit, addr);
399 return ext4_test_and_clear_bit(bit, addr);
400}
401
402static inline int mb_find_next_zero_bit(void *addr, int max, int start)
403{
404 int fix = 0, ret, tmpmax;
405 addr = mb_correct_addr_and_bit(&fix, addr);
406 tmpmax = max + fix;
407 start += fix;
408
409 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
410 if (ret > max)
411 return max;
412 return ret;
413}
414
415static inline int mb_find_next_bit(void *addr, int max, int start)
416{
417 int fix = 0, ret, tmpmax;
418 addr = mb_correct_addr_and_bit(&fix, addr);
419 tmpmax = max + fix;
420 start += fix;
421
422 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
423 if (ret > max)
424 return max;
425 return ret;
426}
427
428static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
429{
430 char *bb;
431
432 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
433 BUG_ON(max == NULL);
434
435 if (order > e4b->bd_blkbits + 1) {
436 *max = 0;
437 return NULL;
438 }
439
440
441 if (order == 0) {
442 *max = 1 << (e4b->bd_blkbits + 3);
443 return e4b->bd_bitmap;
444 }
445
446 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
447 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
448
449 return bb;
450}
451
452#ifdef DOUBLE_CHECK
453static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
454 int first, int count)
455{
456 int i;
457 struct super_block *sb = e4b->bd_sb;
458
459 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
460 return;
461 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
462 for (i = 0; i < count; i++) {
463 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
464 ext4_fsblk_t blocknr;
465
466 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
467 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
468 ext4_grp_locked_error(sb, e4b->bd_group,
469 inode ? inode->i_ino : 0,
470 blocknr,
471 "freeing block already freed "
472 "(bit %u)",
473 first + i);
474 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
475 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
476 }
477 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
478 }
479}
480
481static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
482{
483 int i;
484
485 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
486 return;
487 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
488 for (i = 0; i < count; i++) {
489 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
490 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
491 }
492}
493
494static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
495{
496 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
497 unsigned char *b1, *b2;
498 int i;
499 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
500 b2 = (unsigned char *) bitmap;
501 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
502 if (b1[i] != b2[i]) {
503 ext4_msg(e4b->bd_sb, KERN_ERR,
504 "corruption in group %u "
505 "at byte %u(%u): %x in copy != %x "
506 "on disk/prealloc",
507 e4b->bd_group, i, i * 8, b1[i], b2[i]);
508 BUG();
509 }
510 }
511 }
512}
513
514#else
515static inline void mb_free_blocks_double(struct inode *inode,
516 struct ext4_buddy *e4b, int first, int count)
517{
518 return;
519}
520static inline void mb_mark_used_double(struct ext4_buddy *e4b,
521 int first, int count)
522{
523 return;
524}
525static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
526{
527 return;
528}
529#endif
530
531#ifdef AGGRESSIVE_CHECK
532
533#define MB_CHECK_ASSERT(assert) \
534do { \
535 if (!(assert)) { \
536 printk(KERN_EMERG \
537 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
538 function, file, line, # assert); \
539 BUG(); \
540 } \
541} while (0)
542
543static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
544 const char *function, int line)
545{
546 struct super_block *sb = e4b->bd_sb;
547 int order = e4b->bd_blkbits + 1;
548 int max;
549 int max2;
550 int i;
551 int j;
552 int k;
553 int count;
554 struct ext4_group_info *grp;
555 int fragments = 0;
556 int fstart;
557 struct list_head *cur;
558 void *buddy;
559 void *buddy2;
560
561 {
562 static int mb_check_counter;
563 if (mb_check_counter++ % 100 != 0)
564 return 0;
565 }
566
567 while (order > 1) {
568 buddy = mb_find_buddy(e4b, order, &max);
569 MB_CHECK_ASSERT(buddy);
570 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
571 MB_CHECK_ASSERT(buddy2);
572 MB_CHECK_ASSERT(buddy != buddy2);
573 MB_CHECK_ASSERT(max * 2 == max2);
574
575 count = 0;
576 for (i = 0; i < max; i++) {
577
578 if (mb_test_bit(i, buddy)) {
579
580 if (!mb_test_bit(i << 1, buddy2)) {
581 MB_CHECK_ASSERT(
582 mb_test_bit((i<<1)+1, buddy2));
583 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
584 MB_CHECK_ASSERT(
585 mb_test_bit(i << 1, buddy2));
586 }
587 continue;
588 }
589
590
591 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
592 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
593
594 for (j = 0; j < (1 << order); j++) {
595 k = (i * (1 << order)) + j;
596 MB_CHECK_ASSERT(
597 !mb_test_bit(k, e4b->bd_bitmap));
598 }
599 count++;
600 }
601 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
602 order--;
603 }
604
605 fstart = -1;
606 buddy = mb_find_buddy(e4b, 0, &max);
607 for (i = 0; i < max; i++) {
608 if (!mb_test_bit(i, buddy)) {
609 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
610 if (fstart == -1) {
611 fragments++;
612 fstart = i;
613 }
614 continue;
615 }
616 fstart = -1;
617
618 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
619 buddy2 = mb_find_buddy(e4b, j, &max2);
620 k = i >> j;
621 MB_CHECK_ASSERT(k < max2);
622 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
623 }
624 }
625 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
626 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
627
628 grp = ext4_get_group_info(sb, e4b->bd_group);
629 list_for_each(cur, &grp->bb_prealloc_list) {
630 ext4_group_t groupnr;
631 struct ext4_prealloc_space *pa;
632 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
633 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
634 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
635 for (i = 0; i < pa->pa_len; i++)
636 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
637 }
638 return 0;
639}
640#undef MB_CHECK_ASSERT
641#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
642 __FILE__, __func__, __LINE__)
643#else
644#define mb_check_buddy(e4b)
645#endif
646
647
648
649
650
651
652
653static void ext4_mb_mark_free_simple(struct super_block *sb,
654 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
655 struct ext4_group_info *grp)
656{
657 struct ext4_sb_info *sbi = EXT4_SB(sb);
658 ext4_grpblk_t min;
659 ext4_grpblk_t max;
660 ext4_grpblk_t chunk;
661 unsigned int border;
662
663 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
664
665 border = 2 << sb->s_blocksize_bits;
666
667 while (len > 0) {
668
669 max = ffs(first | border) - 1;
670
671
672 min = fls(len) - 1;
673
674 if (max < min)
675 min = max;
676 chunk = 1 << min;
677
678
679 grp->bb_counters[min]++;
680 if (min > 0)
681 mb_clear_bit(first >> min,
682 buddy + sbi->s_mb_offsets[min]);
683
684 len -= chunk;
685 first += chunk;
686 }
687}
688
689
690
691
692
693static void
694mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
695{
696 int i;
697 int bits;
698
699 grp->bb_largest_free_order = -1;
700
701 bits = sb->s_blocksize_bits + 1;
702 for (i = bits; i >= 0; i--) {
703 if (grp->bb_counters[i] > 0) {
704 grp->bb_largest_free_order = i;
705 break;
706 }
707 }
708}
709
710static noinline_for_stack
711void ext4_mb_generate_buddy(struct super_block *sb,
712 void *buddy, void *bitmap, ext4_group_t group)
713{
714 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
715 struct ext4_sb_info *sbi = EXT4_SB(sb);
716 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
717 ext4_grpblk_t i = 0;
718 ext4_grpblk_t first;
719 ext4_grpblk_t len;
720 unsigned free = 0;
721 unsigned fragments = 0;
722 unsigned long long period = get_cycles();
723
724
725
726 i = mb_find_next_zero_bit(bitmap, max, 0);
727 grp->bb_first_free = i;
728 while (i < max) {
729 fragments++;
730 first = i;
731 i = mb_find_next_bit(bitmap, max, i);
732 len = i - first;
733 free += len;
734 if (len > 1)
735 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
736 else
737 grp->bb_counters[0]++;
738 if (i < max)
739 i = mb_find_next_zero_bit(bitmap, max, i);
740 }
741 grp->bb_fragments = fragments;
742
743 if (free != grp->bb_free) {
744 ext4_grp_locked_error(sb, group, 0, 0,
745 "block bitmap and bg descriptor "
746 "inconsistent: %u vs %u free clusters",
747 free, grp->bb_free);
748
749
750
751
752 grp->bb_free = free;
753 ext4_mark_group_bitmap_corrupted(sb, group,
754 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
755 }
756 mb_set_largest_free_order(sb, grp);
757
758 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
759
760 period = get_cycles() - period;
761 spin_lock(&sbi->s_bal_lock);
762 sbi->s_mb_buddies_generated++;
763 sbi->s_mb_generation_time += period;
764 spin_unlock(&sbi->s_bal_lock);
765}
766
767static void mb_regenerate_buddy(struct ext4_buddy *e4b)
768{
769 int count;
770 int order = 1;
771 void *buddy;
772
773 while ((buddy = mb_find_buddy(e4b, order++, &count))) {
774 ext4_set_bits(buddy, 0, count);
775 }
776 e4b->bd_info->bb_fragments = 0;
777 memset(e4b->bd_info->bb_counters, 0,
778 sizeof(*e4b->bd_info->bb_counters) *
779 (e4b->bd_sb->s_blocksize_bits + 2));
780
781 ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
782 e4b->bd_bitmap, e4b->bd_group);
783}
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
806{
807 ext4_group_t ngroups;
808 int blocksize;
809 int blocks_per_page;
810 int groups_per_page;
811 int err = 0;
812 int i;
813 ext4_group_t first_group, group;
814 int first_block;
815 struct super_block *sb;
816 struct buffer_head *bhs;
817 struct buffer_head **bh = NULL;
818 struct inode *inode;
819 char *data;
820 char *bitmap;
821 struct ext4_group_info *grinfo;
822
823 mb_debug(1, "init page %lu\n", page->index);
824
825 inode = page->mapping->host;
826 sb = inode->i_sb;
827 ngroups = ext4_get_groups_count(sb);
828 blocksize = i_blocksize(inode);
829 blocks_per_page = PAGE_SIZE / blocksize;
830
831 groups_per_page = blocks_per_page >> 1;
832 if (groups_per_page == 0)
833 groups_per_page = 1;
834
835
836 if (groups_per_page > 1) {
837 i = sizeof(struct buffer_head *) * groups_per_page;
838 bh = kzalloc(i, gfp);
839 if (bh == NULL) {
840 err = -ENOMEM;
841 goto out;
842 }
843 } else
844 bh = &bhs;
845
846 first_group = page->index * blocks_per_page / 2;
847
848
849 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
850 if (group >= ngroups)
851 break;
852
853 grinfo = ext4_get_group_info(sb, group);
854
855
856
857
858
859
860 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
861 bh[i] = NULL;
862 continue;
863 }
864 bh[i] = ext4_read_block_bitmap_nowait(sb, group);
865 if (IS_ERR(bh[i])) {
866 err = PTR_ERR(bh[i]);
867 bh[i] = NULL;
868 goto out;
869 }
870 mb_debug(1, "read bitmap for group %u\n", group);
871 }
872
873
874 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
875 int err2;
876
877 if (!bh[i])
878 continue;
879 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
880 if (!err)
881 err = err2;
882 }
883
884 first_block = page->index * blocks_per_page;
885 for (i = 0; i < blocks_per_page; i++) {
886 group = (first_block + i) >> 1;
887 if (group >= ngroups)
888 break;
889
890 if (!bh[group - first_group])
891
892 continue;
893
894 if (!buffer_verified(bh[group - first_group]))
895
896 continue;
897 err = 0;
898
899
900
901
902
903
904
905 data = page_address(page) + (i * blocksize);
906 bitmap = bh[group - first_group]->b_data;
907
908
909
910
911
912 if ((first_block + i) & 1) {
913
914 BUG_ON(incore == NULL);
915 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
916 group, page->index, i * blocksize);
917 trace_ext4_mb_buddy_bitmap_load(sb, group);
918 grinfo = ext4_get_group_info(sb, group);
919 grinfo->bb_fragments = 0;
920 memset(grinfo->bb_counters, 0,
921 sizeof(*grinfo->bb_counters) *
922 (sb->s_blocksize_bits+2));
923
924
925
926 ext4_lock_group(sb, group);
927
928 memset(data, 0xff, blocksize);
929 ext4_mb_generate_buddy(sb, data, incore, group);
930 ext4_unlock_group(sb, group);
931 incore = NULL;
932 } else {
933
934 BUG_ON(incore != NULL);
935 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
936 group, page->index, i * blocksize);
937 trace_ext4_mb_bitmap_load(sb, group);
938
939
940 ext4_lock_group(sb, group);
941 memcpy(data, bitmap, blocksize);
942
943
944 ext4_mb_generate_from_pa(sb, data, group);
945 ext4_mb_generate_from_freelist(sb, data, group);
946 ext4_unlock_group(sb, group);
947
948
949
950
951 incore = data;
952 }
953 }
954 SetPageUptodate(page);
955
956out:
957 if (bh) {
958 for (i = 0; i < groups_per_page; i++)
959 brelse(bh[i]);
960 if (bh != &bhs)
961 kfree(bh);
962 }
963 return err;
964}
965
966
967
968
969
970
971
972static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
973 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
974{
975 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
976 int block, pnum, poff;
977 int blocks_per_page;
978 struct page *page;
979
980 e4b->bd_buddy_page = NULL;
981 e4b->bd_bitmap_page = NULL;
982
983 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
984
985
986
987
988
989 block = group * 2;
990 pnum = block / blocks_per_page;
991 poff = block % blocks_per_page;
992 page = find_or_create_page(inode->i_mapping, pnum, gfp);
993 if (!page)
994 return -ENOMEM;
995 BUG_ON(page->mapping != inode->i_mapping);
996 e4b->bd_bitmap_page = page;
997 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
998
999 if (blocks_per_page >= 2) {
1000
1001 return 0;
1002 }
1003
1004 block++;
1005 pnum = block / blocks_per_page;
1006 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1007 if (!page)
1008 return -ENOMEM;
1009 BUG_ON(page->mapping != inode->i_mapping);
1010 e4b->bd_buddy_page = page;
1011 return 0;
1012}
1013
1014static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1015{
1016 if (e4b->bd_bitmap_page) {
1017 unlock_page(e4b->bd_bitmap_page);
1018 put_page(e4b->bd_bitmap_page);
1019 }
1020 if (e4b->bd_buddy_page) {
1021 unlock_page(e4b->bd_buddy_page);
1022 put_page(e4b->bd_buddy_page);
1023 }
1024}
1025
1026
1027
1028
1029
1030
1031static noinline_for_stack
1032int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1033{
1034
1035 struct ext4_group_info *this_grp;
1036 struct ext4_buddy e4b;
1037 struct page *page;
1038 int ret = 0;
1039
1040 might_sleep();
1041 mb_debug(1, "init group %u\n", group);
1042 this_grp = ext4_get_group_info(sb, group);
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1053 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1054
1055
1056
1057
1058 goto err;
1059 }
1060
1061 page = e4b.bd_bitmap_page;
1062 ret = ext4_mb_init_cache(page, NULL, gfp);
1063 if (ret)
1064 goto err;
1065 if (!PageUptodate(page)) {
1066 ret = -EIO;
1067 goto err;
1068 }
1069
1070 if (e4b.bd_buddy_page == NULL) {
1071
1072
1073
1074
1075
1076 ret = 0;
1077 goto err;
1078 }
1079
1080 page = e4b.bd_buddy_page;
1081 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1082 if (ret)
1083 goto err;
1084 if (!PageUptodate(page)) {
1085 ret = -EIO;
1086 goto err;
1087 }
1088err:
1089 ext4_mb_put_buddy_page_lock(&e4b);
1090 return ret;
1091}
1092
1093
1094
1095
1096
1097
1098static noinline_for_stack int
1099ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1100 struct ext4_buddy *e4b, gfp_t gfp)
1101{
1102 int blocks_per_page;
1103 int block;
1104 int pnum;
1105 int poff;
1106 struct page *page;
1107 int ret;
1108 struct ext4_group_info *grp;
1109 struct ext4_sb_info *sbi = EXT4_SB(sb);
1110 struct inode *inode = sbi->s_buddy_cache;
1111
1112 might_sleep();
1113 mb_debug(1, "load group %u\n", group);
1114
1115 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1116 grp = ext4_get_group_info(sb, group);
1117
1118 e4b->bd_blkbits = sb->s_blocksize_bits;
1119 e4b->bd_info = grp;
1120 e4b->bd_sb = sb;
1121 e4b->bd_group = group;
1122 e4b->bd_buddy_page = NULL;
1123 e4b->bd_bitmap_page = NULL;
1124
1125 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1126
1127
1128
1129
1130 ret = ext4_mb_init_group(sb, group, gfp);
1131 if (ret)
1132 return ret;
1133 }
1134
1135
1136
1137
1138
1139
1140 block = group * 2;
1141 pnum = block / blocks_per_page;
1142 poff = block % blocks_per_page;
1143
1144
1145
1146 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1147 if (page == NULL || !PageUptodate(page)) {
1148 if (page)
1149
1150
1151
1152
1153
1154
1155
1156
1157 put_page(page);
1158 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1159 if (page) {
1160 BUG_ON(page->mapping != inode->i_mapping);
1161 if (!PageUptodate(page)) {
1162 ret = ext4_mb_init_cache(page, NULL, gfp);
1163 if (ret) {
1164 unlock_page(page);
1165 goto err;
1166 }
1167 mb_cmp_bitmaps(e4b, page_address(page) +
1168 (poff * sb->s_blocksize));
1169 }
1170 unlock_page(page);
1171 }
1172 }
1173 if (page == NULL) {
1174 ret = -ENOMEM;
1175 goto err;
1176 }
1177 if (!PageUptodate(page)) {
1178 ret = -EIO;
1179 goto err;
1180 }
1181
1182
1183 e4b->bd_bitmap_page = page;
1184 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1185
1186 block++;
1187 pnum = block / blocks_per_page;
1188 poff = block % blocks_per_page;
1189
1190 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1191 if (page == NULL || !PageUptodate(page)) {
1192 if (page)
1193 put_page(page);
1194 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1195 if (page) {
1196 BUG_ON(page->mapping != inode->i_mapping);
1197 if (!PageUptodate(page)) {
1198 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1199 gfp);
1200 if (ret) {
1201 unlock_page(page);
1202 goto err;
1203 }
1204 }
1205 unlock_page(page);
1206 }
1207 }
1208 if (page == NULL) {
1209 ret = -ENOMEM;
1210 goto err;
1211 }
1212 if (!PageUptodate(page)) {
1213 ret = -EIO;
1214 goto err;
1215 }
1216
1217
1218 e4b->bd_buddy_page = page;
1219 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1220
1221 BUG_ON(e4b->bd_bitmap_page == NULL);
1222 BUG_ON(e4b->bd_buddy_page == NULL);
1223
1224 return 0;
1225
1226err:
1227 if (page)
1228 put_page(page);
1229 if (e4b->bd_bitmap_page)
1230 put_page(e4b->bd_bitmap_page);
1231 if (e4b->bd_buddy_page)
1232 put_page(e4b->bd_buddy_page);
1233 e4b->bd_buddy = NULL;
1234 e4b->bd_bitmap = NULL;
1235 return ret;
1236}
1237
1238static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1239 struct ext4_buddy *e4b)
1240{
1241 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1242}
1243
1244static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1245{
1246 if (e4b->bd_bitmap_page)
1247 put_page(e4b->bd_bitmap_page);
1248 if (e4b->bd_buddy_page)
1249 put_page(e4b->bd_buddy_page);
1250}
1251
1252
1253static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1254{
1255 int order = 1;
1256 int bb_incr = 1 << (e4b->bd_blkbits - 1);
1257 void *bb;
1258
1259 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1260 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1261
1262 bb = e4b->bd_buddy;
1263 while (order <= e4b->bd_blkbits + 1) {
1264 block = block >> 1;
1265 if (!mb_test_bit(block, bb)) {
1266
1267 return order;
1268 }
1269 bb += bb_incr;
1270 bb_incr >>= 1;
1271 order++;
1272 }
1273 return 0;
1274}
1275
1276static void mb_clear_bits(void *bm, int cur, int len)
1277{
1278 __u32 *addr;
1279
1280 len = cur + len;
1281 while (cur < len) {
1282 if ((cur & 31) == 0 && (len - cur) >= 32) {
1283
1284 addr = bm + (cur >> 3);
1285 *addr = 0;
1286 cur += 32;
1287 continue;
1288 }
1289 mb_clear_bit(cur, bm);
1290 cur++;
1291 }
1292}
1293
1294
1295
1296
1297static int mb_test_and_clear_bits(void *bm, int cur, int len)
1298{
1299 __u32 *addr;
1300 int zero_bit = -1;
1301
1302 len = cur + len;
1303 while (cur < len) {
1304 if ((cur & 31) == 0 && (len - cur) >= 32) {
1305
1306 addr = bm + (cur >> 3);
1307 if (*addr != (__u32)(-1) && zero_bit == -1)
1308 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1309 *addr = 0;
1310 cur += 32;
1311 continue;
1312 }
1313 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1314 zero_bit = cur;
1315 cur++;
1316 }
1317
1318 return zero_bit;
1319}
1320
1321void ext4_set_bits(void *bm, int cur, int len)
1322{
1323 __u32 *addr;
1324
1325 len = cur + len;
1326 while (cur < len) {
1327 if ((cur & 31) == 0 && (len - cur) >= 32) {
1328
1329 addr = bm + (cur >> 3);
1330 *addr = 0xffffffff;
1331 cur += 32;
1332 continue;
1333 }
1334 mb_set_bit(cur, bm);
1335 cur++;
1336 }
1337}
1338
1339
1340
1341
1342static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1343{
1344 if (mb_test_bit(*bit + side, bitmap)) {
1345 mb_clear_bit(*bit, bitmap);
1346 (*bit) -= side;
1347 return 1;
1348 }
1349 else {
1350 (*bit) += side;
1351 mb_set_bit(*bit, bitmap);
1352 return -1;
1353 }
1354}
1355
1356static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1357{
1358 int max;
1359 int order = 1;
1360 void *buddy = mb_find_buddy(e4b, order, &max);
1361
1362 while (buddy) {
1363 void *buddy2;
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394 if (first & 1)
1395 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1396 if (!(last & 1))
1397 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1398 if (first > last)
1399 break;
1400 order++;
1401
1402 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1403 mb_clear_bits(buddy, first, last - first + 1);
1404 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1405 break;
1406 }
1407 first >>= 1;
1408 last >>= 1;
1409 buddy = buddy2;
1410 }
1411}
1412
1413static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1414 int first, int count)
1415{
1416 int left_is_free = 0;
1417 int right_is_free = 0;
1418 int block;
1419 int last = first + count - 1;
1420 struct super_block *sb = e4b->bd_sb;
1421
1422 if (WARN_ON(count == 0))
1423 return;
1424 BUG_ON(last >= (sb->s_blocksize << 3));
1425 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1426
1427 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1428 return;
1429
1430 mb_check_buddy(e4b);
1431 mb_free_blocks_double(inode, e4b, first, count);
1432
1433 e4b->bd_info->bb_free += count;
1434 if (first < e4b->bd_info->bb_first_free)
1435 e4b->bd_info->bb_first_free = first;
1436
1437
1438
1439
1440 if (first != 0)
1441 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1442 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1443 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1444 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1445
1446 if (unlikely(block != -1)) {
1447 struct ext4_sb_info *sbi = EXT4_SB(sb);
1448 ext4_fsblk_t blocknr;
1449
1450 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1451 blocknr += EXT4_C2B(sbi, block);
1452 ext4_grp_locked_error(sb, e4b->bd_group,
1453 inode ? inode->i_ino : 0,
1454 blocknr,
1455 "freeing already freed block "
1456 "(bit %u); block bitmap corrupt.",
1457 block);
1458 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
1459 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1460 mb_regenerate_buddy(e4b);
1461 goto done;
1462 }
1463
1464
1465 if (left_is_free && right_is_free)
1466 e4b->bd_info->bb_fragments--;
1467 else if (!left_is_free && !right_is_free)
1468 e4b->bd_info->bb_fragments++;
1469
1470
1471
1472
1473
1474
1475
1476 if (first & 1) {
1477 first += !left_is_free;
1478 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1479 }
1480 if (!(last & 1)) {
1481 last -= !right_is_free;
1482 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1483 }
1484
1485 if (first <= last)
1486 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1487
1488done:
1489 mb_set_largest_free_order(sb, e4b->bd_info);
1490 mb_check_buddy(e4b);
1491}
1492
1493static int mb_find_extent(struct ext4_buddy *e4b, int block,
1494 int needed, struct ext4_free_extent *ex)
1495{
1496 int next = block;
1497 int max, order;
1498 void *buddy;
1499
1500 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1501 BUG_ON(ex == NULL);
1502
1503 buddy = mb_find_buddy(e4b, 0, &max);
1504 BUG_ON(buddy == NULL);
1505 BUG_ON(block >= max);
1506 if (mb_test_bit(block, buddy)) {
1507 ex->fe_len = 0;
1508 ex->fe_start = 0;
1509 ex->fe_group = 0;
1510 return 0;
1511 }
1512
1513
1514 order = mb_find_order_for_block(e4b, block);
1515 block = block >> order;
1516
1517 ex->fe_len = 1 << order;
1518 ex->fe_start = block << order;
1519 ex->fe_group = e4b->bd_group;
1520
1521
1522 next = next - ex->fe_start;
1523 ex->fe_len -= next;
1524 ex->fe_start += next;
1525
1526 while (needed > ex->fe_len &&
1527 mb_find_buddy(e4b, order, &max)) {
1528
1529 if (block + 1 >= max)
1530 break;
1531
1532 next = (block + 1) * (1 << order);
1533 if (mb_test_bit(next, e4b->bd_bitmap))
1534 break;
1535
1536 order = mb_find_order_for_block(e4b, next);
1537
1538 block = next >> order;
1539 ex->fe_len += 1 << order;
1540 }
1541
1542 if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
1543
1544 WARN_ON(1);
1545 ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
1546 "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
1547 block, order, needed, ex->fe_group, ex->fe_start,
1548 ex->fe_len, ex->fe_logical);
1549 ex->fe_len = 0;
1550 ex->fe_start = 0;
1551 ex->fe_group = 0;
1552 }
1553 return ex->fe_len;
1554}
1555
1556static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1557{
1558 int ord;
1559 int mlen = 0;
1560 int max = 0;
1561 int cur;
1562 int start = ex->fe_start;
1563 int len = ex->fe_len;
1564 unsigned ret = 0;
1565 int len0 = len;
1566 void *buddy;
1567
1568 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1569 BUG_ON(e4b->bd_group != ex->fe_group);
1570 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1571 mb_check_buddy(e4b);
1572 mb_mark_used_double(e4b, start, len);
1573
1574 e4b->bd_info->bb_free -= len;
1575 if (e4b->bd_info->bb_first_free == start)
1576 e4b->bd_info->bb_first_free += len;
1577
1578
1579 if (start != 0)
1580 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1581 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1582 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1583 if (mlen && max)
1584 e4b->bd_info->bb_fragments++;
1585 else if (!mlen && !max)
1586 e4b->bd_info->bb_fragments--;
1587
1588
1589 while (len) {
1590 ord = mb_find_order_for_block(e4b, start);
1591
1592 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1593
1594 mlen = 1 << ord;
1595 buddy = mb_find_buddy(e4b, ord, &max);
1596 BUG_ON((start >> ord) >= max);
1597 mb_set_bit(start >> ord, buddy);
1598 e4b->bd_info->bb_counters[ord]--;
1599 start += mlen;
1600 len -= mlen;
1601 BUG_ON(len < 0);
1602 continue;
1603 }
1604
1605
1606 if (ret == 0)
1607 ret = len | (ord << 16);
1608
1609
1610 BUG_ON(ord <= 0);
1611 buddy = mb_find_buddy(e4b, ord, &max);
1612 mb_set_bit(start >> ord, buddy);
1613 e4b->bd_info->bb_counters[ord]--;
1614
1615 ord--;
1616 cur = (start >> ord) & ~1U;
1617 buddy = mb_find_buddy(e4b, ord, &max);
1618 mb_clear_bit(cur, buddy);
1619 mb_clear_bit(cur + 1, buddy);
1620 e4b->bd_info->bb_counters[ord]++;
1621 e4b->bd_info->bb_counters[ord]++;
1622 }
1623 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1624
1625 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1626 mb_check_buddy(e4b);
1627
1628 return ret;
1629}
1630
1631
1632
1633
1634static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1635 struct ext4_buddy *e4b)
1636{
1637 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1638 int ret;
1639
1640 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1641 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1642
1643 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1644 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1645 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1646
1647
1648
1649 ac->ac_f_ex = ac->ac_b_ex;
1650
1651 ac->ac_status = AC_STATUS_FOUND;
1652 ac->ac_tail = ret & 0xffff;
1653 ac->ac_buddy = ret >> 16;
1654
1655
1656
1657
1658
1659
1660
1661
1662 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1663 get_page(ac->ac_bitmap_page);
1664 ac->ac_buddy_page = e4b->bd_buddy_page;
1665 get_page(ac->ac_buddy_page);
1666
1667 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1668 spin_lock(&sbi->s_md_lock);
1669 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1670 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1671 spin_unlock(&sbi->s_md_lock);
1672 }
1673}
1674
1675
1676
1677
1678
1679static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1680 struct ext4_buddy *e4b,
1681 int finish_group)
1682{
1683 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1684 struct ext4_free_extent *bex = &ac->ac_b_ex;
1685 struct ext4_free_extent *gex = &ac->ac_g_ex;
1686 struct ext4_free_extent ex;
1687 int max;
1688
1689 if (ac->ac_status == AC_STATUS_FOUND)
1690 return;
1691
1692
1693
1694 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1695 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1696 ac->ac_status = AC_STATUS_BREAK;
1697 return;
1698 }
1699
1700
1701
1702
1703 if (bex->fe_len < gex->fe_len)
1704 return;
1705
1706 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1707 && bex->fe_group == e4b->bd_group) {
1708
1709
1710
1711 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1712 if (max >= gex->fe_len) {
1713 ext4_mb_use_best_found(ac, e4b);
1714 return;
1715 }
1716 }
1717}
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1730 struct ext4_free_extent *ex,
1731 struct ext4_buddy *e4b)
1732{
1733 struct ext4_free_extent *bex = &ac->ac_b_ex;
1734 struct ext4_free_extent *gex = &ac->ac_g_ex;
1735
1736 BUG_ON(ex->fe_len <= 0);
1737 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1738 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1739 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1740
1741 ac->ac_found++;
1742
1743
1744
1745
1746 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1747 *bex = *ex;
1748 ext4_mb_use_best_found(ac, e4b);
1749 return;
1750 }
1751
1752
1753
1754
1755 if (ex->fe_len == gex->fe_len) {
1756 *bex = *ex;
1757 ext4_mb_use_best_found(ac, e4b);
1758 return;
1759 }
1760
1761
1762
1763
1764 if (bex->fe_len == 0) {
1765 *bex = *ex;
1766 return;
1767 }
1768
1769
1770
1771
1772 if (bex->fe_len < gex->fe_len) {
1773
1774
1775 if (ex->fe_len > bex->fe_len)
1776 *bex = *ex;
1777 } else if (ex->fe_len > gex->fe_len) {
1778
1779
1780
1781 if (ex->fe_len < bex->fe_len)
1782 *bex = *ex;
1783 }
1784
1785 ext4_mb_check_limits(ac, e4b, 0);
1786}
1787
1788static noinline_for_stack
1789int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1790 struct ext4_buddy *e4b)
1791{
1792 struct ext4_free_extent ex = ac->ac_b_ex;
1793 ext4_group_t group = ex.fe_group;
1794 int max;
1795 int err;
1796
1797 BUG_ON(ex.fe_len <= 0);
1798 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1799 if (err)
1800 return err;
1801
1802 ext4_lock_group(ac->ac_sb, group);
1803 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1804
1805 if (max > 0) {
1806 ac->ac_b_ex = ex;
1807 ext4_mb_use_best_found(ac, e4b);
1808 }
1809
1810 ext4_unlock_group(ac->ac_sb, group);
1811 ext4_mb_unload_buddy(e4b);
1812
1813 return 0;
1814}
1815
1816static noinline_for_stack
1817int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1818 struct ext4_buddy *e4b)
1819{
1820 ext4_group_t group = ac->ac_g_ex.fe_group;
1821 int max;
1822 int err;
1823 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1824 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1825 struct ext4_free_extent ex;
1826
1827 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1828 return 0;
1829 if (grp->bb_free == 0)
1830 return 0;
1831
1832 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1833 if (err)
1834 return err;
1835
1836 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1837 ext4_mb_unload_buddy(e4b);
1838 return 0;
1839 }
1840
1841 ext4_lock_group(ac->ac_sb, group);
1842 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1843 ac->ac_g_ex.fe_len, &ex);
1844 ex.fe_logical = 0xDEADFA11;
1845
1846 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1847 ext4_fsblk_t start;
1848
1849 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1850 ex.fe_start;
1851
1852 if (do_div(start, sbi->s_stripe) == 0) {
1853 ac->ac_found++;
1854 ac->ac_b_ex = ex;
1855 ext4_mb_use_best_found(ac, e4b);
1856 }
1857 } else if (max >= ac->ac_g_ex.fe_len) {
1858 BUG_ON(ex.fe_len <= 0);
1859 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1860 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1861 ac->ac_found++;
1862 ac->ac_b_ex = ex;
1863 ext4_mb_use_best_found(ac, e4b);
1864 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1865
1866
1867 BUG_ON(ex.fe_len <= 0);
1868 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1869 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1870 ac->ac_found++;
1871 ac->ac_b_ex = ex;
1872 ext4_mb_use_best_found(ac, e4b);
1873 }
1874 ext4_unlock_group(ac->ac_sb, group);
1875 ext4_mb_unload_buddy(e4b);
1876
1877 return 0;
1878}
1879
1880
1881
1882
1883
1884static noinline_for_stack
1885void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1886 struct ext4_buddy *e4b)
1887{
1888 struct super_block *sb = ac->ac_sb;
1889 struct ext4_group_info *grp = e4b->bd_info;
1890 void *buddy;
1891 int i;
1892 int k;
1893 int max;
1894
1895 BUG_ON(ac->ac_2order <= 0);
1896 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1897 if (grp->bb_counters[i] == 0)
1898 continue;
1899
1900 buddy = mb_find_buddy(e4b, i, &max);
1901 BUG_ON(buddy == NULL);
1902
1903 k = mb_find_next_zero_bit(buddy, max, 0);
1904 BUG_ON(k >= max);
1905
1906 ac->ac_found++;
1907
1908 ac->ac_b_ex.fe_len = 1 << i;
1909 ac->ac_b_ex.fe_start = k << i;
1910 ac->ac_b_ex.fe_group = e4b->bd_group;
1911
1912 ext4_mb_use_best_found(ac, e4b);
1913
1914 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1915
1916 if (EXT4_SB(sb)->s_mb_stats)
1917 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1918
1919 break;
1920 }
1921}
1922
1923
1924
1925
1926
1927
1928static noinline_for_stack
1929void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1930 struct ext4_buddy *e4b)
1931{
1932 struct super_block *sb = ac->ac_sb;
1933 void *bitmap = e4b->bd_bitmap;
1934 struct ext4_free_extent ex;
1935 int i;
1936 int free;
1937
1938 free = e4b->bd_info->bb_free;
1939 BUG_ON(free <= 0);
1940
1941 i = e4b->bd_info->bb_first_free;
1942
1943 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1944 i = mb_find_next_zero_bit(bitmap,
1945 EXT4_CLUSTERS_PER_GROUP(sb), i);
1946 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1947
1948
1949
1950
1951
1952 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1953 "%d free clusters as per "
1954 "group info. But bitmap says 0",
1955 free);
1956 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
1957 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1958 break;
1959 }
1960
1961 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1962 BUG_ON(ex.fe_len <= 0);
1963 if (free < ex.fe_len) {
1964 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1965 "%d free clusters as per "
1966 "group info. But got %d blocks",
1967 free, ex.fe_len);
1968 ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
1969 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
1970
1971
1972
1973
1974
1975 break;
1976 }
1977 ex.fe_logical = 0xDEADC0DE;
1978 ext4_mb_measure_extent(ac, &ex, e4b);
1979
1980 i += ex.fe_len;
1981 free -= ex.fe_len;
1982 }
1983
1984 ext4_mb_check_limits(ac, e4b, 1);
1985}
1986
1987
1988
1989
1990
1991static noinline_for_stack
1992void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1993 struct ext4_buddy *e4b)
1994{
1995 struct super_block *sb = ac->ac_sb;
1996 struct ext4_sb_info *sbi = EXT4_SB(sb);
1997 void *bitmap = e4b->bd_bitmap;
1998 struct ext4_free_extent ex;
1999 ext4_fsblk_t first_group_block;
2000 ext4_fsblk_t a;
2001 ext4_grpblk_t i;
2002 int max;
2003
2004 BUG_ON(sbi->s_stripe == 0);
2005
2006
2007 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2008
2009 a = first_group_block + sbi->s_stripe - 1;
2010 do_div(a, sbi->s_stripe);
2011 i = (a * sbi->s_stripe) - first_group_block;
2012
2013 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2014 if (!mb_test_bit(i, bitmap)) {
2015 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2016 if (max >= sbi->s_stripe) {
2017 ac->ac_found++;
2018 ex.fe_logical = 0xDEADF00D;
2019 ac->ac_b_ex = ex;
2020 ext4_mb_use_best_found(ac, e4b);
2021 break;
2022 }
2023 }
2024 i += sbi->s_stripe;
2025 }
2026}
2027
2028
2029
2030
2031
2032
2033
2034static int ext4_mb_good_group(struct ext4_allocation_context *ac,
2035 ext4_group_t group, int cr)
2036{
2037 unsigned free, fragments;
2038 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2039 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2040
2041 BUG_ON(cr < 0 || cr >= 4);
2042
2043 free = grp->bb_free;
2044 if (free == 0)
2045 return 0;
2046 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2047 return 0;
2048
2049 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2050 return 0;
2051
2052
2053 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2054 int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
2055 if (ret)
2056 return ret;
2057 }
2058
2059 fragments = grp->bb_fragments;
2060 if (fragments == 0)
2061 return 0;
2062
2063 switch (cr) {
2064 case 0:
2065 BUG_ON(ac->ac_2order == 0);
2066
2067
2068 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2069 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2070 ((group % flex_size) == 0))
2071 return 0;
2072
2073 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
2074 (free / fragments) >= ac->ac_g_ex.fe_len)
2075 return 1;
2076
2077 if (grp->bb_largest_free_order < ac->ac_2order)
2078 return 0;
2079
2080 return 1;
2081 case 1:
2082 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2083 return 1;
2084 break;
2085 case 2:
2086 if (free >= ac->ac_g_ex.fe_len)
2087 return 1;
2088 break;
2089 case 3:
2090 return 1;
2091 default:
2092 BUG();
2093 }
2094
2095 return 0;
2096}
2097
2098static noinline_for_stack int
2099ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2100{
2101 ext4_group_t ngroups, group, i;
2102 int cr;
2103 int err = 0, first_err = 0;
2104 struct ext4_sb_info *sbi;
2105 struct super_block *sb;
2106 struct ext4_buddy e4b;
2107
2108 sb = ac->ac_sb;
2109 sbi = EXT4_SB(sb);
2110 ngroups = ext4_get_groups_count(sb);
2111
2112 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2113 ngroups = sbi->s_blockfile_groups;
2114
2115 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2116
2117
2118 err = ext4_mb_find_by_goal(ac, &e4b);
2119 if (err || ac->ac_status == AC_STATUS_FOUND)
2120 goto out;
2121
2122 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2123 goto out;
2124
2125
2126
2127
2128
2129
2130 i = fls(ac->ac_g_ex.fe_len);
2131 ac->ac_2order = 0;
2132
2133
2134
2135
2136
2137
2138
2139 if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
2140
2141
2142
2143 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2144 ac->ac_2order = array_index_nospec(i - 1,
2145 sb->s_blocksize_bits + 2);
2146 }
2147
2148
2149 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2150
2151 spin_lock(&sbi->s_md_lock);
2152 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2153 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2154 spin_unlock(&sbi->s_md_lock);
2155 }
2156
2157
2158 cr = ac->ac_2order ? 0 : 1;
2159
2160
2161
2162
2163repeat:
2164 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2165 ac->ac_criteria = cr;
2166
2167
2168
2169
2170 group = ac->ac_g_ex.fe_group;
2171
2172 for (i = 0; i < ngroups; group++, i++) {
2173 int ret = 0;
2174 cond_resched();
2175
2176
2177
2178
2179 if (group >= ngroups)
2180 group = 0;
2181
2182
2183 ret = ext4_mb_good_group(ac, group, cr);
2184 if (ret <= 0) {
2185 if (!first_err)
2186 first_err = ret;
2187 continue;
2188 }
2189
2190 err = ext4_mb_load_buddy(sb, group, &e4b);
2191 if (err)
2192 goto out;
2193
2194 ext4_lock_group(sb, group);
2195
2196
2197
2198
2199
2200 ret = ext4_mb_good_group(ac, group, cr);
2201 if (ret <= 0) {
2202 ext4_unlock_group(sb, group);
2203 ext4_mb_unload_buddy(&e4b);
2204 if (!first_err)
2205 first_err = ret;
2206 continue;
2207 }
2208
2209 ac->ac_groups_scanned++;
2210 if (cr == 0)
2211 ext4_mb_simple_scan_group(ac, &e4b);
2212 else if (cr == 1 && sbi->s_stripe &&
2213 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2214 ext4_mb_scan_aligned(ac, &e4b);
2215 else
2216 ext4_mb_complex_scan_group(ac, &e4b);
2217
2218 ext4_unlock_group(sb, group);
2219 ext4_mb_unload_buddy(&e4b);
2220
2221 if (ac->ac_status != AC_STATUS_CONTINUE)
2222 break;
2223 }
2224 }
2225
2226 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2227 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2228
2229
2230
2231
2232
2233 ext4_mb_try_best_found(ac, &e4b);
2234 if (ac->ac_status != AC_STATUS_FOUND) {
2235
2236
2237
2238
2239
2240
2241 ac->ac_b_ex.fe_group = 0;
2242 ac->ac_b_ex.fe_start = 0;
2243 ac->ac_b_ex.fe_len = 0;
2244 ac->ac_status = AC_STATUS_CONTINUE;
2245 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2246 cr = 3;
2247 atomic_inc(&sbi->s_mb_lost_chunks);
2248 goto repeat;
2249 }
2250 }
2251out:
2252 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2253 err = first_err;
2254 return err;
2255}
2256
2257static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2258{
2259 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2260 ext4_group_t group;
2261
2262 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2263 return NULL;
2264 group = *pos + 1;
2265 return (void *) ((unsigned long) group);
2266}
2267
2268static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2269{
2270 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2271 ext4_group_t group;
2272
2273 ++*pos;
2274 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2275 return NULL;
2276 group = *pos + 1;
2277 return (void *) ((unsigned long) group);
2278}
2279
2280static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2281{
2282 struct super_block *sb = PDE_DATA(file_inode(seq->file));
2283 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2284 int i;
2285 int err, buddy_loaded = 0;
2286 struct ext4_buddy e4b;
2287 struct ext4_group_info *grinfo;
2288 unsigned char blocksize_bits = min_t(unsigned char,
2289 sb->s_blocksize_bits,
2290 EXT4_MAX_BLOCK_LOG_SIZE);
2291 struct sg {
2292 struct ext4_group_info info;
2293 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2294 } sg;
2295
2296 group--;
2297 if (group == 0)
2298 seq_puts(seq, "#group: free frags first ["
2299 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2300 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2301
2302 i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2303 sizeof(struct ext4_group_info);
2304
2305 grinfo = ext4_get_group_info(sb, group);
2306
2307 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2308 err = ext4_mb_load_buddy(sb, group, &e4b);
2309 if (err) {
2310 seq_printf(seq, "#%-5u: I/O error\n", group);
2311 return 0;
2312 }
2313 buddy_loaded = 1;
2314 }
2315
2316 memcpy(&sg, ext4_get_group_info(sb, group), i);
2317
2318 if (buddy_loaded)
2319 ext4_mb_unload_buddy(&e4b);
2320
2321 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2322 sg.info.bb_fragments, sg.info.bb_first_free);
2323 for (i = 0; i <= 13; i++)
2324 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
2325 sg.info.bb_counters[i] : 0);
2326 seq_printf(seq, " ]\n");
2327
2328 return 0;
2329}
2330
2331static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2332{
2333}
2334
2335const struct seq_operations ext4_mb_seq_groups_ops = {
2336 .start = ext4_mb_seq_groups_start,
2337 .next = ext4_mb_seq_groups_next,
2338 .stop = ext4_mb_seq_groups_stop,
2339 .show = ext4_mb_seq_groups_show,
2340};
2341
2342static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2343{
2344 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2345 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2346
2347 BUG_ON(!cachep);
2348 return cachep;
2349}
2350
2351
2352
2353
2354
2355int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2356{
2357 struct ext4_sb_info *sbi = EXT4_SB(sb);
2358 unsigned size;
2359 struct ext4_group_info ***new_groupinfo;
2360
2361 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2362 EXT4_DESC_PER_BLOCK_BITS(sb);
2363 if (size <= sbi->s_group_info_size)
2364 return 0;
2365
2366 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2367 new_groupinfo = kvzalloc(size, GFP_KERNEL);
2368 if (!new_groupinfo) {
2369 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2370 return -ENOMEM;
2371 }
2372 if (sbi->s_group_info) {
2373 memcpy(new_groupinfo, sbi->s_group_info,
2374 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2375 kvfree(sbi->s_group_info);
2376 }
2377 sbi->s_group_info = new_groupinfo;
2378 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2379 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2380 sbi->s_group_info_size);
2381 return 0;
2382}
2383
2384
2385int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2386 struct ext4_group_desc *desc)
2387{
2388 int i;
2389 int metalen = 0;
2390 struct ext4_sb_info *sbi = EXT4_SB(sb);
2391 struct ext4_group_info **meta_group_info;
2392 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2393
2394
2395
2396
2397
2398
2399 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2400 metalen = sizeof(*meta_group_info) <<
2401 EXT4_DESC_PER_BLOCK_BITS(sb);
2402 meta_group_info = kmalloc(metalen, GFP_NOFS);
2403 if (meta_group_info == NULL) {
2404 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2405 "for a buddy group");
2406 goto exit_meta_group_info;
2407 }
2408 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2409 meta_group_info;
2410 }
2411
2412 meta_group_info =
2413 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2414 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2415
2416 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
2417 if (meta_group_info[i] == NULL) {
2418 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2419 goto exit_group_info;
2420 }
2421 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2422 &(meta_group_info[i]->bb_state));
2423
2424
2425
2426
2427
2428 if (ext4_has_group_desc_csum(sb) &&
2429 (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
2430 meta_group_info[i]->bb_free =
2431 ext4_free_clusters_after_init(sb, group, desc);
2432 } else {
2433 meta_group_info[i]->bb_free =
2434 ext4_free_group_clusters(sb, desc);
2435 }
2436
2437 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2438 init_rwsem(&meta_group_info[i]->alloc_sem);
2439 meta_group_info[i]->bb_free_root = RB_ROOT;
2440 meta_group_info[i]->bb_largest_free_order = -1;
2441
2442#ifdef DOUBLE_CHECK
2443 {
2444 struct buffer_head *bh;
2445 meta_group_info[i]->bb_bitmap =
2446 kmalloc(sb->s_blocksize, GFP_NOFS);
2447 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2448 bh = ext4_read_block_bitmap(sb, group);
2449 BUG_ON(IS_ERR_OR_NULL(bh));
2450 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2451 sb->s_blocksize);
2452 put_bh(bh);
2453 }
2454#endif
2455
2456 return 0;
2457
2458exit_group_info:
2459
2460 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2461 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2462 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2463 }
2464exit_meta_group_info:
2465 return -ENOMEM;
2466}
2467
2468static int ext4_mb_init_backend(struct super_block *sb)
2469{
2470 ext4_group_t ngroups = ext4_get_groups_count(sb);
2471 ext4_group_t i;
2472 struct ext4_sb_info *sbi = EXT4_SB(sb);
2473 int err;
2474 struct ext4_group_desc *desc;
2475 struct kmem_cache *cachep;
2476
2477 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2478 if (err)
2479 return err;
2480
2481 sbi->s_buddy_cache = new_inode(sb);
2482 if (sbi->s_buddy_cache == NULL) {
2483 ext4_msg(sb, KERN_ERR, "can't get new inode");
2484 goto err_freesgi;
2485 }
2486
2487
2488
2489
2490 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2491 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2492 for (i = 0; i < ngroups; i++) {
2493 cond_resched();
2494 desc = ext4_get_group_desc(sb, i, NULL);
2495 if (desc == NULL) {
2496 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2497 goto err_freebuddy;
2498 }
2499 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2500 goto err_freebuddy;
2501 }
2502
2503 return 0;
2504
2505err_freebuddy:
2506 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2507 while (i-- > 0)
2508 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2509 i = sbi->s_group_info_size;
2510 while (i-- > 0)
2511 kfree(sbi->s_group_info[i]);
2512 iput(sbi->s_buddy_cache);
2513err_freesgi:
2514 kvfree(sbi->s_group_info);
2515 return -ENOMEM;
2516}
2517
2518static void ext4_groupinfo_destroy_slabs(void)
2519{
2520 int i;
2521
2522 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2523 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2524 ext4_groupinfo_caches[i] = NULL;
2525 }
2526}
2527
2528static int ext4_groupinfo_create_slab(size_t size)
2529{
2530 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2531 int slab_size;
2532 int blocksize_bits = order_base_2(size);
2533 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2534 struct kmem_cache *cachep;
2535
2536 if (cache_index >= NR_GRPINFO_CACHES)
2537 return -EINVAL;
2538
2539 if (unlikely(cache_index < 0))
2540 cache_index = 0;
2541
2542 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2543 if (ext4_groupinfo_caches[cache_index]) {
2544 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2545 return 0;
2546 }
2547
2548 slab_size = offsetof(struct ext4_group_info,
2549 bb_counters[blocksize_bits + 2]);
2550
2551 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2552 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2553 NULL);
2554
2555 ext4_groupinfo_caches[cache_index] = cachep;
2556
2557 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2558 if (!cachep) {
2559 printk(KERN_EMERG
2560 "EXT4-fs: no memory for groupinfo slab cache\n");
2561 return -ENOMEM;
2562 }
2563
2564 return 0;
2565}
2566
2567int ext4_mb_init(struct super_block *sb)
2568{
2569 struct ext4_sb_info *sbi = EXT4_SB(sb);
2570 unsigned i, j;
2571 unsigned offset, offset_incr;
2572 unsigned max;
2573 int ret;
2574
2575 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2576
2577 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2578 if (sbi->s_mb_offsets == NULL) {
2579 ret = -ENOMEM;
2580 goto out;
2581 }
2582
2583 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2584 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2585 if (sbi->s_mb_maxs == NULL) {
2586 ret = -ENOMEM;
2587 goto out;
2588 }
2589
2590 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2591 if (ret < 0)
2592 goto out;
2593
2594
2595 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2596 sbi->s_mb_offsets[0] = 0;
2597
2598 i = 1;
2599 offset = 0;
2600 offset_incr = 1 << (sb->s_blocksize_bits - 1);
2601 max = sb->s_blocksize << 2;
2602 do {
2603 sbi->s_mb_offsets[i] = offset;
2604 sbi->s_mb_maxs[i] = max;
2605 offset += offset_incr;
2606 offset_incr = offset_incr >> 1;
2607 max = max >> 1;
2608 i++;
2609 } while (i <= sb->s_blocksize_bits + 1);
2610
2611 spin_lock_init(&sbi->s_md_lock);
2612 spin_lock_init(&sbi->s_bal_lock);
2613 sbi->s_mb_free_pending = 0;
2614 INIT_LIST_HEAD(&sbi->s_freed_data_list);
2615
2616 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2617 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2618 sbi->s_mb_stats = MB_DEFAULT_STATS;
2619 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2620 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2634 sbi->s_cluster_bits, 32);
2635
2636
2637
2638
2639
2640
2641
2642
2643 if (sbi->s_stripe > 1) {
2644 sbi->s_mb_group_prealloc = roundup(
2645 sbi->s_mb_group_prealloc, sbi->s_stripe);
2646 }
2647
2648 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2649 if (sbi->s_locality_groups == NULL) {
2650 ret = -ENOMEM;
2651 goto out;
2652 }
2653 for_each_possible_cpu(i) {
2654 struct ext4_locality_group *lg;
2655 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2656 mutex_init(&lg->lg_mutex);
2657 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2658 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2659 spin_lock_init(&lg->lg_prealloc_lock);
2660 }
2661
2662
2663 ret = ext4_mb_init_backend(sb);
2664 if (ret != 0)
2665 goto out_free_locality_groups;
2666
2667 return 0;
2668
2669out_free_locality_groups:
2670 free_percpu(sbi->s_locality_groups);
2671 sbi->s_locality_groups = NULL;
2672out:
2673 kfree(sbi->s_mb_offsets);
2674 sbi->s_mb_offsets = NULL;
2675 kfree(sbi->s_mb_maxs);
2676 sbi->s_mb_maxs = NULL;
2677 return ret;
2678}
2679
2680
2681static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2682{
2683 struct ext4_prealloc_space *pa;
2684 struct list_head *cur, *tmp;
2685 int count = 0;
2686
2687 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2688 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2689 list_del(&pa->pa_group_list);
2690 count++;
2691 kmem_cache_free(ext4_pspace_cachep, pa);
2692 }
2693 if (count)
2694 mb_debug(1, "mballoc: %u PAs left\n", count);
2695
2696}
2697
2698int ext4_mb_release(struct super_block *sb)
2699{
2700 ext4_group_t ngroups = ext4_get_groups_count(sb);
2701 ext4_group_t i;
2702 int num_meta_group_infos;
2703 struct ext4_group_info *grinfo;
2704 struct ext4_sb_info *sbi = EXT4_SB(sb);
2705 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2706
2707 if (sbi->s_group_info) {
2708 for (i = 0; i < ngroups; i++) {
2709 cond_resched();
2710 grinfo = ext4_get_group_info(sb, i);
2711#ifdef DOUBLE_CHECK
2712 kfree(grinfo->bb_bitmap);
2713#endif
2714 ext4_lock_group(sb, i);
2715 ext4_mb_cleanup_pa(grinfo);
2716 ext4_unlock_group(sb, i);
2717 kmem_cache_free(cachep, grinfo);
2718 }
2719 num_meta_group_infos = (ngroups +
2720 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2721 EXT4_DESC_PER_BLOCK_BITS(sb);
2722 for (i = 0; i < num_meta_group_infos; i++)
2723 kfree(sbi->s_group_info[i]);
2724 kvfree(sbi->s_group_info);
2725 }
2726 kfree(sbi->s_mb_offsets);
2727 kfree(sbi->s_mb_maxs);
2728 iput(sbi->s_buddy_cache);
2729 if (sbi->s_mb_stats) {
2730 ext4_msg(sb, KERN_INFO,
2731 "mballoc: %u blocks %u reqs (%u success)",
2732 atomic_read(&sbi->s_bal_allocated),
2733 atomic_read(&sbi->s_bal_reqs),
2734 atomic_read(&sbi->s_bal_success));
2735 ext4_msg(sb, KERN_INFO,
2736 "mballoc: %u extents scanned, %u goal hits, "
2737 "%u 2^N hits, %u breaks, %u lost",
2738 atomic_read(&sbi->s_bal_ex_scanned),
2739 atomic_read(&sbi->s_bal_goals),
2740 atomic_read(&sbi->s_bal_2orders),
2741 atomic_read(&sbi->s_bal_breaks),
2742 atomic_read(&sbi->s_mb_lost_chunks));
2743 ext4_msg(sb, KERN_INFO,
2744 "mballoc: %lu generated and it took %Lu",
2745 sbi->s_mb_buddies_generated,
2746 sbi->s_mb_generation_time);
2747 ext4_msg(sb, KERN_INFO,
2748 "mballoc: %u preallocated, %u discarded",
2749 atomic_read(&sbi->s_mb_preallocated),
2750 atomic_read(&sbi->s_mb_discarded));
2751 }
2752
2753 free_percpu(sbi->s_locality_groups);
2754
2755 return 0;
2756}
2757
2758static inline int ext4_issue_discard(struct super_block *sb,
2759 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
2760 struct bio **biop)
2761{
2762 ext4_fsblk_t discard_block;
2763
2764 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2765 ext4_group_first_block_no(sb, block_group));
2766 count = EXT4_C2B(EXT4_SB(sb), count);
2767 trace_ext4_discard_blocks(sb,
2768 (unsigned long long) discard_block, count);
2769 if (biop) {
2770 return __blkdev_issue_discard(sb->s_bdev,
2771 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
2772 (sector_t)count << (sb->s_blocksize_bits - 9),
2773 GFP_NOFS, 0, biop);
2774 } else
2775 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2776}
2777
2778static void ext4_free_data_in_buddy(struct super_block *sb,
2779 struct ext4_free_data *entry)
2780{
2781 struct ext4_buddy e4b;
2782 struct ext4_group_info *db;
2783 int err, count = 0, count2 = 0;
2784
2785 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2786 entry->efd_count, entry->efd_group, entry);
2787
2788 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2789
2790 BUG_ON(err != 0);
2791
2792 spin_lock(&EXT4_SB(sb)->s_md_lock);
2793 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
2794 spin_unlock(&EXT4_SB(sb)->s_md_lock);
2795
2796 db = e4b.bd_info;
2797
2798 count += entry->efd_count;
2799 count2++;
2800 ext4_lock_group(sb, entry->efd_group);
2801
2802 rb_erase(&entry->efd_node, &(db->bb_free_root));
2803 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2804
2805
2806
2807
2808
2809
2810
2811 if (!test_opt(sb, DISCARD))
2812 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2813
2814 if (!db->bb_free_root.rb_node) {
2815
2816
2817
2818 put_page(e4b.bd_buddy_page);
2819 put_page(e4b.bd_bitmap_page);
2820 }
2821 ext4_unlock_group(sb, entry->efd_group);
2822 kmem_cache_free(ext4_free_data_cachep, entry);
2823 ext4_mb_unload_buddy(&e4b);
2824
2825 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2826}
2827
2828
2829
2830
2831
2832void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
2833{
2834 struct ext4_sb_info *sbi = EXT4_SB(sb);
2835 struct ext4_free_data *entry, *tmp;
2836 struct bio *discard_bio = NULL;
2837 struct list_head freed_data_list;
2838 struct list_head *cut_pos = NULL;
2839 int err;
2840
2841 INIT_LIST_HEAD(&freed_data_list);
2842
2843 spin_lock(&sbi->s_md_lock);
2844 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
2845 if (entry->efd_tid != commit_tid)
2846 break;
2847 cut_pos = &entry->efd_list;
2848 }
2849 if (cut_pos)
2850 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
2851 cut_pos);
2852 spin_unlock(&sbi->s_md_lock);
2853
2854 if (test_opt(sb, DISCARD)) {
2855 list_for_each_entry(entry, &freed_data_list, efd_list) {
2856 err = ext4_issue_discard(sb, entry->efd_group,
2857 entry->efd_start_cluster,
2858 entry->efd_count,
2859 &discard_bio);
2860 if (err && err != -EOPNOTSUPP) {
2861 ext4_msg(sb, KERN_WARNING, "discard request in"
2862 " group:%d block:%d count:%d failed"
2863 " with %d", entry->efd_group,
2864 entry->efd_start_cluster,
2865 entry->efd_count, err);
2866 } else if (err == -EOPNOTSUPP)
2867 break;
2868 }
2869
2870 if (discard_bio) {
2871 submit_bio_wait(discard_bio);
2872 bio_put(discard_bio);
2873 }
2874 }
2875
2876 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
2877 ext4_free_data_in_buddy(sb, entry);
2878}
2879
2880int __init ext4_init_mballoc(void)
2881{
2882 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2883 SLAB_RECLAIM_ACCOUNT);
2884 if (ext4_pspace_cachep == NULL)
2885 return -ENOMEM;
2886
2887 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2888 SLAB_RECLAIM_ACCOUNT);
2889 if (ext4_ac_cachep == NULL) {
2890 kmem_cache_destroy(ext4_pspace_cachep);
2891 return -ENOMEM;
2892 }
2893
2894 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2895 SLAB_RECLAIM_ACCOUNT);
2896 if (ext4_free_data_cachep == NULL) {
2897 kmem_cache_destroy(ext4_pspace_cachep);
2898 kmem_cache_destroy(ext4_ac_cachep);
2899 return -ENOMEM;
2900 }
2901 return 0;
2902}
2903
2904void ext4_exit_mballoc(void)
2905{
2906
2907
2908
2909
2910 rcu_barrier();
2911 kmem_cache_destroy(ext4_pspace_cachep);
2912 kmem_cache_destroy(ext4_ac_cachep);
2913 kmem_cache_destroy(ext4_free_data_cachep);
2914 ext4_groupinfo_destroy_slabs();
2915}
2916
2917
2918
2919
2920
2921
2922static noinline_for_stack int
2923ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2924 handle_t *handle, unsigned int reserv_clstrs)
2925{
2926 struct buffer_head *bitmap_bh = NULL;
2927 struct ext4_group_desc *gdp;
2928 struct buffer_head *gdp_bh;
2929 struct ext4_sb_info *sbi;
2930 struct super_block *sb;
2931 ext4_fsblk_t block;
2932 int err, len;
2933
2934 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2935 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2936
2937 sb = ac->ac_sb;
2938 sbi = EXT4_SB(sb);
2939
2940 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2941 if (IS_ERR(bitmap_bh)) {
2942 err = PTR_ERR(bitmap_bh);
2943 bitmap_bh = NULL;
2944 goto out_err;
2945 }
2946
2947 BUFFER_TRACE(bitmap_bh, "getting write access");
2948 err = ext4_journal_get_write_access(handle, bitmap_bh);
2949 if (err)
2950 goto out_err;
2951
2952 err = -EIO;
2953 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2954 if (!gdp)
2955 goto out_err;
2956
2957 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2958 ext4_free_group_clusters(sb, gdp));
2959
2960 BUFFER_TRACE(gdp_bh, "get_write_access");
2961 err = ext4_journal_get_write_access(handle, gdp_bh);
2962 if (err)
2963 goto out_err;
2964
2965 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2966
2967 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2968 if (!ext4_data_block_valid(sbi, block, len)) {
2969 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2970 "fs metadata", block, block+len);
2971
2972
2973
2974
2975 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2976 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2977 ac->ac_b_ex.fe_len);
2978 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2979 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2980 if (!err)
2981 err = -EFSCORRUPTED;
2982 goto out_err;
2983 }
2984
2985 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2986#ifdef AGGRESSIVE_CHECK
2987 {
2988 int i;
2989 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2990 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2991 bitmap_bh->b_data));
2992 }
2993 }
2994#endif
2995 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2996 ac->ac_b_ex.fe_len);
2997 if (ext4_has_group_desc_csum(sb) &&
2998 (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
2999 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3000 ext4_free_group_clusters_set(sb, gdp,
3001 ext4_free_clusters_after_init(sb,
3002 ac->ac_b_ex.fe_group, gdp));
3003 }
3004 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
3005 ext4_free_group_clusters_set(sb, gdp, len);
3006 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
3007 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
3008
3009 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3010 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
3011
3012
3013
3014 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3015
3016 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
3017 reserv_clstrs);
3018
3019 if (sbi->s_log_groups_per_flex) {
3020 ext4_group_t flex_group = ext4_flex_group(sbi,
3021 ac->ac_b_ex.fe_group);
3022 atomic64_sub(ac->ac_b_ex.fe_len,
3023 &sbi->s_flex_groups[flex_group].free_clusters);
3024 }
3025
3026 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3027 if (err)
3028 goto out_err;
3029 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3030
3031out_err:
3032 brelse(bitmap_bh);
3033 return err;
3034}
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3046{
3047 struct super_block *sb = ac->ac_sb;
3048 struct ext4_locality_group *lg = ac->ac_lg;
3049
3050 BUG_ON(lg == NULL);
3051 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3052 mb_debug(1, "#%u: goal %u blocks for locality group\n",
3053 current->pid, ac->ac_g_ex.fe_len);
3054}
3055
3056
3057
3058
3059
3060static noinline_for_stack void
3061ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3062 struct ext4_allocation_request *ar)
3063{
3064 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3065 int bsbits, max;
3066 ext4_lblk_t end;
3067 loff_t size, start_off;
3068 loff_t orig_size __maybe_unused;
3069 ext4_lblk_t start;
3070 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3071 struct ext4_prealloc_space *pa;
3072
3073
3074
3075 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3076 return;
3077
3078
3079 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3080 return;
3081
3082
3083
3084 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
3085 return;
3086
3087 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
3088 ext4_mb_normalize_group_request(ac);
3089 return ;
3090 }
3091
3092 bsbits = ac->ac_sb->s_blocksize_bits;
3093
3094
3095
3096 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3097 size = size << bsbits;
3098 if (size < i_size_read(ac->ac_inode))
3099 size = i_size_read(ac->ac_inode);
3100 orig_size = size;
3101
3102
3103 max = 2 << bsbits;
3104
3105#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3106 (req <= (size) || max <= (chunk_size))
3107
3108
3109
3110 start_off = 0;
3111 if (size <= 16 * 1024) {
3112 size = 16 * 1024;
3113 } else if (size <= 32 * 1024) {
3114 size = 32 * 1024;
3115 } else if (size <= 64 * 1024) {
3116 size = 64 * 1024;
3117 } else if (size <= 128 * 1024) {
3118 size = 128 * 1024;
3119 } else if (size <= 256 * 1024) {
3120 size = 256 * 1024;
3121 } else if (size <= 512 * 1024) {
3122 size = 512 * 1024;
3123 } else if (size <= 1024 * 1024) {
3124 size = 1024 * 1024;
3125 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3126 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3127 (21 - bsbits)) << 21;
3128 size = 2 * 1024 * 1024;
3129 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3130 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3131 (22 - bsbits)) << 22;
3132 size = 4 * 1024 * 1024;
3133 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3134 (8<<20)>>bsbits, max, 8 * 1024)) {
3135 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3136 (23 - bsbits)) << 23;
3137 size = 8 * 1024 * 1024;
3138 } else {
3139 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3140 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3141 ac->ac_o_ex.fe_len) << bsbits;
3142 }
3143 size = size >> bsbits;
3144 start = start_off >> bsbits;
3145
3146
3147 if (ar->pleft && start <= ar->lleft) {
3148 size -= ar->lleft + 1 - start;
3149 start = ar->lleft + 1;
3150 }
3151 if (ar->pright && start + size - 1 >= ar->lright)
3152 size -= start + size - ar->lright;
3153
3154
3155
3156
3157
3158 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
3159 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
3160
3161 end = start + size;
3162
3163
3164 rcu_read_lock();
3165 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3166 ext4_lblk_t pa_end;
3167
3168 if (pa->pa_deleted)
3169 continue;
3170 spin_lock(&pa->pa_lock);
3171 if (pa->pa_deleted) {
3172 spin_unlock(&pa->pa_lock);
3173 continue;
3174 }
3175
3176 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3177 pa->pa_len);
3178
3179
3180 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3181 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3182
3183
3184 if (pa->pa_lstart >= end || pa_end <= start) {
3185 spin_unlock(&pa->pa_lock);
3186 continue;
3187 }
3188 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3189
3190
3191 if (pa_end <= ac->ac_o_ex.fe_logical) {
3192 BUG_ON(pa_end < start);
3193 start = pa_end;
3194 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3195 BUG_ON(pa->pa_lstart > end);
3196 end = pa->pa_lstart;
3197 }
3198 spin_unlock(&pa->pa_lock);
3199 }
3200 rcu_read_unlock();
3201 size = end - start;
3202
3203
3204 rcu_read_lock();
3205 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3206 ext4_lblk_t pa_end;
3207
3208 spin_lock(&pa->pa_lock);
3209 if (pa->pa_deleted == 0) {
3210 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3211 pa->pa_len);
3212 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3213 }
3214 spin_unlock(&pa->pa_lock);
3215 }
3216 rcu_read_unlock();
3217
3218 if (start + size <= ac->ac_o_ex.fe_logical &&
3219 start > ac->ac_o_ex.fe_logical) {
3220 ext4_msg(ac->ac_sb, KERN_ERR,
3221 "start %lu, size %lu, fe_logical %lu",
3222 (unsigned long) start, (unsigned long) size,
3223 (unsigned long) ac->ac_o_ex.fe_logical);
3224 BUG();
3225 }
3226 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3227
3228
3229
3230
3231
3232 ac->ac_g_ex.fe_logical = start;
3233 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3234
3235
3236 if (ar->pright && (ar->lright == (start + size))) {
3237
3238 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3239 &ac->ac_f_ex.fe_group,
3240 &ac->ac_f_ex.fe_start);
3241 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3242 }
3243 if (ar->pleft && (ar->lleft + 1 == start)) {
3244
3245 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3246 &ac->ac_f_ex.fe_group,
3247 &ac->ac_f_ex.fe_start);
3248 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3249 }
3250
3251 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3252 (unsigned) orig_size, (unsigned) start);
3253}
3254
3255static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3256{
3257 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3258
3259 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3260 atomic_inc(&sbi->s_bal_reqs);
3261 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3262 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3263 atomic_inc(&sbi->s_bal_success);
3264 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3265 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3266 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3267 atomic_inc(&sbi->s_bal_goals);
3268 if (ac->ac_found > sbi->s_mb_max_to_scan)
3269 atomic_inc(&sbi->s_bal_breaks);
3270 }
3271
3272 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3273 trace_ext4_mballoc_alloc(ac);
3274 else
3275 trace_ext4_mballoc_prealloc(ac);
3276}
3277
3278
3279
3280
3281
3282
3283
3284static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3285{
3286 struct ext4_prealloc_space *pa = ac->ac_pa;
3287 struct ext4_buddy e4b;
3288 int err;
3289
3290 if (pa == NULL) {
3291 if (ac->ac_f_ex.fe_len == 0)
3292 return;
3293 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3294 if (err) {
3295
3296
3297
3298
3299
3300 WARN(1, "mb_load_buddy failed (%d)", err);
3301 return;
3302 }
3303 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3304 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3305 ac->ac_f_ex.fe_len);
3306 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3307 ext4_mb_unload_buddy(&e4b);
3308 return;
3309 }
3310 if (pa->pa_type == MB_INODE_PA)
3311 pa->pa_free += ac->ac_b_ex.fe_len;
3312}
3313
3314
3315
3316
3317static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3318 struct ext4_prealloc_space *pa)
3319{
3320 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3321 ext4_fsblk_t start;
3322 ext4_fsblk_t end;
3323 int len;
3324
3325
3326 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3327 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3328 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3329 len = EXT4_NUM_B2C(sbi, end - start);
3330 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3331 &ac->ac_b_ex.fe_start);
3332 ac->ac_b_ex.fe_len = len;
3333 ac->ac_status = AC_STATUS_FOUND;
3334 ac->ac_pa = pa;
3335
3336 BUG_ON(start < pa->pa_pstart);
3337 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3338 BUG_ON(pa->pa_free < len);
3339 pa->pa_free -= len;
3340
3341 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3342}
3343
3344
3345
3346
3347static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3348 struct ext4_prealloc_space *pa)
3349{
3350 unsigned int len = ac->ac_o_ex.fe_len;
3351
3352 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3353 &ac->ac_b_ex.fe_group,
3354 &ac->ac_b_ex.fe_start);
3355 ac->ac_b_ex.fe_len = len;
3356 ac->ac_status = AC_STATUS_FOUND;
3357 ac->ac_pa = pa;
3358
3359
3360
3361
3362
3363
3364
3365 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3366}
3367
3368
3369
3370
3371
3372
3373
3374static struct ext4_prealloc_space *
3375ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3376 struct ext4_prealloc_space *pa,
3377 struct ext4_prealloc_space *cpa)
3378{
3379 ext4_fsblk_t cur_distance, new_distance;
3380
3381 if (cpa == NULL) {
3382 atomic_inc(&pa->pa_count);
3383 return pa;
3384 }
3385 cur_distance = abs(goal_block - cpa->pa_pstart);
3386 new_distance = abs(goal_block - pa->pa_pstart);
3387
3388 if (cur_distance <= new_distance)
3389 return cpa;
3390
3391
3392 atomic_dec(&cpa->pa_count);
3393 atomic_inc(&pa->pa_count);
3394 return pa;
3395}
3396
3397
3398
3399
3400static noinline_for_stack int
3401ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3402{
3403 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3404 int order, i;
3405 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3406 struct ext4_locality_group *lg;
3407 struct ext4_prealloc_space *pa, *cpa = NULL;
3408 ext4_fsblk_t goal_block;
3409
3410
3411 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3412 return 0;
3413
3414
3415 rcu_read_lock();
3416 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3417
3418
3419
3420 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3421 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3422 EXT4_C2B(sbi, pa->pa_len)))
3423 continue;
3424
3425
3426 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3427 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3428 EXT4_MAX_BLOCK_FILE_PHYS))
3429 continue;
3430
3431
3432 spin_lock(&pa->pa_lock);
3433 if (pa->pa_deleted == 0 && pa->pa_free) {
3434 atomic_inc(&pa->pa_count);
3435 ext4_mb_use_inode_pa(ac, pa);
3436 spin_unlock(&pa->pa_lock);
3437 ac->ac_criteria = 10;
3438 rcu_read_unlock();
3439 return 1;
3440 }
3441 spin_unlock(&pa->pa_lock);
3442 }
3443 rcu_read_unlock();
3444
3445
3446 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3447 return 0;
3448
3449
3450 lg = ac->ac_lg;
3451 if (lg == NULL)
3452 return 0;
3453 order = fls(ac->ac_o_ex.fe_len) - 1;
3454 if (order > PREALLOC_TB_SIZE - 1)
3455
3456 order = PREALLOC_TB_SIZE - 1;
3457
3458 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3459
3460
3461
3462
3463 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3464 rcu_read_lock();
3465 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3466 pa_inode_list) {
3467 spin_lock(&pa->pa_lock);
3468 if (pa->pa_deleted == 0 &&
3469 pa->pa_free >= ac->ac_o_ex.fe_len) {
3470
3471 cpa = ext4_mb_check_group_pa(goal_block,
3472 pa, cpa);
3473 }
3474 spin_unlock(&pa->pa_lock);
3475 }
3476 rcu_read_unlock();
3477 }
3478 if (cpa) {
3479 ext4_mb_use_group_pa(ac, cpa);
3480 ac->ac_criteria = 20;
3481 return 1;
3482 }
3483 return 0;
3484}
3485
3486
3487
3488
3489
3490
3491
3492static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3493 ext4_group_t group)
3494{
3495 struct rb_node *n;
3496 struct ext4_group_info *grp;
3497 struct ext4_free_data *entry;
3498
3499 grp = ext4_get_group_info(sb, group);
3500 n = rb_first(&(grp->bb_free_root));
3501
3502 while (n) {
3503 entry = rb_entry(n, struct ext4_free_data, efd_node);
3504 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3505 n = rb_next(n);
3506 }
3507 return;
3508}
3509
3510
3511
3512
3513
3514
3515static noinline_for_stack
3516void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3517 ext4_group_t group)
3518{
3519 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3520 struct ext4_prealloc_space *pa;
3521 struct list_head *cur;
3522 ext4_group_t groupnr;
3523 ext4_grpblk_t start;
3524 int preallocated = 0;
3525 int len;
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535 list_for_each(cur, &grp->bb_prealloc_list) {
3536 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3537 spin_lock(&pa->pa_lock);
3538 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3539 &groupnr, &start);
3540 len = pa->pa_len;
3541 spin_unlock(&pa->pa_lock);
3542 if (unlikely(len == 0))
3543 continue;
3544 BUG_ON(groupnr != group);
3545 ext4_set_bits(bitmap, start, len);
3546 preallocated += len;
3547 }
3548 mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
3549}
3550
3551static void ext4_mb_pa_callback(struct rcu_head *head)
3552{
3553 struct ext4_prealloc_space *pa;
3554 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3555
3556 BUG_ON(atomic_read(&pa->pa_count));
3557 BUG_ON(pa->pa_deleted == 0);
3558 kmem_cache_free(ext4_pspace_cachep, pa);
3559}
3560
3561
3562
3563
3564
3565static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3566 struct super_block *sb, struct ext4_prealloc_space *pa)
3567{
3568 ext4_group_t grp;
3569 ext4_fsblk_t grp_blk;
3570
3571
3572 spin_lock(&pa->pa_lock);
3573 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
3574 spin_unlock(&pa->pa_lock);
3575 return;
3576 }
3577
3578 if (pa->pa_deleted == 1) {
3579 spin_unlock(&pa->pa_lock);
3580 return;
3581 }
3582
3583 pa->pa_deleted = 1;
3584 spin_unlock(&pa->pa_lock);
3585
3586 grp_blk = pa->pa_pstart;
3587
3588
3589
3590
3591 if (pa->pa_type == MB_GROUP_PA)
3592 grp_blk--;
3593
3594 grp = ext4_get_group_number(sb, grp_blk);
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610 ext4_lock_group(sb, grp);
3611 list_del(&pa->pa_group_list);
3612 ext4_unlock_group(sb, grp);
3613
3614 spin_lock(pa->pa_obj_lock);
3615 list_del_rcu(&pa->pa_inode_list);
3616 spin_unlock(pa->pa_obj_lock);
3617
3618 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3619}
3620
3621
3622
3623
3624static noinline_for_stack int
3625ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3626{
3627 struct super_block *sb = ac->ac_sb;
3628 struct ext4_sb_info *sbi = EXT4_SB(sb);
3629 struct ext4_prealloc_space *pa;
3630 struct ext4_group_info *grp;
3631 struct ext4_inode_info *ei;
3632
3633
3634 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3635 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3636 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3637
3638 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3639 if (pa == NULL)
3640 return -ENOMEM;
3641
3642 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3643 int winl;
3644 int wins;
3645 int win;
3646 int offs;
3647
3648
3649
3650
3651 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3652 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3653
3654
3655
3656
3657 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3658
3659
3660 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3661
3662
3663 win = min(winl, wins);
3664
3665 offs = ac->ac_o_ex.fe_logical %
3666 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3667 if (offs && offs < win)
3668 win = offs;
3669
3670 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3671 EXT4_NUM_B2C(sbi, win);
3672 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3673 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3674 }
3675
3676
3677
3678 ac->ac_f_ex = ac->ac_b_ex;
3679
3680 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3681 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3682 pa->pa_len = ac->ac_b_ex.fe_len;
3683 pa->pa_free = pa->pa_len;
3684 atomic_set(&pa->pa_count, 1);
3685 spin_lock_init(&pa->pa_lock);
3686 INIT_LIST_HEAD(&pa->pa_inode_list);
3687 INIT_LIST_HEAD(&pa->pa_group_list);
3688 pa->pa_deleted = 0;
3689 pa->pa_type = MB_INODE_PA;
3690
3691 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3692 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3693 trace_ext4_mb_new_inode_pa(ac, pa);
3694
3695 ext4_mb_use_inode_pa(ac, pa);
3696 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3697
3698 ei = EXT4_I(ac->ac_inode);
3699 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3700
3701 pa->pa_obj_lock = &ei->i_prealloc_lock;
3702 pa->pa_inode = ac->ac_inode;
3703
3704 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3705 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3706 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3707
3708 spin_lock(pa->pa_obj_lock);
3709 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3710 spin_unlock(pa->pa_obj_lock);
3711
3712 return 0;
3713}
3714
3715
3716
3717
3718static noinline_for_stack int
3719ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3720{
3721 struct super_block *sb = ac->ac_sb;
3722 struct ext4_locality_group *lg;
3723 struct ext4_prealloc_space *pa;
3724 struct ext4_group_info *grp;
3725
3726
3727 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3728 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3729 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3730
3731 BUG_ON(ext4_pspace_cachep == NULL);
3732 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3733 if (pa == NULL)
3734 return -ENOMEM;
3735
3736
3737
3738 ac->ac_f_ex = ac->ac_b_ex;
3739
3740 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3741 pa->pa_lstart = pa->pa_pstart;
3742 pa->pa_len = ac->ac_b_ex.fe_len;
3743 pa->pa_free = pa->pa_len;
3744 atomic_set(&pa->pa_count, 1);
3745 spin_lock_init(&pa->pa_lock);
3746 INIT_LIST_HEAD(&pa->pa_inode_list);
3747 INIT_LIST_HEAD(&pa->pa_group_list);
3748 pa->pa_deleted = 0;
3749 pa->pa_type = MB_GROUP_PA;
3750
3751 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3752 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3753 trace_ext4_mb_new_group_pa(ac, pa);
3754
3755 ext4_mb_use_group_pa(ac, pa);
3756 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3757
3758 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3759 lg = ac->ac_lg;
3760 BUG_ON(lg == NULL);
3761
3762 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3763 pa->pa_inode = NULL;
3764
3765 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3766 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3767 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3768
3769
3770
3771
3772
3773 return 0;
3774}
3775
3776static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3777{
3778 int err;
3779
3780 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3781 err = ext4_mb_new_group_pa(ac);
3782 else
3783 err = ext4_mb_new_inode_pa(ac);
3784 return err;
3785}
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795static noinline_for_stack int
3796ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3797 struct ext4_prealloc_space *pa)
3798{
3799 struct super_block *sb = e4b->bd_sb;
3800 struct ext4_sb_info *sbi = EXT4_SB(sb);
3801 unsigned int end;
3802 unsigned int next;
3803 ext4_group_t group;
3804 ext4_grpblk_t bit;
3805 unsigned long long grp_blk_start;
3806 int free = 0;
3807
3808 BUG_ON(pa->pa_deleted == 0);
3809 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3810 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3811 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3812 end = bit + pa->pa_len;
3813
3814 while (bit < end) {
3815 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3816 if (bit >= end)
3817 break;
3818 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3819 mb_debug(1, " free preallocated %u/%u in group %u\n",
3820 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3821 (unsigned) next - bit, (unsigned) group);
3822 free += next - bit;
3823
3824 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3825 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3826 EXT4_C2B(sbi, bit)),
3827 next - bit);
3828 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3829 bit = next + 1;
3830 }
3831 if (free != pa->pa_free) {
3832 ext4_msg(e4b->bd_sb, KERN_CRIT,
3833 "pa %p: logic %lu, phys. %lu, len %lu",
3834 pa, (unsigned long) pa->pa_lstart,
3835 (unsigned long) pa->pa_pstart,
3836 (unsigned long) pa->pa_len);
3837 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3838 free, pa->pa_free);
3839
3840
3841
3842
3843 }
3844 atomic_add(free, &sbi->s_mb_discarded);
3845
3846 return 0;
3847}
3848
3849static noinline_for_stack int
3850ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3851 struct ext4_prealloc_space *pa)
3852{
3853 struct super_block *sb = e4b->bd_sb;
3854 ext4_group_t group;
3855 ext4_grpblk_t bit;
3856
3857 trace_ext4_mb_release_group_pa(sb, pa);
3858 BUG_ON(pa->pa_deleted == 0);
3859 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3860 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3861 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3862 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3863 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3864
3865 return 0;
3866}
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877static noinline_for_stack int
3878ext4_mb_discard_group_preallocations(struct super_block *sb,
3879 ext4_group_t group, int needed)
3880{
3881 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3882 struct buffer_head *bitmap_bh = NULL;
3883 struct ext4_prealloc_space *pa, *tmp;
3884 struct list_head list;
3885 struct ext4_buddy e4b;
3886 int err;
3887 int busy = 0;
3888 int free = 0;
3889
3890 mb_debug(1, "discard preallocation for group %u\n", group);
3891
3892 if (list_empty(&grp->bb_prealloc_list))
3893 return 0;
3894
3895 bitmap_bh = ext4_read_block_bitmap(sb, group);
3896 if (IS_ERR(bitmap_bh)) {
3897 err = PTR_ERR(bitmap_bh);
3898 ext4_error(sb, "Error %d reading block bitmap for %u",
3899 err, group);
3900 return 0;
3901 }
3902
3903 err = ext4_mb_load_buddy(sb, group, &e4b);
3904 if (err) {
3905 ext4_warning(sb, "Error %d loading buddy information for %u",
3906 err, group);
3907 put_bh(bitmap_bh);
3908 return 0;
3909 }
3910
3911 if (needed == 0)
3912 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3913
3914 INIT_LIST_HEAD(&list);
3915repeat:
3916 ext4_lock_group(sb, group);
3917 list_for_each_entry_safe(pa, tmp,
3918 &grp->bb_prealloc_list, pa_group_list) {
3919 spin_lock(&pa->pa_lock);
3920 if (atomic_read(&pa->pa_count)) {
3921 spin_unlock(&pa->pa_lock);
3922 busy = 1;
3923 continue;
3924 }
3925 if (pa->pa_deleted) {
3926 spin_unlock(&pa->pa_lock);
3927 continue;
3928 }
3929
3930
3931 pa->pa_deleted = 1;
3932
3933
3934 free += pa->pa_free;
3935
3936 spin_unlock(&pa->pa_lock);
3937
3938 list_del(&pa->pa_group_list);
3939 list_add(&pa->u.pa_tmp_list, &list);
3940 }
3941
3942
3943 if (free < needed && busy) {
3944 busy = 0;
3945 ext4_unlock_group(sb, group);
3946 cond_resched();
3947 goto repeat;
3948 }
3949
3950
3951 if (list_empty(&list)) {
3952 BUG_ON(free != 0);
3953 goto out;
3954 }
3955
3956
3957 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3958
3959
3960 spin_lock(pa->pa_obj_lock);
3961 list_del_rcu(&pa->pa_inode_list);
3962 spin_unlock(pa->pa_obj_lock);
3963
3964 if (pa->pa_type == MB_GROUP_PA)
3965 ext4_mb_release_group_pa(&e4b, pa);
3966 else
3967 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3968
3969 list_del(&pa->u.pa_tmp_list);
3970 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3971 }
3972
3973out:
3974 ext4_unlock_group(sb, group);
3975 ext4_mb_unload_buddy(&e4b);
3976 put_bh(bitmap_bh);
3977 return free;
3978}
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989void ext4_discard_preallocations(struct inode *inode)
3990{
3991 struct ext4_inode_info *ei = EXT4_I(inode);
3992 struct super_block *sb = inode->i_sb;
3993 struct buffer_head *bitmap_bh = NULL;
3994 struct ext4_prealloc_space *pa, *tmp;
3995 ext4_group_t group = 0;
3996 struct list_head list;
3997 struct ext4_buddy e4b;
3998 int err;
3999
4000 if (!S_ISREG(inode->i_mode)) {
4001
4002 return;
4003 }
4004
4005 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
4006 trace_ext4_discard_preallocations(inode);
4007
4008 INIT_LIST_HEAD(&list);
4009
4010repeat:
4011
4012 spin_lock(&ei->i_prealloc_lock);
4013 while (!list_empty(&ei->i_prealloc_list)) {
4014 pa = list_entry(ei->i_prealloc_list.next,
4015 struct ext4_prealloc_space, pa_inode_list);
4016 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
4017 spin_lock(&pa->pa_lock);
4018 if (atomic_read(&pa->pa_count)) {
4019
4020
4021 spin_unlock(&pa->pa_lock);
4022 spin_unlock(&ei->i_prealloc_lock);
4023 ext4_msg(sb, KERN_ERR,
4024 "uh-oh! used pa while discarding");
4025 WARN_ON(1);
4026 schedule_timeout_uninterruptible(HZ);
4027 goto repeat;
4028
4029 }
4030 if (pa->pa_deleted == 0) {
4031 pa->pa_deleted = 1;
4032 spin_unlock(&pa->pa_lock);
4033 list_del_rcu(&pa->pa_inode_list);
4034 list_add(&pa->u.pa_tmp_list, &list);
4035 continue;
4036 }
4037
4038
4039 spin_unlock(&pa->pa_lock);
4040 spin_unlock(&ei->i_prealloc_lock);
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054 schedule_timeout_uninterruptible(HZ);
4055 goto repeat;
4056 }
4057 spin_unlock(&ei->i_prealloc_lock);
4058
4059 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4060 BUG_ON(pa->pa_type != MB_INODE_PA);
4061 group = ext4_get_group_number(sb, pa->pa_pstart);
4062
4063 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4064 GFP_NOFS|__GFP_NOFAIL);
4065 if (err) {
4066 ext4_error(sb, "Error %d loading buddy information for %u",
4067 err, group);
4068 continue;
4069 }
4070
4071 bitmap_bh = ext4_read_block_bitmap(sb, group);
4072 if (IS_ERR(bitmap_bh)) {
4073 err = PTR_ERR(bitmap_bh);
4074 ext4_error(sb, "Error %d reading block bitmap for %u",
4075 err, group);
4076 ext4_mb_unload_buddy(&e4b);
4077 continue;
4078 }
4079
4080 ext4_lock_group(sb, group);
4081 list_del(&pa->pa_group_list);
4082 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4083 ext4_unlock_group(sb, group);
4084
4085 ext4_mb_unload_buddy(&e4b);
4086 put_bh(bitmap_bh);
4087
4088 list_del(&pa->u.pa_tmp_list);
4089 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4090 }
4091}
4092
4093#ifdef CONFIG_EXT4_DEBUG
4094static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4095{
4096 struct super_block *sb = ac->ac_sb;
4097 ext4_group_t ngroups, i;
4098
4099 if (!ext4_mballoc_debug ||
4100 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
4101 return;
4102
4103 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
4104 " Allocation context details:");
4105 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
4106 ac->ac_status, ac->ac_flags);
4107 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
4108 "goal %lu/%lu/%lu@%lu, "
4109 "best %lu/%lu/%lu@%lu cr %d",
4110 (unsigned long)ac->ac_o_ex.fe_group,
4111 (unsigned long)ac->ac_o_ex.fe_start,
4112 (unsigned long)ac->ac_o_ex.fe_len,
4113 (unsigned long)ac->ac_o_ex.fe_logical,
4114 (unsigned long)ac->ac_g_ex.fe_group,
4115 (unsigned long)ac->ac_g_ex.fe_start,
4116 (unsigned long)ac->ac_g_ex.fe_len,
4117 (unsigned long)ac->ac_g_ex.fe_logical,
4118 (unsigned long)ac->ac_b_ex.fe_group,
4119 (unsigned long)ac->ac_b_ex.fe_start,
4120 (unsigned long)ac->ac_b_ex.fe_len,
4121 (unsigned long)ac->ac_b_ex.fe_logical,
4122 (int)ac->ac_criteria);
4123 ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
4124 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
4125 ngroups = ext4_get_groups_count(sb);
4126 for (i = 0; i < ngroups; i++) {
4127 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4128 struct ext4_prealloc_space *pa;
4129 ext4_grpblk_t start;
4130 struct list_head *cur;
4131 ext4_lock_group(sb, i);
4132 list_for_each(cur, &grp->bb_prealloc_list) {
4133 pa = list_entry(cur, struct ext4_prealloc_space,
4134 pa_group_list);
4135 spin_lock(&pa->pa_lock);
4136 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4137 NULL, &start);
4138 spin_unlock(&pa->pa_lock);
4139 printk(KERN_ERR "PA:%u:%d:%u \n", i,
4140 start, pa->pa_len);
4141 }
4142 ext4_unlock_group(sb, i);
4143
4144 if (grp->bb_free == 0)
4145 continue;
4146 printk(KERN_ERR "%u: %d/%d \n",
4147 i, grp->bb_free, grp->bb_fragments);
4148 }
4149 printk(KERN_ERR "\n");
4150}
4151#else
4152static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4153{
4154 return;
4155}
4156#endif
4157
4158
4159
4160
4161
4162
4163
4164
4165static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4166{
4167 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4168 int bsbits = ac->ac_sb->s_blocksize_bits;
4169 loff_t size, isize;
4170
4171 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4172 return;
4173
4174 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4175 return;
4176
4177 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4178 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4179 >> bsbits;
4180
4181 if ((size == isize) && !ext4_fs_is_busy(sbi) &&
4182 !inode_is_open_for_write(ac->ac_inode)) {
4183 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4184 return;
4185 }
4186
4187 if (sbi->s_mb_group_prealloc <= 0) {
4188 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4189 return;
4190 }
4191
4192
4193 size = max(size, isize);
4194 if (size > sbi->s_mb_stream_request) {
4195 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4196 return;
4197 }
4198
4199 BUG_ON(ac->ac_lg != NULL);
4200
4201
4202
4203
4204
4205 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
4206
4207
4208 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4209
4210
4211 mutex_lock(&ac->ac_lg->lg_mutex);
4212}
4213
4214static noinline_for_stack int
4215ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4216 struct ext4_allocation_request *ar)
4217{
4218 struct super_block *sb = ar->inode->i_sb;
4219 struct ext4_sb_info *sbi = EXT4_SB(sb);
4220 struct ext4_super_block *es = sbi->s_es;
4221 ext4_group_t group;
4222 unsigned int len;
4223 ext4_fsblk_t goal;
4224 ext4_grpblk_t block;
4225
4226
4227 len = ar->len;
4228
4229
4230 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4231 len = EXT4_CLUSTERS_PER_GROUP(sb);
4232
4233
4234 goal = ar->goal;
4235 if (goal < le32_to_cpu(es->s_first_data_block) ||
4236 goal >= ext4_blocks_count(es))
4237 goal = le32_to_cpu(es->s_first_data_block);
4238 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4239
4240
4241 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
4242 ac->ac_status = AC_STATUS_CONTINUE;
4243 ac->ac_sb = sb;
4244 ac->ac_inode = ar->inode;
4245 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4246 ac->ac_o_ex.fe_group = group;
4247 ac->ac_o_ex.fe_start = block;
4248 ac->ac_o_ex.fe_len = len;
4249 ac->ac_g_ex = ac->ac_o_ex;
4250 ac->ac_flags = ar->flags;
4251
4252
4253
4254 ext4_mb_group_or_file(ac);
4255
4256 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4257 "left: %u/%u, right %u/%u to %swritable\n",
4258 (unsigned) ar->len, (unsigned) ar->logical,
4259 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4260 (unsigned) ar->lleft, (unsigned) ar->pleft,
4261 (unsigned) ar->lright, (unsigned) ar->pright,
4262 inode_is_open_for_write(ar->inode) ? "" : "non-");
4263 return 0;
4264
4265}
4266
4267static noinline_for_stack void
4268ext4_mb_discard_lg_preallocations(struct super_block *sb,
4269 struct ext4_locality_group *lg,
4270 int order, int total_entries)
4271{
4272 ext4_group_t group = 0;
4273 struct ext4_buddy e4b;
4274 struct list_head discard_list;
4275 struct ext4_prealloc_space *pa, *tmp;
4276
4277 mb_debug(1, "discard locality group preallocation\n");
4278
4279 INIT_LIST_HEAD(&discard_list);
4280
4281 spin_lock(&lg->lg_prealloc_lock);
4282 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4283 pa_inode_list) {
4284 spin_lock(&pa->pa_lock);
4285 if (atomic_read(&pa->pa_count)) {
4286
4287
4288
4289
4290
4291 spin_unlock(&pa->pa_lock);
4292 continue;
4293 }
4294 if (pa->pa_deleted) {
4295 spin_unlock(&pa->pa_lock);
4296 continue;
4297 }
4298
4299 BUG_ON(pa->pa_type != MB_GROUP_PA);
4300
4301
4302 pa->pa_deleted = 1;
4303 spin_unlock(&pa->pa_lock);
4304
4305 list_del_rcu(&pa->pa_inode_list);
4306 list_add(&pa->u.pa_tmp_list, &discard_list);
4307
4308 total_entries--;
4309 if (total_entries <= 5) {
4310
4311
4312
4313
4314
4315
4316 break;
4317 }
4318 }
4319 spin_unlock(&lg->lg_prealloc_lock);
4320
4321 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4322 int err;
4323
4324 group = ext4_get_group_number(sb, pa->pa_pstart);
4325 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4326 GFP_NOFS|__GFP_NOFAIL);
4327 if (err) {
4328 ext4_error(sb, "Error %d loading buddy information for %u",
4329 err, group);
4330 continue;
4331 }
4332 ext4_lock_group(sb, group);
4333 list_del(&pa->pa_group_list);
4334 ext4_mb_release_group_pa(&e4b, pa);
4335 ext4_unlock_group(sb, group);
4336
4337 ext4_mb_unload_buddy(&e4b);
4338 list_del(&pa->u.pa_tmp_list);
4339 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4340 }
4341}
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4353{
4354 int order, added = 0, lg_prealloc_count = 1;
4355 struct super_block *sb = ac->ac_sb;
4356 struct ext4_locality_group *lg = ac->ac_lg;
4357 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4358
4359 order = fls(pa->pa_free) - 1;
4360 if (order > PREALLOC_TB_SIZE - 1)
4361
4362 order = PREALLOC_TB_SIZE - 1;
4363
4364 spin_lock(&lg->lg_prealloc_lock);
4365 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4366 pa_inode_list) {
4367 spin_lock(&tmp_pa->pa_lock);
4368 if (tmp_pa->pa_deleted) {
4369 spin_unlock(&tmp_pa->pa_lock);
4370 continue;
4371 }
4372 if (!added && pa->pa_free < tmp_pa->pa_free) {
4373
4374 list_add_tail_rcu(&pa->pa_inode_list,
4375 &tmp_pa->pa_inode_list);
4376 added = 1;
4377
4378
4379
4380
4381 }
4382 spin_unlock(&tmp_pa->pa_lock);
4383 lg_prealloc_count++;
4384 }
4385 if (!added)
4386 list_add_tail_rcu(&pa->pa_inode_list,
4387 &lg->lg_prealloc_list[order]);
4388 spin_unlock(&lg->lg_prealloc_lock);
4389
4390
4391 if (lg_prealloc_count > 8) {
4392 ext4_mb_discard_lg_preallocations(sb, lg,
4393 order, lg_prealloc_count);
4394 return;
4395 }
4396 return ;
4397}
4398
4399
4400
4401
4402static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4403{
4404 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4405 struct ext4_prealloc_space *pa = ac->ac_pa;
4406 if (pa) {
4407 if (pa->pa_type == MB_GROUP_PA) {
4408
4409 spin_lock(&pa->pa_lock);
4410 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4411 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4412 pa->pa_free -= ac->ac_b_ex.fe_len;
4413 pa->pa_len -= ac->ac_b_ex.fe_len;
4414 spin_unlock(&pa->pa_lock);
4415 }
4416 }
4417 if (pa) {
4418
4419
4420
4421
4422
4423
4424 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4425 spin_lock(pa->pa_obj_lock);
4426 list_del_rcu(&pa->pa_inode_list);
4427 spin_unlock(pa->pa_obj_lock);
4428 ext4_mb_add_n_trim(ac);
4429 }
4430 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4431 }
4432 if (ac->ac_bitmap_page)
4433 put_page(ac->ac_bitmap_page);
4434 if (ac->ac_buddy_page)
4435 put_page(ac->ac_buddy_page);
4436 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4437 mutex_unlock(&ac->ac_lg->lg_mutex);
4438 ext4_mb_collect_stats(ac);
4439 return 0;
4440}
4441
4442static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4443{
4444 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4445 int ret;
4446 int freed = 0;
4447
4448 trace_ext4_mb_discard_preallocations(sb, needed);
4449 for (i = 0; i < ngroups && needed > 0; i++) {
4450 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4451 freed += ret;
4452 needed -= ret;
4453 }
4454
4455 return freed;
4456}
4457
4458
4459
4460
4461
4462
4463ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4464 struct ext4_allocation_request *ar, int *errp)
4465{
4466 int freed;
4467 struct ext4_allocation_context *ac = NULL;
4468 struct ext4_sb_info *sbi;
4469 struct super_block *sb;
4470 ext4_fsblk_t block = 0;
4471 unsigned int inquota = 0;
4472 unsigned int reserv_clstrs = 0;
4473
4474 might_sleep();
4475 sb = ar->inode->i_sb;
4476 sbi = EXT4_SB(sb);
4477
4478 trace_ext4_request_blocks(ar);
4479
4480
4481 if (ext4_is_quota_file(ar->inode))
4482 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4483
4484 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
4485
4486
4487
4488
4489 while (ar->len &&
4490 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4491
4492
4493 cond_resched();
4494 ar->len = ar->len >> 1;
4495 }
4496 if (!ar->len) {
4497 *errp = -ENOSPC;
4498 return 0;
4499 }
4500 reserv_clstrs = ar->len;
4501 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4502 dquot_alloc_block_nofail(ar->inode,
4503 EXT4_C2B(sbi, ar->len));
4504 } else {
4505 while (ar->len &&
4506 dquot_alloc_block(ar->inode,
4507 EXT4_C2B(sbi, ar->len))) {
4508
4509 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4510 ar->len--;
4511 }
4512 }
4513 inquota = ar->len;
4514 if (ar->len == 0) {
4515 *errp = -EDQUOT;
4516 goto out;
4517 }
4518 }
4519
4520 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4521 if (!ac) {
4522 ar->len = 0;
4523 *errp = -ENOMEM;
4524 goto out;
4525 }
4526
4527 *errp = ext4_mb_initialize_context(ac, ar);
4528 if (*errp) {
4529 ar->len = 0;
4530 goto out;
4531 }
4532
4533 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4534 if (!ext4_mb_use_preallocated(ac)) {
4535 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4536 ext4_mb_normalize_request(ac, ar);
4537repeat:
4538
4539 *errp = ext4_mb_regular_allocator(ac);
4540 if (*errp)
4541 goto discard_and_exit;
4542
4543
4544
4545
4546 if (ac->ac_status == AC_STATUS_FOUND &&
4547 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4548 *errp = ext4_mb_new_preallocation(ac);
4549 if (*errp) {
4550 discard_and_exit:
4551 ext4_discard_allocated_blocks(ac);
4552 goto errout;
4553 }
4554 }
4555 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4556 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4557 if (*errp) {
4558 ext4_discard_allocated_blocks(ac);
4559 goto errout;
4560 } else {
4561 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4562 ar->len = ac->ac_b_ex.fe_len;
4563 }
4564 } else {
4565 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4566 if (freed)
4567 goto repeat;
4568 *errp = -ENOSPC;
4569 }
4570
4571errout:
4572 if (*errp) {
4573 ac->ac_b_ex.fe_len = 0;
4574 ar->len = 0;
4575 ext4_mb_show_ac(ac);
4576 }
4577 ext4_mb_release_context(ac);
4578out:
4579 if (ac)
4580 kmem_cache_free(ext4_ac_cachep, ac);
4581 if (inquota && ar->len < inquota)
4582 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4583 if (!ar->len) {
4584 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
4585
4586 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4587 reserv_clstrs);
4588 }
4589
4590 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4591
4592 return block;
4593}
4594
4595
4596
4597
4598
4599
4600static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
4601 struct ext4_free_data *entry,
4602 struct ext4_free_data *new_entry,
4603 struct rb_root *entry_rb_root)
4604{
4605 if ((entry->efd_tid != new_entry->efd_tid) ||
4606 (entry->efd_group != new_entry->efd_group))
4607 return;
4608 if (entry->efd_start_cluster + entry->efd_count ==
4609 new_entry->efd_start_cluster) {
4610 new_entry->efd_start_cluster = entry->efd_start_cluster;
4611 new_entry->efd_count += entry->efd_count;
4612 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
4613 entry->efd_start_cluster) {
4614 new_entry->efd_count += entry->efd_count;
4615 } else
4616 return;
4617 spin_lock(&sbi->s_md_lock);
4618 list_del(&entry->efd_list);
4619 spin_unlock(&sbi->s_md_lock);
4620 rb_erase(&entry->efd_node, entry_rb_root);
4621 kmem_cache_free(ext4_free_data_cachep, entry);
4622}
4623
4624static noinline_for_stack int
4625ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4626 struct ext4_free_data *new_entry)
4627{
4628 ext4_group_t group = e4b->bd_group;
4629 ext4_grpblk_t cluster;
4630 ext4_grpblk_t clusters = new_entry->efd_count;
4631 struct ext4_free_data *entry;
4632 struct ext4_group_info *db = e4b->bd_info;
4633 struct super_block *sb = e4b->bd_sb;
4634 struct ext4_sb_info *sbi = EXT4_SB(sb);
4635 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4636 struct rb_node *parent = NULL, *new_node;
4637
4638 BUG_ON(!ext4_handle_valid(handle));
4639 BUG_ON(e4b->bd_bitmap_page == NULL);
4640 BUG_ON(e4b->bd_buddy_page == NULL);
4641
4642 new_node = &new_entry->efd_node;
4643 cluster = new_entry->efd_start_cluster;
4644
4645 if (!*n) {
4646
4647
4648
4649
4650
4651 get_page(e4b->bd_buddy_page);
4652 get_page(e4b->bd_bitmap_page);
4653 }
4654 while (*n) {
4655 parent = *n;
4656 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4657 if (cluster < entry->efd_start_cluster)
4658 n = &(*n)->rb_left;
4659 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4660 n = &(*n)->rb_right;
4661 else {
4662 ext4_grp_locked_error(sb, group, 0,
4663 ext4_group_first_block_no(sb, group) +
4664 EXT4_C2B(sbi, cluster),
4665 "Block already on to-be-freed list");
4666 return 0;
4667 }
4668 }
4669
4670 rb_link_node(new_node, parent, n);
4671 rb_insert_color(new_node, &db->bb_free_root);
4672
4673
4674 node = rb_prev(new_node);
4675 if (node) {
4676 entry = rb_entry(node, struct ext4_free_data, efd_node);
4677 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4678 &(db->bb_free_root));
4679 }
4680
4681 node = rb_next(new_node);
4682 if (node) {
4683 entry = rb_entry(node, struct ext4_free_data, efd_node);
4684 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4685 &(db->bb_free_root));
4686 }
4687
4688 spin_lock(&sbi->s_md_lock);
4689 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
4690 sbi->s_mb_free_pending += clusters;
4691 spin_unlock(&sbi->s_md_lock);
4692 return 0;
4693}
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704void ext4_free_blocks(handle_t *handle, struct inode *inode,
4705 struct buffer_head *bh, ext4_fsblk_t block,
4706 unsigned long count, int flags)
4707{
4708 struct buffer_head *bitmap_bh = NULL;
4709 struct super_block *sb = inode->i_sb;
4710 struct ext4_group_desc *gdp;
4711 unsigned int overflow;
4712 ext4_grpblk_t bit;
4713 struct buffer_head *gd_bh;
4714 ext4_group_t block_group;
4715 struct ext4_sb_info *sbi;
4716 struct ext4_buddy e4b;
4717 unsigned int count_clusters;
4718 int err = 0;
4719 int ret;
4720
4721 might_sleep();
4722 if (bh) {
4723 if (block)
4724 BUG_ON(block != bh->b_blocknr);
4725 else
4726 block = bh->b_blocknr;
4727 }
4728
4729 sbi = EXT4_SB(sb);
4730 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4731 !ext4_data_block_valid(sbi, block, count)) {
4732 ext4_error(sb, "Freeing blocks not in datazone - "
4733 "block = %llu, count = %lu", block, count);
4734 goto error_return;
4735 }
4736
4737 ext4_debug("freeing block %llu\n", block);
4738 trace_ext4_free_blocks(inode, block, count, flags);
4739
4740 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4741 BUG_ON(count > 1);
4742
4743 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4744 inode, bh, block);
4745 }
4746
4747
4748
4749
4750
4751
4752
4753
4754 overflow = EXT4_PBLK_COFF(sbi, block);
4755 if (overflow) {
4756 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4757 overflow = sbi->s_cluster_ratio - overflow;
4758 block += overflow;
4759 if (count > overflow)
4760 count -= overflow;
4761 else
4762 return;
4763 } else {
4764 block -= overflow;
4765 count += overflow;
4766 }
4767 }
4768 overflow = EXT4_LBLK_COFF(sbi, count);
4769 if (overflow) {
4770 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4771 if (count > overflow)
4772 count -= overflow;
4773 else
4774 return;
4775 } else
4776 count += sbi->s_cluster_ratio - overflow;
4777 }
4778
4779 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4780 int i;
4781 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
4782
4783 for (i = 0; i < count; i++) {
4784 cond_resched();
4785 if (is_metadata)
4786 bh = sb_find_get_block(inode->i_sb, block + i);
4787 ext4_forget(handle, is_metadata, inode, bh, block + i);
4788 }
4789 }
4790
4791do_more:
4792 overflow = 0;
4793 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4794
4795 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4796 ext4_get_group_info(sb, block_group))))
4797 return;
4798
4799
4800
4801
4802
4803 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4804 overflow = EXT4_C2B(sbi, bit) + count -
4805 EXT4_BLOCKS_PER_GROUP(sb);
4806 count -= overflow;
4807 }
4808 count_clusters = EXT4_NUM_B2C(sbi, count);
4809 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4810 if (IS_ERR(bitmap_bh)) {
4811 err = PTR_ERR(bitmap_bh);
4812 bitmap_bh = NULL;
4813 goto error_return;
4814 }
4815 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4816 if (!gdp) {
4817 err = -EIO;
4818 goto error_return;
4819 }
4820
4821 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4822 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4823 in_range(block, ext4_inode_table(sb, gdp),
4824 sbi->s_itb_per_group) ||
4825 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4826 sbi->s_itb_per_group)) {
4827
4828 ext4_error(sb, "Freeing blocks in system zone - "
4829 "Block = %llu, count = %lu", block, count);
4830
4831 goto error_return;
4832 }
4833
4834 BUFFER_TRACE(bitmap_bh, "getting write access");
4835 err = ext4_journal_get_write_access(handle, bitmap_bh);
4836 if (err)
4837 goto error_return;
4838
4839
4840
4841
4842
4843
4844 BUFFER_TRACE(gd_bh, "get_write_access");
4845 err = ext4_journal_get_write_access(handle, gd_bh);
4846 if (err)
4847 goto error_return;
4848#ifdef AGGRESSIVE_CHECK
4849 {
4850 int i;
4851 for (i = 0; i < count_clusters; i++)
4852 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4853 }
4854#endif
4855 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4856
4857
4858 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
4859 GFP_NOFS|__GFP_NOFAIL);
4860 if (err)
4861 goto error_return;
4862
4863
4864
4865
4866
4867
4868
4869 if (ext4_handle_valid(handle) &&
4870 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
4871 !ext4_should_writeback_data(inode))) {
4872 struct ext4_free_data *new_entry;
4873
4874
4875
4876
4877 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
4878 GFP_NOFS|__GFP_NOFAIL);
4879 new_entry->efd_start_cluster = bit;
4880 new_entry->efd_group = block_group;
4881 new_entry->efd_count = count_clusters;
4882 new_entry->efd_tid = handle->h_transaction->t_tid;
4883
4884 ext4_lock_group(sb, block_group);
4885 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4886 ext4_mb_free_metadata(handle, &e4b, new_entry);
4887 } else {
4888
4889
4890
4891
4892 if (test_opt(sb, DISCARD)) {
4893 err = ext4_issue_discard(sb, block_group, bit, count,
4894 NULL);
4895 if (err && err != -EOPNOTSUPP)
4896 ext4_msg(sb, KERN_WARNING, "discard request in"
4897 " group:%d block:%d count:%lu failed"
4898 " with %d", block_group, bit, count,
4899 err);
4900 } else
4901 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
4902
4903 ext4_lock_group(sb, block_group);
4904 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4905 mb_free_blocks(inode, &e4b, bit, count_clusters);
4906 }
4907
4908 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4909 ext4_free_group_clusters_set(sb, gdp, ret);
4910 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4911 ext4_group_desc_csum_set(sb, block_group, gdp);
4912 ext4_unlock_group(sb, block_group);
4913
4914 if (sbi->s_log_groups_per_flex) {
4915 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4916 atomic64_add(count_clusters,
4917 &sbi->s_flex_groups[flex_group].free_clusters);
4918 }
4919
4920
4921
4922
4923
4924
4925 if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
4926 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4927 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4928 percpu_counter_add(&sbi->s_freeclusters_counter,
4929 count_clusters);
4930 }
4931
4932 ext4_mb_unload_buddy(&e4b);
4933
4934
4935 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4936 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4937
4938
4939 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4940 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4941 if (!err)
4942 err = ret;
4943
4944 if (overflow && !err) {
4945 block += count;
4946 count = overflow;
4947 put_bh(bitmap_bh);
4948 goto do_more;
4949 }
4950error_return:
4951 brelse(bitmap_bh);
4952 ext4_std_error(sb, err);
4953 return;
4954}
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4966 ext4_fsblk_t block, unsigned long count)
4967{
4968 struct buffer_head *bitmap_bh = NULL;
4969 struct buffer_head *gd_bh;
4970 ext4_group_t block_group;
4971 ext4_grpblk_t bit;
4972 unsigned int i;
4973 struct ext4_group_desc *desc;
4974 struct ext4_sb_info *sbi = EXT4_SB(sb);
4975 struct ext4_buddy e4b;
4976 int err = 0, ret, free_clusters_count;
4977 ext4_grpblk_t clusters_freed;
4978 ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
4979 ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
4980 unsigned long cluster_count = last_cluster - first_cluster + 1;
4981
4982 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4983
4984 if (count == 0)
4985 return 0;
4986
4987 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4988
4989
4990
4991
4992 if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
4993 ext4_warning(sb, "too many blocks added to group %u",
4994 block_group);
4995 err = -EINVAL;
4996 goto error_return;
4997 }
4998
4999 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
5000 if (IS_ERR(bitmap_bh)) {
5001 err = PTR_ERR(bitmap_bh);
5002 bitmap_bh = NULL;
5003 goto error_return;
5004 }
5005
5006 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
5007 if (!desc) {
5008 err = -EIO;
5009 goto error_return;
5010 }
5011
5012 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
5013 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
5014 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
5015 in_range(block + count - 1, ext4_inode_table(sb, desc),
5016 sbi->s_itb_per_group)) {
5017 ext4_error(sb, "Adding blocks in system zones - "
5018 "Block = %llu, count = %lu",
5019 block, count);
5020 err = -EINVAL;
5021 goto error_return;
5022 }
5023
5024 BUFFER_TRACE(bitmap_bh, "getting write access");
5025 err = ext4_journal_get_write_access(handle, bitmap_bh);
5026 if (err)
5027 goto error_return;
5028
5029
5030
5031
5032
5033
5034 BUFFER_TRACE(gd_bh, "get_write_access");
5035 err = ext4_journal_get_write_access(handle, gd_bh);
5036 if (err)
5037 goto error_return;
5038
5039 for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
5040 BUFFER_TRACE(bitmap_bh, "clear bit");
5041 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
5042 ext4_error(sb, "bit already cleared for block %llu",
5043 (ext4_fsblk_t)(block + i));
5044 BUFFER_TRACE(bitmap_bh, "bit already cleared");
5045 } else {
5046 clusters_freed++;
5047 }
5048 }
5049
5050 err = ext4_mb_load_buddy(sb, block_group, &e4b);
5051 if (err)
5052 goto error_return;
5053
5054
5055
5056
5057
5058
5059 ext4_lock_group(sb, block_group);
5060 mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
5061 mb_free_blocks(NULL, &e4b, bit, cluster_count);
5062 free_clusters_count = clusters_freed +
5063 ext4_free_group_clusters(sb, desc);
5064 ext4_free_group_clusters_set(sb, desc, free_clusters_count);
5065 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
5066 ext4_group_desc_csum_set(sb, block_group, desc);
5067 ext4_unlock_group(sb, block_group);
5068 percpu_counter_add(&sbi->s_freeclusters_counter,
5069 clusters_freed);
5070
5071 if (sbi->s_log_groups_per_flex) {
5072 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
5073 atomic64_add(clusters_freed,
5074 &sbi->s_flex_groups[flex_group].free_clusters);
5075 }
5076
5077 ext4_mb_unload_buddy(&e4b);
5078
5079
5080 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
5081 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
5082
5083
5084 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
5085 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
5086 if (!err)
5087 err = ret;
5088
5089error_return:
5090 brelse(bitmap_bh);
5091 ext4_std_error(sb, err);
5092 return err;
5093}
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107static int ext4_trim_extent(struct super_block *sb, int start, int count,
5108 ext4_group_t group, struct ext4_buddy *e4b)
5109__releases(bitlock)
5110__acquires(bitlock)
5111{
5112 struct ext4_free_extent ex;
5113 int ret = 0;
5114
5115 trace_ext4_trim_extent(sb, group, start, count);
5116
5117 assert_spin_locked(ext4_group_lock_ptr(sb, group));
5118
5119 ex.fe_start = start;
5120 ex.fe_group = group;
5121 ex.fe_len = count;
5122
5123
5124
5125
5126
5127 mb_mark_used(e4b, &ex);
5128 ext4_unlock_group(sb, group);
5129 ret = ext4_issue_discard(sb, group, start, count, NULL);
5130 ext4_lock_group(sb, group);
5131 mb_free_blocks(NULL, e4b, start, ex.fe_len);
5132 return ret;
5133}
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153static ext4_grpblk_t
5154ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
5155 ext4_grpblk_t start, ext4_grpblk_t max,
5156 ext4_grpblk_t minblocks)
5157{
5158 void *bitmap;
5159 ext4_grpblk_t next, count = 0, free_count = 0;
5160 struct ext4_buddy e4b;
5161 int ret = 0;
5162
5163 trace_ext4_trim_all_free(sb, group, start, max);
5164
5165 ret = ext4_mb_load_buddy(sb, group, &e4b);
5166 if (ret) {
5167 ext4_warning(sb, "Error %d loading buddy information for %u",
5168 ret, group);
5169 return ret;
5170 }
5171 bitmap = e4b.bd_bitmap;
5172
5173 ext4_lock_group(sb, group);
5174 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
5175 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
5176 goto out;
5177
5178 start = (e4b.bd_info->bb_first_free > start) ?
5179 e4b.bd_info->bb_first_free : start;
5180
5181 while (start <= max) {
5182 start = mb_find_next_zero_bit(bitmap, max + 1, start);
5183 if (start > max)
5184 break;
5185 next = mb_find_next_bit(bitmap, max + 1, start);
5186
5187 if ((next - start) >= minblocks) {
5188 ret = ext4_trim_extent(sb, start,
5189 next - start, group, &e4b);
5190 if (ret && ret != -EOPNOTSUPP)
5191 break;
5192 ret = 0;
5193 count += next - start;
5194 }
5195 free_count += next - start;
5196 start = next + 1;
5197
5198 if (fatal_signal_pending(current)) {
5199 count = -ERESTARTSYS;
5200 break;
5201 }
5202
5203 if (need_resched()) {
5204 ext4_unlock_group(sb, group);
5205 cond_resched();
5206 ext4_lock_group(sb, group);
5207 }
5208
5209 if ((e4b.bd_info->bb_free - free_count) < minblocks)
5210 break;
5211 }
5212
5213 if (!ret) {
5214 ret = count;
5215 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
5216 }
5217out:
5218 ext4_unlock_group(sb, group);
5219 ext4_mb_unload_buddy(&e4b);
5220
5221 ext4_debug("trimmed %d blocks in the group %d\n",
5222 count, group);
5223
5224 return ret;
5225}
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5240{
5241 struct ext4_group_info *grp;
5242 ext4_group_t group, first_group, last_group;
5243 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5244 uint64_t start, end, minlen, trimmed = 0;
5245 ext4_fsblk_t first_data_blk =
5246 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5247 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5248 int ret = 0;
5249
5250 start = range->start >> sb->s_blocksize_bits;
5251 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5252 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
5253 range->minlen >> sb->s_blocksize_bits);
5254
5255 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
5256 start >= max_blks ||
5257 range->len < sb->s_blocksize)
5258 return -EINVAL;
5259 if (end >= max_blks)
5260 end = max_blks - 1;
5261 if (end <= first_data_blk)
5262 goto out;
5263 if (start < first_data_blk)
5264 start = first_data_blk;
5265
5266
5267 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5268 &first_group, &first_cluster);
5269 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5270 &last_group, &last_cluster);
5271
5272
5273 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5274
5275 for (group = first_group; group <= last_group; group++) {
5276 grp = ext4_get_group_info(sb, group);
5277
5278 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5279 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
5280 if (ret)
5281 break;
5282 }
5283
5284
5285
5286
5287
5288
5289
5290 if (group == last_group)
5291 end = last_cluster;
5292
5293 if (grp->bb_free >= minlen) {
5294 cnt = ext4_trim_all_free(sb, group, first_cluster,
5295 end, minlen);
5296 if (cnt < 0) {
5297 ret = cnt;
5298 break;
5299 }
5300 trimmed += cnt;
5301 }
5302
5303
5304
5305
5306
5307 first_cluster = 0;
5308 }
5309
5310 if (!ret)
5311 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5312
5313out:
5314 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5315 return ret;
5316}
5317
5318
5319int
5320ext4_mballoc_query_range(
5321 struct super_block *sb,
5322 ext4_group_t group,
5323 ext4_grpblk_t start,
5324 ext4_grpblk_t end,
5325 ext4_mballoc_query_range_fn formatter,
5326 void *priv)
5327{
5328 void *bitmap;
5329 ext4_grpblk_t next;
5330 struct ext4_buddy e4b;
5331 int error;
5332
5333 error = ext4_mb_load_buddy(sb, group, &e4b);
5334 if (error)
5335 return error;
5336 bitmap = e4b.bd_bitmap;
5337
5338 ext4_lock_group(sb, group);
5339
5340 start = (e4b.bd_info->bb_first_free > start) ?
5341 e4b.bd_info->bb_first_free : start;
5342 if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
5343 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5344
5345 while (start <= end) {
5346 start = mb_find_next_zero_bit(bitmap, end + 1, start);
5347 if (start > end)
5348 break;
5349 next = mb_find_next_bit(bitmap, end + 1, start);
5350
5351 ext4_unlock_group(sb, group);
5352 error = formatter(sb, group, start, next - start, priv);
5353 if (error)
5354 goto out_unload;
5355 ext4_lock_group(sb, group);
5356
5357 start = next + 1;
5358 }
5359
5360 ext4_unlock_group(sb, group);
5361out_unload:
5362 ext4_mb_unload_buddy(&e4b);
5363
5364 return error;
5365}
5366