1
2
3
4
5
6
7
8
9
10
11
12#include "ext4_jbd2.h"
13#include "mballoc.h"
14#include <linux/log2.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/backing-dev.h>
18#include <trace/events/ext4.h>
19
20#ifdef CONFIG_EXT4_DEBUG
21ushort ext4_mballoc_debug __read_mostly;
22
23module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
24MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
25#endif
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338static struct kmem_cache *ext4_pspace_cachep;
339static struct kmem_cache *ext4_ac_cachep;
340static struct kmem_cache *ext4_free_data_cachep;
341
342
343
344
345#define NR_GRPINFO_CACHES 8
346static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
347
348static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
349 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
350 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
351 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
352};
353
354static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
355 ext4_group_t group);
356static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
357 ext4_group_t group);
358
359static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
360{
361#if BITS_PER_LONG == 64
362 *bit += ((unsigned long) addr & 7UL) << 3;
363 addr = (void *) ((unsigned long) addr & ~7UL);
364#elif BITS_PER_LONG == 32
365 *bit += ((unsigned long) addr & 3UL) << 3;
366 addr = (void *) ((unsigned long) addr & ~3UL);
367#else
368#error "how many bits you are?!"
369#endif
370 return addr;
371}
372
373static inline int mb_test_bit(int bit, void *addr)
374{
375
376
377
378
379 addr = mb_correct_addr_and_bit(&bit, addr);
380 return ext4_test_bit(bit, addr);
381}
382
383static inline void mb_set_bit(int bit, void *addr)
384{
385 addr = mb_correct_addr_and_bit(&bit, addr);
386 ext4_set_bit(bit, addr);
387}
388
389static inline void mb_clear_bit(int bit, void *addr)
390{
391 addr = mb_correct_addr_and_bit(&bit, addr);
392 ext4_clear_bit(bit, addr);
393}
394
395static inline int mb_test_and_clear_bit(int bit, void *addr)
396{
397 addr = mb_correct_addr_and_bit(&bit, addr);
398 return ext4_test_and_clear_bit(bit, addr);
399}
400
401static inline int mb_find_next_zero_bit(void *addr, int max, int start)
402{
403 int fix = 0, ret, tmpmax;
404 addr = mb_correct_addr_and_bit(&fix, addr);
405 tmpmax = max + fix;
406 start += fix;
407
408 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
409 if (ret > max)
410 return max;
411 return ret;
412}
413
414static inline int mb_find_next_bit(void *addr, int max, int start)
415{
416 int fix = 0, ret, tmpmax;
417 addr = mb_correct_addr_and_bit(&fix, addr);
418 tmpmax = max + fix;
419 start += fix;
420
421 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
422 if (ret > max)
423 return max;
424 return ret;
425}
426
427static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
428{
429 char *bb;
430
431 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
432 BUG_ON(max == NULL);
433
434 if (order > e4b->bd_blkbits + 1) {
435 *max = 0;
436 return NULL;
437 }
438
439
440 if (order == 0) {
441 *max = 1 << (e4b->bd_blkbits + 3);
442 return e4b->bd_bitmap;
443 }
444
445 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
446 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
447
448 return bb;
449}
450
451#ifdef DOUBLE_CHECK
452static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
453 int first, int count)
454{
455 int i;
456 struct super_block *sb = e4b->bd_sb;
457
458 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
459 return;
460 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
461 for (i = 0; i < count; i++) {
462 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
463 ext4_fsblk_t blocknr;
464
465 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
466 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
467 ext4_grp_locked_error(sb, e4b->bd_group,
468 inode ? inode->i_ino : 0,
469 blocknr,
470 "freeing block already freed "
471 "(bit %u)",
472 first + i);
473 }
474 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
475 }
476}
477
478static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
479{
480 int i;
481
482 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
483 return;
484 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
485 for (i = 0; i < count; i++) {
486 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
487 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
488 }
489}
490
491static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
492{
493 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
494 unsigned char *b1, *b2;
495 int i;
496 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
497 b2 = (unsigned char *) bitmap;
498 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
499 if (b1[i] != b2[i]) {
500 ext4_msg(e4b->bd_sb, KERN_ERR,
501 "corruption in group %u "
502 "at byte %u(%u): %x in copy != %x "
503 "on disk/prealloc",
504 e4b->bd_group, i, i * 8, b1[i], b2[i]);
505 BUG();
506 }
507 }
508 }
509}
510
511#else
512static inline void mb_free_blocks_double(struct inode *inode,
513 struct ext4_buddy *e4b, int first, int count)
514{
515 return;
516}
517static inline void mb_mark_used_double(struct ext4_buddy *e4b,
518 int first, int count)
519{
520 return;
521}
522static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
523{
524 return;
525}
526#endif
527
528#ifdef AGGRESSIVE_CHECK
529
530#define MB_CHECK_ASSERT(assert) \
531do { \
532 if (!(assert)) { \
533 printk(KERN_EMERG \
534 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
535 function, file, line, # assert); \
536 BUG(); \
537 } \
538} while (0)
539
540static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
541 const char *function, int line)
542{
543 struct super_block *sb = e4b->bd_sb;
544 int order = e4b->bd_blkbits + 1;
545 int max;
546 int max2;
547 int i;
548 int j;
549 int k;
550 int count;
551 struct ext4_group_info *grp;
552 int fragments = 0;
553 int fstart;
554 struct list_head *cur;
555 void *buddy;
556 void *buddy2;
557
558 {
559 static int mb_check_counter;
560 if (mb_check_counter++ % 100 != 0)
561 return 0;
562 }
563
564 while (order > 1) {
565 buddy = mb_find_buddy(e4b, order, &max);
566 MB_CHECK_ASSERT(buddy);
567 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
568 MB_CHECK_ASSERT(buddy2);
569 MB_CHECK_ASSERT(buddy != buddy2);
570 MB_CHECK_ASSERT(max * 2 == max2);
571
572 count = 0;
573 for (i = 0; i < max; i++) {
574
575 if (mb_test_bit(i, buddy)) {
576
577 if (!mb_test_bit(i << 1, buddy2)) {
578 MB_CHECK_ASSERT(
579 mb_test_bit((i<<1)+1, buddy2));
580 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
581 MB_CHECK_ASSERT(
582 mb_test_bit(i << 1, buddy2));
583 }
584 continue;
585 }
586
587
588 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
589 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
590
591 for (j = 0; j < (1 << order); j++) {
592 k = (i * (1 << order)) + j;
593 MB_CHECK_ASSERT(
594 !mb_test_bit(k, e4b->bd_bitmap));
595 }
596 count++;
597 }
598 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
599 order--;
600 }
601
602 fstart = -1;
603 buddy = mb_find_buddy(e4b, 0, &max);
604 for (i = 0; i < max; i++) {
605 if (!mb_test_bit(i, buddy)) {
606 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
607 if (fstart == -1) {
608 fragments++;
609 fstart = i;
610 }
611 continue;
612 }
613 fstart = -1;
614
615 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
616 buddy2 = mb_find_buddy(e4b, j, &max2);
617 k = i >> j;
618 MB_CHECK_ASSERT(k < max2);
619 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
620 }
621 }
622 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
623 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
624
625 grp = ext4_get_group_info(sb, e4b->bd_group);
626 list_for_each(cur, &grp->bb_prealloc_list) {
627 ext4_group_t groupnr;
628 struct ext4_prealloc_space *pa;
629 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
630 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
631 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
632 for (i = 0; i < pa->pa_len; i++)
633 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
634 }
635 return 0;
636}
637#undef MB_CHECK_ASSERT
638#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
639 __FILE__, __func__, __LINE__)
640#else
641#define mb_check_buddy(e4b)
642#endif
643
644
645
646
647
648
649
650static void ext4_mb_mark_free_simple(struct super_block *sb,
651 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
652 struct ext4_group_info *grp)
653{
654 struct ext4_sb_info *sbi = EXT4_SB(sb);
655 ext4_grpblk_t min;
656 ext4_grpblk_t max;
657 ext4_grpblk_t chunk;
658 unsigned int border;
659
660 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
661
662 border = 2 << sb->s_blocksize_bits;
663
664 while (len > 0) {
665
666 max = ffs(first | border) - 1;
667
668
669 min = fls(len) - 1;
670
671 if (max < min)
672 min = max;
673 chunk = 1 << min;
674
675
676 grp->bb_counters[min]++;
677 if (min > 0)
678 mb_clear_bit(first >> min,
679 buddy + sbi->s_mb_offsets[min]);
680
681 len -= chunk;
682 first += chunk;
683 }
684}
685
686
687
688
689
690static void
691mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
692{
693 int i;
694 int bits;
695
696 grp->bb_largest_free_order = -1;
697
698 bits = sb->s_blocksize_bits + 1;
699 for (i = bits; i >= 0; i--) {
700 if (grp->bb_counters[i] > 0) {
701 grp->bb_largest_free_order = i;
702 break;
703 }
704 }
705}
706
707static noinline_for_stack
708void ext4_mb_generate_buddy(struct super_block *sb,
709 void *buddy, void *bitmap, ext4_group_t group)
710{
711 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
712 struct ext4_sb_info *sbi = EXT4_SB(sb);
713 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
714 ext4_grpblk_t i = 0;
715 ext4_grpblk_t first;
716 ext4_grpblk_t len;
717 unsigned free = 0;
718 unsigned fragments = 0;
719 unsigned long long period = get_cycles();
720
721
722
723 i = mb_find_next_zero_bit(bitmap, max, 0);
724 grp->bb_first_free = i;
725 while (i < max) {
726 fragments++;
727 first = i;
728 i = mb_find_next_bit(bitmap, max, i);
729 len = i - first;
730 free += len;
731 if (len > 1)
732 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
733 else
734 grp->bb_counters[0]++;
735 if (i < max)
736 i = mb_find_next_zero_bit(bitmap, max, i);
737 }
738 grp->bb_fragments = fragments;
739
740 if (free != grp->bb_free) {
741 ext4_grp_locked_error(sb, group, 0, 0,
742 "block bitmap and bg descriptor "
743 "inconsistent: %u vs %u free clusters",
744 free, grp->bb_free);
745
746
747
748
749 grp->bb_free = free;
750 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
751 percpu_counter_sub(&sbi->s_freeclusters_counter,
752 grp->bb_free);
753 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
754 }
755 mb_set_largest_free_order(sb, grp);
756
757 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
758
759 period = get_cycles() - period;
760 spin_lock(&sbi->s_bal_lock);
761 sbi->s_mb_buddies_generated++;
762 sbi->s_mb_generation_time += period;
763 spin_unlock(&sbi->s_bal_lock);
764}
765
766static void mb_regenerate_buddy(struct ext4_buddy *e4b)
767{
768 int count;
769 int order = 1;
770 void *buddy;
771
772 while ((buddy = mb_find_buddy(e4b, order++, &count))) {
773 ext4_set_bits(buddy, 0, count);
774 }
775 e4b->bd_info->bb_fragments = 0;
776 memset(e4b->bd_info->bb_counters, 0,
777 sizeof(*e4b->bd_info->bb_counters) *
778 (e4b->bd_sb->s_blocksize_bits + 2));
779
780 ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
781 e4b->bd_bitmap, e4b->bd_group);
782}
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
805{
806 ext4_group_t ngroups;
807 int blocksize;
808 int blocks_per_page;
809 int groups_per_page;
810 int err = 0;
811 int i;
812 ext4_group_t first_group, group;
813 int first_block;
814 struct super_block *sb;
815 struct buffer_head *bhs;
816 struct buffer_head **bh = NULL;
817 struct inode *inode;
818 char *data;
819 char *bitmap;
820 struct ext4_group_info *grinfo;
821
822 mb_debug(1, "init page %lu\n", page->index);
823
824 inode = page->mapping->host;
825 sb = inode->i_sb;
826 ngroups = ext4_get_groups_count(sb);
827 blocksize = i_blocksize(inode);
828 blocks_per_page = PAGE_SIZE / blocksize;
829
830 groups_per_page = blocks_per_page >> 1;
831 if (groups_per_page == 0)
832 groups_per_page = 1;
833
834
835 if (groups_per_page > 1) {
836 i = sizeof(struct buffer_head *) * groups_per_page;
837 bh = kzalloc(i, gfp);
838 if (bh == NULL) {
839 err = -ENOMEM;
840 goto out;
841 }
842 } else
843 bh = &bhs;
844
845 first_group = page->index * blocks_per_page / 2;
846
847
848 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
849 if (group >= ngroups)
850 break;
851
852 grinfo = ext4_get_group_info(sb, group);
853
854
855
856
857
858
859 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
860 bh[i] = NULL;
861 continue;
862 }
863 bh[i] = ext4_read_block_bitmap_nowait(sb, group);
864 if (IS_ERR(bh[i])) {
865 err = PTR_ERR(bh[i]);
866 bh[i] = NULL;
867 goto out;
868 }
869 mb_debug(1, "read bitmap for group %u\n", group);
870 }
871
872
873 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
874 int err2;
875
876 if (!bh[i])
877 continue;
878 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
879 if (!err)
880 err = err2;
881 }
882
883 first_block = page->index * blocks_per_page;
884 for (i = 0; i < blocks_per_page; i++) {
885 group = (first_block + i) >> 1;
886 if (group >= ngroups)
887 break;
888
889 if (!bh[group - first_group])
890
891 continue;
892
893 if (!buffer_verified(bh[group - first_group]))
894
895 continue;
896 err = 0;
897
898
899
900
901
902
903
904 data = page_address(page) + (i * blocksize);
905 bitmap = bh[group - first_group]->b_data;
906
907
908
909
910
911 if ((first_block + i) & 1) {
912
913 BUG_ON(incore == NULL);
914 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
915 group, page->index, i * blocksize);
916 trace_ext4_mb_buddy_bitmap_load(sb, group);
917 grinfo = ext4_get_group_info(sb, group);
918 grinfo->bb_fragments = 0;
919 memset(grinfo->bb_counters, 0,
920 sizeof(*grinfo->bb_counters) *
921 (sb->s_blocksize_bits+2));
922
923
924
925 ext4_lock_group(sb, group);
926
927 memset(data, 0xff, blocksize);
928 ext4_mb_generate_buddy(sb, data, incore, group);
929 ext4_unlock_group(sb, group);
930 incore = NULL;
931 } else {
932
933 BUG_ON(incore != NULL);
934 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
935 group, page->index, i * blocksize);
936 trace_ext4_mb_bitmap_load(sb, group);
937
938
939 ext4_lock_group(sb, group);
940 memcpy(data, bitmap, blocksize);
941
942
943 ext4_mb_generate_from_pa(sb, data, group);
944 ext4_mb_generate_from_freelist(sb, data, group);
945 ext4_unlock_group(sb, group);
946
947
948
949
950 incore = data;
951 }
952 }
953 SetPageUptodate(page);
954
955out:
956 if (bh) {
957 for (i = 0; i < groups_per_page; i++)
958 brelse(bh[i]);
959 if (bh != &bhs)
960 kfree(bh);
961 }
962 return err;
963}
964
965
966
967
968
969
970
971static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
972 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
973{
974 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
975 int block, pnum, poff;
976 int blocks_per_page;
977 struct page *page;
978
979 e4b->bd_buddy_page = NULL;
980 e4b->bd_bitmap_page = NULL;
981
982 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
983
984
985
986
987
988 block = group * 2;
989 pnum = block / blocks_per_page;
990 poff = block % blocks_per_page;
991 page = find_or_create_page(inode->i_mapping, pnum, gfp);
992 if (!page)
993 return -ENOMEM;
994 BUG_ON(page->mapping != inode->i_mapping);
995 e4b->bd_bitmap_page = page;
996 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
997
998 if (blocks_per_page >= 2) {
999
1000 return 0;
1001 }
1002
1003 block++;
1004 pnum = block / blocks_per_page;
1005 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1006 if (!page)
1007 return -ENOMEM;
1008 BUG_ON(page->mapping != inode->i_mapping);
1009 e4b->bd_buddy_page = page;
1010 return 0;
1011}
1012
1013static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1014{
1015 if (e4b->bd_bitmap_page) {
1016 unlock_page(e4b->bd_bitmap_page);
1017 put_page(e4b->bd_bitmap_page);
1018 }
1019 if (e4b->bd_buddy_page) {
1020 unlock_page(e4b->bd_buddy_page);
1021 put_page(e4b->bd_buddy_page);
1022 }
1023}
1024
1025
1026
1027
1028
1029
1030static noinline_for_stack
1031int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1032{
1033
1034 struct ext4_group_info *this_grp;
1035 struct ext4_buddy e4b;
1036 struct page *page;
1037 int ret = 0;
1038
1039 might_sleep();
1040 mb_debug(1, "init group %u\n", group);
1041 this_grp = ext4_get_group_info(sb, group);
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1052 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1053
1054
1055
1056
1057 goto err;
1058 }
1059
1060 page = e4b.bd_bitmap_page;
1061 ret = ext4_mb_init_cache(page, NULL, gfp);
1062 if (ret)
1063 goto err;
1064 if (!PageUptodate(page)) {
1065 ret = -EIO;
1066 goto err;
1067 }
1068
1069 if (e4b.bd_buddy_page == NULL) {
1070
1071
1072
1073
1074
1075 ret = 0;
1076 goto err;
1077 }
1078
1079 page = e4b.bd_buddy_page;
1080 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1081 if (ret)
1082 goto err;
1083 if (!PageUptodate(page)) {
1084 ret = -EIO;
1085 goto err;
1086 }
1087err:
1088 ext4_mb_put_buddy_page_lock(&e4b);
1089 return ret;
1090}
1091
1092
1093
1094
1095
1096
1097static noinline_for_stack int
1098ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1099 struct ext4_buddy *e4b, gfp_t gfp)
1100{
1101 int blocks_per_page;
1102 int block;
1103 int pnum;
1104 int poff;
1105 struct page *page;
1106 int ret;
1107 struct ext4_group_info *grp;
1108 struct ext4_sb_info *sbi = EXT4_SB(sb);
1109 struct inode *inode = sbi->s_buddy_cache;
1110
1111 might_sleep();
1112 mb_debug(1, "load group %u\n", group);
1113
1114 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1115 grp = ext4_get_group_info(sb, group);
1116
1117 e4b->bd_blkbits = sb->s_blocksize_bits;
1118 e4b->bd_info = grp;
1119 e4b->bd_sb = sb;
1120 e4b->bd_group = group;
1121 e4b->bd_buddy_page = NULL;
1122 e4b->bd_bitmap_page = NULL;
1123
1124 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1125
1126
1127
1128
1129 ret = ext4_mb_init_group(sb, group, gfp);
1130 if (ret)
1131 return ret;
1132 }
1133
1134
1135
1136
1137
1138
1139 block = group * 2;
1140 pnum = block / blocks_per_page;
1141 poff = block % blocks_per_page;
1142
1143
1144
1145 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1146 if (page == NULL || !PageUptodate(page)) {
1147 if (page)
1148
1149
1150
1151
1152
1153
1154
1155
1156 put_page(page);
1157 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1158 if (page) {
1159 BUG_ON(page->mapping != inode->i_mapping);
1160 if (!PageUptodate(page)) {
1161 ret = ext4_mb_init_cache(page, NULL, gfp);
1162 if (ret) {
1163 unlock_page(page);
1164 goto err;
1165 }
1166 mb_cmp_bitmaps(e4b, page_address(page) +
1167 (poff * sb->s_blocksize));
1168 }
1169 unlock_page(page);
1170 }
1171 }
1172 if (page == NULL) {
1173 ret = -ENOMEM;
1174 goto err;
1175 }
1176 if (!PageUptodate(page)) {
1177 ret = -EIO;
1178 goto err;
1179 }
1180
1181
1182 e4b->bd_bitmap_page = page;
1183 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1184
1185 block++;
1186 pnum = block / blocks_per_page;
1187 poff = block % blocks_per_page;
1188
1189 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1190 if (page == NULL || !PageUptodate(page)) {
1191 if (page)
1192 put_page(page);
1193 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1194 if (page) {
1195 BUG_ON(page->mapping != inode->i_mapping);
1196 if (!PageUptodate(page)) {
1197 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1198 gfp);
1199 if (ret) {
1200 unlock_page(page);
1201 goto err;
1202 }
1203 }
1204 unlock_page(page);
1205 }
1206 }
1207 if (page == NULL) {
1208 ret = -ENOMEM;
1209 goto err;
1210 }
1211 if (!PageUptodate(page)) {
1212 ret = -EIO;
1213 goto err;
1214 }
1215
1216
1217 e4b->bd_buddy_page = page;
1218 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1219
1220 BUG_ON(e4b->bd_bitmap_page == NULL);
1221 BUG_ON(e4b->bd_buddy_page == NULL);
1222
1223 return 0;
1224
1225err:
1226 if (page)
1227 put_page(page);
1228 if (e4b->bd_bitmap_page)
1229 put_page(e4b->bd_bitmap_page);
1230 if (e4b->bd_buddy_page)
1231 put_page(e4b->bd_buddy_page);
1232 e4b->bd_buddy = NULL;
1233 e4b->bd_bitmap = NULL;
1234 return ret;
1235}
1236
1237static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1238 struct ext4_buddy *e4b)
1239{
1240 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1241}
1242
1243static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1244{
1245 if (e4b->bd_bitmap_page)
1246 put_page(e4b->bd_bitmap_page);
1247 if (e4b->bd_buddy_page)
1248 put_page(e4b->bd_buddy_page);
1249}
1250
1251
1252static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1253{
1254 int order = 1;
1255 int bb_incr = 1 << (e4b->bd_blkbits - 1);
1256 void *bb;
1257
1258 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1259 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1260
1261 bb = e4b->bd_buddy;
1262 while (order <= e4b->bd_blkbits + 1) {
1263 block = block >> 1;
1264 if (!mb_test_bit(block, bb)) {
1265
1266 return order;
1267 }
1268 bb += bb_incr;
1269 bb_incr >>= 1;
1270 order++;
1271 }
1272 return 0;
1273}
1274
1275static void mb_clear_bits(void *bm, int cur, int len)
1276{
1277 __u32 *addr;
1278
1279 len = cur + len;
1280 while (cur < len) {
1281 if ((cur & 31) == 0 && (len - cur) >= 32) {
1282
1283 addr = bm + (cur >> 3);
1284 *addr = 0;
1285 cur += 32;
1286 continue;
1287 }
1288 mb_clear_bit(cur, bm);
1289 cur++;
1290 }
1291}
1292
1293
1294
1295
1296static int mb_test_and_clear_bits(void *bm, int cur, int len)
1297{
1298 __u32 *addr;
1299 int zero_bit = -1;
1300
1301 len = cur + len;
1302 while (cur < len) {
1303 if ((cur & 31) == 0 && (len - cur) >= 32) {
1304
1305 addr = bm + (cur >> 3);
1306 if (*addr != (__u32)(-1) && zero_bit == -1)
1307 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1308 *addr = 0;
1309 cur += 32;
1310 continue;
1311 }
1312 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1313 zero_bit = cur;
1314 cur++;
1315 }
1316
1317 return zero_bit;
1318}
1319
1320void ext4_set_bits(void *bm, int cur, int len)
1321{
1322 __u32 *addr;
1323
1324 len = cur + len;
1325 while (cur < len) {
1326 if ((cur & 31) == 0 && (len - cur) >= 32) {
1327
1328 addr = bm + (cur >> 3);
1329 *addr = 0xffffffff;
1330 cur += 32;
1331 continue;
1332 }
1333 mb_set_bit(cur, bm);
1334 cur++;
1335 }
1336}
1337
1338
1339
1340
1341static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1342{
1343 if (mb_test_bit(*bit + side, bitmap)) {
1344 mb_clear_bit(*bit, bitmap);
1345 (*bit) -= side;
1346 return 1;
1347 }
1348 else {
1349 (*bit) += side;
1350 mb_set_bit(*bit, bitmap);
1351 return -1;
1352 }
1353}
1354
1355static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1356{
1357 int max;
1358 int order = 1;
1359 void *buddy = mb_find_buddy(e4b, order, &max);
1360
1361 while (buddy) {
1362 void *buddy2;
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393 if (first & 1)
1394 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1395 if (!(last & 1))
1396 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1397 if (first > last)
1398 break;
1399 order++;
1400
1401 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1402 mb_clear_bits(buddy, first, last - first + 1);
1403 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1404 break;
1405 }
1406 first >>= 1;
1407 last >>= 1;
1408 buddy = buddy2;
1409 }
1410}
1411
1412static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1413 int first, int count)
1414{
1415 int left_is_free = 0;
1416 int right_is_free = 0;
1417 int block;
1418 int last = first + count - 1;
1419 struct super_block *sb = e4b->bd_sb;
1420
1421 if (WARN_ON(count == 0))
1422 return;
1423 BUG_ON(last >= (sb->s_blocksize << 3));
1424 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1425
1426 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1427 return;
1428
1429 mb_check_buddy(e4b);
1430 mb_free_blocks_double(inode, e4b, first, count);
1431
1432 e4b->bd_info->bb_free += count;
1433 if (first < e4b->bd_info->bb_first_free)
1434 e4b->bd_info->bb_first_free = first;
1435
1436
1437
1438
1439 if (first != 0)
1440 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1441 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1442 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1443 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1444
1445 if (unlikely(block != -1)) {
1446 struct ext4_sb_info *sbi = EXT4_SB(sb);
1447 ext4_fsblk_t blocknr;
1448
1449 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1450 blocknr += EXT4_C2B(sbi, block);
1451 ext4_grp_locked_error(sb, e4b->bd_group,
1452 inode ? inode->i_ino : 0,
1453 blocknr,
1454 "freeing already freed block "
1455 "(bit %u); block bitmap corrupt.",
1456 block);
1457 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
1458 percpu_counter_sub(&sbi->s_freeclusters_counter,
1459 e4b->bd_info->bb_free);
1460
1461 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1462 &e4b->bd_info->bb_state);
1463 mb_regenerate_buddy(e4b);
1464 goto done;
1465 }
1466
1467
1468 if (left_is_free && right_is_free)
1469 e4b->bd_info->bb_fragments--;
1470 else if (!left_is_free && !right_is_free)
1471 e4b->bd_info->bb_fragments++;
1472
1473
1474
1475
1476
1477
1478
1479 if (first & 1) {
1480 first += !left_is_free;
1481 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1482 }
1483 if (!(last & 1)) {
1484 last -= !right_is_free;
1485 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1486 }
1487
1488 if (first <= last)
1489 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1490
1491done:
1492 mb_set_largest_free_order(sb, e4b->bd_info);
1493 mb_check_buddy(e4b);
1494}
1495
1496static int mb_find_extent(struct ext4_buddy *e4b, int block,
1497 int needed, struct ext4_free_extent *ex)
1498{
1499 int next = block;
1500 int max, order;
1501 void *buddy;
1502
1503 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1504 BUG_ON(ex == NULL);
1505
1506 buddy = mb_find_buddy(e4b, 0, &max);
1507 BUG_ON(buddy == NULL);
1508 BUG_ON(block >= max);
1509 if (mb_test_bit(block, buddy)) {
1510 ex->fe_len = 0;
1511 ex->fe_start = 0;
1512 ex->fe_group = 0;
1513 return 0;
1514 }
1515
1516
1517 order = mb_find_order_for_block(e4b, block);
1518 block = block >> order;
1519
1520 ex->fe_len = 1 << order;
1521 ex->fe_start = block << order;
1522 ex->fe_group = e4b->bd_group;
1523
1524
1525 next = next - ex->fe_start;
1526 ex->fe_len -= next;
1527 ex->fe_start += next;
1528
1529 while (needed > ex->fe_len &&
1530 mb_find_buddy(e4b, order, &max)) {
1531
1532 if (block + 1 >= max)
1533 break;
1534
1535 next = (block + 1) * (1 << order);
1536 if (mb_test_bit(next, e4b->bd_bitmap))
1537 break;
1538
1539 order = mb_find_order_for_block(e4b, next);
1540
1541 block = next >> order;
1542 ex->fe_len += 1 << order;
1543 }
1544
1545 if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) {
1546
1547 WARN_ON(1);
1548 ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
1549 "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
1550 block, order, needed, ex->fe_group, ex->fe_start,
1551 ex->fe_len, ex->fe_logical);
1552 ex->fe_len = 0;
1553 ex->fe_start = 0;
1554 ex->fe_group = 0;
1555 }
1556 return ex->fe_len;
1557}
1558
1559static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1560{
1561 int ord;
1562 int mlen = 0;
1563 int max = 0;
1564 int cur;
1565 int start = ex->fe_start;
1566 int len = ex->fe_len;
1567 unsigned ret = 0;
1568 int len0 = len;
1569 void *buddy;
1570
1571 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1572 BUG_ON(e4b->bd_group != ex->fe_group);
1573 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1574 mb_check_buddy(e4b);
1575 mb_mark_used_double(e4b, start, len);
1576
1577 e4b->bd_info->bb_free -= len;
1578 if (e4b->bd_info->bb_first_free == start)
1579 e4b->bd_info->bb_first_free += len;
1580
1581
1582 if (start != 0)
1583 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1584 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1585 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1586 if (mlen && max)
1587 e4b->bd_info->bb_fragments++;
1588 else if (!mlen && !max)
1589 e4b->bd_info->bb_fragments--;
1590
1591
1592 while (len) {
1593 ord = mb_find_order_for_block(e4b, start);
1594
1595 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1596
1597 mlen = 1 << ord;
1598 buddy = mb_find_buddy(e4b, ord, &max);
1599 BUG_ON((start >> ord) >= max);
1600 mb_set_bit(start >> ord, buddy);
1601 e4b->bd_info->bb_counters[ord]--;
1602 start += mlen;
1603 len -= mlen;
1604 BUG_ON(len < 0);
1605 continue;
1606 }
1607
1608
1609 if (ret == 0)
1610 ret = len | (ord << 16);
1611
1612
1613 BUG_ON(ord <= 0);
1614 buddy = mb_find_buddy(e4b, ord, &max);
1615 mb_set_bit(start >> ord, buddy);
1616 e4b->bd_info->bb_counters[ord]--;
1617
1618 ord--;
1619 cur = (start >> ord) & ~1U;
1620 buddy = mb_find_buddy(e4b, ord, &max);
1621 mb_clear_bit(cur, buddy);
1622 mb_clear_bit(cur + 1, buddy);
1623 e4b->bd_info->bb_counters[ord]++;
1624 e4b->bd_info->bb_counters[ord]++;
1625 }
1626 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1627
1628 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1629 mb_check_buddy(e4b);
1630
1631 return ret;
1632}
1633
1634
1635
1636
1637static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1638 struct ext4_buddy *e4b)
1639{
1640 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1641 int ret;
1642
1643 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1644 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1645
1646 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1647 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1648 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1649
1650
1651
1652 ac->ac_f_ex = ac->ac_b_ex;
1653
1654 ac->ac_status = AC_STATUS_FOUND;
1655 ac->ac_tail = ret & 0xffff;
1656 ac->ac_buddy = ret >> 16;
1657
1658
1659
1660
1661
1662
1663
1664
1665 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1666 get_page(ac->ac_bitmap_page);
1667 ac->ac_buddy_page = e4b->bd_buddy_page;
1668 get_page(ac->ac_buddy_page);
1669
1670 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1671 spin_lock(&sbi->s_md_lock);
1672 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1673 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1674 spin_unlock(&sbi->s_md_lock);
1675 }
1676}
1677
1678
1679
1680
1681
1682static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1683 struct ext4_buddy *e4b,
1684 int finish_group)
1685{
1686 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1687 struct ext4_free_extent *bex = &ac->ac_b_ex;
1688 struct ext4_free_extent *gex = &ac->ac_g_ex;
1689 struct ext4_free_extent ex;
1690 int max;
1691
1692 if (ac->ac_status == AC_STATUS_FOUND)
1693 return;
1694
1695
1696
1697 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1698 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1699 ac->ac_status = AC_STATUS_BREAK;
1700 return;
1701 }
1702
1703
1704
1705
1706 if (bex->fe_len < gex->fe_len)
1707 return;
1708
1709 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1710 && bex->fe_group == e4b->bd_group) {
1711
1712
1713
1714 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1715 if (max >= gex->fe_len) {
1716 ext4_mb_use_best_found(ac, e4b);
1717 return;
1718 }
1719 }
1720}
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1733 struct ext4_free_extent *ex,
1734 struct ext4_buddy *e4b)
1735{
1736 struct ext4_free_extent *bex = &ac->ac_b_ex;
1737 struct ext4_free_extent *gex = &ac->ac_g_ex;
1738
1739 BUG_ON(ex->fe_len <= 0);
1740 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1741 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1742 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1743
1744 ac->ac_found++;
1745
1746
1747
1748
1749 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1750 *bex = *ex;
1751 ext4_mb_use_best_found(ac, e4b);
1752 return;
1753 }
1754
1755
1756
1757
1758 if (ex->fe_len == gex->fe_len) {
1759 *bex = *ex;
1760 ext4_mb_use_best_found(ac, e4b);
1761 return;
1762 }
1763
1764
1765
1766
1767 if (bex->fe_len == 0) {
1768 *bex = *ex;
1769 return;
1770 }
1771
1772
1773
1774
1775 if (bex->fe_len < gex->fe_len) {
1776
1777
1778 if (ex->fe_len > bex->fe_len)
1779 *bex = *ex;
1780 } else if (ex->fe_len > gex->fe_len) {
1781
1782
1783
1784 if (ex->fe_len < bex->fe_len)
1785 *bex = *ex;
1786 }
1787
1788 ext4_mb_check_limits(ac, e4b, 0);
1789}
1790
1791static noinline_for_stack
1792int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1793 struct ext4_buddy *e4b)
1794{
1795 struct ext4_free_extent ex = ac->ac_b_ex;
1796 ext4_group_t group = ex.fe_group;
1797 int max;
1798 int err;
1799
1800 BUG_ON(ex.fe_len <= 0);
1801 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1802 if (err)
1803 return err;
1804
1805 ext4_lock_group(ac->ac_sb, group);
1806 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1807
1808 if (max > 0) {
1809 ac->ac_b_ex = ex;
1810 ext4_mb_use_best_found(ac, e4b);
1811 }
1812
1813 ext4_unlock_group(ac->ac_sb, group);
1814 ext4_mb_unload_buddy(e4b);
1815
1816 return 0;
1817}
1818
1819static noinline_for_stack
1820int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1821 struct ext4_buddy *e4b)
1822{
1823 ext4_group_t group = ac->ac_g_ex.fe_group;
1824 int max;
1825 int err;
1826 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1827 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1828 struct ext4_free_extent ex;
1829
1830 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1831 return 0;
1832 if (grp->bb_free == 0)
1833 return 0;
1834
1835 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1836 if (err)
1837 return err;
1838
1839 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1840 ext4_mb_unload_buddy(e4b);
1841 return 0;
1842 }
1843
1844 ext4_lock_group(ac->ac_sb, group);
1845 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1846 ac->ac_g_ex.fe_len, &ex);
1847 ex.fe_logical = 0xDEADFA11;
1848
1849 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1850 ext4_fsblk_t start;
1851
1852 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1853 ex.fe_start;
1854
1855 if (do_div(start, sbi->s_stripe) == 0) {
1856 ac->ac_found++;
1857 ac->ac_b_ex = ex;
1858 ext4_mb_use_best_found(ac, e4b);
1859 }
1860 } else if (max >= ac->ac_g_ex.fe_len) {
1861 BUG_ON(ex.fe_len <= 0);
1862 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1863 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1864 ac->ac_found++;
1865 ac->ac_b_ex = ex;
1866 ext4_mb_use_best_found(ac, e4b);
1867 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1868
1869
1870 BUG_ON(ex.fe_len <= 0);
1871 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1872 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1873 ac->ac_found++;
1874 ac->ac_b_ex = ex;
1875 ext4_mb_use_best_found(ac, e4b);
1876 }
1877 ext4_unlock_group(ac->ac_sb, group);
1878 ext4_mb_unload_buddy(e4b);
1879
1880 return 0;
1881}
1882
1883
1884
1885
1886
1887static noinline_for_stack
1888void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1889 struct ext4_buddy *e4b)
1890{
1891 struct super_block *sb = ac->ac_sb;
1892 struct ext4_group_info *grp = e4b->bd_info;
1893 void *buddy;
1894 int i;
1895 int k;
1896 int max;
1897
1898 BUG_ON(ac->ac_2order <= 0);
1899 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1900 if (grp->bb_counters[i] == 0)
1901 continue;
1902
1903 buddy = mb_find_buddy(e4b, i, &max);
1904 BUG_ON(buddy == NULL);
1905
1906 k = mb_find_next_zero_bit(buddy, max, 0);
1907 BUG_ON(k >= max);
1908
1909 ac->ac_found++;
1910
1911 ac->ac_b_ex.fe_len = 1 << i;
1912 ac->ac_b_ex.fe_start = k << i;
1913 ac->ac_b_ex.fe_group = e4b->bd_group;
1914
1915 ext4_mb_use_best_found(ac, e4b);
1916
1917 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1918
1919 if (EXT4_SB(sb)->s_mb_stats)
1920 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1921
1922 break;
1923 }
1924}
1925
1926
1927
1928
1929
1930
1931static noinline_for_stack
1932void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1933 struct ext4_buddy *e4b)
1934{
1935 struct super_block *sb = ac->ac_sb;
1936 void *bitmap = e4b->bd_bitmap;
1937 struct ext4_free_extent ex;
1938 int i;
1939 int free;
1940
1941 free = e4b->bd_info->bb_free;
1942 BUG_ON(free <= 0);
1943
1944 i = e4b->bd_info->bb_first_free;
1945
1946 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1947 i = mb_find_next_zero_bit(bitmap,
1948 EXT4_CLUSTERS_PER_GROUP(sb), i);
1949 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1950
1951
1952
1953
1954
1955 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1956 "%d free clusters as per "
1957 "group info. But bitmap says 0",
1958 free);
1959 break;
1960 }
1961
1962 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1963 BUG_ON(ex.fe_len <= 0);
1964 if (free < ex.fe_len) {
1965 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1966 "%d free clusters as per "
1967 "group info. But got %d blocks",
1968 free, ex.fe_len);
1969
1970
1971
1972
1973
1974 break;
1975 }
1976 ex.fe_logical = 0xDEADC0DE;
1977 ext4_mb_measure_extent(ac, &ex, e4b);
1978
1979 i += ex.fe_len;
1980 free -= ex.fe_len;
1981 }
1982
1983 ext4_mb_check_limits(ac, e4b, 1);
1984}
1985
1986
1987
1988
1989
1990static noinline_for_stack
1991void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1992 struct ext4_buddy *e4b)
1993{
1994 struct super_block *sb = ac->ac_sb;
1995 struct ext4_sb_info *sbi = EXT4_SB(sb);
1996 void *bitmap = e4b->bd_bitmap;
1997 struct ext4_free_extent ex;
1998 ext4_fsblk_t first_group_block;
1999 ext4_fsblk_t a;
2000 ext4_grpblk_t i;
2001 int max;
2002
2003 BUG_ON(sbi->s_stripe == 0);
2004
2005
2006 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2007
2008 a = first_group_block + sbi->s_stripe - 1;
2009 do_div(a, sbi->s_stripe);
2010 i = (a * sbi->s_stripe) - first_group_block;
2011
2012 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2013 if (!mb_test_bit(i, bitmap)) {
2014 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2015 if (max >= sbi->s_stripe) {
2016 ac->ac_found++;
2017 ex.fe_logical = 0xDEADF00D;
2018 ac->ac_b_ex = ex;
2019 ext4_mb_use_best_found(ac, e4b);
2020 break;
2021 }
2022 }
2023 i += sbi->s_stripe;
2024 }
2025}
2026
2027
2028
2029
2030
2031
2032
2033static int ext4_mb_good_group(struct ext4_allocation_context *ac,
2034 ext4_group_t group, int cr)
2035{
2036 unsigned free, fragments;
2037 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2038 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2039
2040 BUG_ON(cr < 0 || cr >= 4);
2041
2042 free = grp->bb_free;
2043 if (free == 0)
2044 return 0;
2045 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2046 return 0;
2047
2048 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2049 return 0;
2050
2051
2052 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2053 int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
2054 if (ret)
2055 return ret;
2056 }
2057
2058 fragments = grp->bb_fragments;
2059 if (fragments == 0)
2060 return 0;
2061
2062 switch (cr) {
2063 case 0:
2064 BUG_ON(ac->ac_2order == 0);
2065
2066
2067 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2068 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2069 ((group % flex_size) == 0))
2070 return 0;
2071
2072 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
2073 (free / fragments) >= ac->ac_g_ex.fe_len)
2074 return 1;
2075
2076 if (grp->bb_largest_free_order < ac->ac_2order)
2077 return 0;
2078
2079 return 1;
2080 case 1:
2081 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2082 return 1;
2083 break;
2084 case 2:
2085 if (free >= ac->ac_g_ex.fe_len)
2086 return 1;
2087 break;
2088 case 3:
2089 return 1;
2090 default:
2091 BUG();
2092 }
2093
2094 return 0;
2095}
2096
2097static noinline_for_stack int
2098ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2099{
2100 ext4_group_t ngroups, group, i;
2101 int cr;
2102 int err = 0, first_err = 0;
2103 struct ext4_sb_info *sbi;
2104 struct super_block *sb;
2105 struct ext4_buddy e4b;
2106
2107 sb = ac->ac_sb;
2108 sbi = EXT4_SB(sb);
2109 ngroups = ext4_get_groups_count(sb);
2110
2111 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2112 ngroups = sbi->s_blockfile_groups;
2113
2114 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2115
2116
2117 err = ext4_mb_find_by_goal(ac, &e4b);
2118 if (err || ac->ac_status == AC_STATUS_FOUND)
2119 goto out;
2120
2121 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2122 goto out;
2123
2124
2125
2126
2127
2128
2129 i = fls(ac->ac_g_ex.fe_len);
2130 ac->ac_2order = 0;
2131
2132
2133
2134
2135
2136
2137
2138 if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
2139
2140
2141
2142 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2143 ac->ac_2order = i - 1;
2144 }
2145
2146
2147 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2148
2149 spin_lock(&sbi->s_md_lock);
2150 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2151 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2152 spin_unlock(&sbi->s_md_lock);
2153 }
2154
2155
2156 cr = ac->ac_2order ? 0 : 1;
2157
2158
2159
2160
2161repeat:
2162 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2163 ac->ac_criteria = cr;
2164
2165
2166
2167
2168 group = ac->ac_g_ex.fe_group;
2169
2170 for (i = 0; i < ngroups; group++, i++) {
2171 int ret = 0;
2172 cond_resched();
2173
2174
2175
2176
2177 if (group >= ngroups)
2178 group = 0;
2179
2180
2181 ret = ext4_mb_good_group(ac, group, cr);
2182 if (ret <= 0) {
2183 if (!first_err)
2184 first_err = ret;
2185 continue;
2186 }
2187
2188 err = ext4_mb_load_buddy(sb, group, &e4b);
2189 if (err)
2190 goto out;
2191
2192 ext4_lock_group(sb, group);
2193
2194
2195
2196
2197
2198 ret = ext4_mb_good_group(ac, group, cr);
2199 if (ret <= 0) {
2200 ext4_unlock_group(sb, group);
2201 ext4_mb_unload_buddy(&e4b);
2202 if (!first_err)
2203 first_err = ret;
2204 continue;
2205 }
2206
2207 ac->ac_groups_scanned++;
2208 if (cr == 0)
2209 ext4_mb_simple_scan_group(ac, &e4b);
2210 else if (cr == 1 && sbi->s_stripe &&
2211 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2212 ext4_mb_scan_aligned(ac, &e4b);
2213 else
2214 ext4_mb_complex_scan_group(ac, &e4b);
2215
2216 ext4_unlock_group(sb, group);
2217 ext4_mb_unload_buddy(&e4b);
2218
2219 if (ac->ac_status != AC_STATUS_CONTINUE)
2220 break;
2221 }
2222 }
2223
2224 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2225 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2226
2227
2228
2229
2230
2231 ext4_mb_try_best_found(ac, &e4b);
2232 if (ac->ac_status != AC_STATUS_FOUND) {
2233
2234
2235
2236
2237
2238
2239 ac->ac_b_ex.fe_group = 0;
2240 ac->ac_b_ex.fe_start = 0;
2241 ac->ac_b_ex.fe_len = 0;
2242 ac->ac_status = AC_STATUS_CONTINUE;
2243 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2244 cr = 3;
2245 atomic_inc(&sbi->s_mb_lost_chunks);
2246 goto repeat;
2247 }
2248 }
2249out:
2250 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2251 err = first_err;
2252 return err;
2253}
2254
2255static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2256{
2257 struct super_block *sb = seq->private;
2258 ext4_group_t group;
2259
2260 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2261 return NULL;
2262 group = *pos + 1;
2263 return (void *) ((unsigned long) group);
2264}
2265
2266static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2267{
2268 struct super_block *sb = seq->private;
2269 ext4_group_t group;
2270
2271 ++*pos;
2272 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2273 return NULL;
2274 group = *pos + 1;
2275 return (void *) ((unsigned long) group);
2276}
2277
2278static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2279{
2280 struct super_block *sb = seq->private;
2281 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2282 int i;
2283 int err, buddy_loaded = 0;
2284 struct ext4_buddy e4b;
2285 struct ext4_group_info *grinfo;
2286 unsigned char blocksize_bits = min_t(unsigned char,
2287 sb->s_blocksize_bits,
2288 EXT4_MAX_BLOCK_LOG_SIZE);
2289 struct sg {
2290 struct ext4_group_info info;
2291 ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
2292 } sg;
2293
2294 group--;
2295 if (group == 0)
2296 seq_puts(seq, "#group: free frags first ["
2297 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2298 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2299
2300 i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2301 sizeof(struct ext4_group_info);
2302
2303 grinfo = ext4_get_group_info(sb, group);
2304
2305 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2306 err = ext4_mb_load_buddy(sb, group, &e4b);
2307 if (err) {
2308 seq_printf(seq, "#%-5u: I/O error\n", group);
2309 return 0;
2310 }
2311 buddy_loaded = 1;
2312 }
2313
2314 memcpy(&sg, ext4_get_group_info(sb, group), i);
2315
2316 if (buddy_loaded)
2317 ext4_mb_unload_buddy(&e4b);
2318
2319 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2320 sg.info.bb_fragments, sg.info.bb_first_free);
2321 for (i = 0; i <= 13; i++)
2322 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
2323 sg.info.bb_counters[i] : 0);
2324 seq_printf(seq, " ]\n");
2325
2326 return 0;
2327}
2328
2329static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2330{
2331}
2332
2333static const struct seq_operations ext4_mb_seq_groups_ops = {
2334 .start = ext4_mb_seq_groups_start,
2335 .next = ext4_mb_seq_groups_next,
2336 .stop = ext4_mb_seq_groups_stop,
2337 .show = ext4_mb_seq_groups_show,
2338};
2339
2340static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2341{
2342 struct super_block *sb = PDE_DATA(inode);
2343 int rc;
2344
2345 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2346 if (rc == 0) {
2347 struct seq_file *m = file->private_data;
2348 m->private = sb;
2349 }
2350 return rc;
2351
2352}
2353
2354const struct file_operations ext4_seq_mb_groups_fops = {
2355 .open = ext4_mb_seq_groups_open,
2356 .read = seq_read,
2357 .llseek = seq_lseek,
2358 .release = seq_release,
2359};
2360
2361static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2362{
2363 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2364 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2365
2366 BUG_ON(!cachep);
2367 return cachep;
2368}
2369
2370
2371
2372
2373
2374int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2375{
2376 struct ext4_sb_info *sbi = EXT4_SB(sb);
2377 unsigned size;
2378 struct ext4_group_info ***new_groupinfo;
2379
2380 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2381 EXT4_DESC_PER_BLOCK_BITS(sb);
2382 if (size <= sbi->s_group_info_size)
2383 return 0;
2384
2385 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2386 new_groupinfo = kvzalloc(size, GFP_KERNEL);
2387 if (!new_groupinfo) {
2388 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2389 return -ENOMEM;
2390 }
2391 if (sbi->s_group_info) {
2392 memcpy(new_groupinfo, sbi->s_group_info,
2393 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2394 kvfree(sbi->s_group_info);
2395 }
2396 sbi->s_group_info = new_groupinfo;
2397 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2398 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2399 sbi->s_group_info_size);
2400 return 0;
2401}
2402
2403
2404int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2405 struct ext4_group_desc *desc)
2406{
2407 int i;
2408 int metalen = 0;
2409 struct ext4_sb_info *sbi = EXT4_SB(sb);
2410 struct ext4_group_info **meta_group_info;
2411 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2412
2413
2414
2415
2416
2417
2418 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2419 metalen = sizeof(*meta_group_info) <<
2420 EXT4_DESC_PER_BLOCK_BITS(sb);
2421 meta_group_info = kmalloc(metalen, GFP_NOFS);
2422 if (meta_group_info == NULL) {
2423 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2424 "for a buddy group");
2425 goto exit_meta_group_info;
2426 }
2427 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2428 meta_group_info;
2429 }
2430
2431 meta_group_info =
2432 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2433 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2434
2435 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
2436 if (meta_group_info[i] == NULL) {
2437 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2438 goto exit_group_info;
2439 }
2440 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2441 &(meta_group_info[i]->bb_state));
2442
2443
2444
2445
2446
2447 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2448 meta_group_info[i]->bb_free =
2449 ext4_free_clusters_after_init(sb, group, desc);
2450 } else {
2451 meta_group_info[i]->bb_free =
2452 ext4_free_group_clusters(sb, desc);
2453 }
2454
2455 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2456 init_rwsem(&meta_group_info[i]->alloc_sem);
2457 meta_group_info[i]->bb_free_root = RB_ROOT;
2458 meta_group_info[i]->bb_largest_free_order = -1;
2459
2460#ifdef DOUBLE_CHECK
2461 {
2462 struct buffer_head *bh;
2463 meta_group_info[i]->bb_bitmap =
2464 kmalloc(sb->s_blocksize, GFP_NOFS);
2465 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2466 bh = ext4_read_block_bitmap(sb, group);
2467 BUG_ON(IS_ERR_OR_NULL(bh));
2468 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2469 sb->s_blocksize);
2470 put_bh(bh);
2471 }
2472#endif
2473
2474 return 0;
2475
2476exit_group_info:
2477
2478 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2479 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2480 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2481 }
2482exit_meta_group_info:
2483 return -ENOMEM;
2484}
2485
2486static int ext4_mb_init_backend(struct super_block *sb)
2487{
2488 ext4_group_t ngroups = ext4_get_groups_count(sb);
2489 ext4_group_t i;
2490 struct ext4_sb_info *sbi = EXT4_SB(sb);
2491 int err;
2492 struct ext4_group_desc *desc;
2493 struct kmem_cache *cachep;
2494
2495 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2496 if (err)
2497 return err;
2498
2499 sbi->s_buddy_cache = new_inode(sb);
2500 if (sbi->s_buddy_cache == NULL) {
2501 ext4_msg(sb, KERN_ERR, "can't get new inode");
2502 goto err_freesgi;
2503 }
2504
2505
2506
2507
2508 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2509 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2510 for (i = 0; i < ngroups; i++) {
2511 desc = ext4_get_group_desc(sb, i, NULL);
2512 if (desc == NULL) {
2513 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2514 goto err_freebuddy;
2515 }
2516 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2517 goto err_freebuddy;
2518 }
2519
2520 return 0;
2521
2522err_freebuddy:
2523 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2524 while (i-- > 0)
2525 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2526 i = sbi->s_group_info_size;
2527 while (i-- > 0)
2528 kfree(sbi->s_group_info[i]);
2529 iput(sbi->s_buddy_cache);
2530err_freesgi:
2531 kvfree(sbi->s_group_info);
2532 return -ENOMEM;
2533}
2534
2535static void ext4_groupinfo_destroy_slabs(void)
2536{
2537 int i;
2538
2539 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2540 if (ext4_groupinfo_caches[i])
2541 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2542 ext4_groupinfo_caches[i] = NULL;
2543 }
2544}
2545
2546static int ext4_groupinfo_create_slab(size_t size)
2547{
2548 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2549 int slab_size;
2550 int blocksize_bits = order_base_2(size);
2551 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2552 struct kmem_cache *cachep;
2553
2554 if (cache_index >= NR_GRPINFO_CACHES)
2555 return -EINVAL;
2556
2557 if (unlikely(cache_index < 0))
2558 cache_index = 0;
2559
2560 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2561 if (ext4_groupinfo_caches[cache_index]) {
2562 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2563 return 0;
2564 }
2565
2566 slab_size = offsetof(struct ext4_group_info,
2567 bb_counters[blocksize_bits + 2]);
2568
2569 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2570 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2571 NULL);
2572
2573 ext4_groupinfo_caches[cache_index] = cachep;
2574
2575 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2576 if (!cachep) {
2577 printk(KERN_EMERG
2578 "EXT4-fs: no memory for groupinfo slab cache\n");
2579 return -ENOMEM;
2580 }
2581
2582 return 0;
2583}
2584
2585int ext4_mb_init(struct super_block *sb)
2586{
2587 struct ext4_sb_info *sbi = EXT4_SB(sb);
2588 unsigned i, j;
2589 unsigned offset, offset_incr;
2590 unsigned max;
2591 int ret;
2592
2593 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2594
2595 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2596 if (sbi->s_mb_offsets == NULL) {
2597 ret = -ENOMEM;
2598 goto out;
2599 }
2600
2601 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2602 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2603 if (sbi->s_mb_maxs == NULL) {
2604 ret = -ENOMEM;
2605 goto out;
2606 }
2607
2608 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2609 if (ret < 0)
2610 goto out;
2611
2612
2613 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2614 sbi->s_mb_offsets[0] = 0;
2615
2616 i = 1;
2617 offset = 0;
2618 offset_incr = 1 << (sb->s_blocksize_bits - 1);
2619 max = sb->s_blocksize << 2;
2620 do {
2621 sbi->s_mb_offsets[i] = offset;
2622 sbi->s_mb_maxs[i] = max;
2623 offset += offset_incr;
2624 offset_incr = offset_incr >> 1;
2625 max = max >> 1;
2626 i++;
2627 } while (i <= sb->s_blocksize_bits + 1);
2628
2629 spin_lock_init(&sbi->s_md_lock);
2630 spin_lock_init(&sbi->s_bal_lock);
2631 sbi->s_mb_free_pending = 0;
2632 INIT_LIST_HEAD(&sbi->s_freed_data_list);
2633
2634 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2635 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2636 sbi->s_mb_stats = MB_DEFAULT_STATS;
2637 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2638 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2652 sbi->s_cluster_bits, 32);
2653
2654
2655
2656
2657
2658
2659
2660
2661 if (sbi->s_stripe > 1) {
2662 sbi->s_mb_group_prealloc = roundup(
2663 sbi->s_mb_group_prealloc, sbi->s_stripe);
2664 }
2665
2666 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2667 if (sbi->s_locality_groups == NULL) {
2668 ret = -ENOMEM;
2669 goto out;
2670 }
2671 for_each_possible_cpu(i) {
2672 struct ext4_locality_group *lg;
2673 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2674 mutex_init(&lg->lg_mutex);
2675 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2676 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2677 spin_lock_init(&lg->lg_prealloc_lock);
2678 }
2679
2680
2681 ret = ext4_mb_init_backend(sb);
2682 if (ret != 0)
2683 goto out_free_locality_groups;
2684
2685 return 0;
2686
2687out_free_locality_groups:
2688 free_percpu(sbi->s_locality_groups);
2689 sbi->s_locality_groups = NULL;
2690out:
2691 kfree(sbi->s_mb_offsets);
2692 sbi->s_mb_offsets = NULL;
2693 kfree(sbi->s_mb_maxs);
2694 sbi->s_mb_maxs = NULL;
2695 return ret;
2696}
2697
2698
2699static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2700{
2701 struct ext4_prealloc_space *pa;
2702 struct list_head *cur, *tmp;
2703 int count = 0;
2704
2705 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2706 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2707 list_del(&pa->pa_group_list);
2708 count++;
2709 kmem_cache_free(ext4_pspace_cachep, pa);
2710 }
2711 if (count)
2712 mb_debug(1, "mballoc: %u PAs left\n", count);
2713
2714}
2715
2716int ext4_mb_release(struct super_block *sb)
2717{
2718 ext4_group_t ngroups = ext4_get_groups_count(sb);
2719 ext4_group_t i;
2720 int num_meta_group_infos;
2721 struct ext4_group_info *grinfo;
2722 struct ext4_sb_info *sbi = EXT4_SB(sb);
2723 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2724
2725 if (sbi->s_group_info) {
2726 for (i = 0; i < ngroups; i++) {
2727 grinfo = ext4_get_group_info(sb, i);
2728#ifdef DOUBLE_CHECK
2729 kfree(grinfo->bb_bitmap);
2730#endif
2731 ext4_lock_group(sb, i);
2732 ext4_mb_cleanup_pa(grinfo);
2733 ext4_unlock_group(sb, i);
2734 kmem_cache_free(cachep, grinfo);
2735 }
2736 num_meta_group_infos = (ngroups +
2737 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2738 EXT4_DESC_PER_BLOCK_BITS(sb);
2739 for (i = 0; i < num_meta_group_infos; i++)
2740 kfree(sbi->s_group_info[i]);
2741 kvfree(sbi->s_group_info);
2742 }
2743 kfree(sbi->s_mb_offsets);
2744 kfree(sbi->s_mb_maxs);
2745 iput(sbi->s_buddy_cache);
2746 if (sbi->s_mb_stats) {
2747 ext4_msg(sb, KERN_INFO,
2748 "mballoc: %u blocks %u reqs (%u success)",
2749 atomic_read(&sbi->s_bal_allocated),
2750 atomic_read(&sbi->s_bal_reqs),
2751 atomic_read(&sbi->s_bal_success));
2752 ext4_msg(sb, KERN_INFO,
2753 "mballoc: %u extents scanned, %u goal hits, "
2754 "%u 2^N hits, %u breaks, %u lost",
2755 atomic_read(&sbi->s_bal_ex_scanned),
2756 atomic_read(&sbi->s_bal_goals),
2757 atomic_read(&sbi->s_bal_2orders),
2758 atomic_read(&sbi->s_bal_breaks),
2759 atomic_read(&sbi->s_mb_lost_chunks));
2760 ext4_msg(sb, KERN_INFO,
2761 "mballoc: %lu generated and it took %Lu",
2762 sbi->s_mb_buddies_generated,
2763 sbi->s_mb_generation_time);
2764 ext4_msg(sb, KERN_INFO,
2765 "mballoc: %u preallocated, %u discarded",
2766 atomic_read(&sbi->s_mb_preallocated),
2767 atomic_read(&sbi->s_mb_discarded));
2768 }
2769
2770 free_percpu(sbi->s_locality_groups);
2771
2772 return 0;
2773}
2774
2775static inline int ext4_issue_discard(struct super_block *sb,
2776 ext4_group_t block_group, ext4_grpblk_t cluster, int count,
2777 struct bio **biop)
2778{
2779 ext4_fsblk_t discard_block;
2780
2781 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2782 ext4_group_first_block_no(sb, block_group));
2783 count = EXT4_C2B(EXT4_SB(sb), count);
2784 trace_ext4_discard_blocks(sb,
2785 (unsigned long long) discard_block, count);
2786 if (biop) {
2787 return __blkdev_issue_discard(sb->s_bdev,
2788 (sector_t)discard_block << (sb->s_blocksize_bits - 9),
2789 (sector_t)count << (sb->s_blocksize_bits - 9),
2790 GFP_NOFS, 0, biop);
2791 } else
2792 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2793}
2794
2795static void ext4_free_data_in_buddy(struct super_block *sb,
2796 struct ext4_free_data *entry)
2797{
2798 struct ext4_buddy e4b;
2799 struct ext4_group_info *db;
2800 int err, count = 0, count2 = 0;
2801
2802 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2803 entry->efd_count, entry->efd_group, entry);
2804
2805 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2806
2807 BUG_ON(err != 0);
2808
2809 spin_lock(&EXT4_SB(sb)->s_md_lock);
2810 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
2811 spin_unlock(&EXT4_SB(sb)->s_md_lock);
2812
2813 db = e4b.bd_info;
2814
2815 count += entry->efd_count;
2816 count2++;
2817 ext4_lock_group(sb, entry->efd_group);
2818
2819 rb_erase(&entry->efd_node, &(db->bb_free_root));
2820 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2821
2822
2823
2824
2825
2826
2827
2828 if (!test_opt(sb, DISCARD))
2829 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2830
2831 if (!db->bb_free_root.rb_node) {
2832
2833
2834
2835 put_page(e4b.bd_buddy_page);
2836 put_page(e4b.bd_bitmap_page);
2837 }
2838 ext4_unlock_group(sb, entry->efd_group);
2839 kmem_cache_free(ext4_free_data_cachep, entry);
2840 ext4_mb_unload_buddy(&e4b);
2841
2842 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2843}
2844
2845
2846
2847
2848
2849void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
2850{
2851 struct ext4_sb_info *sbi = EXT4_SB(sb);
2852 struct ext4_free_data *entry, *tmp;
2853 struct bio *discard_bio = NULL;
2854 struct list_head freed_data_list;
2855 struct list_head *cut_pos = NULL;
2856 int err;
2857
2858 INIT_LIST_HEAD(&freed_data_list);
2859
2860 spin_lock(&sbi->s_md_lock);
2861 list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
2862 if (entry->efd_tid != commit_tid)
2863 break;
2864 cut_pos = &entry->efd_list;
2865 }
2866 if (cut_pos)
2867 list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
2868 cut_pos);
2869 spin_unlock(&sbi->s_md_lock);
2870
2871 if (test_opt(sb, DISCARD)) {
2872 list_for_each_entry(entry, &freed_data_list, efd_list) {
2873 err = ext4_issue_discard(sb, entry->efd_group,
2874 entry->efd_start_cluster,
2875 entry->efd_count,
2876 &discard_bio);
2877 if (err && err != -EOPNOTSUPP) {
2878 ext4_msg(sb, KERN_WARNING, "discard request in"
2879 " group:%d block:%d count:%d failed"
2880 " with %d", entry->efd_group,
2881 entry->efd_start_cluster,
2882 entry->efd_count, err);
2883 } else if (err == -EOPNOTSUPP)
2884 break;
2885 }
2886
2887 if (discard_bio) {
2888 submit_bio_wait(discard_bio);
2889 bio_put(discard_bio);
2890 }
2891 }
2892
2893 list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
2894 ext4_free_data_in_buddy(sb, entry);
2895}
2896
2897int __init ext4_init_mballoc(void)
2898{
2899 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2900 SLAB_RECLAIM_ACCOUNT);
2901 if (ext4_pspace_cachep == NULL)
2902 return -ENOMEM;
2903
2904 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2905 SLAB_RECLAIM_ACCOUNT);
2906 if (ext4_ac_cachep == NULL) {
2907 kmem_cache_destroy(ext4_pspace_cachep);
2908 return -ENOMEM;
2909 }
2910
2911 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2912 SLAB_RECLAIM_ACCOUNT);
2913 if (ext4_free_data_cachep == NULL) {
2914 kmem_cache_destroy(ext4_pspace_cachep);
2915 kmem_cache_destroy(ext4_ac_cachep);
2916 return -ENOMEM;
2917 }
2918 return 0;
2919}
2920
2921void ext4_exit_mballoc(void)
2922{
2923
2924
2925
2926
2927 rcu_barrier();
2928 kmem_cache_destroy(ext4_pspace_cachep);
2929 kmem_cache_destroy(ext4_ac_cachep);
2930 kmem_cache_destroy(ext4_free_data_cachep);
2931 ext4_groupinfo_destroy_slabs();
2932}
2933
2934
2935
2936
2937
2938
2939static noinline_for_stack int
2940ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2941 handle_t *handle, unsigned int reserv_clstrs)
2942{
2943 struct buffer_head *bitmap_bh = NULL;
2944 struct ext4_group_desc *gdp;
2945 struct buffer_head *gdp_bh;
2946 struct ext4_sb_info *sbi;
2947 struct super_block *sb;
2948 ext4_fsblk_t block;
2949 int err, len;
2950
2951 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2952 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2953
2954 sb = ac->ac_sb;
2955 sbi = EXT4_SB(sb);
2956
2957 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2958 if (IS_ERR(bitmap_bh)) {
2959 err = PTR_ERR(bitmap_bh);
2960 bitmap_bh = NULL;
2961 goto out_err;
2962 }
2963
2964 BUFFER_TRACE(bitmap_bh, "getting write access");
2965 err = ext4_journal_get_write_access(handle, bitmap_bh);
2966 if (err)
2967 goto out_err;
2968
2969 err = -EIO;
2970 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2971 if (!gdp)
2972 goto out_err;
2973
2974 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2975 ext4_free_group_clusters(sb, gdp));
2976
2977 BUFFER_TRACE(gdp_bh, "get_write_access");
2978 err = ext4_journal_get_write_access(handle, gdp_bh);
2979 if (err)
2980 goto out_err;
2981
2982 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2983
2984 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2985 if (!ext4_data_block_valid(sbi, block, len)) {
2986 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2987 "fs metadata", block, block+len);
2988
2989
2990
2991
2992 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2993 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2994 ac->ac_b_ex.fe_len);
2995 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2996 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2997 if (!err)
2998 err = -EFSCORRUPTED;
2999 goto out_err;
3000 }
3001
3002 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3003#ifdef AGGRESSIVE_CHECK
3004 {
3005 int i;
3006 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
3007 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
3008 bitmap_bh->b_data));
3009 }
3010 }
3011#endif
3012 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
3013 ac->ac_b_ex.fe_len);
3014 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
3015 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3016 ext4_free_group_clusters_set(sb, gdp,
3017 ext4_free_clusters_after_init(sb,
3018 ac->ac_b_ex.fe_group, gdp));
3019 }
3020 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
3021 ext4_free_group_clusters_set(sb, gdp, len);
3022 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
3023 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
3024
3025 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3026 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
3027
3028
3029
3030 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3031
3032 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
3033 reserv_clstrs);
3034
3035 if (sbi->s_log_groups_per_flex) {
3036 ext4_group_t flex_group = ext4_flex_group(sbi,
3037 ac->ac_b_ex.fe_group);
3038 atomic64_sub(ac->ac_b_ex.fe_len,
3039 &sbi->s_flex_groups[flex_group].free_clusters);
3040 }
3041
3042 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
3043 if (err)
3044 goto out_err;
3045 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3046
3047out_err:
3048 brelse(bitmap_bh);
3049 return err;
3050}
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3062{
3063 struct super_block *sb = ac->ac_sb;
3064 struct ext4_locality_group *lg = ac->ac_lg;
3065
3066 BUG_ON(lg == NULL);
3067 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3068 mb_debug(1, "#%u: goal %u blocks for locality group\n",
3069 current->pid, ac->ac_g_ex.fe_len);
3070}
3071
3072
3073
3074
3075
3076static noinline_for_stack void
3077ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3078 struct ext4_allocation_request *ar)
3079{
3080 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3081 int bsbits, max;
3082 ext4_lblk_t end;
3083 loff_t size, start_off;
3084 loff_t orig_size __maybe_unused;
3085 ext4_lblk_t start;
3086 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3087 struct ext4_prealloc_space *pa;
3088
3089
3090
3091 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3092 return;
3093
3094
3095 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3096 return;
3097
3098
3099
3100 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
3101 return;
3102
3103 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
3104 ext4_mb_normalize_group_request(ac);
3105 return ;
3106 }
3107
3108 bsbits = ac->ac_sb->s_blocksize_bits;
3109
3110
3111
3112 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3113 size = size << bsbits;
3114 if (size < i_size_read(ac->ac_inode))
3115 size = i_size_read(ac->ac_inode);
3116 orig_size = size;
3117
3118
3119 max = 2 << bsbits;
3120
3121#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3122 (req <= (size) || max <= (chunk_size))
3123
3124
3125
3126 start_off = 0;
3127 if (size <= 16 * 1024) {
3128 size = 16 * 1024;
3129 } else if (size <= 32 * 1024) {
3130 size = 32 * 1024;
3131 } else if (size <= 64 * 1024) {
3132 size = 64 * 1024;
3133 } else if (size <= 128 * 1024) {
3134 size = 128 * 1024;
3135 } else if (size <= 256 * 1024) {
3136 size = 256 * 1024;
3137 } else if (size <= 512 * 1024) {
3138 size = 512 * 1024;
3139 } else if (size <= 1024 * 1024) {
3140 size = 1024 * 1024;
3141 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3142 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3143 (21 - bsbits)) << 21;
3144 size = 2 * 1024 * 1024;
3145 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3146 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3147 (22 - bsbits)) << 22;
3148 size = 4 * 1024 * 1024;
3149 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3150 (8<<20)>>bsbits, max, 8 * 1024)) {
3151 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3152 (23 - bsbits)) << 23;
3153 size = 8 * 1024 * 1024;
3154 } else {
3155 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3156 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3157 ac->ac_o_ex.fe_len) << bsbits;
3158 }
3159 size = size >> bsbits;
3160 start = start_off >> bsbits;
3161
3162
3163 if (ar->pleft && start <= ar->lleft) {
3164 size -= ar->lleft + 1 - start;
3165 start = ar->lleft + 1;
3166 }
3167 if (ar->pright && start + size - 1 >= ar->lright)
3168 size -= start + size - ar->lright;
3169
3170
3171
3172
3173
3174 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
3175 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
3176
3177 end = start + size;
3178
3179
3180 rcu_read_lock();
3181 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3182 ext4_lblk_t pa_end;
3183
3184 if (pa->pa_deleted)
3185 continue;
3186 spin_lock(&pa->pa_lock);
3187 if (pa->pa_deleted) {
3188 spin_unlock(&pa->pa_lock);
3189 continue;
3190 }
3191
3192 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3193 pa->pa_len);
3194
3195
3196 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3197 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3198
3199
3200 if (pa->pa_lstart >= end || pa_end <= start) {
3201 spin_unlock(&pa->pa_lock);
3202 continue;
3203 }
3204 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3205
3206
3207 if (pa_end <= ac->ac_o_ex.fe_logical) {
3208 BUG_ON(pa_end < start);
3209 start = pa_end;
3210 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3211 BUG_ON(pa->pa_lstart > end);
3212 end = pa->pa_lstart;
3213 }
3214 spin_unlock(&pa->pa_lock);
3215 }
3216 rcu_read_unlock();
3217 size = end - start;
3218
3219
3220 rcu_read_lock();
3221 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3222 ext4_lblk_t pa_end;
3223
3224 spin_lock(&pa->pa_lock);
3225 if (pa->pa_deleted == 0) {
3226 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3227 pa->pa_len);
3228 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3229 }
3230 spin_unlock(&pa->pa_lock);
3231 }
3232 rcu_read_unlock();
3233
3234 if (start + size <= ac->ac_o_ex.fe_logical &&
3235 start > ac->ac_o_ex.fe_logical) {
3236 ext4_msg(ac->ac_sb, KERN_ERR,
3237 "start %lu, size %lu, fe_logical %lu",
3238 (unsigned long) start, (unsigned long) size,
3239 (unsigned long) ac->ac_o_ex.fe_logical);
3240 BUG();
3241 }
3242 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3243
3244
3245
3246
3247
3248 ac->ac_g_ex.fe_logical = start;
3249 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3250
3251
3252 if (ar->pright && (ar->lright == (start + size))) {
3253
3254 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3255 &ac->ac_f_ex.fe_group,
3256 &ac->ac_f_ex.fe_start);
3257 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3258 }
3259 if (ar->pleft && (ar->lleft + 1 == start)) {
3260
3261 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3262 &ac->ac_f_ex.fe_group,
3263 &ac->ac_f_ex.fe_start);
3264 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3265 }
3266
3267 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3268 (unsigned) orig_size, (unsigned) start);
3269}
3270
3271static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3272{
3273 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3274
3275 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3276 atomic_inc(&sbi->s_bal_reqs);
3277 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3278 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3279 atomic_inc(&sbi->s_bal_success);
3280 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3281 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3282 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3283 atomic_inc(&sbi->s_bal_goals);
3284 if (ac->ac_found > sbi->s_mb_max_to_scan)
3285 atomic_inc(&sbi->s_bal_breaks);
3286 }
3287
3288 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3289 trace_ext4_mballoc_alloc(ac);
3290 else
3291 trace_ext4_mballoc_prealloc(ac);
3292}
3293
3294
3295
3296
3297
3298
3299
3300static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3301{
3302 struct ext4_prealloc_space *pa = ac->ac_pa;
3303 struct ext4_buddy e4b;
3304 int err;
3305
3306 if (pa == NULL) {
3307 if (ac->ac_f_ex.fe_len == 0)
3308 return;
3309 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3310 if (err) {
3311
3312
3313
3314
3315
3316 WARN(1, "mb_load_buddy failed (%d)", err);
3317 return;
3318 }
3319 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3320 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3321 ac->ac_f_ex.fe_len);
3322 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3323 ext4_mb_unload_buddy(&e4b);
3324 return;
3325 }
3326 if (pa->pa_type == MB_INODE_PA)
3327 pa->pa_free += ac->ac_b_ex.fe_len;
3328}
3329
3330
3331
3332
3333static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3334 struct ext4_prealloc_space *pa)
3335{
3336 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3337 ext4_fsblk_t start;
3338 ext4_fsblk_t end;
3339 int len;
3340
3341
3342 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3343 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3344 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3345 len = EXT4_NUM_B2C(sbi, end - start);
3346 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3347 &ac->ac_b_ex.fe_start);
3348 ac->ac_b_ex.fe_len = len;
3349 ac->ac_status = AC_STATUS_FOUND;
3350 ac->ac_pa = pa;
3351
3352 BUG_ON(start < pa->pa_pstart);
3353 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3354 BUG_ON(pa->pa_free < len);
3355 pa->pa_free -= len;
3356
3357 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3358}
3359
3360
3361
3362
3363static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3364 struct ext4_prealloc_space *pa)
3365{
3366 unsigned int len = ac->ac_o_ex.fe_len;
3367
3368 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3369 &ac->ac_b_ex.fe_group,
3370 &ac->ac_b_ex.fe_start);
3371 ac->ac_b_ex.fe_len = len;
3372 ac->ac_status = AC_STATUS_FOUND;
3373 ac->ac_pa = pa;
3374
3375
3376
3377
3378
3379
3380
3381 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3382}
3383
3384
3385
3386
3387
3388
3389
3390static struct ext4_prealloc_space *
3391ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3392 struct ext4_prealloc_space *pa,
3393 struct ext4_prealloc_space *cpa)
3394{
3395 ext4_fsblk_t cur_distance, new_distance;
3396
3397 if (cpa == NULL) {
3398 atomic_inc(&pa->pa_count);
3399 return pa;
3400 }
3401 cur_distance = abs(goal_block - cpa->pa_pstart);
3402 new_distance = abs(goal_block - pa->pa_pstart);
3403
3404 if (cur_distance <= new_distance)
3405 return cpa;
3406
3407
3408 atomic_dec(&cpa->pa_count);
3409 atomic_inc(&pa->pa_count);
3410 return pa;
3411}
3412
3413
3414
3415
3416static noinline_for_stack int
3417ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3418{
3419 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3420 int order, i;
3421 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3422 struct ext4_locality_group *lg;
3423 struct ext4_prealloc_space *pa, *cpa = NULL;
3424 ext4_fsblk_t goal_block;
3425
3426
3427 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3428 return 0;
3429
3430
3431 rcu_read_lock();
3432 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3433
3434
3435
3436 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3437 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3438 EXT4_C2B(sbi, pa->pa_len)))
3439 continue;
3440
3441
3442 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3443 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3444 EXT4_MAX_BLOCK_FILE_PHYS))
3445 continue;
3446
3447
3448 spin_lock(&pa->pa_lock);
3449 if (pa->pa_deleted == 0 && pa->pa_free) {
3450 atomic_inc(&pa->pa_count);
3451 ext4_mb_use_inode_pa(ac, pa);
3452 spin_unlock(&pa->pa_lock);
3453 ac->ac_criteria = 10;
3454 rcu_read_unlock();
3455 return 1;
3456 }
3457 spin_unlock(&pa->pa_lock);
3458 }
3459 rcu_read_unlock();
3460
3461
3462 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3463 return 0;
3464
3465
3466 lg = ac->ac_lg;
3467 if (lg == NULL)
3468 return 0;
3469 order = fls(ac->ac_o_ex.fe_len) - 1;
3470 if (order > PREALLOC_TB_SIZE - 1)
3471
3472 order = PREALLOC_TB_SIZE - 1;
3473
3474 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3475
3476
3477
3478
3479 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3480 rcu_read_lock();
3481 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3482 pa_inode_list) {
3483 spin_lock(&pa->pa_lock);
3484 if (pa->pa_deleted == 0 &&
3485 pa->pa_free >= ac->ac_o_ex.fe_len) {
3486
3487 cpa = ext4_mb_check_group_pa(goal_block,
3488 pa, cpa);
3489 }
3490 spin_unlock(&pa->pa_lock);
3491 }
3492 rcu_read_unlock();
3493 }
3494 if (cpa) {
3495 ext4_mb_use_group_pa(ac, cpa);
3496 ac->ac_criteria = 20;
3497 return 1;
3498 }
3499 return 0;
3500}
3501
3502
3503
3504
3505
3506
3507
3508static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3509 ext4_group_t group)
3510{
3511 struct rb_node *n;
3512 struct ext4_group_info *grp;
3513 struct ext4_free_data *entry;
3514
3515 grp = ext4_get_group_info(sb, group);
3516 n = rb_first(&(grp->bb_free_root));
3517
3518 while (n) {
3519 entry = rb_entry(n, struct ext4_free_data, efd_node);
3520 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3521 n = rb_next(n);
3522 }
3523 return;
3524}
3525
3526
3527
3528
3529
3530
3531static noinline_for_stack
3532void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3533 ext4_group_t group)
3534{
3535 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3536 struct ext4_prealloc_space *pa;
3537 struct list_head *cur;
3538 ext4_group_t groupnr;
3539 ext4_grpblk_t start;
3540 int preallocated = 0;
3541 int len;
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551 list_for_each(cur, &grp->bb_prealloc_list) {
3552 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3553 spin_lock(&pa->pa_lock);
3554 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3555 &groupnr, &start);
3556 len = pa->pa_len;
3557 spin_unlock(&pa->pa_lock);
3558 if (unlikely(len == 0))
3559 continue;
3560 BUG_ON(groupnr != group);
3561 ext4_set_bits(bitmap, start, len);
3562 preallocated += len;
3563 }
3564 mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
3565}
3566
3567static void ext4_mb_pa_callback(struct rcu_head *head)
3568{
3569 struct ext4_prealloc_space *pa;
3570 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3571
3572 BUG_ON(atomic_read(&pa->pa_count));
3573 BUG_ON(pa->pa_deleted == 0);
3574 kmem_cache_free(ext4_pspace_cachep, pa);
3575}
3576
3577
3578
3579
3580
3581static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3582 struct super_block *sb, struct ext4_prealloc_space *pa)
3583{
3584 ext4_group_t grp;
3585 ext4_fsblk_t grp_blk;
3586
3587
3588 spin_lock(&pa->pa_lock);
3589 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
3590 spin_unlock(&pa->pa_lock);
3591 return;
3592 }
3593
3594 if (pa->pa_deleted == 1) {
3595 spin_unlock(&pa->pa_lock);
3596 return;
3597 }
3598
3599 pa->pa_deleted = 1;
3600 spin_unlock(&pa->pa_lock);
3601
3602 grp_blk = pa->pa_pstart;
3603
3604
3605
3606
3607 if (pa->pa_type == MB_GROUP_PA)
3608 grp_blk--;
3609
3610 grp = ext4_get_group_number(sb, grp_blk);
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626 ext4_lock_group(sb, grp);
3627 list_del(&pa->pa_group_list);
3628 ext4_unlock_group(sb, grp);
3629
3630 spin_lock(pa->pa_obj_lock);
3631 list_del_rcu(&pa->pa_inode_list);
3632 spin_unlock(pa->pa_obj_lock);
3633
3634 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3635}
3636
3637
3638
3639
3640static noinline_for_stack int
3641ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3642{
3643 struct super_block *sb = ac->ac_sb;
3644 struct ext4_sb_info *sbi = EXT4_SB(sb);
3645 struct ext4_prealloc_space *pa;
3646 struct ext4_group_info *grp;
3647 struct ext4_inode_info *ei;
3648
3649
3650 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3651 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3652 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3653
3654 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3655 if (pa == NULL)
3656 return -ENOMEM;
3657
3658 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3659 int winl;
3660 int wins;
3661 int win;
3662 int offs;
3663
3664
3665
3666
3667 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3668 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3669
3670
3671
3672
3673 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3674
3675
3676 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3677
3678
3679 win = min(winl, wins);
3680
3681 offs = ac->ac_o_ex.fe_logical %
3682 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3683 if (offs && offs < win)
3684 win = offs;
3685
3686 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3687 EXT4_NUM_B2C(sbi, win);
3688 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3689 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3690 }
3691
3692
3693
3694 ac->ac_f_ex = ac->ac_b_ex;
3695
3696 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3697 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3698 pa->pa_len = ac->ac_b_ex.fe_len;
3699 pa->pa_free = pa->pa_len;
3700 atomic_set(&pa->pa_count, 1);
3701 spin_lock_init(&pa->pa_lock);
3702 INIT_LIST_HEAD(&pa->pa_inode_list);
3703 INIT_LIST_HEAD(&pa->pa_group_list);
3704 pa->pa_deleted = 0;
3705 pa->pa_type = MB_INODE_PA;
3706
3707 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3708 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3709 trace_ext4_mb_new_inode_pa(ac, pa);
3710
3711 ext4_mb_use_inode_pa(ac, pa);
3712 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3713
3714 ei = EXT4_I(ac->ac_inode);
3715 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3716
3717 pa->pa_obj_lock = &ei->i_prealloc_lock;
3718 pa->pa_inode = ac->ac_inode;
3719
3720 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3721 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3722 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3723
3724 spin_lock(pa->pa_obj_lock);
3725 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3726 spin_unlock(pa->pa_obj_lock);
3727
3728 return 0;
3729}
3730
3731
3732
3733
3734static noinline_for_stack int
3735ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3736{
3737 struct super_block *sb = ac->ac_sb;
3738 struct ext4_locality_group *lg;
3739 struct ext4_prealloc_space *pa;
3740 struct ext4_group_info *grp;
3741
3742
3743 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3744 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3745 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3746
3747 BUG_ON(ext4_pspace_cachep == NULL);
3748 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3749 if (pa == NULL)
3750 return -ENOMEM;
3751
3752
3753
3754 ac->ac_f_ex = ac->ac_b_ex;
3755
3756 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3757 pa->pa_lstart = pa->pa_pstart;
3758 pa->pa_len = ac->ac_b_ex.fe_len;
3759 pa->pa_free = pa->pa_len;
3760 atomic_set(&pa->pa_count, 1);
3761 spin_lock_init(&pa->pa_lock);
3762 INIT_LIST_HEAD(&pa->pa_inode_list);
3763 INIT_LIST_HEAD(&pa->pa_group_list);
3764 pa->pa_deleted = 0;
3765 pa->pa_type = MB_GROUP_PA;
3766
3767 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3768 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3769 trace_ext4_mb_new_group_pa(ac, pa);
3770
3771 ext4_mb_use_group_pa(ac, pa);
3772 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3773
3774 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3775 lg = ac->ac_lg;
3776 BUG_ON(lg == NULL);
3777
3778 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3779 pa->pa_inode = NULL;
3780
3781 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3782 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3783 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3784
3785
3786
3787
3788
3789 return 0;
3790}
3791
3792static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3793{
3794 int err;
3795
3796 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3797 err = ext4_mb_new_group_pa(ac);
3798 else
3799 err = ext4_mb_new_inode_pa(ac);
3800 return err;
3801}
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811static noinline_for_stack int
3812ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3813 struct ext4_prealloc_space *pa)
3814{
3815 struct super_block *sb = e4b->bd_sb;
3816 struct ext4_sb_info *sbi = EXT4_SB(sb);
3817 unsigned int end;
3818 unsigned int next;
3819 ext4_group_t group;
3820 ext4_grpblk_t bit;
3821 unsigned long long grp_blk_start;
3822 int err = 0;
3823 int free = 0;
3824
3825 BUG_ON(pa->pa_deleted == 0);
3826 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3827 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3828 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3829 end = bit + pa->pa_len;
3830
3831 while (bit < end) {
3832 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3833 if (bit >= end)
3834 break;
3835 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3836 mb_debug(1, " free preallocated %u/%u in group %u\n",
3837 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3838 (unsigned) next - bit, (unsigned) group);
3839 free += next - bit;
3840
3841 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3842 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3843 EXT4_C2B(sbi, bit)),
3844 next - bit);
3845 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3846 bit = next + 1;
3847 }
3848 if (free != pa->pa_free) {
3849 ext4_msg(e4b->bd_sb, KERN_CRIT,
3850 "pa %p: logic %lu, phys. %lu, len %lu",
3851 pa, (unsigned long) pa->pa_lstart,
3852 (unsigned long) pa->pa_pstart,
3853 (unsigned long) pa->pa_len);
3854 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3855 free, pa->pa_free);
3856
3857
3858
3859
3860 }
3861 atomic_add(free, &sbi->s_mb_discarded);
3862
3863 return err;
3864}
3865
3866static noinline_for_stack int
3867ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3868 struct ext4_prealloc_space *pa)
3869{
3870 struct super_block *sb = e4b->bd_sb;
3871 ext4_group_t group;
3872 ext4_grpblk_t bit;
3873
3874 trace_ext4_mb_release_group_pa(sb, pa);
3875 BUG_ON(pa->pa_deleted == 0);
3876 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3877 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3878 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3879 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3880 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3881
3882 return 0;
3883}
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894static noinline_for_stack int
3895ext4_mb_discard_group_preallocations(struct super_block *sb,
3896 ext4_group_t group, int needed)
3897{
3898 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3899 struct buffer_head *bitmap_bh = NULL;
3900 struct ext4_prealloc_space *pa, *tmp;
3901 struct list_head list;
3902 struct ext4_buddy e4b;
3903 int err;
3904 int busy = 0;
3905 int free = 0;
3906
3907 mb_debug(1, "discard preallocation for group %u\n", group);
3908
3909 if (list_empty(&grp->bb_prealloc_list))
3910 return 0;
3911
3912 bitmap_bh = ext4_read_block_bitmap(sb, group);
3913 if (IS_ERR(bitmap_bh)) {
3914 err = PTR_ERR(bitmap_bh);
3915 ext4_error(sb, "Error %d reading block bitmap for %u",
3916 err, group);
3917 return 0;
3918 }
3919
3920 err = ext4_mb_load_buddy(sb, group, &e4b);
3921 if (err) {
3922 ext4_warning(sb, "Error %d loading buddy information for %u",
3923 err, group);
3924 put_bh(bitmap_bh);
3925 return 0;
3926 }
3927
3928 if (needed == 0)
3929 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3930
3931 INIT_LIST_HEAD(&list);
3932repeat:
3933 ext4_lock_group(sb, group);
3934 list_for_each_entry_safe(pa, tmp,
3935 &grp->bb_prealloc_list, pa_group_list) {
3936 spin_lock(&pa->pa_lock);
3937 if (atomic_read(&pa->pa_count)) {
3938 spin_unlock(&pa->pa_lock);
3939 busy = 1;
3940 continue;
3941 }
3942 if (pa->pa_deleted) {
3943 spin_unlock(&pa->pa_lock);
3944 continue;
3945 }
3946
3947
3948 pa->pa_deleted = 1;
3949
3950
3951 free += pa->pa_free;
3952
3953 spin_unlock(&pa->pa_lock);
3954
3955 list_del(&pa->pa_group_list);
3956 list_add(&pa->u.pa_tmp_list, &list);
3957 }
3958
3959
3960 if (free < needed && busy) {
3961 busy = 0;
3962 ext4_unlock_group(sb, group);
3963 cond_resched();
3964 goto repeat;
3965 }
3966
3967
3968 if (list_empty(&list)) {
3969 BUG_ON(free != 0);
3970 goto out;
3971 }
3972
3973
3974 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3975
3976
3977 spin_lock(pa->pa_obj_lock);
3978 list_del_rcu(&pa->pa_inode_list);
3979 spin_unlock(pa->pa_obj_lock);
3980
3981 if (pa->pa_type == MB_GROUP_PA)
3982 ext4_mb_release_group_pa(&e4b, pa);
3983 else
3984 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3985
3986 list_del(&pa->u.pa_tmp_list);
3987 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3988 }
3989
3990out:
3991 ext4_unlock_group(sb, group);
3992 ext4_mb_unload_buddy(&e4b);
3993 put_bh(bitmap_bh);
3994 return free;
3995}
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006void ext4_discard_preallocations(struct inode *inode)
4007{
4008 struct ext4_inode_info *ei = EXT4_I(inode);
4009 struct super_block *sb = inode->i_sb;
4010 struct buffer_head *bitmap_bh = NULL;
4011 struct ext4_prealloc_space *pa, *tmp;
4012 ext4_group_t group = 0;
4013 struct list_head list;
4014 struct ext4_buddy e4b;
4015 int err;
4016
4017 if (!S_ISREG(inode->i_mode)) {
4018
4019 return;
4020 }
4021
4022 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
4023 trace_ext4_discard_preallocations(inode);
4024
4025 INIT_LIST_HEAD(&list);
4026
4027repeat:
4028
4029 spin_lock(&ei->i_prealloc_lock);
4030 while (!list_empty(&ei->i_prealloc_list)) {
4031 pa = list_entry(ei->i_prealloc_list.next,
4032 struct ext4_prealloc_space, pa_inode_list);
4033 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
4034 spin_lock(&pa->pa_lock);
4035 if (atomic_read(&pa->pa_count)) {
4036
4037
4038 spin_unlock(&pa->pa_lock);
4039 spin_unlock(&ei->i_prealloc_lock);
4040 ext4_msg(sb, KERN_ERR,
4041 "uh-oh! used pa while discarding");
4042 WARN_ON(1);
4043 schedule_timeout_uninterruptible(HZ);
4044 goto repeat;
4045
4046 }
4047 if (pa->pa_deleted == 0) {
4048 pa->pa_deleted = 1;
4049 spin_unlock(&pa->pa_lock);
4050 list_del_rcu(&pa->pa_inode_list);
4051 list_add(&pa->u.pa_tmp_list, &list);
4052 continue;
4053 }
4054
4055
4056 spin_unlock(&pa->pa_lock);
4057 spin_unlock(&ei->i_prealloc_lock);
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071 schedule_timeout_uninterruptible(HZ);
4072 goto repeat;
4073 }
4074 spin_unlock(&ei->i_prealloc_lock);
4075
4076 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4077 BUG_ON(pa->pa_type != MB_INODE_PA);
4078 group = ext4_get_group_number(sb, pa->pa_pstart);
4079
4080 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4081 GFP_NOFS|__GFP_NOFAIL);
4082 if (err) {
4083 ext4_error(sb, "Error %d loading buddy information for %u",
4084 err, group);
4085 continue;
4086 }
4087
4088 bitmap_bh = ext4_read_block_bitmap(sb, group);
4089 if (IS_ERR(bitmap_bh)) {
4090 err = PTR_ERR(bitmap_bh);
4091 ext4_error(sb, "Error %d reading block bitmap for %u",
4092 err, group);
4093 ext4_mb_unload_buddy(&e4b);
4094 continue;
4095 }
4096
4097 ext4_lock_group(sb, group);
4098 list_del(&pa->pa_group_list);
4099 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4100 ext4_unlock_group(sb, group);
4101
4102 ext4_mb_unload_buddy(&e4b);
4103 put_bh(bitmap_bh);
4104
4105 list_del(&pa->u.pa_tmp_list);
4106 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4107 }
4108}
4109
4110#ifdef CONFIG_EXT4_DEBUG
4111static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4112{
4113 struct super_block *sb = ac->ac_sb;
4114 ext4_group_t ngroups, i;
4115
4116 if (!ext4_mballoc_debug ||
4117 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
4118 return;
4119
4120 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
4121 " Allocation context details:");
4122 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
4123 ac->ac_status, ac->ac_flags);
4124 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
4125 "goal %lu/%lu/%lu@%lu, "
4126 "best %lu/%lu/%lu@%lu cr %d",
4127 (unsigned long)ac->ac_o_ex.fe_group,
4128 (unsigned long)ac->ac_o_ex.fe_start,
4129 (unsigned long)ac->ac_o_ex.fe_len,
4130 (unsigned long)ac->ac_o_ex.fe_logical,
4131 (unsigned long)ac->ac_g_ex.fe_group,
4132 (unsigned long)ac->ac_g_ex.fe_start,
4133 (unsigned long)ac->ac_g_ex.fe_len,
4134 (unsigned long)ac->ac_g_ex.fe_logical,
4135 (unsigned long)ac->ac_b_ex.fe_group,
4136 (unsigned long)ac->ac_b_ex.fe_start,
4137 (unsigned long)ac->ac_b_ex.fe_len,
4138 (unsigned long)ac->ac_b_ex.fe_logical,
4139 (int)ac->ac_criteria);
4140 ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
4141 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
4142 ngroups = ext4_get_groups_count(sb);
4143 for (i = 0; i < ngroups; i++) {
4144 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4145 struct ext4_prealloc_space *pa;
4146 ext4_grpblk_t start;
4147 struct list_head *cur;
4148 ext4_lock_group(sb, i);
4149 list_for_each(cur, &grp->bb_prealloc_list) {
4150 pa = list_entry(cur, struct ext4_prealloc_space,
4151 pa_group_list);
4152 spin_lock(&pa->pa_lock);
4153 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4154 NULL, &start);
4155 spin_unlock(&pa->pa_lock);
4156 printk(KERN_ERR "PA:%u:%d:%u \n", i,
4157 start, pa->pa_len);
4158 }
4159 ext4_unlock_group(sb, i);
4160
4161 if (grp->bb_free == 0)
4162 continue;
4163 printk(KERN_ERR "%u: %d/%d \n",
4164 i, grp->bb_free, grp->bb_fragments);
4165 }
4166 printk(KERN_ERR "\n");
4167}
4168#else
4169static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4170{
4171 return;
4172}
4173#endif
4174
4175
4176
4177
4178
4179
4180
4181
4182static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4183{
4184 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4185 int bsbits = ac->ac_sb->s_blocksize_bits;
4186 loff_t size, isize;
4187
4188 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4189 return;
4190
4191 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4192 return;
4193
4194 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4195 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4196 >> bsbits;
4197
4198 if ((size == isize) &&
4199 !ext4_fs_is_busy(sbi) &&
4200 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
4201 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4202 return;
4203 }
4204
4205 if (sbi->s_mb_group_prealloc <= 0) {
4206 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4207 return;
4208 }
4209
4210
4211 size = max(size, isize);
4212 if (size > sbi->s_mb_stream_request) {
4213 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4214 return;
4215 }
4216
4217 BUG_ON(ac->ac_lg != NULL);
4218
4219
4220
4221
4222
4223 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
4224
4225
4226 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4227
4228
4229 mutex_lock(&ac->ac_lg->lg_mutex);
4230}
4231
4232static noinline_for_stack int
4233ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4234 struct ext4_allocation_request *ar)
4235{
4236 struct super_block *sb = ar->inode->i_sb;
4237 struct ext4_sb_info *sbi = EXT4_SB(sb);
4238 struct ext4_super_block *es = sbi->s_es;
4239 ext4_group_t group;
4240 unsigned int len;
4241 ext4_fsblk_t goal;
4242 ext4_grpblk_t block;
4243
4244
4245 len = ar->len;
4246
4247
4248 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4249 len = EXT4_CLUSTERS_PER_GROUP(sb);
4250
4251
4252 goal = ar->goal;
4253 if (goal < le32_to_cpu(es->s_first_data_block) ||
4254 goal >= ext4_blocks_count(es))
4255 goal = le32_to_cpu(es->s_first_data_block);
4256 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4257
4258
4259 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
4260 ac->ac_status = AC_STATUS_CONTINUE;
4261 ac->ac_sb = sb;
4262 ac->ac_inode = ar->inode;
4263 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4264 ac->ac_o_ex.fe_group = group;
4265 ac->ac_o_ex.fe_start = block;
4266 ac->ac_o_ex.fe_len = len;
4267 ac->ac_g_ex = ac->ac_o_ex;
4268 ac->ac_flags = ar->flags;
4269
4270
4271
4272 ext4_mb_group_or_file(ac);
4273
4274 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4275 "left: %u/%u, right %u/%u to %swritable\n",
4276 (unsigned) ar->len, (unsigned) ar->logical,
4277 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4278 (unsigned) ar->lleft, (unsigned) ar->pleft,
4279 (unsigned) ar->lright, (unsigned) ar->pright,
4280 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4281 return 0;
4282
4283}
4284
4285static noinline_for_stack void
4286ext4_mb_discard_lg_preallocations(struct super_block *sb,
4287 struct ext4_locality_group *lg,
4288 int order, int total_entries)
4289{
4290 ext4_group_t group = 0;
4291 struct ext4_buddy e4b;
4292 struct list_head discard_list;
4293 struct ext4_prealloc_space *pa, *tmp;
4294
4295 mb_debug(1, "discard locality group preallocation\n");
4296
4297 INIT_LIST_HEAD(&discard_list);
4298
4299 spin_lock(&lg->lg_prealloc_lock);
4300 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4301 pa_inode_list) {
4302 spin_lock(&pa->pa_lock);
4303 if (atomic_read(&pa->pa_count)) {
4304
4305
4306
4307
4308
4309 spin_unlock(&pa->pa_lock);
4310 continue;
4311 }
4312 if (pa->pa_deleted) {
4313 spin_unlock(&pa->pa_lock);
4314 continue;
4315 }
4316
4317 BUG_ON(pa->pa_type != MB_GROUP_PA);
4318
4319
4320 pa->pa_deleted = 1;
4321 spin_unlock(&pa->pa_lock);
4322
4323 list_del_rcu(&pa->pa_inode_list);
4324 list_add(&pa->u.pa_tmp_list, &discard_list);
4325
4326 total_entries--;
4327 if (total_entries <= 5) {
4328
4329
4330
4331
4332
4333
4334 break;
4335 }
4336 }
4337 spin_unlock(&lg->lg_prealloc_lock);
4338
4339 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4340 int err;
4341
4342 group = ext4_get_group_number(sb, pa->pa_pstart);
4343 err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
4344 GFP_NOFS|__GFP_NOFAIL);
4345 if (err) {
4346 ext4_error(sb, "Error %d loading buddy information for %u",
4347 err, group);
4348 continue;
4349 }
4350 ext4_lock_group(sb, group);
4351 list_del(&pa->pa_group_list);
4352 ext4_mb_release_group_pa(&e4b, pa);
4353 ext4_unlock_group(sb, group);
4354
4355 ext4_mb_unload_buddy(&e4b);
4356 list_del(&pa->u.pa_tmp_list);
4357 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4358 }
4359}
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4371{
4372 int order, added = 0, lg_prealloc_count = 1;
4373 struct super_block *sb = ac->ac_sb;
4374 struct ext4_locality_group *lg = ac->ac_lg;
4375 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4376
4377 order = fls(pa->pa_free) - 1;
4378 if (order > PREALLOC_TB_SIZE - 1)
4379
4380 order = PREALLOC_TB_SIZE - 1;
4381
4382 spin_lock(&lg->lg_prealloc_lock);
4383 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4384 pa_inode_list) {
4385 spin_lock(&tmp_pa->pa_lock);
4386 if (tmp_pa->pa_deleted) {
4387 spin_unlock(&tmp_pa->pa_lock);
4388 continue;
4389 }
4390 if (!added && pa->pa_free < tmp_pa->pa_free) {
4391
4392 list_add_tail_rcu(&pa->pa_inode_list,
4393 &tmp_pa->pa_inode_list);
4394 added = 1;
4395
4396
4397
4398
4399 }
4400 spin_unlock(&tmp_pa->pa_lock);
4401 lg_prealloc_count++;
4402 }
4403 if (!added)
4404 list_add_tail_rcu(&pa->pa_inode_list,
4405 &lg->lg_prealloc_list[order]);
4406 spin_unlock(&lg->lg_prealloc_lock);
4407
4408
4409 if (lg_prealloc_count > 8) {
4410 ext4_mb_discard_lg_preallocations(sb, lg,
4411 order, lg_prealloc_count);
4412 return;
4413 }
4414 return ;
4415}
4416
4417
4418
4419
4420static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4421{
4422 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4423 struct ext4_prealloc_space *pa = ac->ac_pa;
4424 if (pa) {
4425 if (pa->pa_type == MB_GROUP_PA) {
4426
4427 spin_lock(&pa->pa_lock);
4428 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4429 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4430 pa->pa_free -= ac->ac_b_ex.fe_len;
4431 pa->pa_len -= ac->ac_b_ex.fe_len;
4432 spin_unlock(&pa->pa_lock);
4433 }
4434 }
4435 if (pa) {
4436
4437
4438
4439
4440
4441
4442 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4443 spin_lock(pa->pa_obj_lock);
4444 list_del_rcu(&pa->pa_inode_list);
4445 spin_unlock(pa->pa_obj_lock);
4446 ext4_mb_add_n_trim(ac);
4447 }
4448 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4449 }
4450 if (ac->ac_bitmap_page)
4451 put_page(ac->ac_bitmap_page);
4452 if (ac->ac_buddy_page)
4453 put_page(ac->ac_buddy_page);
4454 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4455 mutex_unlock(&ac->ac_lg->lg_mutex);
4456 ext4_mb_collect_stats(ac);
4457 return 0;
4458}
4459
4460static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4461{
4462 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4463 int ret;
4464 int freed = 0;
4465
4466 trace_ext4_mb_discard_preallocations(sb, needed);
4467 for (i = 0; i < ngroups && needed > 0; i++) {
4468 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4469 freed += ret;
4470 needed -= ret;
4471 }
4472
4473 return freed;
4474}
4475
4476
4477
4478
4479
4480
4481ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4482 struct ext4_allocation_request *ar, int *errp)
4483{
4484 int freed;
4485 struct ext4_allocation_context *ac = NULL;
4486 struct ext4_sb_info *sbi;
4487 struct super_block *sb;
4488 ext4_fsblk_t block = 0;
4489 unsigned int inquota = 0;
4490 unsigned int reserv_clstrs = 0;
4491
4492 might_sleep();
4493 sb = ar->inode->i_sb;
4494 sbi = EXT4_SB(sb);
4495
4496 trace_ext4_request_blocks(ar);
4497
4498
4499 if (ext4_is_quota_file(ar->inode))
4500 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4501
4502 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
4503
4504
4505
4506
4507 while (ar->len &&
4508 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4509
4510
4511 cond_resched();
4512 ar->len = ar->len >> 1;
4513 }
4514 if (!ar->len) {
4515 *errp = -ENOSPC;
4516 return 0;
4517 }
4518 reserv_clstrs = ar->len;
4519 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4520 dquot_alloc_block_nofail(ar->inode,
4521 EXT4_C2B(sbi, ar->len));
4522 } else {
4523 while (ar->len &&
4524 dquot_alloc_block(ar->inode,
4525 EXT4_C2B(sbi, ar->len))) {
4526
4527 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4528 ar->len--;
4529 }
4530 }
4531 inquota = ar->len;
4532 if (ar->len == 0) {
4533 *errp = -EDQUOT;
4534 goto out;
4535 }
4536 }
4537
4538 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4539 if (!ac) {
4540 ar->len = 0;
4541 *errp = -ENOMEM;
4542 goto out;
4543 }
4544
4545 *errp = ext4_mb_initialize_context(ac, ar);
4546 if (*errp) {
4547 ar->len = 0;
4548 goto out;
4549 }
4550
4551 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4552 if (!ext4_mb_use_preallocated(ac)) {
4553 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4554 ext4_mb_normalize_request(ac, ar);
4555repeat:
4556
4557 *errp = ext4_mb_regular_allocator(ac);
4558 if (*errp)
4559 goto discard_and_exit;
4560
4561
4562
4563
4564 if (ac->ac_status == AC_STATUS_FOUND &&
4565 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4566 *errp = ext4_mb_new_preallocation(ac);
4567 if (*errp) {
4568 discard_and_exit:
4569 ext4_discard_allocated_blocks(ac);
4570 goto errout;
4571 }
4572 }
4573 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4574 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4575 if (*errp) {
4576 ext4_discard_allocated_blocks(ac);
4577 goto errout;
4578 } else {
4579 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4580 ar->len = ac->ac_b_ex.fe_len;
4581 }
4582 } else {
4583 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4584 if (freed)
4585 goto repeat;
4586 *errp = -ENOSPC;
4587 }
4588
4589errout:
4590 if (*errp) {
4591 ac->ac_b_ex.fe_len = 0;
4592 ar->len = 0;
4593 ext4_mb_show_ac(ac);
4594 }
4595 ext4_mb_release_context(ac);
4596out:
4597 if (ac)
4598 kmem_cache_free(ext4_ac_cachep, ac);
4599 if (inquota && ar->len < inquota)
4600 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4601 if (!ar->len) {
4602 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
4603
4604 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4605 reserv_clstrs);
4606 }
4607
4608 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4609
4610 return block;
4611}
4612
4613
4614
4615
4616
4617
4618static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
4619 struct ext4_free_data *entry,
4620 struct ext4_free_data *new_entry,
4621 struct rb_root *entry_rb_root)
4622{
4623 if ((entry->efd_tid != new_entry->efd_tid) ||
4624 (entry->efd_group != new_entry->efd_group))
4625 return;
4626 if (entry->efd_start_cluster + entry->efd_count ==
4627 new_entry->efd_start_cluster) {
4628 new_entry->efd_start_cluster = entry->efd_start_cluster;
4629 new_entry->efd_count += entry->efd_count;
4630 } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
4631 entry->efd_start_cluster) {
4632 new_entry->efd_count += entry->efd_count;
4633 } else
4634 return;
4635 spin_lock(&sbi->s_md_lock);
4636 list_del(&entry->efd_list);
4637 spin_unlock(&sbi->s_md_lock);
4638 rb_erase(&entry->efd_node, entry_rb_root);
4639 kmem_cache_free(ext4_free_data_cachep, entry);
4640}
4641
4642static noinline_for_stack int
4643ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4644 struct ext4_free_data *new_entry)
4645{
4646 ext4_group_t group = e4b->bd_group;
4647 ext4_grpblk_t cluster;
4648 ext4_grpblk_t clusters = new_entry->efd_count;
4649 struct ext4_free_data *entry;
4650 struct ext4_group_info *db = e4b->bd_info;
4651 struct super_block *sb = e4b->bd_sb;
4652 struct ext4_sb_info *sbi = EXT4_SB(sb);
4653 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4654 struct rb_node *parent = NULL, *new_node;
4655
4656 BUG_ON(!ext4_handle_valid(handle));
4657 BUG_ON(e4b->bd_bitmap_page == NULL);
4658 BUG_ON(e4b->bd_buddy_page == NULL);
4659
4660 new_node = &new_entry->efd_node;
4661 cluster = new_entry->efd_start_cluster;
4662
4663 if (!*n) {
4664
4665
4666
4667
4668
4669 get_page(e4b->bd_buddy_page);
4670 get_page(e4b->bd_bitmap_page);
4671 }
4672 while (*n) {
4673 parent = *n;
4674 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4675 if (cluster < entry->efd_start_cluster)
4676 n = &(*n)->rb_left;
4677 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4678 n = &(*n)->rb_right;
4679 else {
4680 ext4_grp_locked_error(sb, group, 0,
4681 ext4_group_first_block_no(sb, group) +
4682 EXT4_C2B(sbi, cluster),
4683 "Block already on to-be-freed list");
4684 return 0;
4685 }
4686 }
4687
4688 rb_link_node(new_node, parent, n);
4689 rb_insert_color(new_node, &db->bb_free_root);
4690
4691
4692 node = rb_prev(new_node);
4693 if (node) {
4694 entry = rb_entry(node, struct ext4_free_data, efd_node);
4695 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4696 &(db->bb_free_root));
4697 }
4698
4699 node = rb_next(new_node);
4700 if (node) {
4701 entry = rb_entry(node, struct ext4_free_data, efd_node);
4702 ext4_try_merge_freed_extent(sbi, entry, new_entry,
4703 &(db->bb_free_root));
4704 }
4705
4706 spin_lock(&sbi->s_md_lock);
4707 list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list);
4708 sbi->s_mb_free_pending += clusters;
4709 spin_unlock(&sbi->s_md_lock);
4710 return 0;
4711}
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721void ext4_free_blocks(handle_t *handle, struct inode *inode,
4722 struct buffer_head *bh, ext4_fsblk_t block,
4723 unsigned long count, int flags)
4724{
4725 struct buffer_head *bitmap_bh = NULL;
4726 struct super_block *sb = inode->i_sb;
4727 struct ext4_group_desc *gdp;
4728 unsigned int overflow;
4729 ext4_grpblk_t bit;
4730 struct buffer_head *gd_bh;
4731 ext4_group_t block_group;
4732 struct ext4_sb_info *sbi;
4733 struct ext4_buddy e4b;
4734 unsigned int count_clusters;
4735 int err = 0;
4736 int ret;
4737
4738 might_sleep();
4739 if (bh) {
4740 if (block)
4741 BUG_ON(block != bh->b_blocknr);
4742 else
4743 block = bh->b_blocknr;
4744 }
4745
4746 sbi = EXT4_SB(sb);
4747 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4748 !ext4_data_block_valid(sbi, block, count)) {
4749 ext4_error(sb, "Freeing blocks not in datazone - "
4750 "block = %llu, count = %lu", block, count);
4751 goto error_return;
4752 }
4753
4754 ext4_debug("freeing block %llu\n", block);
4755 trace_ext4_free_blocks(inode, block, count, flags);
4756
4757 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4758 BUG_ON(count > 1);
4759
4760 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4761 inode, bh, block);
4762 }
4763
4764
4765
4766
4767
4768
4769
4770
4771 overflow = EXT4_PBLK_COFF(sbi, block);
4772 if (overflow) {
4773 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4774 overflow = sbi->s_cluster_ratio - overflow;
4775 block += overflow;
4776 if (count > overflow)
4777 count -= overflow;
4778 else
4779 return;
4780 } else {
4781 block -= overflow;
4782 count += overflow;
4783 }
4784 }
4785 overflow = EXT4_LBLK_COFF(sbi, count);
4786 if (overflow) {
4787 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4788 if (count > overflow)
4789 count -= overflow;
4790 else
4791 return;
4792 } else
4793 count += sbi->s_cluster_ratio - overflow;
4794 }
4795
4796 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4797 int i;
4798 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
4799
4800 for (i = 0; i < count; i++) {
4801 cond_resched();
4802 if (is_metadata)
4803 bh = sb_find_get_block(inode->i_sb, block + i);
4804 ext4_forget(handle, is_metadata, inode, bh, block + i);
4805 }
4806 }
4807
4808do_more:
4809 overflow = 0;
4810 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4811
4812 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4813 ext4_get_group_info(sb, block_group))))
4814 return;
4815
4816
4817
4818
4819
4820 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4821 overflow = EXT4_C2B(sbi, bit) + count -
4822 EXT4_BLOCKS_PER_GROUP(sb);
4823 count -= overflow;
4824 }
4825 count_clusters = EXT4_NUM_B2C(sbi, count);
4826 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4827 if (IS_ERR(bitmap_bh)) {
4828 err = PTR_ERR(bitmap_bh);
4829 bitmap_bh = NULL;
4830 goto error_return;
4831 }
4832 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4833 if (!gdp) {
4834 err = -EIO;
4835 goto error_return;
4836 }
4837
4838 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4839 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4840 in_range(block, ext4_inode_table(sb, gdp),
4841 sbi->s_itb_per_group) ||
4842 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4843 sbi->s_itb_per_group)) {
4844
4845 ext4_error(sb, "Freeing blocks in system zone - "
4846 "Block = %llu, count = %lu", block, count);
4847
4848 goto error_return;
4849 }
4850
4851 BUFFER_TRACE(bitmap_bh, "getting write access");
4852 err = ext4_journal_get_write_access(handle, bitmap_bh);
4853 if (err)
4854 goto error_return;
4855
4856
4857
4858
4859
4860
4861 BUFFER_TRACE(gd_bh, "get_write_access");
4862 err = ext4_journal_get_write_access(handle, gd_bh);
4863 if (err)
4864 goto error_return;
4865#ifdef AGGRESSIVE_CHECK
4866 {
4867 int i;
4868 for (i = 0; i < count_clusters; i++)
4869 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4870 }
4871#endif
4872 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4873
4874
4875 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
4876 GFP_NOFS|__GFP_NOFAIL);
4877 if (err)
4878 goto error_return;
4879
4880
4881
4882
4883
4884
4885
4886 if (ext4_handle_valid(handle) &&
4887 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
4888 !ext4_should_writeback_data(inode))) {
4889 struct ext4_free_data *new_entry;
4890
4891
4892
4893
4894 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
4895 GFP_NOFS|__GFP_NOFAIL);
4896 new_entry->efd_start_cluster = bit;
4897 new_entry->efd_group = block_group;
4898 new_entry->efd_count = count_clusters;
4899 new_entry->efd_tid = handle->h_transaction->t_tid;
4900
4901 ext4_lock_group(sb, block_group);
4902 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4903 ext4_mb_free_metadata(handle, &e4b, new_entry);
4904 } else {
4905
4906
4907
4908
4909 if (test_opt(sb, DISCARD)) {
4910 err = ext4_issue_discard(sb, block_group, bit, count,
4911 NULL);
4912 if (err && err != -EOPNOTSUPP)
4913 ext4_msg(sb, KERN_WARNING, "discard request in"
4914 " group:%d block:%d count:%lu failed"
4915 " with %d", block_group, bit, count,
4916 err);
4917 } else
4918 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
4919
4920 ext4_lock_group(sb, block_group);
4921 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4922 mb_free_blocks(inode, &e4b, bit, count_clusters);
4923 }
4924
4925 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4926 ext4_free_group_clusters_set(sb, gdp, ret);
4927 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4928 ext4_group_desc_csum_set(sb, block_group, gdp);
4929 ext4_unlock_group(sb, block_group);
4930
4931 if (sbi->s_log_groups_per_flex) {
4932 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4933 atomic64_add(count_clusters,
4934 &sbi->s_flex_groups[flex_group].free_clusters);
4935 }
4936
4937 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4938 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4939 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4940
4941 ext4_mb_unload_buddy(&e4b);
4942
4943
4944 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4945 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4946
4947
4948 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4949 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4950 if (!err)
4951 err = ret;
4952
4953 if (overflow && !err) {
4954 block += count;
4955 count = overflow;
4956 put_bh(bitmap_bh);
4957 goto do_more;
4958 }
4959error_return:
4960 brelse(bitmap_bh);
4961 ext4_std_error(sb, err);
4962 return;
4963}
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4975 ext4_fsblk_t block, unsigned long count)
4976{
4977 struct buffer_head *bitmap_bh = NULL;
4978 struct buffer_head *gd_bh;
4979 ext4_group_t block_group;
4980 ext4_grpblk_t bit;
4981 unsigned int i;
4982 struct ext4_group_desc *desc;
4983 struct ext4_sb_info *sbi = EXT4_SB(sb);
4984 struct ext4_buddy e4b;
4985 int err = 0, ret, free_clusters_count;
4986 ext4_grpblk_t clusters_freed;
4987 ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
4988 ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
4989 unsigned long cluster_count = last_cluster - first_cluster + 1;
4990
4991 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4992
4993 if (count == 0)
4994 return 0;
4995
4996 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4997
4998
4999
5000
5001 if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
5002 ext4_warning(sb, "too many blocks added to group %u",
5003 block_group);
5004 err = -EINVAL;
5005 goto error_return;
5006 }
5007
5008 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
5009 if (IS_ERR(bitmap_bh)) {
5010 err = PTR_ERR(bitmap_bh);
5011 bitmap_bh = NULL;
5012 goto error_return;
5013 }
5014
5015 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
5016 if (!desc) {
5017 err = -EIO;
5018 goto error_return;
5019 }
5020
5021 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
5022 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
5023 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
5024 in_range(block + count - 1, ext4_inode_table(sb, desc),
5025 sbi->s_itb_per_group)) {
5026 ext4_error(sb, "Adding blocks in system zones - "
5027 "Block = %llu, count = %lu",
5028 block, count);
5029 err = -EINVAL;
5030 goto error_return;
5031 }
5032
5033 BUFFER_TRACE(bitmap_bh, "getting write access");
5034 err = ext4_journal_get_write_access(handle, bitmap_bh);
5035 if (err)
5036 goto error_return;
5037
5038
5039
5040
5041
5042
5043 BUFFER_TRACE(gd_bh, "get_write_access");
5044 err = ext4_journal_get_write_access(handle, gd_bh);
5045 if (err)
5046 goto error_return;
5047
5048 for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
5049 BUFFER_TRACE(bitmap_bh, "clear bit");
5050 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
5051 ext4_error(sb, "bit already cleared for block %llu",
5052 (ext4_fsblk_t)(block + i));
5053 BUFFER_TRACE(bitmap_bh, "bit already cleared");
5054 } else {
5055 clusters_freed++;
5056 }
5057 }
5058
5059 err = ext4_mb_load_buddy(sb, block_group, &e4b);
5060 if (err)
5061 goto error_return;
5062
5063
5064
5065
5066
5067
5068 ext4_lock_group(sb, block_group);
5069 mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
5070 mb_free_blocks(NULL, &e4b, bit, cluster_count);
5071 free_clusters_count = clusters_freed +
5072 ext4_free_group_clusters(sb, desc);
5073 ext4_free_group_clusters_set(sb, desc, free_clusters_count);
5074 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
5075 ext4_group_desc_csum_set(sb, block_group, desc);
5076 ext4_unlock_group(sb, block_group);
5077 percpu_counter_add(&sbi->s_freeclusters_counter,
5078 clusters_freed);
5079
5080 if (sbi->s_log_groups_per_flex) {
5081 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
5082 atomic64_add(clusters_freed,
5083 &sbi->s_flex_groups[flex_group].free_clusters);
5084 }
5085
5086 ext4_mb_unload_buddy(&e4b);
5087
5088
5089 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
5090 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
5091
5092
5093 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
5094 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
5095 if (!err)
5096 err = ret;
5097
5098error_return:
5099 brelse(bitmap_bh);
5100 ext4_std_error(sb, err);
5101 return err;
5102}
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116static int ext4_trim_extent(struct super_block *sb, int start, int count,
5117 ext4_group_t group, struct ext4_buddy *e4b)
5118__releases(bitlock)
5119__acquires(bitlock)
5120{
5121 struct ext4_free_extent ex;
5122 int ret = 0;
5123
5124 trace_ext4_trim_extent(sb, group, start, count);
5125
5126 assert_spin_locked(ext4_group_lock_ptr(sb, group));
5127
5128 ex.fe_start = start;
5129 ex.fe_group = group;
5130 ex.fe_len = count;
5131
5132
5133
5134
5135
5136 mb_mark_used(e4b, &ex);
5137 ext4_unlock_group(sb, group);
5138 ret = ext4_issue_discard(sb, group, start, count, NULL);
5139 ext4_lock_group(sb, group);
5140 mb_free_blocks(NULL, e4b, start, ex.fe_len);
5141 return ret;
5142}
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162static ext4_grpblk_t
5163ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
5164 ext4_grpblk_t start, ext4_grpblk_t max,
5165 ext4_grpblk_t minblocks)
5166{
5167 void *bitmap;
5168 ext4_grpblk_t next, count = 0, free_count = 0;
5169 struct ext4_buddy e4b;
5170 int ret = 0;
5171
5172 trace_ext4_trim_all_free(sb, group, start, max);
5173
5174 ret = ext4_mb_load_buddy(sb, group, &e4b);
5175 if (ret) {
5176 ext4_warning(sb, "Error %d loading buddy information for %u",
5177 ret, group);
5178 return ret;
5179 }
5180 bitmap = e4b.bd_bitmap;
5181
5182 ext4_lock_group(sb, group);
5183 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
5184 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
5185 goto out;
5186
5187 start = (e4b.bd_info->bb_first_free > start) ?
5188 e4b.bd_info->bb_first_free : start;
5189
5190 while (start <= max) {
5191 start = mb_find_next_zero_bit(bitmap, max + 1, start);
5192 if (start > max)
5193 break;
5194 next = mb_find_next_bit(bitmap, max + 1, start);
5195
5196 if ((next - start) >= minblocks) {
5197 ret = ext4_trim_extent(sb, start,
5198 next - start, group, &e4b);
5199 if (ret && ret != -EOPNOTSUPP)
5200 break;
5201 ret = 0;
5202 count += next - start;
5203 }
5204 free_count += next - start;
5205 start = next + 1;
5206
5207 if (fatal_signal_pending(current)) {
5208 count = -ERESTARTSYS;
5209 break;
5210 }
5211
5212 if (need_resched()) {
5213 ext4_unlock_group(sb, group);
5214 cond_resched();
5215 ext4_lock_group(sb, group);
5216 }
5217
5218 if ((e4b.bd_info->bb_free - free_count) < minblocks)
5219 break;
5220 }
5221
5222 if (!ret) {
5223 ret = count;
5224 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
5225 }
5226out:
5227 ext4_unlock_group(sb, group);
5228 ext4_mb_unload_buddy(&e4b);
5229
5230 ext4_debug("trimmed %d blocks in the group %d\n",
5231 count, group);
5232
5233 return ret;
5234}
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5249{
5250 struct ext4_group_info *grp;
5251 ext4_group_t group, first_group, last_group;
5252 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5253 uint64_t start, end, minlen, trimmed = 0;
5254 ext4_fsblk_t first_data_blk =
5255 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5256 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5257 int ret = 0;
5258
5259 start = range->start >> sb->s_blocksize_bits;
5260 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5261 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
5262 range->minlen >> sb->s_blocksize_bits);
5263
5264 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
5265 start >= max_blks ||
5266 range->len < sb->s_blocksize)
5267 return -EINVAL;
5268 if (end >= max_blks)
5269 end = max_blks - 1;
5270 if (end <= first_data_blk)
5271 goto out;
5272 if (start < first_data_blk)
5273 start = first_data_blk;
5274
5275
5276 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5277 &first_group, &first_cluster);
5278 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5279 &last_group, &last_cluster);
5280
5281
5282 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5283
5284 for (group = first_group; group <= last_group; group++) {
5285 grp = ext4_get_group_info(sb, group);
5286
5287 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5288 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
5289 if (ret)
5290 break;
5291 }
5292
5293
5294
5295
5296
5297
5298
5299 if (group == last_group)
5300 end = last_cluster;
5301
5302 if (grp->bb_free >= minlen) {
5303 cnt = ext4_trim_all_free(sb, group, first_cluster,
5304 end, minlen);
5305 if (cnt < 0) {
5306 ret = cnt;
5307 break;
5308 }
5309 trimmed += cnt;
5310 }
5311
5312
5313
5314
5315
5316 first_cluster = 0;
5317 }
5318
5319 if (!ret)
5320 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5321
5322out:
5323 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5324 return ret;
5325}
5326
5327
5328int
5329ext4_mballoc_query_range(
5330 struct super_block *sb,
5331 ext4_group_t group,
5332 ext4_grpblk_t start,
5333 ext4_grpblk_t end,
5334 ext4_mballoc_query_range_fn formatter,
5335 void *priv)
5336{
5337 void *bitmap;
5338 ext4_grpblk_t next;
5339 struct ext4_buddy e4b;
5340 int error;
5341
5342 error = ext4_mb_load_buddy(sb, group, &e4b);
5343 if (error)
5344 return error;
5345 bitmap = e4b.bd_bitmap;
5346
5347 ext4_lock_group(sb, group);
5348
5349 start = (e4b.bd_info->bb_first_free > start) ?
5350 e4b.bd_info->bb_first_free : start;
5351 if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
5352 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5353
5354 while (start <= end) {
5355 start = mb_find_next_zero_bit(bitmap, end + 1, start);
5356 if (start > end)
5357 break;
5358 next = mb_find_next_bit(bitmap, end + 1, start);
5359
5360 ext4_unlock_group(sb, group);
5361 error = formatter(sb, group, start, next - start, priv);
5362 if (error)
5363 goto out_unload;
5364 ext4_lock_group(sb, group);
5365
5366 start = next + 1;
5367 }
5368
5369 ext4_unlock_group(sb, group);
5370out_unload:
5371 ext4_mb_unload_buddy(&e4b);
5372
5373 return error;
5374}
5375