1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "mballoc.h"
25#include <linux/debugfs.h>
26#include <trace/events/ext4.h>
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337static struct kmem_cache *ext4_pspace_cachep;
338static struct kmem_cache *ext4_ac_cachep;
339static struct kmem_cache *ext4_free_ext_cachep;
340static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
341 ext4_group_t group);
342static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
343 ext4_group_t group);
344static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
345
346static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
347{
348#if BITS_PER_LONG == 64
349 *bit += ((unsigned long) addr & 7UL) << 3;
350 addr = (void *) ((unsigned long) addr & ~7UL);
351#elif BITS_PER_LONG == 32
352 *bit += ((unsigned long) addr & 3UL) << 3;
353 addr = (void *) ((unsigned long) addr & ~3UL);
354#else
355#error "how many bits you are?!"
356#endif
357 return addr;
358}
359
360static inline int mb_test_bit(int bit, void *addr)
361{
362
363
364
365
366 addr = mb_correct_addr_and_bit(&bit, addr);
367 return ext4_test_bit(bit, addr);
368}
369
370static inline void mb_set_bit(int bit, void *addr)
371{
372 addr = mb_correct_addr_and_bit(&bit, addr);
373 ext4_set_bit(bit, addr);
374}
375
376static inline void mb_clear_bit(int bit, void *addr)
377{
378 addr = mb_correct_addr_and_bit(&bit, addr);
379 ext4_clear_bit(bit, addr);
380}
381
382static inline int mb_find_next_zero_bit(void *addr, int max, int start)
383{
384 int fix = 0, ret, tmpmax;
385 addr = mb_correct_addr_and_bit(&fix, addr);
386 tmpmax = max + fix;
387 start += fix;
388
389 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
390 if (ret > max)
391 return max;
392 return ret;
393}
394
395static inline int mb_find_next_bit(void *addr, int max, int start)
396{
397 int fix = 0, ret, tmpmax;
398 addr = mb_correct_addr_and_bit(&fix, addr);
399 tmpmax = max + fix;
400 start += fix;
401
402 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
403 if (ret > max)
404 return max;
405 return ret;
406}
407
408static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
409{
410 char *bb;
411
412 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
413 BUG_ON(max == NULL);
414
415 if (order > e4b->bd_blkbits + 1) {
416 *max = 0;
417 return NULL;
418 }
419
420
421 *max = 1 << (e4b->bd_blkbits + 3);
422 if (order == 0)
423 return EXT4_MB_BITMAP(e4b);
424
425 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
426 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
427
428 return bb;
429}
430
431#ifdef DOUBLE_CHECK
432static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
433 int first, int count)
434{
435 int i;
436 struct super_block *sb = e4b->bd_sb;
437
438 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
439 return;
440 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
441 for (i = 0; i < count; i++) {
442 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
443 ext4_fsblk_t blocknr;
444 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
445 blocknr += first + i;
446 blocknr +=
447 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
448 ext4_grp_locked_error(sb, e4b->bd_group,
449 __func__, "double-free of inode"
450 " %lu's block %llu(bit %u in group %u)",
451 inode ? inode->i_ino : 0, blocknr,
452 first + i, e4b->bd_group);
453 }
454 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
455 }
456}
457
458static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
459{
460 int i;
461
462 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
463 return;
464 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
465 for (i = 0; i < count; i++) {
466 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
467 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
468 }
469}
470
471static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
472{
473 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
474 unsigned char *b1, *b2;
475 int i;
476 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
477 b2 = (unsigned char *) bitmap;
478 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
479 if (b1[i] != b2[i]) {
480 printk(KERN_ERR "corruption in group %u "
481 "at byte %u(%u): %x in copy != %x "
482 "on disk/prealloc\n",
483 e4b->bd_group, i, i * 8, b1[i], b2[i]);
484 BUG();
485 }
486 }
487 }
488}
489
490#else
491static inline void mb_free_blocks_double(struct inode *inode,
492 struct ext4_buddy *e4b, int first, int count)
493{
494 return;
495}
496static inline void mb_mark_used_double(struct ext4_buddy *e4b,
497 int first, int count)
498{
499 return;
500}
501static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
502{
503 return;
504}
505#endif
506
507#ifdef AGGRESSIVE_CHECK
508
509#define MB_CHECK_ASSERT(assert) \
510do { \
511 if (!(assert)) { \
512 printk(KERN_EMERG \
513 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
514 function, file, line, # assert); \
515 BUG(); \
516 } \
517} while (0)
518
519static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
520 const char *function, int line)
521{
522 struct super_block *sb = e4b->bd_sb;
523 int order = e4b->bd_blkbits + 1;
524 int max;
525 int max2;
526 int i;
527 int j;
528 int k;
529 int count;
530 struct ext4_group_info *grp;
531 int fragments = 0;
532 int fstart;
533 struct list_head *cur;
534 void *buddy;
535 void *buddy2;
536
537 {
538 static int mb_check_counter;
539 if (mb_check_counter++ % 100 != 0)
540 return 0;
541 }
542
543 while (order > 1) {
544 buddy = mb_find_buddy(e4b, order, &max);
545 MB_CHECK_ASSERT(buddy);
546 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
547 MB_CHECK_ASSERT(buddy2);
548 MB_CHECK_ASSERT(buddy != buddy2);
549 MB_CHECK_ASSERT(max * 2 == max2);
550
551 count = 0;
552 for (i = 0; i < max; i++) {
553
554 if (mb_test_bit(i, buddy)) {
555
556 if (!mb_test_bit(i << 1, buddy2)) {
557 MB_CHECK_ASSERT(
558 mb_test_bit((i<<1)+1, buddy2));
559 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
560 MB_CHECK_ASSERT(
561 mb_test_bit(i << 1, buddy2));
562 }
563 continue;
564 }
565
566
567 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
568 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
569
570 for (j = 0; j < (1 << order); j++) {
571 k = (i * (1 << order)) + j;
572 MB_CHECK_ASSERT(
573 !mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
574 }
575 count++;
576 }
577 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
578 order--;
579 }
580
581 fstart = -1;
582 buddy = mb_find_buddy(e4b, 0, &max);
583 for (i = 0; i < max; i++) {
584 if (!mb_test_bit(i, buddy)) {
585 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
586 if (fstart == -1) {
587 fragments++;
588 fstart = i;
589 }
590 continue;
591 }
592 fstart = -1;
593
594 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
595 buddy2 = mb_find_buddy(e4b, j, &max2);
596 k = i >> j;
597 MB_CHECK_ASSERT(k < max2);
598 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
599 }
600 }
601 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
602 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
603
604 grp = ext4_get_group_info(sb, e4b->bd_group);
605 buddy = mb_find_buddy(e4b, 0, &max);
606 list_for_each(cur, &grp->bb_prealloc_list) {
607 ext4_group_t groupnr;
608 struct ext4_prealloc_space *pa;
609 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
610 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
611 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
612 for (i = 0; i < pa->pa_len; i++)
613 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
614 }
615 return 0;
616}
617#undef MB_CHECK_ASSERT
618#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
619 __FILE__, __func__, __LINE__)
620#else
621#define mb_check_buddy(e4b)
622#endif
623
624
625static void ext4_mb_mark_free_simple(struct super_block *sb,
626 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
627 struct ext4_group_info *grp)
628{
629 struct ext4_sb_info *sbi = EXT4_SB(sb);
630 ext4_grpblk_t min;
631 ext4_grpblk_t max;
632 ext4_grpblk_t chunk;
633 unsigned short border;
634
635 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
636
637 border = 2 << sb->s_blocksize_bits;
638
639 while (len > 0) {
640
641 max = ffs(first | border) - 1;
642
643
644 min = fls(len) - 1;
645
646 if (max < min)
647 min = max;
648 chunk = 1 << min;
649
650
651 grp->bb_counters[min]++;
652 if (min > 0)
653 mb_clear_bit(first >> min,
654 buddy + sbi->s_mb_offsets[min]);
655
656 len -= chunk;
657 first += chunk;
658 }
659}
660
661static noinline_for_stack
662void ext4_mb_generate_buddy(struct super_block *sb,
663 void *buddy, void *bitmap, ext4_group_t group)
664{
665 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
666 ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
667 ext4_grpblk_t i = 0;
668 ext4_grpblk_t first;
669 ext4_grpblk_t len;
670 unsigned free = 0;
671 unsigned fragments = 0;
672 unsigned long long period = get_cycles();
673
674
675
676 i = mb_find_next_zero_bit(bitmap, max, 0);
677 grp->bb_first_free = i;
678 while (i < max) {
679 fragments++;
680 first = i;
681 i = mb_find_next_bit(bitmap, max, i);
682 len = i - first;
683 free += len;
684 if (len > 1)
685 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
686 else
687 grp->bb_counters[0]++;
688 if (i < max)
689 i = mb_find_next_zero_bit(bitmap, max, i);
690 }
691 grp->bb_fragments = fragments;
692
693 if (free != grp->bb_free) {
694 ext4_grp_locked_error(sb, group, __func__,
695 "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
696 group, free, grp->bb_free);
697
698
699
700
701 grp->bb_free = free;
702 }
703
704 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
705
706 period = get_cycles() - period;
707 spin_lock(&EXT4_SB(sb)->s_bal_lock);
708 EXT4_SB(sb)->s_mb_buddies_generated++;
709 EXT4_SB(sb)->s_mb_generation_time += period;
710 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
711}
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730static int ext4_mb_init_cache(struct page *page, char *incore)
731{
732 ext4_group_t ngroups;
733 int blocksize;
734 int blocks_per_page;
735 int groups_per_page;
736 int err = 0;
737 int i;
738 ext4_group_t first_group;
739 int first_block;
740 struct super_block *sb;
741 struct buffer_head *bhs;
742 struct buffer_head **bh;
743 struct inode *inode;
744 char *data;
745 char *bitmap;
746
747 mb_debug(1, "init page %lu\n", page->index);
748
749 inode = page->mapping->host;
750 sb = inode->i_sb;
751 ngroups = ext4_get_groups_count(sb);
752 blocksize = 1 << inode->i_blkbits;
753 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
754
755 groups_per_page = blocks_per_page >> 1;
756 if (groups_per_page == 0)
757 groups_per_page = 1;
758
759
760 if (groups_per_page > 1) {
761 err = -ENOMEM;
762 i = sizeof(struct buffer_head *) * groups_per_page;
763 bh = kzalloc(i, GFP_NOFS);
764 if (bh == NULL)
765 goto out;
766 } else
767 bh = &bhs;
768
769 first_group = page->index * blocks_per_page / 2;
770
771
772 for (i = 0; i < groups_per_page; i++) {
773 struct ext4_group_desc *desc;
774
775 if (first_group + i >= ngroups)
776 break;
777
778 err = -EIO;
779 desc = ext4_get_group_desc(sb, first_group + i, NULL);
780 if (desc == NULL)
781 goto out;
782
783 err = -ENOMEM;
784 bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
785 if (bh[i] == NULL)
786 goto out;
787
788 if (bitmap_uptodate(bh[i]))
789 continue;
790
791 lock_buffer(bh[i]);
792 if (bitmap_uptodate(bh[i])) {
793 unlock_buffer(bh[i]);
794 continue;
795 }
796 ext4_lock_group(sb, first_group + i);
797 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
798 ext4_init_block_bitmap(sb, bh[i],
799 first_group + i, desc);
800 set_bitmap_uptodate(bh[i]);
801 set_buffer_uptodate(bh[i]);
802 ext4_unlock_group(sb, first_group + i);
803 unlock_buffer(bh[i]);
804 continue;
805 }
806 ext4_unlock_group(sb, first_group + i);
807 if (buffer_uptodate(bh[i])) {
808
809
810
811
812 set_bitmap_uptodate(bh[i]);
813 unlock_buffer(bh[i]);
814 continue;
815 }
816 get_bh(bh[i]);
817
818
819
820
821
822
823 set_bitmap_uptodate(bh[i]);
824 bh[i]->b_end_io = end_buffer_read_sync;
825 submit_bh(READ, bh[i]);
826 mb_debug(1, "read bitmap for group %u\n", first_group + i);
827 }
828
829
830 for (i = 0; i < groups_per_page && bh[i]; i++)
831 wait_on_buffer(bh[i]);
832
833 err = -EIO;
834 for (i = 0; i < groups_per_page && bh[i]; i++)
835 if (!buffer_uptodate(bh[i]))
836 goto out;
837
838 err = 0;
839 first_block = page->index * blocks_per_page;
840
841 memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
842 for (i = 0; i < blocks_per_page; i++) {
843 int group;
844 struct ext4_group_info *grinfo;
845
846 group = (first_block + i) >> 1;
847 if (group >= ngroups)
848 break;
849
850
851
852
853
854
855
856 data = page_address(page) + (i * blocksize);
857 bitmap = bh[group - first_group]->b_data;
858
859
860
861
862
863 if ((first_block + i) & 1) {
864
865 BUG_ON(incore == NULL);
866 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
867 group, page->index, i * blocksize);
868 grinfo = ext4_get_group_info(sb, group);
869 grinfo->bb_fragments = 0;
870 memset(grinfo->bb_counters, 0,
871 sizeof(*grinfo->bb_counters) *
872 (sb->s_blocksize_bits+2));
873
874
875
876 ext4_lock_group(sb, group);
877 ext4_mb_generate_buddy(sb, data, incore, group);
878 ext4_unlock_group(sb, group);
879 incore = NULL;
880 } else {
881
882 BUG_ON(incore != NULL);
883 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
884 group, page->index, i * blocksize);
885
886
887 ext4_lock_group(sb, group);
888 memcpy(data, bitmap, blocksize);
889
890
891 ext4_mb_generate_from_pa(sb, data, group);
892 ext4_mb_generate_from_freelist(sb, data, group);
893 ext4_unlock_group(sb, group);
894
895
896
897
898 incore = data;
899 }
900 }
901 SetPageUptodate(page);
902
903out:
904 if (bh) {
905 for (i = 0; i < groups_per_page && bh[i]; i++)
906 brelse(bh[i]);
907 if (bh != &bhs)
908 kfree(bh);
909 }
910 return err;
911}
912
913static noinline_for_stack
914int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
915{
916
917 int ret = 0;
918 void *bitmap;
919 int blocks_per_page;
920 int block, pnum, poff;
921 int num_grp_locked = 0;
922 struct ext4_group_info *this_grp;
923 struct ext4_sb_info *sbi = EXT4_SB(sb);
924 struct inode *inode = sbi->s_buddy_cache;
925 struct page *page = NULL, *bitmap_page = NULL;
926
927 mb_debug(1, "init group %u\n", group);
928 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
929 this_grp = ext4_get_group_info(sb, group);
930
931
932
933
934
935
936
937 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
938 if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
939
940
941
942
943 ret = 0;
944 goto err;
945 }
946
947
948
949
950
951 block = group * 2;
952 pnum = block / blocks_per_page;
953 poff = block % blocks_per_page;
954 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
955 if (page) {
956 BUG_ON(page->mapping != inode->i_mapping);
957 ret = ext4_mb_init_cache(page, NULL);
958 if (ret) {
959 unlock_page(page);
960 goto err;
961 }
962 unlock_page(page);
963 }
964 if (page == NULL || !PageUptodate(page)) {
965 ret = -EIO;
966 goto err;
967 }
968 mark_page_accessed(page);
969 bitmap_page = page;
970 bitmap = page_address(page) + (poff * sb->s_blocksize);
971
972
973 block++;
974 pnum = block / blocks_per_page;
975 poff = block % blocks_per_page;
976 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
977 if (page == bitmap_page) {
978
979
980
981
982
983 unlock_page(page);
984 } else if (page) {
985 BUG_ON(page->mapping != inode->i_mapping);
986 ret = ext4_mb_init_cache(page, bitmap);
987 if (ret) {
988 unlock_page(page);
989 goto err;
990 }
991 unlock_page(page);
992 }
993 if (page == NULL || !PageUptodate(page)) {
994 ret = -EIO;
995 goto err;
996 }
997 mark_page_accessed(page);
998err:
999 ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
1000 if (bitmap_page)
1001 page_cache_release(bitmap_page);
1002 if (page)
1003 page_cache_release(page);
1004 return ret;
1005}
1006
1007static noinline_for_stack int
1008ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1009 struct ext4_buddy *e4b)
1010{
1011 int blocks_per_page;
1012 int block;
1013 int pnum;
1014 int poff;
1015 struct page *page;
1016 int ret;
1017 struct ext4_group_info *grp;
1018 struct ext4_sb_info *sbi = EXT4_SB(sb);
1019 struct inode *inode = sbi->s_buddy_cache;
1020
1021 mb_debug(1, "load group %u\n", group);
1022
1023 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1024 grp = ext4_get_group_info(sb, group);
1025
1026 e4b->bd_blkbits = sb->s_blocksize_bits;
1027 e4b->bd_info = ext4_get_group_info(sb, group);
1028 e4b->bd_sb = sb;
1029 e4b->bd_group = group;
1030 e4b->bd_buddy_page = NULL;
1031 e4b->bd_bitmap_page = NULL;
1032 e4b->alloc_semp = &grp->alloc_sem;
1033
1034
1035
1036
1037
1038
1039
1040repeat_load_buddy:
1041 down_read(e4b->alloc_semp);
1042
1043 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1044
1045
1046
1047
1048
1049 up_read(e4b->alloc_semp);
1050
1051
1052
1053
1054 ret = ext4_mb_init_group(sb, group);
1055 if (ret)
1056 return ret;
1057 goto repeat_load_buddy;
1058 }
1059
1060
1061
1062
1063
1064
1065 block = group * 2;
1066 pnum = block / blocks_per_page;
1067 poff = block % blocks_per_page;
1068
1069
1070
1071 page = find_get_page(inode->i_mapping, pnum);
1072 if (page == NULL || !PageUptodate(page)) {
1073 if (page)
1074
1075
1076
1077
1078
1079
1080
1081
1082 page_cache_release(page);
1083 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1084 if (page) {
1085 BUG_ON(page->mapping != inode->i_mapping);
1086 if (!PageUptodate(page)) {
1087 ret = ext4_mb_init_cache(page, NULL);
1088 if (ret) {
1089 unlock_page(page);
1090 goto err;
1091 }
1092 mb_cmp_bitmaps(e4b, page_address(page) +
1093 (poff * sb->s_blocksize));
1094 }
1095 unlock_page(page);
1096 }
1097 }
1098 if (page == NULL || !PageUptodate(page)) {
1099 ret = -EIO;
1100 goto err;
1101 }
1102 e4b->bd_bitmap_page = page;
1103 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1104 mark_page_accessed(page);
1105
1106 block++;
1107 pnum = block / blocks_per_page;
1108 poff = block % blocks_per_page;
1109
1110 page = find_get_page(inode->i_mapping, pnum);
1111 if (page == NULL || !PageUptodate(page)) {
1112 if (page)
1113 page_cache_release(page);
1114 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1115 if (page) {
1116 BUG_ON(page->mapping != inode->i_mapping);
1117 if (!PageUptodate(page)) {
1118 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
1119 if (ret) {
1120 unlock_page(page);
1121 goto err;
1122 }
1123 }
1124 unlock_page(page);
1125 }
1126 }
1127 if (page == NULL || !PageUptodate(page)) {
1128 ret = -EIO;
1129 goto err;
1130 }
1131 e4b->bd_buddy_page = page;
1132 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1133 mark_page_accessed(page);
1134
1135 BUG_ON(e4b->bd_bitmap_page == NULL);
1136 BUG_ON(e4b->bd_buddy_page == NULL);
1137
1138 return 0;
1139
1140err:
1141 if (e4b->bd_bitmap_page)
1142 page_cache_release(e4b->bd_bitmap_page);
1143 if (e4b->bd_buddy_page)
1144 page_cache_release(e4b->bd_buddy_page);
1145 e4b->bd_buddy = NULL;
1146 e4b->bd_bitmap = NULL;
1147
1148
1149 up_read(e4b->alloc_semp);
1150 return ret;
1151}
1152
1153static void ext4_mb_release_desc(struct ext4_buddy *e4b)
1154{
1155 if (e4b->bd_bitmap_page)
1156 page_cache_release(e4b->bd_bitmap_page);
1157 if (e4b->bd_buddy_page)
1158 page_cache_release(e4b->bd_buddy_page);
1159
1160 if (e4b->alloc_semp)
1161 up_read(e4b->alloc_semp);
1162}
1163
1164
1165static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1166{
1167 int order = 1;
1168 void *bb;
1169
1170 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
1171 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1172
1173 bb = EXT4_MB_BUDDY(e4b);
1174 while (order <= e4b->bd_blkbits + 1) {
1175 block = block >> 1;
1176 if (!mb_test_bit(block, bb)) {
1177
1178 return order;
1179 }
1180 bb += 1 << (e4b->bd_blkbits - order);
1181 order++;
1182 }
1183 return 0;
1184}
1185
1186static void mb_clear_bits(void *bm, int cur, int len)
1187{
1188 __u32 *addr;
1189
1190 len = cur + len;
1191 while (cur < len) {
1192 if ((cur & 31) == 0 && (len - cur) >= 32) {
1193
1194 addr = bm + (cur >> 3);
1195 *addr = 0;
1196 cur += 32;
1197 continue;
1198 }
1199 mb_clear_bit(cur, bm);
1200 cur++;
1201 }
1202}
1203
1204static void mb_set_bits(void *bm, int cur, int len)
1205{
1206 __u32 *addr;
1207
1208 len = cur + len;
1209 while (cur < len) {
1210 if ((cur & 31) == 0 && (len - cur) >= 32) {
1211
1212 addr = bm + (cur >> 3);
1213 *addr = 0xffffffff;
1214 cur += 32;
1215 continue;
1216 }
1217 mb_set_bit(cur, bm);
1218 cur++;
1219 }
1220}
1221
1222static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1223 int first, int count)
1224{
1225 int block = 0;
1226 int max = 0;
1227 int order;
1228 void *buddy;
1229 void *buddy2;
1230 struct super_block *sb = e4b->bd_sb;
1231
1232 BUG_ON(first + count > (sb->s_blocksize << 3));
1233 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1234 mb_check_buddy(e4b);
1235 mb_free_blocks_double(inode, e4b, first, count);
1236
1237 e4b->bd_info->bb_free += count;
1238 if (first < e4b->bd_info->bb_first_free)
1239 e4b->bd_info->bb_first_free = first;
1240
1241
1242 if (first != 0)
1243 block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
1244 if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
1245 max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
1246 if (block && max)
1247 e4b->bd_info->bb_fragments--;
1248 else if (!block && !max)
1249 e4b->bd_info->bb_fragments++;
1250
1251
1252 while (count-- > 0) {
1253 block = first++;
1254 order = 0;
1255
1256 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
1257 ext4_fsblk_t blocknr;
1258 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
1259 blocknr += block;
1260 blocknr +=
1261 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1262 ext4_grp_locked_error(sb, e4b->bd_group,
1263 __func__, "double-free of inode"
1264 " %lu's block %llu(bit %u in group %u)",
1265 inode ? inode->i_ino : 0, blocknr, block,
1266 e4b->bd_group);
1267 }
1268 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1269 e4b->bd_info->bb_counters[order]++;
1270
1271
1272 buddy = mb_find_buddy(e4b, order, &max);
1273
1274 do {
1275 block &= ~1UL;
1276 if (mb_test_bit(block, buddy) ||
1277 mb_test_bit(block + 1, buddy))
1278 break;
1279
1280
1281 buddy2 = mb_find_buddy(e4b, order + 1, &max);
1282
1283 if (!buddy2)
1284 break;
1285
1286 if (order > 0) {
1287
1288
1289 mb_set_bit(block, buddy);
1290 mb_set_bit(block + 1, buddy);
1291 }
1292 e4b->bd_info->bb_counters[order]--;
1293 e4b->bd_info->bb_counters[order]--;
1294
1295 block = block >> 1;
1296 order++;
1297 e4b->bd_info->bb_counters[order]++;
1298
1299 mb_clear_bit(block, buddy2);
1300 buddy = buddy2;
1301 } while (1);
1302 }
1303 mb_check_buddy(e4b);
1304}
1305
1306static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1307 int needed, struct ext4_free_extent *ex)
1308{
1309 int next = block;
1310 int max;
1311 int ord;
1312 void *buddy;
1313
1314 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1315 BUG_ON(ex == NULL);
1316
1317 buddy = mb_find_buddy(e4b, order, &max);
1318 BUG_ON(buddy == NULL);
1319 BUG_ON(block >= max);
1320 if (mb_test_bit(block, buddy)) {
1321 ex->fe_len = 0;
1322 ex->fe_start = 0;
1323 ex->fe_group = 0;
1324 return 0;
1325 }
1326
1327
1328 if (likely(order == 0)) {
1329
1330 order = mb_find_order_for_block(e4b, block);
1331 block = block >> order;
1332 }
1333
1334 ex->fe_len = 1 << order;
1335 ex->fe_start = block << order;
1336 ex->fe_group = e4b->bd_group;
1337
1338
1339 next = next - ex->fe_start;
1340 ex->fe_len -= next;
1341 ex->fe_start += next;
1342
1343 while (needed > ex->fe_len &&
1344 (buddy = mb_find_buddy(e4b, order, &max))) {
1345
1346 if (block + 1 >= max)
1347 break;
1348
1349 next = (block + 1) * (1 << order);
1350 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
1351 break;
1352
1353 ord = mb_find_order_for_block(e4b, next);
1354
1355 order = ord;
1356 block = next >> order;
1357 ex->fe_len += 1 << order;
1358 }
1359
1360 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1361 return ex->fe_len;
1362}
1363
1364static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1365{
1366 int ord;
1367 int mlen = 0;
1368 int max = 0;
1369 int cur;
1370 int start = ex->fe_start;
1371 int len = ex->fe_len;
1372 unsigned ret = 0;
1373 int len0 = len;
1374 void *buddy;
1375
1376 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1377 BUG_ON(e4b->bd_group != ex->fe_group);
1378 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1379 mb_check_buddy(e4b);
1380 mb_mark_used_double(e4b, start, len);
1381
1382 e4b->bd_info->bb_free -= len;
1383 if (e4b->bd_info->bb_first_free == start)
1384 e4b->bd_info->bb_first_free += len;
1385
1386
1387 if (start != 0)
1388 mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b));
1389 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1390 max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b));
1391 if (mlen && max)
1392 e4b->bd_info->bb_fragments++;
1393 else if (!mlen && !max)
1394 e4b->bd_info->bb_fragments--;
1395
1396
1397 while (len) {
1398 ord = mb_find_order_for_block(e4b, start);
1399
1400 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1401
1402 mlen = 1 << ord;
1403 buddy = mb_find_buddy(e4b, ord, &max);
1404 BUG_ON((start >> ord) >= max);
1405 mb_set_bit(start >> ord, buddy);
1406 e4b->bd_info->bb_counters[ord]--;
1407 start += mlen;
1408 len -= mlen;
1409 BUG_ON(len < 0);
1410 continue;
1411 }
1412
1413
1414 if (ret == 0)
1415 ret = len | (ord << 16);
1416
1417
1418 BUG_ON(ord <= 0);
1419 buddy = mb_find_buddy(e4b, ord, &max);
1420 mb_set_bit(start >> ord, buddy);
1421 e4b->bd_info->bb_counters[ord]--;
1422
1423 ord--;
1424 cur = (start >> ord) & ~1U;
1425 buddy = mb_find_buddy(e4b, ord, &max);
1426 mb_clear_bit(cur, buddy);
1427 mb_clear_bit(cur + 1, buddy);
1428 e4b->bd_info->bb_counters[ord]++;
1429 e4b->bd_info->bb_counters[ord]++;
1430 }
1431
1432 mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1433 mb_check_buddy(e4b);
1434
1435 return ret;
1436}
1437
1438
1439
1440
1441static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1442 struct ext4_buddy *e4b)
1443{
1444 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1445 int ret;
1446
1447 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1448 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1449
1450 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1451 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1452 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1453
1454
1455
1456 ac->ac_f_ex = ac->ac_b_ex;
1457
1458 ac->ac_status = AC_STATUS_FOUND;
1459 ac->ac_tail = ret & 0xffff;
1460 ac->ac_buddy = ret >> 16;
1461
1462
1463
1464
1465
1466
1467
1468
1469 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1470 get_page(ac->ac_bitmap_page);
1471 ac->ac_buddy_page = e4b->bd_buddy_page;
1472 get_page(ac->ac_buddy_page);
1473
1474 ac->alloc_semp = e4b->alloc_semp;
1475 e4b->alloc_semp = NULL;
1476
1477 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1478 spin_lock(&sbi->s_md_lock);
1479 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1480 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1481 spin_unlock(&sbi->s_md_lock);
1482 }
1483}
1484
1485
1486
1487
1488
1489static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1490 struct ext4_buddy *e4b,
1491 int finish_group)
1492{
1493 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1494 struct ext4_free_extent *bex = &ac->ac_b_ex;
1495 struct ext4_free_extent *gex = &ac->ac_g_ex;
1496 struct ext4_free_extent ex;
1497 int max;
1498
1499 if (ac->ac_status == AC_STATUS_FOUND)
1500 return;
1501
1502
1503
1504 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1505 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1506 ac->ac_status = AC_STATUS_BREAK;
1507 return;
1508 }
1509
1510
1511
1512
1513 if (bex->fe_len < gex->fe_len)
1514 return;
1515
1516 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1517 && bex->fe_group == e4b->bd_group) {
1518
1519
1520
1521 max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
1522 if (max >= gex->fe_len) {
1523 ext4_mb_use_best_found(ac, e4b);
1524 return;
1525 }
1526 }
1527}
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1540 struct ext4_free_extent *ex,
1541 struct ext4_buddy *e4b)
1542{
1543 struct ext4_free_extent *bex = &ac->ac_b_ex;
1544 struct ext4_free_extent *gex = &ac->ac_g_ex;
1545
1546 BUG_ON(ex->fe_len <= 0);
1547 BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1548 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1549 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1550
1551 ac->ac_found++;
1552
1553
1554
1555
1556 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1557 *bex = *ex;
1558 ext4_mb_use_best_found(ac, e4b);
1559 return;
1560 }
1561
1562
1563
1564
1565 if (ex->fe_len == gex->fe_len) {
1566 *bex = *ex;
1567 ext4_mb_use_best_found(ac, e4b);
1568 return;
1569 }
1570
1571
1572
1573
1574 if (bex->fe_len == 0) {
1575 *bex = *ex;
1576 return;
1577 }
1578
1579
1580
1581
1582 if (bex->fe_len < gex->fe_len) {
1583
1584
1585 if (ex->fe_len > bex->fe_len)
1586 *bex = *ex;
1587 } else if (ex->fe_len > gex->fe_len) {
1588
1589
1590
1591 if (ex->fe_len < bex->fe_len)
1592 *bex = *ex;
1593 }
1594
1595 ext4_mb_check_limits(ac, e4b, 0);
1596}
1597
1598static noinline_for_stack
1599int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1600 struct ext4_buddy *e4b)
1601{
1602 struct ext4_free_extent ex = ac->ac_b_ex;
1603 ext4_group_t group = ex.fe_group;
1604 int max;
1605 int err;
1606
1607 BUG_ON(ex.fe_len <= 0);
1608 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1609 if (err)
1610 return err;
1611
1612 ext4_lock_group(ac->ac_sb, group);
1613 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
1614
1615 if (max > 0) {
1616 ac->ac_b_ex = ex;
1617 ext4_mb_use_best_found(ac, e4b);
1618 }
1619
1620 ext4_unlock_group(ac->ac_sb, group);
1621 ext4_mb_release_desc(e4b);
1622
1623 return 0;
1624}
1625
1626static noinline_for_stack
1627int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1628 struct ext4_buddy *e4b)
1629{
1630 ext4_group_t group = ac->ac_g_ex.fe_group;
1631 int max;
1632 int err;
1633 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1634 struct ext4_super_block *es = sbi->s_es;
1635 struct ext4_free_extent ex;
1636
1637 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1638 return 0;
1639
1640 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1641 if (err)
1642 return err;
1643
1644 ext4_lock_group(ac->ac_sb, group);
1645 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
1646 ac->ac_g_ex.fe_len, &ex);
1647
1648 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1649 ext4_fsblk_t start;
1650
1651 start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) +
1652 ex.fe_start + le32_to_cpu(es->s_first_data_block);
1653
1654 if (do_div(start, sbi->s_stripe) == 0) {
1655 ac->ac_found++;
1656 ac->ac_b_ex = ex;
1657 ext4_mb_use_best_found(ac, e4b);
1658 }
1659 } else if (max >= ac->ac_g_ex.fe_len) {
1660 BUG_ON(ex.fe_len <= 0);
1661 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1662 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1663 ac->ac_found++;
1664 ac->ac_b_ex = ex;
1665 ext4_mb_use_best_found(ac, e4b);
1666 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1667
1668
1669 BUG_ON(ex.fe_len <= 0);
1670 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1671 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1672 ac->ac_found++;
1673 ac->ac_b_ex = ex;
1674 ext4_mb_use_best_found(ac, e4b);
1675 }
1676 ext4_unlock_group(ac->ac_sb, group);
1677 ext4_mb_release_desc(e4b);
1678
1679 return 0;
1680}
1681
1682
1683
1684
1685
1686static noinline_for_stack
1687void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1688 struct ext4_buddy *e4b)
1689{
1690 struct super_block *sb = ac->ac_sb;
1691 struct ext4_group_info *grp = e4b->bd_info;
1692 void *buddy;
1693 int i;
1694 int k;
1695 int max;
1696
1697 BUG_ON(ac->ac_2order <= 0);
1698 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1699 if (grp->bb_counters[i] == 0)
1700 continue;
1701
1702 buddy = mb_find_buddy(e4b, i, &max);
1703 BUG_ON(buddy == NULL);
1704
1705 k = mb_find_next_zero_bit(buddy, max, 0);
1706 BUG_ON(k >= max);
1707
1708 ac->ac_found++;
1709
1710 ac->ac_b_ex.fe_len = 1 << i;
1711 ac->ac_b_ex.fe_start = k << i;
1712 ac->ac_b_ex.fe_group = e4b->bd_group;
1713
1714 ext4_mb_use_best_found(ac, e4b);
1715
1716 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1717
1718 if (EXT4_SB(sb)->s_mb_stats)
1719 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1720
1721 break;
1722 }
1723}
1724
1725
1726
1727
1728
1729
1730static noinline_for_stack
1731void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1732 struct ext4_buddy *e4b)
1733{
1734 struct super_block *sb = ac->ac_sb;
1735 void *bitmap = EXT4_MB_BITMAP(e4b);
1736 struct ext4_free_extent ex;
1737 int i;
1738 int free;
1739
1740 free = e4b->bd_info->bb_free;
1741 BUG_ON(free <= 0);
1742
1743 i = e4b->bd_info->bb_first_free;
1744
1745 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1746 i = mb_find_next_zero_bit(bitmap,
1747 EXT4_BLOCKS_PER_GROUP(sb), i);
1748 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
1749
1750
1751
1752
1753
1754 ext4_grp_locked_error(sb, e4b->bd_group,
1755 __func__, "%d free blocks as per "
1756 "group info. But bitmap says 0",
1757 free);
1758 break;
1759 }
1760
1761 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1762 BUG_ON(ex.fe_len <= 0);
1763 if (free < ex.fe_len) {
1764 ext4_grp_locked_error(sb, e4b->bd_group,
1765 __func__, "%d free blocks as per "
1766 "group info. But got %d blocks",
1767 free, ex.fe_len);
1768
1769
1770
1771
1772
1773 break;
1774 }
1775
1776 ext4_mb_measure_extent(ac, &ex, e4b);
1777
1778 i += ex.fe_len;
1779 free -= ex.fe_len;
1780 }
1781
1782 ext4_mb_check_limits(ac, e4b, 1);
1783}
1784
1785
1786
1787
1788
1789
1790static noinline_for_stack
1791void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1792 struct ext4_buddy *e4b)
1793{
1794 struct super_block *sb = ac->ac_sb;
1795 struct ext4_sb_info *sbi = EXT4_SB(sb);
1796 void *bitmap = EXT4_MB_BITMAP(e4b);
1797 struct ext4_free_extent ex;
1798 ext4_fsblk_t first_group_block;
1799 ext4_fsblk_t a;
1800 ext4_grpblk_t i;
1801 int max;
1802
1803 BUG_ON(sbi->s_stripe == 0);
1804
1805
1806 first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb)
1807 + le32_to_cpu(sbi->s_es->s_first_data_block);
1808 a = first_group_block + sbi->s_stripe - 1;
1809 do_div(a, sbi->s_stripe);
1810 i = (a * sbi->s_stripe) - first_group_block;
1811
1812 while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
1813 if (!mb_test_bit(i, bitmap)) {
1814 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1815 if (max >= sbi->s_stripe) {
1816 ac->ac_found++;
1817 ac->ac_b_ex = ex;
1818 ext4_mb_use_best_found(ac, e4b);
1819 break;
1820 }
1821 }
1822 i += sbi->s_stripe;
1823 }
1824}
1825
1826static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1827 ext4_group_t group, int cr)
1828{
1829 unsigned free, fragments;
1830 unsigned i, bits;
1831 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1832 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1833
1834 BUG_ON(cr < 0 || cr >= 4);
1835 BUG_ON(EXT4_MB_GRP_NEED_INIT(grp));
1836
1837 free = grp->bb_free;
1838 fragments = grp->bb_fragments;
1839 if (free == 0)
1840 return 0;
1841 if (fragments == 0)
1842 return 0;
1843
1844 switch (cr) {
1845 case 0:
1846 BUG_ON(ac->ac_2order == 0);
1847
1848
1849 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1850 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1851 ((group % flex_size) == 0))
1852 return 0;
1853
1854 bits = ac->ac_sb->s_blocksize_bits + 1;
1855 for (i = ac->ac_2order; i <= bits; i++)
1856 if (grp->bb_counters[i] > 0)
1857 return 1;
1858 break;
1859 case 1:
1860 if ((free / fragments) >= ac->ac_g_ex.fe_len)
1861 return 1;
1862 break;
1863 case 2:
1864 if (free >= ac->ac_g_ex.fe_len)
1865 return 1;
1866 break;
1867 case 3:
1868 return 1;
1869 default:
1870 BUG();
1871 }
1872
1873 return 0;
1874}
1875
1876
1877
1878
1879
1880
1881
1882
1883int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1884{
1885 int i;
1886 int block, pnum;
1887 int blocks_per_page;
1888 int groups_per_page;
1889 ext4_group_t ngroups = ext4_get_groups_count(sb);
1890 ext4_group_t first_group;
1891 struct ext4_group_info *grp;
1892
1893 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1894
1895
1896
1897
1898
1899 block = group * 2;
1900 pnum = block / blocks_per_page;
1901 first_group = pnum * blocks_per_page / 2;
1902
1903 groups_per_page = blocks_per_page >> 1;
1904 if (groups_per_page == 0)
1905 groups_per_page = 1;
1906
1907 for (i = 0; i < groups_per_page; i++) {
1908
1909 if ((first_group + i) >= ngroups)
1910 break;
1911 grp = ext4_get_group_info(sb, first_group + i);
1912
1913
1914
1915
1916
1917 down_write_nested(&grp->alloc_sem, i);
1918 }
1919 return i;
1920}
1921
1922void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1923 ext4_group_t group, int locked_group)
1924{
1925 int i;
1926 int block, pnum;
1927 int blocks_per_page;
1928 ext4_group_t first_group;
1929 struct ext4_group_info *grp;
1930
1931 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1932
1933
1934
1935
1936
1937 block = group * 2;
1938 pnum = block / blocks_per_page;
1939 first_group = pnum * blocks_per_page / 2;
1940
1941 for (i = 0; i < locked_group; i++) {
1942
1943 grp = ext4_get_group_info(sb, first_group + i);
1944
1945
1946
1947
1948
1949 up_write(&grp->alloc_sem);
1950 }
1951
1952}
1953
1954static noinline_for_stack int
1955ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1956{
1957 ext4_group_t ngroups, group, i;
1958 int cr;
1959 int err = 0;
1960 int bsbits;
1961 struct ext4_sb_info *sbi;
1962 struct super_block *sb;
1963 struct ext4_buddy e4b;
1964
1965 sb = ac->ac_sb;
1966 sbi = EXT4_SB(sb);
1967 ngroups = ext4_get_groups_count(sb);
1968
1969 if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL))
1970 ngroups = sbi->s_blockfile_groups;
1971
1972 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1973
1974
1975 err = ext4_mb_find_by_goal(ac, &e4b);
1976 if (err || ac->ac_status == AC_STATUS_FOUND)
1977 goto out;
1978
1979 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
1980 goto out;
1981
1982
1983
1984
1985
1986
1987 i = fls(ac->ac_g_ex.fe_len);
1988 ac->ac_2order = 0;
1989
1990
1991
1992
1993
1994 if (i >= sbi->s_mb_order2_reqs) {
1995
1996
1997
1998 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
1999 ac->ac_2order = i - 1;
2000 }
2001
2002 bsbits = ac->ac_sb->s_blocksize_bits;
2003
2004
2005 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2006
2007 spin_lock(&sbi->s_md_lock);
2008 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2009 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2010 spin_unlock(&sbi->s_md_lock);
2011 }
2012
2013
2014 cr = ac->ac_2order ? 0 : 1;
2015
2016
2017
2018
2019repeat:
2020 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2021 ac->ac_criteria = cr;
2022
2023
2024
2025
2026 group = ac->ac_g_ex.fe_group;
2027
2028 for (i = 0; i < ngroups; group++, i++) {
2029 struct ext4_group_info *grp;
2030 struct ext4_group_desc *desc;
2031
2032 if (group == ngroups)
2033 group = 0;
2034
2035
2036 grp = ext4_get_group_info(sb, group);
2037 if (grp->bb_free == 0)
2038 continue;
2039
2040 err = ext4_mb_load_buddy(sb, group, &e4b);
2041 if (err)
2042 goto out;
2043
2044 ext4_lock_group(sb, group);
2045 if (!ext4_mb_good_group(ac, group, cr)) {
2046
2047 ext4_unlock_group(sb, group);
2048 ext4_mb_release_desc(&e4b);
2049 continue;
2050 }
2051
2052 ac->ac_groups_scanned++;
2053 desc = ext4_get_group_desc(sb, group, NULL);
2054 if (cr == 0)
2055 ext4_mb_simple_scan_group(ac, &e4b);
2056 else if (cr == 1 &&
2057 ac->ac_g_ex.fe_len == sbi->s_stripe)
2058 ext4_mb_scan_aligned(ac, &e4b);
2059 else
2060 ext4_mb_complex_scan_group(ac, &e4b);
2061
2062 ext4_unlock_group(sb, group);
2063 ext4_mb_release_desc(&e4b);
2064
2065 if (ac->ac_status != AC_STATUS_CONTINUE)
2066 break;
2067 }
2068 }
2069
2070 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2071 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2072
2073
2074
2075
2076
2077 ext4_mb_try_best_found(ac, &e4b);
2078 if (ac->ac_status != AC_STATUS_FOUND) {
2079
2080
2081
2082
2083
2084
2085 ac->ac_b_ex.fe_group = 0;
2086 ac->ac_b_ex.fe_start = 0;
2087 ac->ac_b_ex.fe_len = 0;
2088 ac->ac_status = AC_STATUS_CONTINUE;
2089 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2090 cr = 3;
2091 atomic_inc(&sbi->s_mb_lost_chunks);
2092 goto repeat;
2093 }
2094 }
2095out:
2096 return err;
2097}
2098
2099static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2100{
2101 struct super_block *sb = seq->private;
2102 ext4_group_t group;
2103
2104 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2105 return NULL;
2106 group = *pos + 1;
2107 return (void *) ((unsigned long) group);
2108}
2109
2110static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2111{
2112 struct super_block *sb = seq->private;
2113 ext4_group_t group;
2114
2115 ++*pos;
2116 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2117 return NULL;
2118 group = *pos + 1;
2119 return (void *) ((unsigned long) group);
2120}
2121
2122static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2123{
2124 struct super_block *sb = seq->private;
2125 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2126 int i;
2127 int err;
2128 struct ext4_buddy e4b;
2129 struct sg {
2130 struct ext4_group_info info;
2131 ext4_grpblk_t counters[16];
2132 } sg;
2133
2134 group--;
2135 if (group == 0)
2136 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
2137 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
2138 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
2139 "group", "free", "frags", "first",
2140 "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
2141 "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
2142
2143 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2144 sizeof(struct ext4_group_info);
2145 err = ext4_mb_load_buddy(sb, group, &e4b);
2146 if (err) {
2147 seq_printf(seq, "#%-5u: I/O error\n", group);
2148 return 0;
2149 }
2150 ext4_lock_group(sb, group);
2151 memcpy(&sg, ext4_get_group_info(sb, group), i);
2152 ext4_unlock_group(sb, group);
2153 ext4_mb_release_desc(&e4b);
2154
2155 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2156 sg.info.bb_fragments, sg.info.bb_first_free);
2157 for (i = 0; i <= 13; i++)
2158 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2159 sg.info.bb_counters[i] : 0);
2160 seq_printf(seq, " ]\n");
2161
2162 return 0;
2163}
2164
2165static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2166{
2167}
2168
2169static const struct seq_operations ext4_mb_seq_groups_ops = {
2170 .start = ext4_mb_seq_groups_start,
2171 .next = ext4_mb_seq_groups_next,
2172 .stop = ext4_mb_seq_groups_stop,
2173 .show = ext4_mb_seq_groups_show,
2174};
2175
2176static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2177{
2178 struct super_block *sb = PDE(inode)->data;
2179 int rc;
2180
2181 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2182 if (rc == 0) {
2183 struct seq_file *m = (struct seq_file *)file->private_data;
2184 m->private = sb;
2185 }
2186 return rc;
2187
2188}
2189
2190static const struct file_operations ext4_mb_seq_groups_fops = {
2191 .owner = THIS_MODULE,
2192 .open = ext4_mb_seq_groups_open,
2193 .read = seq_read,
2194 .llseek = seq_lseek,
2195 .release = seq_release,
2196};
2197
2198
2199
2200int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2201 struct ext4_group_desc *desc)
2202{
2203 int i, len;
2204 int metalen = 0;
2205 struct ext4_sb_info *sbi = EXT4_SB(sb);
2206 struct ext4_group_info **meta_group_info;
2207
2208
2209
2210
2211
2212
2213 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2214 metalen = sizeof(*meta_group_info) <<
2215 EXT4_DESC_PER_BLOCK_BITS(sb);
2216 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2217 if (meta_group_info == NULL) {
2218 printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
2219 "buddy group\n");
2220 goto exit_meta_group_info;
2221 }
2222 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2223 meta_group_info;
2224 }
2225
2226
2227
2228
2229
2230 len = offsetof(typeof(**meta_group_info),
2231 bb_counters[sb->s_blocksize_bits + 2]);
2232
2233 meta_group_info =
2234 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2235 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2236
2237 meta_group_info[i] = kzalloc(len, GFP_KERNEL);
2238 if (meta_group_info[i] == NULL) {
2239 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2240 goto exit_group_info;
2241 }
2242 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2243 &(meta_group_info[i]->bb_state));
2244
2245
2246
2247
2248
2249 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2250 meta_group_info[i]->bb_free =
2251 ext4_free_blocks_after_init(sb, group, desc);
2252 } else {
2253 meta_group_info[i]->bb_free =
2254 ext4_free_blks_count(sb, desc);
2255 }
2256
2257 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2258 init_rwsem(&meta_group_info[i]->alloc_sem);
2259 meta_group_info[i]->bb_free_root.rb_node = NULL;
2260
2261#ifdef DOUBLE_CHECK
2262 {
2263 struct buffer_head *bh;
2264 meta_group_info[i]->bb_bitmap =
2265 kmalloc(sb->s_blocksize, GFP_KERNEL);
2266 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2267 bh = ext4_read_block_bitmap(sb, group);
2268 BUG_ON(bh == NULL);
2269 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2270 sb->s_blocksize);
2271 put_bh(bh);
2272 }
2273#endif
2274
2275 return 0;
2276
2277exit_group_info:
2278
2279 if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
2280 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2281exit_meta_group_info:
2282 return -ENOMEM;
2283}
2284
2285static int ext4_mb_init_backend(struct super_block *sb)
2286{
2287 ext4_group_t ngroups = ext4_get_groups_count(sb);
2288 ext4_group_t i;
2289 struct ext4_sb_info *sbi = EXT4_SB(sb);
2290 struct ext4_super_block *es = sbi->s_es;
2291 int num_meta_group_infos;
2292 int num_meta_group_infos_max;
2293 int array_size;
2294 struct ext4_group_desc *desc;
2295
2296
2297 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2298 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311 num_meta_group_infos_max = num_meta_group_infos +
2312 le16_to_cpu(es->s_reserved_gdt_blocks);
2313
2314
2315
2316
2317
2318
2319
2320 array_size = 1;
2321 while (array_size < sizeof(*sbi->s_group_info) *
2322 num_meta_group_infos_max)
2323 array_size = array_size << 1;
2324
2325
2326
2327 sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
2328 if (sbi->s_group_info == NULL) {
2329 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
2330 return -ENOMEM;
2331 }
2332 sbi->s_buddy_cache = new_inode(sb);
2333 if (sbi->s_buddy_cache == NULL) {
2334 printk(KERN_ERR "EXT4-fs: can't get new inode\n");
2335 goto err_freesgi;
2336 }
2337 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2338 for (i = 0; i < ngroups; i++) {
2339 desc = ext4_get_group_desc(sb, i, NULL);
2340 if (desc == NULL) {
2341 printk(KERN_ERR
2342 "EXT4-fs: can't read descriptor %u\n", i);
2343 goto err_freebuddy;
2344 }
2345 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2346 goto err_freebuddy;
2347 }
2348
2349 return 0;
2350
2351err_freebuddy:
2352 while (i-- > 0)
2353 kfree(ext4_get_group_info(sb, i));
2354 i = num_meta_group_infos;
2355 while (i-- > 0)
2356 kfree(sbi->s_group_info[i]);
2357 iput(sbi->s_buddy_cache);
2358err_freesgi:
2359 kfree(sbi->s_group_info);
2360 return -ENOMEM;
2361}
2362
2363int ext4_mb_init(struct super_block *sb, int needs_recovery)
2364{
2365 struct ext4_sb_info *sbi = EXT4_SB(sb);
2366 unsigned i, j;
2367 unsigned offset;
2368 unsigned max;
2369 int ret;
2370
2371 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2372
2373 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2374 if (sbi->s_mb_offsets == NULL) {
2375 return -ENOMEM;
2376 }
2377
2378 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2379 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2380 if (sbi->s_mb_maxs == NULL) {
2381 kfree(sbi->s_mb_offsets);
2382 return -ENOMEM;
2383 }
2384
2385
2386 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2387 sbi->s_mb_offsets[0] = 0;
2388
2389 i = 1;
2390 offset = 0;
2391 max = sb->s_blocksize << 2;
2392 do {
2393 sbi->s_mb_offsets[i] = offset;
2394 sbi->s_mb_maxs[i] = max;
2395 offset += 1 << (sb->s_blocksize_bits - i);
2396 max = max >> 1;
2397 i++;
2398 } while (i <= sb->s_blocksize_bits + 1);
2399
2400
2401 ret = ext4_mb_init_backend(sb);
2402 if (ret != 0) {
2403 kfree(sbi->s_mb_offsets);
2404 kfree(sbi->s_mb_maxs);
2405 return ret;
2406 }
2407
2408 spin_lock_init(&sbi->s_md_lock);
2409 spin_lock_init(&sbi->s_bal_lock);
2410
2411 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2412 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2413 sbi->s_mb_stats = MB_DEFAULT_STATS;
2414 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2415 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2416 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2417
2418 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2419 if (sbi->s_locality_groups == NULL) {
2420 kfree(sbi->s_mb_offsets);
2421 kfree(sbi->s_mb_maxs);
2422 return -ENOMEM;
2423 }
2424 for_each_possible_cpu(i) {
2425 struct ext4_locality_group *lg;
2426 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2427 mutex_init(&lg->lg_mutex);
2428 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2429 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2430 spin_lock_init(&lg->lg_prealloc_lock);
2431 }
2432
2433 if (sbi->s_proc)
2434 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2435 &ext4_mb_seq_groups_fops, sb);
2436
2437 if (sbi->s_journal)
2438 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2439 return 0;
2440}
2441
2442
2443static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2444{
2445 struct ext4_prealloc_space *pa;
2446 struct list_head *cur, *tmp;
2447 int count = 0;
2448
2449 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2450 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2451 list_del(&pa->pa_group_list);
2452 count++;
2453 kmem_cache_free(ext4_pspace_cachep, pa);
2454 }
2455 if (count)
2456 mb_debug(1, "mballoc: %u PAs left\n", count);
2457
2458}
2459
2460int ext4_mb_release(struct super_block *sb)
2461{
2462 ext4_group_t ngroups = ext4_get_groups_count(sb);
2463 ext4_group_t i;
2464 int num_meta_group_infos;
2465 struct ext4_group_info *grinfo;
2466 struct ext4_sb_info *sbi = EXT4_SB(sb);
2467
2468 if (sbi->s_group_info) {
2469 for (i = 0; i < ngroups; i++) {
2470 grinfo = ext4_get_group_info(sb, i);
2471#ifdef DOUBLE_CHECK
2472 kfree(grinfo->bb_bitmap);
2473#endif
2474 ext4_lock_group(sb, i);
2475 ext4_mb_cleanup_pa(grinfo);
2476 ext4_unlock_group(sb, i);
2477 kfree(grinfo);
2478 }
2479 num_meta_group_infos = (ngroups +
2480 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2481 EXT4_DESC_PER_BLOCK_BITS(sb);
2482 for (i = 0; i < num_meta_group_infos; i++)
2483 kfree(sbi->s_group_info[i]);
2484 kfree(sbi->s_group_info);
2485 }
2486 kfree(sbi->s_mb_offsets);
2487 kfree(sbi->s_mb_maxs);
2488 if (sbi->s_buddy_cache)
2489 iput(sbi->s_buddy_cache);
2490 if (sbi->s_mb_stats) {
2491 printk(KERN_INFO
2492 "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n",
2493 atomic_read(&sbi->s_bal_allocated),
2494 atomic_read(&sbi->s_bal_reqs),
2495 atomic_read(&sbi->s_bal_success));
2496 printk(KERN_INFO
2497 "EXT4-fs: mballoc: %u extents scanned, %u goal hits, "
2498 "%u 2^N hits, %u breaks, %u lost\n",
2499 atomic_read(&sbi->s_bal_ex_scanned),
2500 atomic_read(&sbi->s_bal_goals),
2501 atomic_read(&sbi->s_bal_2orders),
2502 atomic_read(&sbi->s_bal_breaks),
2503 atomic_read(&sbi->s_mb_lost_chunks));
2504 printk(KERN_INFO
2505 "EXT4-fs: mballoc: %lu generated and it took %Lu\n",
2506 sbi->s_mb_buddies_generated++,
2507 sbi->s_mb_generation_time);
2508 printk(KERN_INFO
2509 "EXT4-fs: mballoc: %u preallocated, %u discarded\n",
2510 atomic_read(&sbi->s_mb_preallocated),
2511 atomic_read(&sbi->s_mb_discarded));
2512 }
2513
2514 free_percpu(sbi->s_locality_groups);
2515 if (sbi->s_proc)
2516 remove_proc_entry("mb_groups", sbi->s_proc);
2517
2518 return 0;
2519}
2520
2521
2522
2523
2524
2525static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2526{
2527 struct super_block *sb = journal->j_private;
2528 struct ext4_buddy e4b;
2529 struct ext4_group_info *db;
2530 int err, count = 0, count2 = 0;
2531 struct ext4_free_data *entry;
2532 ext4_fsblk_t discard_block;
2533 struct list_head *l, *ltmp;
2534
2535 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2536 entry = list_entry(l, struct ext4_free_data, list);
2537
2538 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2539 entry->count, entry->group, entry);
2540
2541 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2542
2543 BUG_ON(err != 0);
2544
2545 db = e4b.bd_info;
2546
2547 count += entry->count;
2548 count2++;
2549 ext4_lock_group(sb, entry->group);
2550
2551 rb_erase(&entry->node, &(db->bb_free_root));
2552 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2553
2554 if (!db->bb_free_root.rb_node) {
2555
2556
2557
2558 page_cache_release(e4b.bd_buddy_page);
2559 page_cache_release(e4b.bd_bitmap_page);
2560 }
2561 ext4_unlock_group(sb, entry->group);
2562 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2563 + entry->start_blk
2564 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2565 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
2566 entry->count);
2567 sb_issue_discard(sb, discard_block, entry->count);
2568
2569 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b);
2571 }
2572
2573 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2574}
2575
2576#ifdef CONFIG_EXT4_DEBUG
2577u8 mb_enable_debug __read_mostly;
2578
2579static struct dentry *debugfs_dir;
2580static struct dentry *debugfs_debug;
2581
2582static void __init ext4_create_debugfs_entry(void)
2583{
2584 debugfs_dir = debugfs_create_dir("ext4", NULL);
2585 if (debugfs_dir)
2586 debugfs_debug = debugfs_create_u8("mballoc-debug",
2587 S_IRUGO | S_IWUSR,
2588 debugfs_dir,
2589 &mb_enable_debug);
2590}
2591
2592static void ext4_remove_debugfs_entry(void)
2593{
2594 debugfs_remove(debugfs_debug);
2595 debugfs_remove(debugfs_dir);
2596}
2597
2598#else
2599
2600static void __init ext4_create_debugfs_entry(void)
2601{
2602}
2603
2604static void ext4_remove_debugfs_entry(void)
2605{
2606}
2607
2608#endif
2609
2610int __init init_ext4_mballoc(void)
2611{
2612 ext4_pspace_cachep =
2613 kmem_cache_create("ext4_prealloc_space",
2614 sizeof(struct ext4_prealloc_space),
2615 0, SLAB_RECLAIM_ACCOUNT, NULL);
2616 if (ext4_pspace_cachep == NULL)
2617 return -ENOMEM;
2618
2619 ext4_ac_cachep =
2620 kmem_cache_create("ext4_alloc_context",
2621 sizeof(struct ext4_allocation_context),
2622 0, SLAB_RECLAIM_ACCOUNT, NULL);
2623 if (ext4_ac_cachep == NULL) {
2624 kmem_cache_destroy(ext4_pspace_cachep);
2625 return -ENOMEM;
2626 }
2627
2628 ext4_free_ext_cachep =
2629 kmem_cache_create("ext4_free_block_extents",
2630 sizeof(struct ext4_free_data),
2631 0, SLAB_RECLAIM_ACCOUNT, NULL);
2632 if (ext4_free_ext_cachep == NULL) {
2633 kmem_cache_destroy(ext4_pspace_cachep);
2634 kmem_cache_destroy(ext4_ac_cachep);
2635 return -ENOMEM;
2636 }
2637 ext4_create_debugfs_entry();
2638 return 0;
2639}
2640
2641void exit_ext4_mballoc(void)
2642{
2643
2644
2645
2646
2647 rcu_barrier();
2648 kmem_cache_destroy(ext4_pspace_cachep);
2649 kmem_cache_destroy(ext4_ac_cachep);
2650 kmem_cache_destroy(ext4_free_ext_cachep);
2651 ext4_remove_debugfs_entry();
2652}
2653
2654
2655
2656
2657
2658
2659static noinline_for_stack int
2660ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2661 handle_t *handle, unsigned int reserv_blks)
2662{
2663 struct buffer_head *bitmap_bh = NULL;
2664 struct ext4_super_block *es;
2665 struct ext4_group_desc *gdp;
2666 struct buffer_head *gdp_bh;
2667 struct ext4_sb_info *sbi;
2668 struct super_block *sb;
2669 ext4_fsblk_t block;
2670 int err, len;
2671
2672 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2673 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2674
2675 sb = ac->ac_sb;
2676 sbi = EXT4_SB(sb);
2677 es = sbi->s_es;
2678
2679
2680 err = -EIO;
2681 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2682 if (!bitmap_bh)
2683 goto out_err;
2684
2685 err = ext4_journal_get_write_access(handle, bitmap_bh);
2686 if (err)
2687 goto out_err;
2688
2689 err = -EIO;
2690 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2691 if (!gdp)
2692 goto out_err;
2693
2694 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2695 ext4_free_blks_count(sb, gdp));
2696
2697 err = ext4_journal_get_write_access(handle, gdp_bh);
2698 if (err)
2699 goto out_err;
2700
2701 block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb)
2702 + ac->ac_b_ex.fe_start
2703 + le32_to_cpu(es->s_first_data_block);
2704
2705 len = ac->ac_b_ex.fe_len;
2706 if (!ext4_data_block_valid(sbi, block, len)) {
2707 ext4_error(sb, __func__,
2708 "Allocating blocks %llu-%llu which overlap "
2709 "fs metadata\n", block, block+len);
2710
2711
2712
2713
2714 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2715 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2716 ac->ac_b_ex.fe_len);
2717 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2718 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2719 if (!err)
2720 err = -EAGAIN;
2721 goto out_err;
2722 }
2723
2724 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2725#ifdef AGGRESSIVE_CHECK
2726 {
2727 int i;
2728 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2729 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2730 bitmap_bh->b_data));
2731 }
2732 }
2733#endif
2734 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
2735 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2736 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2737 ext4_free_blks_set(sb, gdp,
2738 ext4_free_blocks_after_init(sb,
2739 ac->ac_b_ex.fe_group, gdp));
2740 }
2741 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
2742 ext4_free_blks_set(sb, gdp, len);
2743 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2744
2745 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2746 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
2747
2748
2749
2750 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2751
2752 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2753 else {
2754 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2755 ac->ac_b_ex.fe_len);
2756
2757 vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
2758 }
2759
2760 if (sbi->s_log_groups_per_flex) {
2761 ext4_group_t flex_group = ext4_flex_group(sbi,
2762 ac->ac_b_ex.fe_group);
2763 atomic_sub(ac->ac_b_ex.fe_len,
2764 &sbi->s_flex_groups[flex_group].free_blocks);
2765 }
2766
2767 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2768 if (err)
2769 goto out_err;
2770 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2771
2772out_err:
2773 sb->s_dirt = 1;
2774 brelse(bitmap_bh);
2775 return err;
2776}
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2787{
2788 struct super_block *sb = ac->ac_sb;
2789 struct ext4_locality_group *lg = ac->ac_lg;
2790
2791 BUG_ON(lg == NULL);
2792 if (EXT4_SB(sb)->s_stripe)
2793 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
2794 else
2795 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2796 mb_debug(1, "#%u: goal %u blocks for locality group\n",
2797 current->pid, ac->ac_g_ex.fe_len);
2798}
2799
2800
2801
2802
2803
2804static noinline_for_stack void
2805ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2806 struct ext4_allocation_request *ar)
2807{
2808 int bsbits, max;
2809 ext4_lblk_t end;
2810 loff_t size, orig_size, start_off;
2811 ext4_lblk_t start, orig_start;
2812 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2813 struct ext4_prealloc_space *pa;
2814
2815
2816
2817 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
2818 return;
2819
2820
2821 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2822 return;
2823
2824
2825
2826 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
2827 return;
2828
2829 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
2830 ext4_mb_normalize_group_request(ac);
2831 return ;
2832 }
2833
2834 bsbits = ac->ac_sb->s_blocksize_bits;
2835
2836
2837
2838 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
2839 size = size << bsbits;
2840 if (size < i_size_read(ac->ac_inode))
2841 size = i_size_read(ac->ac_inode);
2842
2843
2844 max = 2 << bsbits;
2845
2846#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
2847 (req <= (size) || max <= (chunk_size))
2848
2849
2850
2851 start_off = 0;
2852 if (size <= 16 * 1024) {
2853 size = 16 * 1024;
2854 } else if (size <= 32 * 1024) {
2855 size = 32 * 1024;
2856 } else if (size <= 64 * 1024) {
2857 size = 64 * 1024;
2858 } else if (size <= 128 * 1024) {
2859 size = 128 * 1024;
2860 } else if (size <= 256 * 1024) {
2861 size = 256 * 1024;
2862 } else if (size <= 512 * 1024) {
2863 size = 512 * 1024;
2864 } else if (size <= 1024 * 1024) {
2865 size = 1024 * 1024;
2866 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
2867 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2868 (21 - bsbits)) << 21;
2869 size = 2 * 1024 * 1024;
2870 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
2871 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2872 (22 - bsbits)) << 22;
2873 size = 4 * 1024 * 1024;
2874 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
2875 (8<<20)>>bsbits, max, 8 * 1024)) {
2876 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2877 (23 - bsbits)) << 23;
2878 size = 8 * 1024 * 1024;
2879 } else {
2880 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
2881 size = ac->ac_o_ex.fe_len << bsbits;
2882 }
2883 orig_size = size = size >> bsbits;
2884 orig_start = start = start_off >> bsbits;
2885
2886
2887 if (ar->pleft && start <= ar->lleft) {
2888 size -= ar->lleft + 1 - start;
2889 start = ar->lleft + 1;
2890 }
2891 if (ar->pright && start + size - 1 >= ar->lright)
2892 size -= start + size - ar->lright;
2893
2894 end = start + size;
2895
2896
2897 rcu_read_lock();
2898 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
2899 ext4_lblk_t pa_end;
2900
2901 if (pa->pa_deleted)
2902 continue;
2903 spin_lock(&pa->pa_lock);
2904 if (pa->pa_deleted) {
2905 spin_unlock(&pa->pa_lock);
2906 continue;
2907 }
2908
2909 pa_end = pa->pa_lstart + pa->pa_len;
2910
2911
2912 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
2913 ac->ac_o_ex.fe_logical < pa->pa_lstart));
2914
2915
2916 if (pa->pa_lstart >= end || pa_end <= start) {
2917 spin_unlock(&pa->pa_lock);
2918 continue;
2919 }
2920 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
2921
2922
2923 if (pa_end <= ac->ac_o_ex.fe_logical) {
2924 BUG_ON(pa_end < start);
2925 start = pa_end;
2926 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
2927 BUG_ON(pa->pa_lstart > end);
2928 end = pa->pa_lstart;
2929 }
2930 spin_unlock(&pa->pa_lock);
2931 }
2932 rcu_read_unlock();
2933 size = end - start;
2934
2935
2936 rcu_read_lock();
2937 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
2938 ext4_lblk_t pa_end;
2939 spin_lock(&pa->pa_lock);
2940 if (pa->pa_deleted == 0) {
2941 pa_end = pa->pa_lstart + pa->pa_len;
2942 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
2943 }
2944 spin_unlock(&pa->pa_lock);
2945 }
2946 rcu_read_unlock();
2947
2948 if (start + size <= ac->ac_o_ex.fe_logical &&
2949 start > ac->ac_o_ex.fe_logical) {
2950 printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n",
2951 (unsigned long) start, (unsigned long) size,
2952 (unsigned long) ac->ac_o_ex.fe_logical);
2953 }
2954 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
2955 start > ac->ac_o_ex.fe_logical);
2956 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
2957
2958
2959
2960
2961
2962 ac->ac_g_ex.fe_logical = start;
2963 ac->ac_g_ex.fe_len = size;
2964
2965
2966 if (ar->pright && (ar->lright == (start + size))) {
2967
2968 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
2969 &ac->ac_f_ex.fe_group,
2970 &ac->ac_f_ex.fe_start);
2971 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
2972 }
2973 if (ar->pleft && (ar->lleft + 1 == start)) {
2974
2975 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
2976 &ac->ac_f_ex.fe_group,
2977 &ac->ac_f_ex.fe_start);
2978 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
2979 }
2980
2981 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
2982 (unsigned) orig_size, (unsigned) start);
2983}
2984
2985static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
2986{
2987 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2988
2989 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
2990 atomic_inc(&sbi->s_bal_reqs);
2991 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
2992 if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len)
2993 atomic_inc(&sbi->s_bal_success);
2994 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
2995 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
2996 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
2997 atomic_inc(&sbi->s_bal_goals);
2998 if (ac->ac_found > sbi->s_mb_max_to_scan)
2999 atomic_inc(&sbi->s_bal_breaks);
3000 }
3001
3002 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3003 trace_ext4_mballoc_alloc(ac);
3004 else
3005 trace_ext4_mballoc_prealloc(ac);
3006}
3007
3008
3009
3010
3011static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3012 struct ext4_prealloc_space *pa)
3013{
3014 ext4_fsblk_t start;
3015 ext4_fsblk_t end;
3016 int len;
3017
3018
3019 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3020 end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len);
3021 len = end - start;
3022 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3023 &ac->ac_b_ex.fe_start);
3024 ac->ac_b_ex.fe_len = len;
3025 ac->ac_status = AC_STATUS_FOUND;
3026 ac->ac_pa = pa;
3027
3028 BUG_ON(start < pa->pa_pstart);
3029 BUG_ON(start + len > pa->pa_pstart + pa->pa_len);
3030 BUG_ON(pa->pa_free < len);
3031 pa->pa_free -= len;
3032
3033 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3034}
3035
3036
3037
3038
3039static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3040 struct ext4_prealloc_space *pa)
3041{
3042 unsigned int len = ac->ac_o_ex.fe_len;
3043
3044 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3045 &ac->ac_b_ex.fe_group,
3046 &ac->ac_b_ex.fe_start);
3047 ac->ac_b_ex.fe_len = len;
3048 ac->ac_status = AC_STATUS_FOUND;
3049 ac->ac_pa = pa;
3050
3051
3052
3053
3054
3055
3056
3057 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3058}
3059
3060
3061
3062
3063
3064
3065
3066static struct ext4_prealloc_space *
3067ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3068 struct ext4_prealloc_space *pa,
3069 struct ext4_prealloc_space *cpa)
3070{
3071 ext4_fsblk_t cur_distance, new_distance;
3072
3073 if (cpa == NULL) {
3074 atomic_inc(&pa->pa_count);
3075 return pa;
3076 }
3077 cur_distance = abs(goal_block - cpa->pa_pstart);
3078 new_distance = abs(goal_block - pa->pa_pstart);
3079
3080 if (cur_distance < new_distance)
3081 return cpa;
3082
3083
3084 atomic_dec(&cpa->pa_count);
3085 atomic_inc(&pa->pa_count);
3086 return pa;
3087}
3088
3089
3090
3091
3092static noinline_for_stack int
3093ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3094{
3095 int order, i;
3096 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3097 struct ext4_locality_group *lg;
3098 struct ext4_prealloc_space *pa, *cpa = NULL;
3099 ext4_fsblk_t goal_block;
3100
3101
3102 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3103 return 0;
3104
3105
3106 rcu_read_lock();
3107 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3108
3109
3110
3111 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3112 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
3113 continue;
3114
3115
3116 if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) &&
3117 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
3118 continue;
3119
3120
3121 spin_lock(&pa->pa_lock);
3122 if (pa->pa_deleted == 0 && pa->pa_free) {
3123 atomic_inc(&pa->pa_count);
3124 ext4_mb_use_inode_pa(ac, pa);
3125 spin_unlock(&pa->pa_lock);
3126 ac->ac_criteria = 10;
3127 rcu_read_unlock();
3128 return 1;
3129 }
3130 spin_unlock(&pa->pa_lock);
3131 }
3132 rcu_read_unlock();
3133
3134
3135 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3136 return 0;
3137
3138
3139 lg = ac->ac_lg;
3140 if (lg == NULL)
3141 return 0;
3142 order = fls(ac->ac_o_ex.fe_len) - 1;
3143 if (order > PREALLOC_TB_SIZE - 1)
3144
3145 order = PREALLOC_TB_SIZE - 1;
3146
3147 goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
3148 ac->ac_g_ex.fe_start +
3149 le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
3150
3151
3152
3153
3154 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3155 rcu_read_lock();
3156 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3157 pa_inode_list) {
3158 spin_lock(&pa->pa_lock);
3159 if (pa->pa_deleted == 0 &&
3160 pa->pa_free >= ac->ac_o_ex.fe_len) {
3161
3162 cpa = ext4_mb_check_group_pa(goal_block,
3163 pa, cpa);
3164 }
3165 spin_unlock(&pa->pa_lock);
3166 }
3167 rcu_read_unlock();
3168 }
3169 if (cpa) {
3170 ext4_mb_use_group_pa(ac, cpa);
3171 ac->ac_criteria = 20;
3172 return 1;
3173 }
3174 return 0;
3175}
3176
3177
3178
3179
3180
3181
3182
3183static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3184 ext4_group_t group)
3185{
3186 struct rb_node *n;
3187 struct ext4_group_info *grp;
3188 struct ext4_free_data *entry;
3189
3190 grp = ext4_get_group_info(sb, group);
3191 n = rb_first(&(grp->bb_free_root));
3192
3193 while (n) {
3194 entry = rb_entry(n, struct ext4_free_data, node);
3195 mb_set_bits(bitmap, entry->start_blk, entry->count);
3196 n = rb_next(n);
3197 }
3198 return;
3199}
3200
3201
3202
3203
3204
3205
3206static noinline_for_stack
3207void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3208 ext4_group_t group)
3209{
3210 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3211 struct ext4_prealloc_space *pa;
3212 struct list_head *cur;
3213 ext4_group_t groupnr;
3214 ext4_grpblk_t start;
3215 int preallocated = 0;
3216 int count = 0;
3217 int len;
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227 list_for_each(cur, &grp->bb_prealloc_list) {
3228 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3229 spin_lock(&pa->pa_lock);
3230 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3231 &groupnr, &start);
3232 len = pa->pa_len;
3233 spin_unlock(&pa->pa_lock);
3234 if (unlikely(len == 0))
3235 continue;
3236 BUG_ON(groupnr != group);
3237 mb_set_bits(bitmap, start, len);
3238 preallocated += len;
3239 count++;
3240 }
3241 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3242}
3243
3244static void ext4_mb_pa_callback(struct rcu_head *head)
3245{
3246 struct ext4_prealloc_space *pa;
3247 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3248 kmem_cache_free(ext4_pspace_cachep, pa);
3249}
3250
3251
3252
3253
3254
3255static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3256 struct super_block *sb, struct ext4_prealloc_space *pa)
3257{
3258 ext4_group_t grp;
3259 ext4_fsblk_t grp_blk;
3260
3261 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3262 return;
3263
3264
3265 spin_lock(&pa->pa_lock);
3266 if (pa->pa_deleted == 1) {
3267 spin_unlock(&pa->pa_lock);
3268 return;
3269 }
3270
3271 pa->pa_deleted = 1;
3272 spin_unlock(&pa->pa_lock);
3273
3274 grp_blk = pa->pa_pstart;
3275
3276
3277
3278
3279 if (pa->pa_type == MB_GROUP_PA)
3280 grp_blk--;
3281
3282 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298 ext4_lock_group(sb, grp);
3299 list_del(&pa->pa_group_list);
3300 ext4_unlock_group(sb, grp);
3301
3302 spin_lock(pa->pa_obj_lock);
3303 list_del_rcu(&pa->pa_inode_list);
3304 spin_unlock(pa->pa_obj_lock);
3305
3306 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3307}
3308
3309
3310
3311
3312static noinline_for_stack int
3313ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3314{
3315 struct super_block *sb = ac->ac_sb;
3316 struct ext4_prealloc_space *pa;
3317 struct ext4_group_info *grp;
3318 struct ext4_inode_info *ei;
3319
3320
3321 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3322 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3323 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3324
3325 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3326 if (pa == NULL)
3327 return -ENOMEM;
3328
3329 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3330 int winl;
3331 int wins;
3332 int win;
3333 int offs;
3334
3335
3336
3337
3338 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3339 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3340
3341
3342
3343
3344 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3345
3346
3347 wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len;
3348
3349
3350 win = min(winl, wins);
3351
3352 offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len;
3353 if (offs && offs < win)
3354 win = offs;
3355
3356 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win;
3357 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3358 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3359 }
3360
3361
3362
3363 ac->ac_f_ex = ac->ac_b_ex;
3364
3365 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3366 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3367 pa->pa_len = ac->ac_b_ex.fe_len;
3368 pa->pa_free = pa->pa_len;
3369 atomic_set(&pa->pa_count, 1);
3370 spin_lock_init(&pa->pa_lock);
3371 INIT_LIST_HEAD(&pa->pa_inode_list);
3372 INIT_LIST_HEAD(&pa->pa_group_list);
3373 pa->pa_deleted = 0;
3374 pa->pa_type = MB_INODE_PA;
3375
3376 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3377 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3378 trace_ext4_mb_new_inode_pa(ac, pa);
3379
3380 ext4_mb_use_inode_pa(ac, pa);
3381 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3382
3383 ei = EXT4_I(ac->ac_inode);
3384 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3385
3386 pa->pa_obj_lock = &ei->i_prealloc_lock;
3387 pa->pa_inode = ac->ac_inode;
3388
3389 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3390 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3391 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3392
3393 spin_lock(pa->pa_obj_lock);
3394 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3395 spin_unlock(pa->pa_obj_lock);
3396
3397 return 0;
3398}
3399
3400
3401
3402
3403static noinline_for_stack int
3404ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3405{
3406 struct super_block *sb = ac->ac_sb;
3407 struct ext4_locality_group *lg;
3408 struct ext4_prealloc_space *pa;
3409 struct ext4_group_info *grp;
3410
3411
3412 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3413 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3414 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3415
3416 BUG_ON(ext4_pspace_cachep == NULL);
3417 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3418 if (pa == NULL)
3419 return -ENOMEM;
3420
3421
3422
3423 ac->ac_f_ex = ac->ac_b_ex;
3424
3425 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3426 pa->pa_lstart = pa->pa_pstart;
3427 pa->pa_len = ac->ac_b_ex.fe_len;
3428 pa->pa_free = pa->pa_len;
3429 atomic_set(&pa->pa_count, 1);
3430 spin_lock_init(&pa->pa_lock);
3431 INIT_LIST_HEAD(&pa->pa_inode_list);
3432 INIT_LIST_HEAD(&pa->pa_group_list);
3433 pa->pa_deleted = 0;
3434 pa->pa_type = MB_GROUP_PA;
3435
3436 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3437 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3438 trace_ext4_mb_new_group_pa(ac, pa);
3439
3440 ext4_mb_use_group_pa(ac, pa);
3441 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3442
3443 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3444 lg = ac->ac_lg;
3445 BUG_ON(lg == NULL);
3446
3447 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3448 pa->pa_inode = NULL;
3449
3450 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3451 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3452 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3453
3454
3455
3456
3457
3458 return 0;
3459}
3460
3461static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3462{
3463 int err;
3464
3465 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3466 err = ext4_mb_new_group_pa(ac);
3467 else
3468 err = ext4_mb_new_inode_pa(ac);
3469 return err;
3470}
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480static noinline_for_stack int
3481ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3482 struct ext4_prealloc_space *pa,
3483 struct ext4_allocation_context *ac)
3484{
3485 struct super_block *sb = e4b->bd_sb;
3486 struct ext4_sb_info *sbi = EXT4_SB(sb);
3487 unsigned int end;
3488 unsigned int next;
3489 ext4_group_t group;
3490 ext4_grpblk_t bit;
3491 unsigned long long grp_blk_start;
3492 sector_t start;
3493 int err = 0;
3494 int free = 0;
3495
3496 BUG_ON(pa->pa_deleted == 0);
3497 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3498 grp_blk_start = pa->pa_pstart - bit;
3499 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3500 end = bit + pa->pa_len;
3501
3502 if (ac) {
3503 ac->ac_sb = sb;
3504 ac->ac_inode = pa->pa_inode;
3505 }
3506
3507 while (bit < end) {
3508 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3509 if (bit >= end)
3510 break;
3511 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3512 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
3513 le32_to_cpu(sbi->s_es->s_first_data_block);
3514 mb_debug(1, " free preallocated %u/%u in group %u\n",
3515 (unsigned) start, (unsigned) next - bit,
3516 (unsigned) group);
3517 free += next - bit;
3518
3519 if (ac) {
3520 ac->ac_b_ex.fe_group = group;
3521 ac->ac_b_ex.fe_start = bit;
3522 ac->ac_b_ex.fe_len = next - bit;
3523 ac->ac_b_ex.fe_logical = 0;
3524 trace_ext4_mballoc_discard(ac);
3525 }
3526
3527 trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
3528 next - bit);
3529 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3530 bit = next + 1;
3531 }
3532 if (free != pa->pa_free) {
3533 printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
3534 pa, (unsigned long) pa->pa_lstart,
3535 (unsigned long) pa->pa_pstart,
3536 (unsigned long) pa->pa_len);
3537 ext4_grp_locked_error(sb, group,
3538 __func__, "free %u, pa_free %u",
3539 free, pa->pa_free);
3540
3541
3542
3543
3544 }
3545 atomic_add(free, &sbi->s_mb_discarded);
3546
3547 return err;
3548}
3549
3550static noinline_for_stack int
3551ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3552 struct ext4_prealloc_space *pa,
3553 struct ext4_allocation_context *ac)
3554{
3555 struct super_block *sb = e4b->bd_sb;
3556 ext4_group_t group;
3557 ext4_grpblk_t bit;
3558
3559 trace_ext4_mb_release_group_pa(ac, pa);
3560 BUG_ON(pa->pa_deleted == 0);
3561 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3562 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3563 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3564 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3565
3566 if (ac) {
3567 ac->ac_sb = sb;
3568 ac->ac_inode = NULL;
3569 ac->ac_b_ex.fe_group = group;
3570 ac->ac_b_ex.fe_start = bit;
3571 ac->ac_b_ex.fe_len = pa->pa_len;
3572 ac->ac_b_ex.fe_logical = 0;
3573 trace_ext4_mballoc_discard(ac);
3574 }
3575
3576 return 0;
3577}
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588static noinline_for_stack int
3589ext4_mb_discard_group_preallocations(struct super_block *sb,
3590 ext4_group_t group, int needed)
3591{
3592 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3593 struct buffer_head *bitmap_bh = NULL;
3594 struct ext4_prealloc_space *pa, *tmp;
3595 struct ext4_allocation_context *ac;
3596 struct list_head list;
3597 struct ext4_buddy e4b;
3598 int err;
3599 int busy = 0;
3600 int free = 0;
3601
3602 mb_debug(1, "discard preallocation for group %u\n", group);
3603
3604 if (list_empty(&grp->bb_prealloc_list))
3605 return 0;
3606
3607 bitmap_bh = ext4_read_block_bitmap(sb, group);
3608 if (bitmap_bh == NULL) {
3609 ext4_error(sb, __func__, "Error in reading block "
3610 "bitmap for %u", group);
3611 return 0;
3612 }
3613
3614 err = ext4_mb_load_buddy(sb, group, &e4b);
3615 if (err) {
3616 ext4_error(sb, __func__, "Error in loading buddy "
3617 "information for %u", group);
3618 put_bh(bitmap_bh);
3619 return 0;
3620 }
3621
3622 if (needed == 0)
3623 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3624
3625 INIT_LIST_HEAD(&list);
3626 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3627 if (ac)
3628 ac->ac_sb = sb;
3629repeat:
3630 ext4_lock_group(sb, group);
3631 list_for_each_entry_safe(pa, tmp,
3632 &grp->bb_prealloc_list, pa_group_list) {
3633 spin_lock(&pa->pa_lock);
3634 if (atomic_read(&pa->pa_count)) {
3635 spin_unlock(&pa->pa_lock);
3636 busy = 1;
3637 continue;
3638 }
3639 if (pa->pa_deleted) {
3640 spin_unlock(&pa->pa_lock);
3641 continue;
3642 }
3643
3644
3645 pa->pa_deleted = 1;
3646
3647
3648 free += pa->pa_free;
3649
3650 spin_unlock(&pa->pa_lock);
3651
3652 list_del(&pa->pa_group_list);
3653 list_add(&pa->u.pa_tmp_list, &list);
3654 }
3655
3656
3657 if (free < needed && busy) {
3658 busy = 0;
3659 ext4_unlock_group(sb, group);
3660
3661
3662
3663
3664 yield();
3665 goto repeat;
3666 }
3667
3668
3669 if (list_empty(&list)) {
3670 BUG_ON(free != 0);
3671 goto out;
3672 }
3673
3674
3675 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3676
3677
3678 spin_lock(pa->pa_obj_lock);
3679 list_del_rcu(&pa->pa_inode_list);
3680 spin_unlock(pa->pa_obj_lock);
3681
3682 if (pa->pa_type == MB_GROUP_PA)
3683 ext4_mb_release_group_pa(&e4b, pa, ac);
3684 else
3685 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3686
3687 list_del(&pa->u.pa_tmp_list);
3688 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3689 }
3690
3691out:
3692 ext4_unlock_group(sb, group);
3693 if (ac)
3694 kmem_cache_free(ext4_ac_cachep, ac);
3695 ext4_mb_release_desc(&e4b);
3696 put_bh(bitmap_bh);
3697 return free;
3698}
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709void ext4_discard_preallocations(struct inode *inode)
3710{
3711 struct ext4_inode_info *ei = EXT4_I(inode);
3712 struct super_block *sb = inode->i_sb;
3713 struct buffer_head *bitmap_bh = NULL;
3714 struct ext4_prealloc_space *pa, *tmp;
3715 struct ext4_allocation_context *ac;
3716 ext4_group_t group = 0;
3717 struct list_head list;
3718 struct ext4_buddy e4b;
3719 int err;
3720
3721 if (!S_ISREG(inode->i_mode)) {
3722
3723 return;
3724 }
3725
3726 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3727 trace_ext4_discard_preallocations(inode);
3728
3729 INIT_LIST_HEAD(&list);
3730
3731 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3732 if (ac) {
3733 ac->ac_sb = sb;
3734 ac->ac_inode = inode;
3735 }
3736repeat:
3737
3738 spin_lock(&ei->i_prealloc_lock);
3739 while (!list_empty(&ei->i_prealloc_list)) {
3740 pa = list_entry(ei->i_prealloc_list.next,
3741 struct ext4_prealloc_space, pa_inode_list);
3742 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3743 spin_lock(&pa->pa_lock);
3744 if (atomic_read(&pa->pa_count)) {
3745
3746
3747 spin_unlock(&pa->pa_lock);
3748 spin_unlock(&ei->i_prealloc_lock);
3749 printk(KERN_ERR "uh-oh! used pa while discarding\n");
3750 WARN_ON(1);
3751 schedule_timeout_uninterruptible(HZ);
3752 goto repeat;
3753
3754 }
3755 if (pa->pa_deleted == 0) {
3756 pa->pa_deleted = 1;
3757 spin_unlock(&pa->pa_lock);
3758 list_del_rcu(&pa->pa_inode_list);
3759 list_add(&pa->u.pa_tmp_list, &list);
3760 continue;
3761 }
3762
3763
3764 spin_unlock(&pa->pa_lock);
3765 spin_unlock(&ei->i_prealloc_lock);
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779 schedule_timeout_uninterruptible(HZ);
3780 goto repeat;
3781 }
3782 spin_unlock(&ei->i_prealloc_lock);
3783
3784 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3785 BUG_ON(pa->pa_type != MB_INODE_PA);
3786 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3787
3788 err = ext4_mb_load_buddy(sb, group, &e4b);
3789 if (err) {
3790 ext4_error(sb, __func__, "Error in loading buddy "
3791 "information for %u", group);
3792 continue;
3793 }
3794
3795 bitmap_bh = ext4_read_block_bitmap(sb, group);
3796 if (bitmap_bh == NULL) {
3797 ext4_error(sb, __func__, "Error in reading block "
3798 "bitmap for %u", group);
3799 ext4_mb_release_desc(&e4b);
3800 continue;
3801 }
3802
3803 ext4_lock_group(sb, group);
3804 list_del(&pa->pa_group_list);
3805 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3806 ext4_unlock_group(sb, group);
3807
3808 ext4_mb_release_desc(&e4b);
3809 put_bh(bitmap_bh);
3810
3811 list_del(&pa->u.pa_tmp_list);
3812 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3813 }
3814 if (ac)
3815 kmem_cache_free(ext4_ac_cachep, ac);
3816}
3817
3818
3819
3820
3821
3822
3823
3824
3825static void ext4_mb_return_to_preallocation(struct inode *inode,
3826 struct ext4_buddy *e4b,
3827 sector_t block, int count)
3828{
3829 BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
3830}
3831#ifdef CONFIG_EXT4_DEBUG
3832static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3833{
3834 struct super_block *sb = ac->ac_sb;
3835 ext4_group_t ngroups, i;
3836
3837 printk(KERN_ERR "EXT4-fs: Can't allocate:"
3838 " Allocation context details:\n");
3839 printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
3840 ac->ac_status, ac->ac_flags);
3841 printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, "
3842 "best %lu/%lu/%lu@%lu cr %d\n",
3843 (unsigned long)ac->ac_o_ex.fe_group,
3844 (unsigned long)ac->ac_o_ex.fe_start,
3845 (unsigned long)ac->ac_o_ex.fe_len,
3846 (unsigned long)ac->ac_o_ex.fe_logical,
3847 (unsigned long)ac->ac_g_ex.fe_group,
3848 (unsigned long)ac->ac_g_ex.fe_start,
3849 (unsigned long)ac->ac_g_ex.fe_len,
3850 (unsigned long)ac->ac_g_ex.fe_logical,
3851 (unsigned long)ac->ac_b_ex.fe_group,
3852 (unsigned long)ac->ac_b_ex.fe_start,
3853 (unsigned long)ac->ac_b_ex.fe_len,
3854 (unsigned long)ac->ac_b_ex.fe_logical,
3855 (int)ac->ac_criteria);
3856 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
3857 ac->ac_found);
3858 printk(KERN_ERR "EXT4-fs: groups: \n");
3859 ngroups = ext4_get_groups_count(sb);
3860 for (i = 0; i < ngroups; i++) {
3861 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
3862 struct ext4_prealloc_space *pa;
3863 ext4_grpblk_t start;
3864 struct list_head *cur;
3865 ext4_lock_group(sb, i);
3866 list_for_each(cur, &grp->bb_prealloc_list) {
3867 pa = list_entry(cur, struct ext4_prealloc_space,
3868 pa_group_list);
3869 spin_lock(&pa->pa_lock);
3870 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3871 NULL, &start);
3872 spin_unlock(&pa->pa_lock);
3873 printk(KERN_ERR "PA:%u:%d:%u \n", i,
3874 start, pa->pa_len);
3875 }
3876 ext4_unlock_group(sb, i);
3877
3878 if (grp->bb_free == 0)
3879 continue;
3880 printk(KERN_ERR "%u: %d/%d \n",
3881 i, grp->bb_free, grp->bb_fragments);
3882 }
3883 printk(KERN_ERR "\n");
3884}
3885#else
3886static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3887{
3888 return;
3889}
3890#endif
3891
3892
3893
3894
3895
3896
3897
3898
3899static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3900{
3901 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3902 int bsbits = ac->ac_sb->s_blocksize_bits;
3903 loff_t size, isize;
3904
3905 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3906 return;
3907
3908 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3909 return;
3910
3911 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
3912 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3913 >> bsbits;
3914
3915 if ((size == isize) &&
3916 !ext4_fs_is_busy(sbi) &&
3917 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
3918 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
3919 return;
3920 }
3921
3922
3923 size = max(size, isize);
3924 if (size >= sbi->s_mb_stream_request) {
3925 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3926 return;
3927 }
3928
3929 BUG_ON(ac->ac_lg != NULL);
3930
3931
3932
3933
3934
3935 ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
3936
3937
3938 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
3939
3940
3941 mutex_lock(&ac->ac_lg->lg_mutex);
3942}
3943
3944static noinline_for_stack int
3945ext4_mb_initialize_context(struct ext4_allocation_context *ac,
3946 struct ext4_allocation_request *ar)
3947{
3948 struct super_block *sb = ar->inode->i_sb;
3949 struct ext4_sb_info *sbi = EXT4_SB(sb);
3950 struct ext4_super_block *es = sbi->s_es;
3951 ext4_group_t group;
3952 unsigned int len;
3953 ext4_fsblk_t goal;
3954 ext4_grpblk_t block;
3955
3956
3957 len = ar->len;
3958
3959
3960 if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10)
3961 len = EXT4_BLOCKS_PER_GROUP(sb) - 10;
3962
3963
3964 goal = ar->goal;
3965 if (goal < le32_to_cpu(es->s_first_data_block) ||
3966 goal >= ext4_blocks_count(es))
3967 goal = le32_to_cpu(es->s_first_data_block);
3968 ext4_get_group_no_and_offset(sb, goal, &group, &block);
3969
3970
3971 memset(ac, 0, sizeof(struct ext4_allocation_context));
3972 ac->ac_b_ex.fe_logical = ar->logical;
3973 ac->ac_status = AC_STATUS_CONTINUE;
3974 ac->ac_sb = sb;
3975 ac->ac_inode = ar->inode;
3976 ac->ac_o_ex.fe_logical = ar->logical;
3977 ac->ac_o_ex.fe_group = group;
3978 ac->ac_o_ex.fe_start = block;
3979 ac->ac_o_ex.fe_len = len;
3980 ac->ac_g_ex.fe_logical = ar->logical;
3981 ac->ac_g_ex.fe_group = group;
3982 ac->ac_g_ex.fe_start = block;
3983 ac->ac_g_ex.fe_len = len;
3984 ac->ac_flags = ar->flags;
3985
3986
3987
3988 ext4_mb_group_or_file(ac);
3989
3990 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
3991 "left: %u/%u, right %u/%u to %swritable\n",
3992 (unsigned) ar->len, (unsigned) ar->logical,
3993 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
3994 (unsigned) ar->lleft, (unsigned) ar->pleft,
3995 (unsigned) ar->lright, (unsigned) ar->pright,
3996 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
3997 return 0;
3998
3999}
4000
4001static noinline_for_stack void
4002ext4_mb_discard_lg_preallocations(struct super_block *sb,
4003 struct ext4_locality_group *lg,
4004 int order, int total_entries)
4005{
4006 ext4_group_t group = 0;
4007 struct ext4_buddy e4b;
4008 struct list_head discard_list;
4009 struct ext4_prealloc_space *pa, *tmp;
4010 struct ext4_allocation_context *ac;
4011
4012 mb_debug(1, "discard locality group preallocation\n");
4013
4014 INIT_LIST_HEAD(&discard_list);
4015 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4016 if (ac)
4017 ac->ac_sb = sb;
4018
4019 spin_lock(&lg->lg_prealloc_lock);
4020 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4021 pa_inode_list) {
4022 spin_lock(&pa->pa_lock);
4023 if (atomic_read(&pa->pa_count)) {
4024
4025
4026
4027
4028
4029 spin_unlock(&pa->pa_lock);
4030 continue;
4031 }
4032 if (pa->pa_deleted) {
4033 spin_unlock(&pa->pa_lock);
4034 continue;
4035 }
4036
4037 BUG_ON(pa->pa_type != MB_GROUP_PA);
4038
4039
4040 pa->pa_deleted = 1;
4041 spin_unlock(&pa->pa_lock);
4042
4043 list_del_rcu(&pa->pa_inode_list);
4044 list_add(&pa->u.pa_tmp_list, &discard_list);
4045
4046 total_entries--;
4047 if (total_entries <= 5) {
4048
4049
4050
4051
4052
4053
4054 break;
4055 }
4056 }
4057 spin_unlock(&lg->lg_prealloc_lock);
4058
4059 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4060
4061 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4062 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4063 ext4_error(sb, __func__, "Error in loading buddy "
4064 "information for %u", group);
4065 continue;
4066 }
4067 ext4_lock_group(sb, group);
4068 list_del(&pa->pa_group_list);
4069 ext4_mb_release_group_pa(&e4b, pa, ac);
4070 ext4_unlock_group(sb, group);
4071
4072 ext4_mb_release_desc(&e4b);
4073 list_del(&pa->u.pa_tmp_list);
4074 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4075 }
4076 if (ac)
4077 kmem_cache_free(ext4_ac_cachep, ac);
4078}
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4090{
4091 int order, added = 0, lg_prealloc_count = 1;
4092 struct super_block *sb = ac->ac_sb;
4093 struct ext4_locality_group *lg = ac->ac_lg;
4094 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4095
4096 order = fls(pa->pa_free) - 1;
4097 if (order > PREALLOC_TB_SIZE - 1)
4098
4099 order = PREALLOC_TB_SIZE - 1;
4100
4101 rcu_read_lock();
4102 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4103 pa_inode_list) {
4104 spin_lock(&tmp_pa->pa_lock);
4105 if (tmp_pa->pa_deleted) {
4106 spin_unlock(&tmp_pa->pa_lock);
4107 continue;
4108 }
4109 if (!added && pa->pa_free < tmp_pa->pa_free) {
4110
4111 list_add_tail_rcu(&pa->pa_inode_list,
4112 &tmp_pa->pa_inode_list);
4113 added = 1;
4114
4115
4116
4117
4118 }
4119 spin_unlock(&tmp_pa->pa_lock);
4120 lg_prealloc_count++;
4121 }
4122 if (!added)
4123 list_add_tail_rcu(&pa->pa_inode_list,
4124 &lg->lg_prealloc_list[order]);
4125 rcu_read_unlock();
4126
4127
4128 if (lg_prealloc_count > 8) {
4129 ext4_mb_discard_lg_preallocations(sb, lg,
4130 order, lg_prealloc_count);
4131 return;
4132 }
4133 return ;
4134}
4135
4136
4137
4138
4139static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4140{
4141 struct ext4_prealloc_space *pa = ac->ac_pa;
4142 if (pa) {
4143 if (pa->pa_type == MB_GROUP_PA) {
4144
4145 spin_lock(&pa->pa_lock);
4146 pa->pa_pstart += ac->ac_b_ex.fe_len;
4147 pa->pa_lstart += ac->ac_b_ex.fe_len;
4148 pa->pa_free -= ac->ac_b_ex.fe_len;
4149 pa->pa_len -= ac->ac_b_ex.fe_len;
4150 spin_unlock(&pa->pa_lock);
4151 }
4152 }
4153 if (ac->alloc_semp)
4154 up_read(ac->alloc_semp);
4155 if (pa) {
4156
4157
4158
4159
4160
4161
4162
4163 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4164 spin_lock(pa->pa_obj_lock);
4165 list_del_rcu(&pa->pa_inode_list);
4166 spin_unlock(pa->pa_obj_lock);
4167 ext4_mb_add_n_trim(ac);
4168 }
4169 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4170 }
4171 if (ac->ac_bitmap_page)
4172 page_cache_release(ac->ac_bitmap_page);
4173 if (ac->ac_buddy_page)
4174 page_cache_release(ac->ac_buddy_page);
4175 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4176 mutex_unlock(&ac->ac_lg->lg_mutex);
4177 ext4_mb_collect_stats(ac);
4178 return 0;
4179}
4180
4181static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4182{
4183 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4184 int ret;
4185 int freed = 0;
4186
4187 trace_ext4_mb_discard_preallocations(sb, needed);
4188 for (i = 0; i < ngroups && needed > 0; i++) {
4189 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4190 freed += ret;
4191 needed -= ret;
4192 }
4193
4194 return freed;
4195}
4196
4197
4198
4199
4200
4201
4202ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4203 struct ext4_allocation_request *ar, int *errp)
4204{
4205 int freed;
4206 struct ext4_allocation_context *ac = NULL;
4207 struct ext4_sb_info *sbi;
4208 struct super_block *sb;
4209 ext4_fsblk_t block = 0;
4210 unsigned int inquota = 0;
4211 unsigned int reserv_blks = 0;
4212
4213 sb = ar->inode->i_sb;
4214 sbi = EXT4_SB(sb);
4215
4216 trace_ext4_request_blocks(ar);
4217
4218
4219
4220
4221
4222
4223 if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4224 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4225 else {
4226
4227
4228
4229
4230 while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
4231
4232 yield();
4233 ar->len = ar->len >> 1;
4234 }
4235 if (!ar->len) {
4236 *errp = -ENOSPC;
4237 return 0;
4238 }
4239 reserv_blks = ar->len;
4240 while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) {
4241 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4242 ar->len--;
4243 }
4244 inquota = ar->len;
4245 if (ar->len == 0) {
4246 *errp = -EDQUOT;
4247 goto out3;
4248 }
4249 }
4250
4251 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4252 if (!ac) {
4253 ar->len = 0;
4254 *errp = -ENOMEM;
4255 goto out1;
4256 }
4257
4258 *errp = ext4_mb_initialize_context(ac, ar);
4259 if (*errp) {
4260 ar->len = 0;
4261 goto out2;
4262 }
4263
4264 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4265 if (!ext4_mb_use_preallocated(ac)) {
4266 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4267 ext4_mb_normalize_request(ac, ar);
4268repeat:
4269
4270 ext4_mb_regular_allocator(ac);
4271
4272
4273
4274
4275 if (ac->ac_status == AC_STATUS_FOUND &&
4276 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4277 ext4_mb_new_preallocation(ac);
4278 }
4279 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4280 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4281 if (*errp == -EAGAIN) {
4282
4283
4284
4285
4286 ext4_mb_release_context(ac);
4287 ac->ac_b_ex.fe_group = 0;
4288 ac->ac_b_ex.fe_start = 0;
4289 ac->ac_b_ex.fe_len = 0;
4290 ac->ac_status = AC_STATUS_CONTINUE;
4291 goto repeat;
4292 } else if (*errp) {
4293 ac->ac_b_ex.fe_len = 0;
4294 ar->len = 0;
4295 ext4_mb_show_ac(ac);
4296 } else {
4297 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4298 ar->len = ac->ac_b_ex.fe_len;
4299 }
4300 } else {
4301 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4302 if (freed)
4303 goto repeat;
4304 *errp = -ENOSPC;
4305 ac->ac_b_ex.fe_len = 0;
4306 ar->len = 0;
4307 ext4_mb_show_ac(ac);
4308 }
4309
4310 ext4_mb_release_context(ac);
4311
4312out2:
4313 kmem_cache_free(ext4_ac_cachep, ac);
4314out1:
4315 if (inquota && ar->len < inquota)
4316 vfs_dq_free_block(ar->inode, inquota - ar->len);
4317out3:
4318 if (!ar->len) {
4319 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4320
4321 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
4322 reserv_blks);
4323 }
4324
4325 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4326
4327 return block;
4328}
4329
4330
4331
4332
4333
4334
4335static int can_merge(struct ext4_free_data *entry1,
4336 struct ext4_free_data *entry2)
4337{
4338 if ((entry1->t_tid == entry2->t_tid) &&
4339 (entry1->group == entry2->group) &&
4340 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4341 return 1;
4342 return 0;
4343}
4344
4345static noinline_for_stack int
4346ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4347 struct ext4_free_data *new_entry)
4348{
4349 ext4_grpblk_t block;
4350 struct ext4_free_data *entry;
4351 struct ext4_group_info *db = e4b->bd_info;
4352 struct super_block *sb = e4b->bd_sb;
4353 struct ext4_sb_info *sbi = EXT4_SB(sb);
4354 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4355 struct rb_node *parent = NULL, *new_node;
4356
4357 BUG_ON(!ext4_handle_valid(handle));
4358 BUG_ON(e4b->bd_bitmap_page == NULL);
4359 BUG_ON(e4b->bd_buddy_page == NULL);
4360
4361 new_node = &new_entry->node;
4362 block = new_entry->start_blk;
4363
4364 if (!*n) {
4365
4366
4367
4368
4369
4370 page_cache_get(e4b->bd_buddy_page);
4371 page_cache_get(e4b->bd_bitmap_page);
4372 }
4373 while (*n) {
4374 parent = *n;
4375 entry = rb_entry(parent, struct ext4_free_data, node);
4376 if (block < entry->start_blk)
4377 n = &(*n)->rb_left;
4378 else if (block >= (entry->start_blk + entry->count))
4379 n = &(*n)->rb_right;
4380 else {
4381 ext4_grp_locked_error(sb, e4b->bd_group, __func__,
4382 "Double free of blocks %d (%d %d)",
4383 block, entry->start_blk, entry->count);
4384 return 0;
4385 }
4386 }
4387
4388 rb_link_node(new_node, parent, n);
4389 rb_insert_color(new_node, &db->bb_free_root);
4390
4391
4392 node = rb_prev(new_node);
4393 if (node) {
4394 entry = rb_entry(node, struct ext4_free_data, node);
4395 if (can_merge(entry, new_entry)) {
4396 new_entry->start_blk = entry->start_blk;
4397 new_entry->count += entry->count;
4398 rb_erase(node, &(db->bb_free_root));
4399 spin_lock(&sbi->s_md_lock);
4400 list_del(&entry->list);
4401 spin_unlock(&sbi->s_md_lock);
4402 kmem_cache_free(ext4_free_ext_cachep, entry);
4403 }
4404 }
4405
4406 node = rb_next(new_node);
4407 if (node) {
4408 entry = rb_entry(node, struct ext4_free_data, node);
4409 if (can_merge(new_entry, entry)) {
4410 new_entry->count += entry->count;
4411 rb_erase(node, &(db->bb_free_root));
4412 spin_lock(&sbi->s_md_lock);
4413 list_del(&entry->list);
4414 spin_unlock(&sbi->s_md_lock);
4415 kmem_cache_free(ext4_free_ext_cachep, entry);
4416 }
4417 }
4418
4419 spin_lock(&sbi->s_md_lock);
4420 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4421 spin_unlock(&sbi->s_md_lock);
4422 return 0;
4423}
4424
4425
4426
4427
4428void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4429 ext4_fsblk_t block, unsigned long count,
4430 int metadata, unsigned long *freed)
4431{
4432 struct buffer_head *bitmap_bh = NULL;
4433 struct super_block *sb = inode->i_sb;
4434 struct ext4_allocation_context *ac = NULL;
4435 struct ext4_group_desc *gdp;
4436 struct ext4_super_block *es;
4437 unsigned int overflow;
4438 ext4_grpblk_t bit;
4439 struct buffer_head *gd_bh;
4440 ext4_group_t block_group;
4441 struct ext4_sb_info *sbi;
4442 struct ext4_buddy e4b;
4443 int err = 0;
4444 int ret;
4445
4446 *freed = 0;
4447
4448 sbi = EXT4_SB(sb);
4449 es = EXT4_SB(sb)->s_es;
4450 if (block < le32_to_cpu(es->s_first_data_block) ||
4451 block + count < block ||
4452 block + count > ext4_blocks_count(es)) {
4453 ext4_error(sb, __func__,
4454 "Freeing blocks not in datazone - "
4455 "block = %llu, count = %lu", block, count);
4456 goto error_return;
4457 }
4458
4459 ext4_debug("freeing block %llu\n", block);
4460 trace_ext4_free_blocks(inode, block, count, metadata);
4461
4462 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4463 if (ac) {
4464 ac->ac_inode = inode;
4465 ac->ac_sb = sb;
4466 }
4467
4468do_more:
4469 overflow = 0;
4470 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4471
4472
4473
4474
4475
4476 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4477 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
4478 count -= overflow;
4479 }
4480 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4481 if (!bitmap_bh) {
4482 err = -EIO;
4483 goto error_return;
4484 }
4485 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4486 if (!gdp) {
4487 err = -EIO;
4488 goto error_return;
4489 }
4490
4491 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4492 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4493 in_range(block, ext4_inode_table(sb, gdp),
4494 EXT4_SB(sb)->s_itb_per_group) ||
4495 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4496 EXT4_SB(sb)->s_itb_per_group)) {
4497
4498 ext4_error(sb, __func__,
4499 "Freeing blocks in system zone - "
4500 "Block = %llu, count = %lu", block, count);
4501
4502 goto error_return;
4503 }
4504
4505 BUFFER_TRACE(bitmap_bh, "getting write access");
4506 err = ext4_journal_get_write_access(handle, bitmap_bh);
4507 if (err)
4508 goto error_return;
4509
4510
4511
4512
4513
4514
4515 BUFFER_TRACE(gd_bh, "get_write_access");
4516 err = ext4_journal_get_write_access(handle, gd_bh);
4517 if (err)
4518 goto error_return;
4519#ifdef AGGRESSIVE_CHECK
4520 {
4521 int i;
4522 for (i = 0; i < count; i++)
4523 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4524 }
4525#endif
4526 if (ac) {
4527 ac->ac_b_ex.fe_group = block_group;
4528 ac->ac_b_ex.fe_start = bit;
4529 ac->ac_b_ex.fe_len = count;
4530 trace_ext4_mballoc_free(ac);
4531 }
4532
4533 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4534 if (err)
4535 goto error_return;
4536 if (metadata && ext4_handle_valid(handle)) {
4537 struct ext4_free_data *new_entry;
4538
4539
4540
4541
4542 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4543 new_entry->start_blk = bit;
4544 new_entry->group = block_group;
4545 new_entry->count = count;
4546 new_entry->t_tid = handle->h_transaction->t_tid;
4547
4548 ext4_lock_group(sb, block_group);
4549 mb_clear_bits(bitmap_bh->b_data, bit, count);
4550 ext4_mb_free_metadata(handle, &e4b, new_entry);
4551 } else {
4552
4553
4554
4555
4556 ext4_lock_group(sb, block_group);
4557 mb_clear_bits(bitmap_bh->b_data, bit, count);
4558 mb_free_blocks(inode, &e4b, bit, count);
4559 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
4560 }
4561
4562 ret = ext4_free_blks_count(sb, gdp) + count;
4563 ext4_free_blks_set(sb, gdp, ret);
4564 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4565 ext4_unlock_group(sb, block_group);
4566 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4567
4568 if (sbi->s_log_groups_per_flex) {
4569 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4570 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
4571 }
4572
4573 ext4_mb_release_desc(&e4b);
4574
4575 *freed += count;
4576
4577
4578 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4579 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4580
4581
4582 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4583 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4584 if (!err)
4585 err = ret;
4586
4587 if (overflow && !err) {
4588 block += count;
4589 count = overflow;
4590 put_bh(bitmap_bh);
4591 goto do_more;
4592 }
4593 sb->s_dirt = 1;
4594error_return:
4595 brelse(bitmap_bh);
4596 ext4_std_error(sb, err);
4597 if (ac)
4598 kmem_cache_free(ext4_ac_cachep, ac);
4599 return;
4600}
4601