1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "mballoc.h"
25#include <linux/debugfs.h>
26#include <linux/slab.h>
27#include <trace/events/ext4.h>
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338static struct kmem_cache *ext4_pspace_cachep;
339static struct kmem_cache *ext4_ac_cachep;
340static struct kmem_cache *ext4_free_ext_cachep;
341
342
343
344
345#define NR_GRPINFO_CACHES 8
346static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
347
348static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
349 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
350 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
351 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
352};
353
354static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
355 ext4_group_t group);
356static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
357 ext4_group_t group);
358static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
359
360static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
361{
362#if BITS_PER_LONG == 64
363 *bit += ((unsigned long) addr & 7UL) << 3;
364 addr = (void *) ((unsigned long) addr & ~7UL);
365#elif BITS_PER_LONG == 32
366 *bit += ((unsigned long) addr & 3UL) << 3;
367 addr = (void *) ((unsigned long) addr & ~3UL);
368#else
369#error "how many bits you are?!"
370#endif
371 return addr;
372}
373
374static inline int mb_test_bit(int bit, void *addr)
375{
376
377
378
379
380 addr = mb_correct_addr_and_bit(&bit, addr);
381 return ext4_test_bit(bit, addr);
382}
383
384static inline void mb_set_bit(int bit, void *addr)
385{
386 addr = mb_correct_addr_and_bit(&bit, addr);
387 ext4_set_bit(bit, addr);
388}
389
390static inline void mb_clear_bit(int bit, void *addr)
391{
392 addr = mb_correct_addr_and_bit(&bit, addr);
393 ext4_clear_bit(bit, addr);
394}
395
396static inline int mb_find_next_zero_bit(void *addr, int max, int start)
397{
398 int fix = 0, ret, tmpmax;
399 addr = mb_correct_addr_and_bit(&fix, addr);
400 tmpmax = max + fix;
401 start += fix;
402
403 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
404 if (ret > max)
405 return max;
406 return ret;
407}
408
409static inline int mb_find_next_bit(void *addr, int max, int start)
410{
411 int fix = 0, ret, tmpmax;
412 addr = mb_correct_addr_and_bit(&fix, addr);
413 tmpmax = max + fix;
414 start += fix;
415
416 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
417 if (ret > max)
418 return max;
419 return ret;
420}
421
422static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
423{
424 char *bb;
425
426 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
427 BUG_ON(max == NULL);
428
429 if (order > e4b->bd_blkbits + 1) {
430 *max = 0;
431 return NULL;
432 }
433
434
435 if (order == 0) {
436 *max = 1 << (e4b->bd_blkbits + 3);
437 return EXT4_MB_BITMAP(e4b);
438 }
439
440 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
441 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
442
443 return bb;
444}
445
446#ifdef DOUBLE_CHECK
447static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
448 int first, int count)
449{
450 int i;
451 struct super_block *sb = e4b->bd_sb;
452
453 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
454 return;
455 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
456 for (i = 0; i < count; i++) {
457 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
458 ext4_fsblk_t blocknr;
459
460 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
461 blocknr += first + i;
462 ext4_grp_locked_error(sb, e4b->bd_group,
463 inode ? inode->i_ino : 0,
464 blocknr,
465 "freeing block already freed "
466 "(bit %u)",
467 first + i);
468 }
469 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
470 }
471}
472
473static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
474{
475 int i;
476
477 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
478 return;
479 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
480 for (i = 0; i < count; i++) {
481 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
482 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
483 }
484}
485
486static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
487{
488 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
489 unsigned char *b1, *b2;
490 int i;
491 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
492 b2 = (unsigned char *) bitmap;
493 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
494 if (b1[i] != b2[i]) {
495 printk(KERN_ERR "corruption in group %u "
496 "at byte %u(%u): %x in copy != %x "
497 "on disk/prealloc\n",
498 e4b->bd_group, i, i * 8, b1[i], b2[i]);
499 BUG();
500 }
501 }
502 }
503}
504
505#else
506static inline void mb_free_blocks_double(struct inode *inode,
507 struct ext4_buddy *e4b, int first, int count)
508{
509 return;
510}
511static inline void mb_mark_used_double(struct ext4_buddy *e4b,
512 int first, int count)
513{
514 return;
515}
516static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
517{
518 return;
519}
520#endif
521
522#ifdef AGGRESSIVE_CHECK
523
524#define MB_CHECK_ASSERT(assert) \
525do { \
526 if (!(assert)) { \
527 printk(KERN_EMERG \
528 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
529 function, file, line, # assert); \
530 BUG(); \
531 } \
532} while (0)
533
534static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
535 const char *function, int line)
536{
537 struct super_block *sb = e4b->bd_sb;
538 int order = e4b->bd_blkbits + 1;
539 int max;
540 int max2;
541 int i;
542 int j;
543 int k;
544 int count;
545 struct ext4_group_info *grp;
546 int fragments = 0;
547 int fstart;
548 struct list_head *cur;
549 void *buddy;
550 void *buddy2;
551
552 {
553 static int mb_check_counter;
554 if (mb_check_counter++ % 100 != 0)
555 return 0;
556 }
557
558 while (order > 1) {
559 buddy = mb_find_buddy(e4b, order, &max);
560 MB_CHECK_ASSERT(buddy);
561 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
562 MB_CHECK_ASSERT(buddy2);
563 MB_CHECK_ASSERT(buddy != buddy2);
564 MB_CHECK_ASSERT(max * 2 == max2);
565
566 count = 0;
567 for (i = 0; i < max; i++) {
568
569 if (mb_test_bit(i, buddy)) {
570
571 if (!mb_test_bit(i << 1, buddy2)) {
572 MB_CHECK_ASSERT(
573 mb_test_bit((i<<1)+1, buddy2));
574 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
575 MB_CHECK_ASSERT(
576 mb_test_bit(i << 1, buddy2));
577 }
578 continue;
579 }
580
581
582 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
583 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
584
585 for (j = 0; j < (1 << order); j++) {
586 k = (i * (1 << order)) + j;
587 MB_CHECK_ASSERT(
588 !mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
589 }
590 count++;
591 }
592 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
593 order--;
594 }
595
596 fstart = -1;
597 buddy = mb_find_buddy(e4b, 0, &max);
598 for (i = 0; i < max; i++) {
599 if (!mb_test_bit(i, buddy)) {
600 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
601 if (fstart == -1) {
602 fragments++;
603 fstart = i;
604 }
605 continue;
606 }
607 fstart = -1;
608
609 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
610 buddy2 = mb_find_buddy(e4b, j, &max2);
611 k = i >> j;
612 MB_CHECK_ASSERT(k < max2);
613 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
614 }
615 }
616 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
617 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
618
619 grp = ext4_get_group_info(sb, e4b->bd_group);
620 list_for_each(cur, &grp->bb_prealloc_list) {
621 ext4_group_t groupnr;
622 struct ext4_prealloc_space *pa;
623 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
624 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
625 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
626 for (i = 0; i < pa->pa_len; i++)
627 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
628 }
629 return 0;
630}
631#undef MB_CHECK_ASSERT
632#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
633 __FILE__, __func__, __LINE__)
634#else
635#define mb_check_buddy(e4b)
636#endif
637
638
639
640
641
642
643
644static void ext4_mb_mark_free_simple(struct super_block *sb,
645 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
646 struct ext4_group_info *grp)
647{
648 struct ext4_sb_info *sbi = EXT4_SB(sb);
649 ext4_grpblk_t min;
650 ext4_grpblk_t max;
651 ext4_grpblk_t chunk;
652 unsigned short border;
653
654 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
655
656 border = 2 << sb->s_blocksize_bits;
657
658 while (len > 0) {
659
660 max = ffs(first | border) - 1;
661
662
663 min = fls(len) - 1;
664
665 if (max < min)
666 min = max;
667 chunk = 1 << min;
668
669
670 grp->bb_counters[min]++;
671 if (min > 0)
672 mb_clear_bit(first >> min,
673 buddy + sbi->s_mb_offsets[min]);
674
675 len -= chunk;
676 first += chunk;
677 }
678}
679
680
681
682
683
684static void
685mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
686{
687 int i;
688 int bits;
689
690 grp->bb_largest_free_order = -1;
691
692 bits = sb->s_blocksize_bits + 1;
693 for (i = bits; i >= 0; i--) {
694 if (grp->bb_counters[i] > 0) {
695 grp->bb_largest_free_order = i;
696 break;
697 }
698 }
699}
700
701static noinline_for_stack
702void ext4_mb_generate_buddy(struct super_block *sb,
703 void *buddy, void *bitmap, ext4_group_t group)
704{
705 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
706 ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
707 ext4_grpblk_t i = 0;
708 ext4_grpblk_t first;
709 ext4_grpblk_t len;
710 unsigned free = 0;
711 unsigned fragments = 0;
712 unsigned long long period = get_cycles();
713
714
715
716 i = mb_find_next_zero_bit(bitmap, max, 0);
717 grp->bb_first_free = i;
718 while (i < max) {
719 fragments++;
720 first = i;
721 i = mb_find_next_bit(bitmap, max, i);
722 len = i - first;
723 free += len;
724 if (len > 1)
725 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
726 else
727 grp->bb_counters[0]++;
728 if (i < max)
729 i = mb_find_next_zero_bit(bitmap, max, i);
730 }
731 grp->bb_fragments = fragments;
732
733 if (free != grp->bb_free) {
734 ext4_grp_locked_error(sb, group, 0, 0,
735 "%u blocks in bitmap, %u in gd",
736 free, grp->bb_free);
737
738
739
740
741 grp->bb_free = free;
742 }
743 mb_set_largest_free_order(sb, grp);
744
745 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
746
747 period = get_cycles() - period;
748 spin_lock(&EXT4_SB(sb)->s_bal_lock);
749 EXT4_SB(sb)->s_mb_buddies_generated++;
750 EXT4_SB(sb)->s_mb_generation_time += period;
751 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
752}
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774static int ext4_mb_init_cache(struct page *page, char *incore)
775{
776 ext4_group_t ngroups;
777 int blocksize;
778 int blocks_per_page;
779 int groups_per_page;
780 int err = 0;
781 int i;
782 ext4_group_t first_group;
783 int first_block;
784 struct super_block *sb;
785 struct buffer_head *bhs;
786 struct buffer_head **bh;
787 struct inode *inode;
788 char *data;
789 char *bitmap;
790 struct ext4_group_info *grinfo;
791
792 mb_debug(1, "init page %lu\n", page->index);
793
794 inode = page->mapping->host;
795 sb = inode->i_sb;
796 ngroups = ext4_get_groups_count(sb);
797 blocksize = 1 << inode->i_blkbits;
798 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
799
800 groups_per_page = blocks_per_page >> 1;
801 if (groups_per_page == 0)
802 groups_per_page = 1;
803
804
805 if (groups_per_page > 1) {
806 err = -ENOMEM;
807 i = sizeof(struct buffer_head *) * groups_per_page;
808 bh = kzalloc(i, GFP_NOFS);
809 if (bh == NULL)
810 goto out;
811 } else
812 bh = &bhs;
813
814 first_group = page->index * blocks_per_page / 2;
815
816
817 for (i = 0; i < groups_per_page; i++) {
818 struct ext4_group_desc *desc;
819
820 if (first_group + i >= ngroups)
821 break;
822
823 grinfo = ext4_get_group_info(sb, first_group + i);
824
825
826
827
828
829
830 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
831 bh[i] = NULL;
832 continue;
833 }
834
835 err = -EIO;
836 desc = ext4_get_group_desc(sb, first_group + i, NULL);
837 if (desc == NULL)
838 goto out;
839
840 err = -ENOMEM;
841 bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
842 if (bh[i] == NULL)
843 goto out;
844
845 if (bitmap_uptodate(bh[i]))
846 continue;
847
848 lock_buffer(bh[i]);
849 if (bitmap_uptodate(bh[i])) {
850 unlock_buffer(bh[i]);
851 continue;
852 }
853 ext4_lock_group(sb, first_group + i);
854 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
855 ext4_init_block_bitmap(sb, bh[i],
856 first_group + i, desc);
857 set_bitmap_uptodate(bh[i]);
858 set_buffer_uptodate(bh[i]);
859 ext4_unlock_group(sb, first_group + i);
860 unlock_buffer(bh[i]);
861 continue;
862 }
863 ext4_unlock_group(sb, first_group + i);
864 if (buffer_uptodate(bh[i])) {
865
866
867
868
869 set_bitmap_uptodate(bh[i]);
870 unlock_buffer(bh[i]);
871 continue;
872 }
873 get_bh(bh[i]);
874
875
876
877
878
879
880 set_bitmap_uptodate(bh[i]);
881 bh[i]->b_end_io = end_buffer_read_sync;
882 submit_bh(READ, bh[i]);
883 mb_debug(1, "read bitmap for group %u\n", first_group + i);
884 }
885
886
887 for (i = 0; i < groups_per_page; i++)
888 if (bh[i])
889 wait_on_buffer(bh[i]);
890
891 err = -EIO;
892 for (i = 0; i < groups_per_page; i++)
893 if (bh[i] && !buffer_uptodate(bh[i]))
894 goto out;
895
896 err = 0;
897 first_block = page->index * blocks_per_page;
898 for (i = 0; i < blocks_per_page; i++) {
899 int group;
900
901 group = (first_block + i) >> 1;
902 if (group >= ngroups)
903 break;
904
905 if (!bh[group - first_group])
906
907 continue;
908
909
910
911
912
913
914
915 data = page_address(page) + (i * blocksize);
916 bitmap = bh[group - first_group]->b_data;
917
918
919
920
921
922 if ((first_block + i) & 1) {
923
924 BUG_ON(incore == NULL);
925 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
926 group, page->index, i * blocksize);
927 trace_ext4_mb_buddy_bitmap_load(sb, group);
928 grinfo = ext4_get_group_info(sb, group);
929 grinfo->bb_fragments = 0;
930 memset(grinfo->bb_counters, 0,
931 sizeof(*grinfo->bb_counters) *
932 (sb->s_blocksize_bits+2));
933
934
935
936 ext4_lock_group(sb, group);
937
938 memset(data, 0xff, blocksize);
939 ext4_mb_generate_buddy(sb, data, incore, group);
940 ext4_unlock_group(sb, group);
941 incore = NULL;
942 } else {
943
944 BUG_ON(incore != NULL);
945 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
946 group, page->index, i * blocksize);
947 trace_ext4_mb_bitmap_load(sb, group);
948
949
950 ext4_lock_group(sb, group);
951 memcpy(data, bitmap, blocksize);
952
953
954 ext4_mb_generate_from_pa(sb, data, group);
955 ext4_mb_generate_from_freelist(sb, data, group);
956 ext4_unlock_group(sb, group);
957
958
959
960
961 incore = data;
962 }
963 }
964 SetPageUptodate(page);
965
966out:
967 if (bh) {
968 for (i = 0; i < groups_per_page; i++)
969 brelse(bh[i]);
970 if (bh != &bhs)
971 kfree(bh);
972 }
973 return err;
974}
975
976
977
978
979
980
981
982static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
983 ext4_group_t group, struct ext4_buddy *e4b)
984{
985 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
986 int block, pnum, poff;
987 int blocks_per_page;
988 struct page *page;
989
990 e4b->bd_buddy_page = NULL;
991 e4b->bd_bitmap_page = NULL;
992
993 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
994
995
996
997
998
999 block = group * 2;
1000 pnum = block / blocks_per_page;
1001 poff = block % blocks_per_page;
1002 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1003 if (!page)
1004 return -EIO;
1005 BUG_ON(page->mapping != inode->i_mapping);
1006 e4b->bd_bitmap_page = page;
1007 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1008
1009 if (blocks_per_page >= 2) {
1010
1011 return 0;
1012 }
1013
1014 block++;
1015 pnum = block / blocks_per_page;
1016 poff = block % blocks_per_page;
1017 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1018 if (!page)
1019 return -EIO;
1020 BUG_ON(page->mapping != inode->i_mapping);
1021 e4b->bd_buddy_page = page;
1022 return 0;
1023}
1024
1025static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1026{
1027 if (e4b->bd_bitmap_page) {
1028 unlock_page(e4b->bd_bitmap_page);
1029 page_cache_release(e4b->bd_bitmap_page);
1030 }
1031 if (e4b->bd_buddy_page) {
1032 unlock_page(e4b->bd_buddy_page);
1033 page_cache_release(e4b->bd_buddy_page);
1034 }
1035}
1036
1037
1038
1039
1040
1041
1042static noinline_for_stack
1043int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1044{
1045
1046 struct ext4_group_info *this_grp;
1047 struct ext4_buddy e4b;
1048 struct page *page;
1049 int ret = 0;
1050
1051 mb_debug(1, "init group %u\n", group);
1052 this_grp = ext4_get_group_info(sb, group);
1053
1054
1055
1056
1057
1058
1059
1060 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
1061 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1062
1063
1064
1065
1066 goto err;
1067 }
1068
1069 page = e4b.bd_bitmap_page;
1070 ret = ext4_mb_init_cache(page, NULL);
1071 if (ret)
1072 goto err;
1073 if (!PageUptodate(page)) {
1074 ret = -EIO;
1075 goto err;
1076 }
1077 mark_page_accessed(page);
1078
1079 if (e4b.bd_buddy_page == NULL) {
1080
1081
1082
1083
1084
1085 ret = 0;
1086 goto err;
1087 }
1088
1089 page = e4b.bd_buddy_page;
1090 ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
1091 if (ret)
1092 goto err;
1093 if (!PageUptodate(page)) {
1094 ret = -EIO;
1095 goto err;
1096 }
1097 mark_page_accessed(page);
1098err:
1099 ext4_mb_put_buddy_page_lock(&e4b);
1100 return ret;
1101}
1102
1103
1104
1105
1106
1107
1108static noinline_for_stack int
1109ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1110 struct ext4_buddy *e4b)
1111{
1112 int blocks_per_page;
1113 int block;
1114 int pnum;
1115 int poff;
1116 struct page *page;
1117 int ret;
1118 struct ext4_group_info *grp;
1119 struct ext4_sb_info *sbi = EXT4_SB(sb);
1120 struct inode *inode = sbi->s_buddy_cache;
1121
1122 mb_debug(1, "load group %u\n", group);
1123
1124 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1125 grp = ext4_get_group_info(sb, group);
1126
1127 e4b->bd_blkbits = sb->s_blocksize_bits;
1128 e4b->bd_info = ext4_get_group_info(sb, group);
1129 e4b->bd_sb = sb;
1130 e4b->bd_group = group;
1131 e4b->bd_buddy_page = NULL;
1132 e4b->bd_bitmap_page = NULL;
1133
1134 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1135
1136
1137
1138
1139 ret = ext4_mb_init_group(sb, group);
1140 if (ret)
1141 return ret;
1142 }
1143
1144
1145
1146
1147
1148
1149 block = group * 2;
1150 pnum = block / blocks_per_page;
1151 poff = block % blocks_per_page;
1152
1153
1154
1155 page = find_get_page(inode->i_mapping, pnum);
1156 if (page == NULL || !PageUptodate(page)) {
1157 if (page)
1158
1159
1160
1161
1162
1163
1164
1165
1166 page_cache_release(page);
1167 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1168 if (page) {
1169 BUG_ON(page->mapping != inode->i_mapping);
1170 if (!PageUptodate(page)) {
1171 ret = ext4_mb_init_cache(page, NULL);
1172 if (ret) {
1173 unlock_page(page);
1174 goto err;
1175 }
1176 mb_cmp_bitmaps(e4b, page_address(page) +
1177 (poff * sb->s_blocksize));
1178 }
1179 unlock_page(page);
1180 }
1181 }
1182 if (page == NULL || !PageUptodate(page)) {
1183 ret = -EIO;
1184 goto err;
1185 }
1186 e4b->bd_bitmap_page = page;
1187 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1188 mark_page_accessed(page);
1189
1190 block++;
1191 pnum = block / blocks_per_page;
1192 poff = block % blocks_per_page;
1193
1194 page = find_get_page(inode->i_mapping, pnum);
1195 if (page == NULL || !PageUptodate(page)) {
1196 if (page)
1197 page_cache_release(page);
1198 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1199 if (page) {
1200 BUG_ON(page->mapping != inode->i_mapping);
1201 if (!PageUptodate(page)) {
1202 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
1203 if (ret) {
1204 unlock_page(page);
1205 goto err;
1206 }
1207 }
1208 unlock_page(page);
1209 }
1210 }
1211 if (page == NULL || !PageUptodate(page)) {
1212 ret = -EIO;
1213 goto err;
1214 }
1215 e4b->bd_buddy_page = page;
1216 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1217 mark_page_accessed(page);
1218
1219 BUG_ON(e4b->bd_bitmap_page == NULL);
1220 BUG_ON(e4b->bd_buddy_page == NULL);
1221
1222 return 0;
1223
1224err:
1225 if (page)
1226 page_cache_release(page);
1227 if (e4b->bd_bitmap_page)
1228 page_cache_release(e4b->bd_bitmap_page);
1229 if (e4b->bd_buddy_page)
1230 page_cache_release(e4b->bd_buddy_page);
1231 e4b->bd_buddy = NULL;
1232 e4b->bd_bitmap = NULL;
1233 return ret;
1234}
1235
1236static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1237{
1238 if (e4b->bd_bitmap_page)
1239 page_cache_release(e4b->bd_bitmap_page);
1240 if (e4b->bd_buddy_page)
1241 page_cache_release(e4b->bd_buddy_page);
1242}
1243
1244
1245static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1246{
1247 int order = 1;
1248 void *bb;
1249
1250 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
1251 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1252
1253 bb = EXT4_MB_BUDDY(e4b);
1254 while (order <= e4b->bd_blkbits + 1) {
1255 block = block >> 1;
1256 if (!mb_test_bit(block, bb)) {
1257
1258 return order;
1259 }
1260 bb += 1 << (e4b->bd_blkbits - order);
1261 order++;
1262 }
1263 return 0;
1264}
1265
1266static void mb_clear_bits(void *bm, int cur, int len)
1267{
1268 __u32 *addr;
1269
1270 len = cur + len;
1271 while (cur < len) {
1272 if ((cur & 31) == 0 && (len - cur) >= 32) {
1273
1274 addr = bm + (cur >> 3);
1275 *addr = 0;
1276 cur += 32;
1277 continue;
1278 }
1279 mb_clear_bit(cur, bm);
1280 cur++;
1281 }
1282}
1283
1284static void mb_set_bits(void *bm, int cur, int len)
1285{
1286 __u32 *addr;
1287
1288 len = cur + len;
1289 while (cur < len) {
1290 if ((cur & 31) == 0 && (len - cur) >= 32) {
1291
1292 addr = bm + (cur >> 3);
1293 *addr = 0xffffffff;
1294 cur += 32;
1295 continue;
1296 }
1297 mb_set_bit(cur, bm);
1298 cur++;
1299 }
1300}
1301
1302static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1303 int first, int count)
1304{
1305 int block = 0;
1306 int max = 0;
1307 int order;
1308 void *buddy;
1309 void *buddy2;
1310 struct super_block *sb = e4b->bd_sb;
1311
1312 BUG_ON(first + count > (sb->s_blocksize << 3));
1313 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1314 mb_check_buddy(e4b);
1315 mb_free_blocks_double(inode, e4b, first, count);
1316
1317 e4b->bd_info->bb_free += count;
1318 if (first < e4b->bd_info->bb_first_free)
1319 e4b->bd_info->bb_first_free = first;
1320
1321
1322 if (first != 0)
1323 block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
1324 if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
1325 max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
1326 if (block && max)
1327 e4b->bd_info->bb_fragments--;
1328 else if (!block && !max)
1329 e4b->bd_info->bb_fragments++;
1330
1331
1332 while (count-- > 0) {
1333 block = first++;
1334 order = 0;
1335
1336 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
1337 ext4_fsblk_t blocknr;
1338
1339 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1340 blocknr += block;
1341 ext4_grp_locked_error(sb, e4b->bd_group,
1342 inode ? inode->i_ino : 0,
1343 blocknr,
1344 "freeing already freed block "
1345 "(bit %u)", block);
1346 }
1347 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1348 e4b->bd_info->bb_counters[order]++;
1349
1350
1351 buddy = mb_find_buddy(e4b, order, &max);
1352
1353 do {
1354 block &= ~1UL;
1355 if (mb_test_bit(block, buddy) ||
1356 mb_test_bit(block + 1, buddy))
1357 break;
1358
1359
1360 buddy2 = mb_find_buddy(e4b, order + 1, &max);
1361
1362 if (!buddy2)
1363 break;
1364
1365 if (order > 0) {
1366
1367
1368 mb_set_bit(block, buddy);
1369 mb_set_bit(block + 1, buddy);
1370 }
1371 e4b->bd_info->bb_counters[order]--;
1372 e4b->bd_info->bb_counters[order]--;
1373
1374 block = block >> 1;
1375 order++;
1376 e4b->bd_info->bb_counters[order]++;
1377
1378 mb_clear_bit(block, buddy2);
1379 buddy = buddy2;
1380 } while (1);
1381 }
1382 mb_set_largest_free_order(sb, e4b->bd_info);
1383 mb_check_buddy(e4b);
1384}
1385
1386static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1387 int needed, struct ext4_free_extent *ex)
1388{
1389 int next = block;
1390 int max;
1391 int ord;
1392 void *buddy;
1393
1394 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1395 BUG_ON(ex == NULL);
1396
1397 buddy = mb_find_buddy(e4b, order, &max);
1398 BUG_ON(buddy == NULL);
1399 BUG_ON(block >= max);
1400 if (mb_test_bit(block, buddy)) {
1401 ex->fe_len = 0;
1402 ex->fe_start = 0;
1403 ex->fe_group = 0;
1404 return 0;
1405 }
1406
1407
1408 if (likely(order == 0)) {
1409
1410 order = mb_find_order_for_block(e4b, block);
1411 block = block >> order;
1412 }
1413
1414 ex->fe_len = 1 << order;
1415 ex->fe_start = block << order;
1416 ex->fe_group = e4b->bd_group;
1417
1418
1419 next = next - ex->fe_start;
1420 ex->fe_len -= next;
1421 ex->fe_start += next;
1422
1423 while (needed > ex->fe_len &&
1424 (buddy = mb_find_buddy(e4b, order, &max))) {
1425
1426 if (block + 1 >= max)
1427 break;
1428
1429 next = (block + 1) * (1 << order);
1430 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
1431 break;
1432
1433 ord = mb_find_order_for_block(e4b, next);
1434
1435 order = ord;
1436 block = next >> order;
1437 ex->fe_len += 1 << order;
1438 }
1439
1440 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1441 return ex->fe_len;
1442}
1443
1444static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1445{
1446 int ord;
1447 int mlen = 0;
1448 int max = 0;
1449 int cur;
1450 int start = ex->fe_start;
1451 int len = ex->fe_len;
1452 unsigned ret = 0;
1453 int len0 = len;
1454 void *buddy;
1455
1456 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1457 BUG_ON(e4b->bd_group != ex->fe_group);
1458 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1459 mb_check_buddy(e4b);
1460 mb_mark_used_double(e4b, start, len);
1461
1462 e4b->bd_info->bb_free -= len;
1463 if (e4b->bd_info->bb_first_free == start)
1464 e4b->bd_info->bb_first_free += len;
1465
1466
1467 if (start != 0)
1468 mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b));
1469 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1470 max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b));
1471 if (mlen && max)
1472 e4b->bd_info->bb_fragments++;
1473 else if (!mlen && !max)
1474 e4b->bd_info->bb_fragments--;
1475
1476
1477 while (len) {
1478 ord = mb_find_order_for_block(e4b, start);
1479
1480 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1481
1482 mlen = 1 << ord;
1483 buddy = mb_find_buddy(e4b, ord, &max);
1484 BUG_ON((start >> ord) >= max);
1485 mb_set_bit(start >> ord, buddy);
1486 e4b->bd_info->bb_counters[ord]--;
1487 start += mlen;
1488 len -= mlen;
1489 BUG_ON(len < 0);
1490 continue;
1491 }
1492
1493
1494 if (ret == 0)
1495 ret = len | (ord << 16);
1496
1497
1498 BUG_ON(ord <= 0);
1499 buddy = mb_find_buddy(e4b, ord, &max);
1500 mb_set_bit(start >> ord, buddy);
1501 e4b->bd_info->bb_counters[ord]--;
1502
1503 ord--;
1504 cur = (start >> ord) & ~1U;
1505 buddy = mb_find_buddy(e4b, ord, &max);
1506 mb_clear_bit(cur, buddy);
1507 mb_clear_bit(cur + 1, buddy);
1508 e4b->bd_info->bb_counters[ord]++;
1509 e4b->bd_info->bb_counters[ord]++;
1510 }
1511 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1512
1513 mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1514 mb_check_buddy(e4b);
1515
1516 return ret;
1517}
1518
1519
1520
1521
1522static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1523 struct ext4_buddy *e4b)
1524{
1525 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1526 int ret;
1527
1528 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1529 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1530
1531 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1532 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1533 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1534
1535
1536
1537 ac->ac_f_ex = ac->ac_b_ex;
1538
1539 ac->ac_status = AC_STATUS_FOUND;
1540 ac->ac_tail = ret & 0xffff;
1541 ac->ac_buddy = ret >> 16;
1542
1543
1544
1545
1546
1547
1548
1549
1550 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1551 get_page(ac->ac_bitmap_page);
1552 ac->ac_buddy_page = e4b->bd_buddy_page;
1553 get_page(ac->ac_buddy_page);
1554
1555 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1556 spin_lock(&sbi->s_md_lock);
1557 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1558 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1559 spin_unlock(&sbi->s_md_lock);
1560 }
1561}
1562
1563
1564
1565
1566
1567static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1568 struct ext4_buddy *e4b,
1569 int finish_group)
1570{
1571 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1572 struct ext4_free_extent *bex = &ac->ac_b_ex;
1573 struct ext4_free_extent *gex = &ac->ac_g_ex;
1574 struct ext4_free_extent ex;
1575 int max;
1576
1577 if (ac->ac_status == AC_STATUS_FOUND)
1578 return;
1579
1580
1581
1582 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1583 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1584 ac->ac_status = AC_STATUS_BREAK;
1585 return;
1586 }
1587
1588
1589
1590
1591 if (bex->fe_len < gex->fe_len)
1592 return;
1593
1594 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1595 && bex->fe_group == e4b->bd_group) {
1596
1597
1598
1599 max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
1600 if (max >= gex->fe_len) {
1601 ext4_mb_use_best_found(ac, e4b);
1602 return;
1603 }
1604 }
1605}
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1618 struct ext4_free_extent *ex,
1619 struct ext4_buddy *e4b)
1620{
1621 struct ext4_free_extent *bex = &ac->ac_b_ex;
1622 struct ext4_free_extent *gex = &ac->ac_g_ex;
1623
1624 BUG_ON(ex->fe_len <= 0);
1625 BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1626 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1627 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1628
1629 ac->ac_found++;
1630
1631
1632
1633
1634 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1635 *bex = *ex;
1636 ext4_mb_use_best_found(ac, e4b);
1637 return;
1638 }
1639
1640
1641
1642
1643 if (ex->fe_len == gex->fe_len) {
1644 *bex = *ex;
1645 ext4_mb_use_best_found(ac, e4b);
1646 return;
1647 }
1648
1649
1650
1651
1652 if (bex->fe_len == 0) {
1653 *bex = *ex;
1654 return;
1655 }
1656
1657
1658
1659
1660 if (bex->fe_len < gex->fe_len) {
1661
1662
1663 if (ex->fe_len > bex->fe_len)
1664 *bex = *ex;
1665 } else if (ex->fe_len > gex->fe_len) {
1666
1667
1668
1669 if (ex->fe_len < bex->fe_len)
1670 *bex = *ex;
1671 }
1672
1673 ext4_mb_check_limits(ac, e4b, 0);
1674}
1675
1676static noinline_for_stack
1677int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1678 struct ext4_buddy *e4b)
1679{
1680 struct ext4_free_extent ex = ac->ac_b_ex;
1681 ext4_group_t group = ex.fe_group;
1682 int max;
1683 int err;
1684
1685 BUG_ON(ex.fe_len <= 0);
1686 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1687 if (err)
1688 return err;
1689
1690 ext4_lock_group(ac->ac_sb, group);
1691 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
1692
1693 if (max > 0) {
1694 ac->ac_b_ex = ex;
1695 ext4_mb_use_best_found(ac, e4b);
1696 }
1697
1698 ext4_unlock_group(ac->ac_sb, group);
1699 ext4_mb_unload_buddy(e4b);
1700
1701 return 0;
1702}
1703
1704static noinline_for_stack
1705int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1706 struct ext4_buddy *e4b)
1707{
1708 ext4_group_t group = ac->ac_g_ex.fe_group;
1709 int max;
1710 int err;
1711 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1712 struct ext4_free_extent ex;
1713
1714 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1715 return 0;
1716
1717 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1718 if (err)
1719 return err;
1720
1721 ext4_lock_group(ac->ac_sb, group);
1722 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
1723 ac->ac_g_ex.fe_len, &ex);
1724
1725 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1726 ext4_fsblk_t start;
1727
1728 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1729 ex.fe_start;
1730
1731 if (do_div(start, sbi->s_stripe) == 0) {
1732 ac->ac_found++;
1733 ac->ac_b_ex = ex;
1734 ext4_mb_use_best_found(ac, e4b);
1735 }
1736 } else if (max >= ac->ac_g_ex.fe_len) {
1737 BUG_ON(ex.fe_len <= 0);
1738 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1739 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1740 ac->ac_found++;
1741 ac->ac_b_ex = ex;
1742 ext4_mb_use_best_found(ac, e4b);
1743 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1744
1745
1746 BUG_ON(ex.fe_len <= 0);
1747 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1748 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1749 ac->ac_found++;
1750 ac->ac_b_ex = ex;
1751 ext4_mb_use_best_found(ac, e4b);
1752 }
1753 ext4_unlock_group(ac->ac_sb, group);
1754 ext4_mb_unload_buddy(e4b);
1755
1756 return 0;
1757}
1758
1759
1760
1761
1762
1763static noinline_for_stack
1764void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1765 struct ext4_buddy *e4b)
1766{
1767 struct super_block *sb = ac->ac_sb;
1768 struct ext4_group_info *grp = e4b->bd_info;
1769 void *buddy;
1770 int i;
1771 int k;
1772 int max;
1773
1774 BUG_ON(ac->ac_2order <= 0);
1775 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1776 if (grp->bb_counters[i] == 0)
1777 continue;
1778
1779 buddy = mb_find_buddy(e4b, i, &max);
1780 BUG_ON(buddy == NULL);
1781
1782 k = mb_find_next_zero_bit(buddy, max, 0);
1783 BUG_ON(k >= max);
1784
1785 ac->ac_found++;
1786
1787 ac->ac_b_ex.fe_len = 1 << i;
1788 ac->ac_b_ex.fe_start = k << i;
1789 ac->ac_b_ex.fe_group = e4b->bd_group;
1790
1791 ext4_mb_use_best_found(ac, e4b);
1792
1793 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1794
1795 if (EXT4_SB(sb)->s_mb_stats)
1796 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1797
1798 break;
1799 }
1800}
1801
1802
1803
1804
1805
1806
1807static noinline_for_stack
1808void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1809 struct ext4_buddy *e4b)
1810{
1811 struct super_block *sb = ac->ac_sb;
1812 void *bitmap = EXT4_MB_BITMAP(e4b);
1813 struct ext4_free_extent ex;
1814 int i;
1815 int free;
1816
1817 free = e4b->bd_info->bb_free;
1818 BUG_ON(free <= 0);
1819
1820 i = e4b->bd_info->bb_first_free;
1821
1822 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1823 i = mb_find_next_zero_bit(bitmap,
1824 EXT4_BLOCKS_PER_GROUP(sb), i);
1825 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
1826
1827
1828
1829
1830
1831 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1832 "%d free blocks as per "
1833 "group info. But bitmap says 0",
1834 free);
1835 break;
1836 }
1837
1838 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1839 BUG_ON(ex.fe_len <= 0);
1840 if (free < ex.fe_len) {
1841 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1842 "%d free blocks as per "
1843 "group info. But got %d blocks",
1844 free, ex.fe_len);
1845
1846
1847
1848
1849
1850 break;
1851 }
1852
1853 ext4_mb_measure_extent(ac, &ex, e4b);
1854
1855 i += ex.fe_len;
1856 free -= ex.fe_len;
1857 }
1858
1859 ext4_mb_check_limits(ac, e4b, 1);
1860}
1861
1862
1863
1864
1865
1866static noinline_for_stack
1867void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1868 struct ext4_buddy *e4b)
1869{
1870 struct super_block *sb = ac->ac_sb;
1871 struct ext4_sb_info *sbi = EXT4_SB(sb);
1872 void *bitmap = EXT4_MB_BITMAP(e4b);
1873 struct ext4_free_extent ex;
1874 ext4_fsblk_t first_group_block;
1875 ext4_fsblk_t a;
1876 ext4_grpblk_t i;
1877 int max;
1878
1879 BUG_ON(sbi->s_stripe == 0);
1880
1881
1882 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1883
1884 a = first_group_block + sbi->s_stripe - 1;
1885 do_div(a, sbi->s_stripe);
1886 i = (a * sbi->s_stripe) - first_group_block;
1887
1888 while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
1889 if (!mb_test_bit(i, bitmap)) {
1890 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1891 if (max >= sbi->s_stripe) {
1892 ac->ac_found++;
1893 ac->ac_b_ex = ex;
1894 ext4_mb_use_best_found(ac, e4b);
1895 break;
1896 }
1897 }
1898 i += sbi->s_stripe;
1899 }
1900}
1901
1902
1903static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1904 ext4_group_t group, int cr)
1905{
1906 unsigned free, fragments;
1907 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1908 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1909
1910 BUG_ON(cr < 0 || cr >= 4);
1911
1912
1913 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1914 int ret = ext4_mb_init_group(ac->ac_sb, group);
1915 if (ret)
1916 return 0;
1917 }
1918
1919 free = grp->bb_free;
1920 fragments = grp->bb_fragments;
1921 if (free == 0)
1922 return 0;
1923 if (fragments == 0)
1924 return 0;
1925
1926 switch (cr) {
1927 case 0:
1928 BUG_ON(ac->ac_2order == 0);
1929
1930 if (grp->bb_largest_free_order < ac->ac_2order)
1931 return 0;
1932
1933
1934 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1935 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1936 ((group % flex_size) == 0))
1937 return 0;
1938
1939 return 1;
1940 case 1:
1941 if ((free / fragments) >= ac->ac_g_ex.fe_len)
1942 return 1;
1943 break;
1944 case 2:
1945 if (free >= ac->ac_g_ex.fe_len)
1946 return 1;
1947 break;
1948 case 3:
1949 return 1;
1950 default:
1951 BUG();
1952 }
1953
1954 return 0;
1955}
1956
1957static noinline_for_stack int
1958ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1959{
1960 ext4_group_t ngroups, group, i;
1961 int cr;
1962 int err = 0;
1963 struct ext4_sb_info *sbi;
1964 struct super_block *sb;
1965 struct ext4_buddy e4b;
1966
1967 sb = ac->ac_sb;
1968 sbi = EXT4_SB(sb);
1969 ngroups = ext4_get_groups_count(sb);
1970
1971 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
1972 ngroups = sbi->s_blockfile_groups;
1973
1974 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1975
1976
1977 err = ext4_mb_find_by_goal(ac, &e4b);
1978 if (err || ac->ac_status == AC_STATUS_FOUND)
1979 goto out;
1980
1981 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
1982 goto out;
1983
1984
1985
1986
1987
1988
1989 i = fls(ac->ac_g_ex.fe_len);
1990 ac->ac_2order = 0;
1991
1992
1993
1994
1995
1996 if (i >= sbi->s_mb_order2_reqs) {
1997
1998
1999
2000 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2001 ac->ac_2order = i - 1;
2002 }
2003
2004
2005 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2006
2007 spin_lock(&sbi->s_md_lock);
2008 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2009 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2010 spin_unlock(&sbi->s_md_lock);
2011 }
2012
2013
2014 cr = ac->ac_2order ? 0 : 1;
2015
2016
2017
2018
2019repeat:
2020 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2021 ac->ac_criteria = cr;
2022
2023
2024
2025
2026 group = ac->ac_g_ex.fe_group;
2027
2028 for (i = 0; i < ngroups; group++, i++) {
2029 if (group == ngroups)
2030 group = 0;
2031
2032
2033 if (!ext4_mb_good_group(ac, group, cr))
2034 continue;
2035
2036 err = ext4_mb_load_buddy(sb, group, &e4b);
2037 if (err)
2038 goto out;
2039
2040 ext4_lock_group(sb, group);
2041
2042
2043
2044
2045
2046 if (!ext4_mb_good_group(ac, group, cr)) {
2047 ext4_unlock_group(sb, group);
2048 ext4_mb_unload_buddy(&e4b);
2049 continue;
2050 }
2051
2052 ac->ac_groups_scanned++;
2053 if (cr == 0)
2054 ext4_mb_simple_scan_group(ac, &e4b);
2055 else if (cr == 1 && sbi->s_stripe &&
2056 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2057 ext4_mb_scan_aligned(ac, &e4b);
2058 else
2059 ext4_mb_complex_scan_group(ac, &e4b);
2060
2061 ext4_unlock_group(sb, group);
2062 ext4_mb_unload_buddy(&e4b);
2063
2064 if (ac->ac_status != AC_STATUS_CONTINUE)
2065 break;
2066 }
2067 }
2068
2069 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2070 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2071
2072
2073
2074
2075
2076 ext4_mb_try_best_found(ac, &e4b);
2077 if (ac->ac_status != AC_STATUS_FOUND) {
2078
2079
2080
2081
2082
2083
2084 ac->ac_b_ex.fe_group = 0;
2085 ac->ac_b_ex.fe_start = 0;
2086 ac->ac_b_ex.fe_len = 0;
2087 ac->ac_status = AC_STATUS_CONTINUE;
2088 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2089 cr = 3;
2090 atomic_inc(&sbi->s_mb_lost_chunks);
2091 goto repeat;
2092 }
2093 }
2094out:
2095 return err;
2096}
2097
2098static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2099{
2100 struct super_block *sb = seq->private;
2101 ext4_group_t group;
2102
2103 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2104 return NULL;
2105 group = *pos + 1;
2106 return (void *) ((unsigned long) group);
2107}
2108
2109static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2110{
2111 struct super_block *sb = seq->private;
2112 ext4_group_t group;
2113
2114 ++*pos;
2115 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2116 return NULL;
2117 group = *pos + 1;
2118 return (void *) ((unsigned long) group);
2119}
2120
2121static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2122{
2123 struct super_block *sb = seq->private;
2124 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2125 int i;
2126 int err;
2127 struct ext4_buddy e4b;
2128 struct sg {
2129 struct ext4_group_info info;
2130 ext4_grpblk_t counters[16];
2131 } sg;
2132
2133 group--;
2134 if (group == 0)
2135 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
2136 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
2137 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
2138 "group", "free", "frags", "first",
2139 "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
2140 "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
2141
2142 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2143 sizeof(struct ext4_group_info);
2144 err = ext4_mb_load_buddy(sb, group, &e4b);
2145 if (err) {
2146 seq_printf(seq, "#%-5u: I/O error\n", group);
2147 return 0;
2148 }
2149 ext4_lock_group(sb, group);
2150 memcpy(&sg, ext4_get_group_info(sb, group), i);
2151 ext4_unlock_group(sb, group);
2152 ext4_mb_unload_buddy(&e4b);
2153
2154 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2155 sg.info.bb_fragments, sg.info.bb_first_free);
2156 for (i = 0; i <= 13; i++)
2157 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2158 sg.info.bb_counters[i] : 0);
2159 seq_printf(seq, " ]\n");
2160
2161 return 0;
2162}
2163
2164static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2165{
2166}
2167
2168static const struct seq_operations ext4_mb_seq_groups_ops = {
2169 .start = ext4_mb_seq_groups_start,
2170 .next = ext4_mb_seq_groups_next,
2171 .stop = ext4_mb_seq_groups_stop,
2172 .show = ext4_mb_seq_groups_show,
2173};
2174
2175static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2176{
2177 struct super_block *sb = PDE(inode)->data;
2178 int rc;
2179
2180 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2181 if (rc == 0) {
2182 struct seq_file *m = file->private_data;
2183 m->private = sb;
2184 }
2185 return rc;
2186
2187}
2188
2189static const struct file_operations ext4_mb_seq_groups_fops = {
2190 .owner = THIS_MODULE,
2191 .open = ext4_mb_seq_groups_open,
2192 .read = seq_read,
2193 .llseek = seq_lseek,
2194 .release = seq_release,
2195};
2196
2197static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2198{
2199 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2200 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2201
2202 BUG_ON(!cachep);
2203 return cachep;
2204}
2205
2206
2207int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2208 struct ext4_group_desc *desc)
2209{
2210 int i;
2211 int metalen = 0;
2212 struct ext4_sb_info *sbi = EXT4_SB(sb);
2213 struct ext4_group_info **meta_group_info;
2214 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2215
2216
2217
2218
2219
2220
2221 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2222 metalen = sizeof(*meta_group_info) <<
2223 EXT4_DESC_PER_BLOCK_BITS(sb);
2224 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2225 if (meta_group_info == NULL) {
2226 printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
2227 "buddy group\n");
2228 goto exit_meta_group_info;
2229 }
2230 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2231 meta_group_info;
2232 }
2233
2234 meta_group_info =
2235 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2236 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2237
2238 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2239 if (meta_group_info[i] == NULL) {
2240 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2241 goto exit_group_info;
2242 }
2243 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2244 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2245 &(meta_group_info[i]->bb_state));
2246
2247
2248
2249
2250
2251 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2252 meta_group_info[i]->bb_free =
2253 ext4_free_blocks_after_init(sb, group, desc);
2254 } else {
2255 meta_group_info[i]->bb_free =
2256 ext4_free_blks_count(sb, desc);
2257 }
2258
2259 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2260 init_rwsem(&meta_group_info[i]->alloc_sem);
2261 meta_group_info[i]->bb_free_root = RB_ROOT;
2262 meta_group_info[i]->bb_largest_free_order = -1;
2263
2264#ifdef DOUBLE_CHECK
2265 {
2266 struct buffer_head *bh;
2267 meta_group_info[i]->bb_bitmap =
2268 kmalloc(sb->s_blocksize, GFP_KERNEL);
2269 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2270 bh = ext4_read_block_bitmap(sb, group);
2271 BUG_ON(bh == NULL);
2272 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2273 sb->s_blocksize);
2274 put_bh(bh);
2275 }
2276#endif
2277
2278 return 0;
2279
2280exit_group_info:
2281
2282 if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
2283 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2284exit_meta_group_info:
2285 return -ENOMEM;
2286}
2287
2288static int ext4_mb_init_backend(struct super_block *sb)
2289{
2290 ext4_group_t ngroups = ext4_get_groups_count(sb);
2291 ext4_group_t i;
2292 struct ext4_sb_info *sbi = EXT4_SB(sb);
2293 struct ext4_super_block *es = sbi->s_es;
2294 int num_meta_group_infos;
2295 int num_meta_group_infos_max;
2296 int array_size;
2297 struct ext4_group_desc *desc;
2298 struct kmem_cache *cachep;
2299
2300
2301 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2302 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315 num_meta_group_infos_max = num_meta_group_infos +
2316 le16_to_cpu(es->s_reserved_gdt_blocks);
2317
2318
2319
2320
2321
2322
2323
2324 array_size = 1;
2325 while (array_size < sizeof(*sbi->s_group_info) *
2326 num_meta_group_infos_max)
2327 array_size = array_size << 1;
2328
2329
2330
2331 sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
2332 if (sbi->s_group_info == NULL) {
2333 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
2334 return -ENOMEM;
2335 }
2336 sbi->s_buddy_cache = new_inode(sb);
2337 if (sbi->s_buddy_cache == NULL) {
2338 printk(KERN_ERR "EXT4-fs: can't get new inode\n");
2339 goto err_freesgi;
2340 }
2341 sbi->s_buddy_cache->i_ino = get_next_ino();
2342 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2343 for (i = 0; i < ngroups; i++) {
2344 desc = ext4_get_group_desc(sb, i, NULL);
2345 if (desc == NULL) {
2346 printk(KERN_ERR
2347 "EXT4-fs: can't read descriptor %u\n", i);
2348 goto err_freebuddy;
2349 }
2350 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2351 goto err_freebuddy;
2352 }
2353
2354 return 0;
2355
2356err_freebuddy:
2357 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2358 while (i-- > 0)
2359 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2360 i = num_meta_group_infos;
2361 while (i-- > 0)
2362 kfree(sbi->s_group_info[i]);
2363 iput(sbi->s_buddy_cache);
2364err_freesgi:
2365 kfree(sbi->s_group_info);
2366 return -ENOMEM;
2367}
2368
2369static void ext4_groupinfo_destroy_slabs(void)
2370{
2371 int i;
2372
2373 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2374 if (ext4_groupinfo_caches[i])
2375 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2376 ext4_groupinfo_caches[i] = NULL;
2377 }
2378}
2379
2380static int ext4_groupinfo_create_slab(size_t size)
2381{
2382 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2383 int slab_size;
2384 int blocksize_bits = order_base_2(size);
2385 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2386 struct kmem_cache *cachep;
2387
2388 if (cache_index >= NR_GRPINFO_CACHES)
2389 return -EINVAL;
2390
2391 if (unlikely(cache_index < 0))
2392 cache_index = 0;
2393
2394 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2395 if (ext4_groupinfo_caches[cache_index]) {
2396 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2397 return 0;
2398 }
2399
2400 slab_size = offsetof(struct ext4_group_info,
2401 bb_counters[blocksize_bits + 2]);
2402
2403 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2404 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2405 NULL);
2406
2407 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2408 if (!cachep) {
2409 printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
2410 return -ENOMEM;
2411 }
2412
2413 ext4_groupinfo_caches[cache_index] = cachep;
2414
2415 return 0;
2416}
2417
2418int ext4_mb_init(struct super_block *sb, int needs_recovery)
2419{
2420 struct ext4_sb_info *sbi = EXT4_SB(sb);
2421 unsigned i, j;
2422 unsigned offset;
2423 unsigned max;
2424 int ret;
2425
2426 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2427
2428 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2429 if (sbi->s_mb_offsets == NULL) {
2430 ret = -ENOMEM;
2431 goto out;
2432 }
2433
2434 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2435 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2436 if (sbi->s_mb_maxs == NULL) {
2437 ret = -ENOMEM;
2438 goto out;
2439 }
2440
2441 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2442 if (ret < 0)
2443 goto out;
2444
2445
2446 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2447 sbi->s_mb_offsets[0] = 0;
2448
2449 i = 1;
2450 offset = 0;
2451 max = sb->s_blocksize << 2;
2452 do {
2453 sbi->s_mb_offsets[i] = offset;
2454 sbi->s_mb_maxs[i] = max;
2455 offset += 1 << (sb->s_blocksize_bits - i);
2456 max = max >> 1;
2457 i++;
2458 } while (i <= sb->s_blocksize_bits + 1);
2459
2460
2461 ret = ext4_mb_init_backend(sb);
2462 if (ret != 0) {
2463 goto out;
2464 }
2465
2466 spin_lock_init(&sbi->s_md_lock);
2467 spin_lock_init(&sbi->s_bal_lock);
2468
2469 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2470 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2471 sbi->s_mb_stats = MB_DEFAULT_STATS;
2472 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2473 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2474 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2475
2476 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2477 if (sbi->s_locality_groups == NULL) {
2478 ret = -ENOMEM;
2479 goto out;
2480 }
2481 for_each_possible_cpu(i) {
2482 struct ext4_locality_group *lg;
2483 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2484 mutex_init(&lg->lg_mutex);
2485 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2486 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2487 spin_lock_init(&lg->lg_prealloc_lock);
2488 }
2489
2490 if (sbi->s_proc)
2491 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2492 &ext4_mb_seq_groups_fops, sb);
2493
2494 if (sbi->s_journal)
2495 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2496out:
2497 if (ret) {
2498 kfree(sbi->s_mb_offsets);
2499 kfree(sbi->s_mb_maxs);
2500 }
2501 return ret;
2502}
2503
2504
2505static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2506{
2507 struct ext4_prealloc_space *pa;
2508 struct list_head *cur, *tmp;
2509 int count = 0;
2510
2511 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2512 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2513 list_del(&pa->pa_group_list);
2514 count++;
2515 kmem_cache_free(ext4_pspace_cachep, pa);
2516 }
2517 if (count)
2518 mb_debug(1, "mballoc: %u PAs left\n", count);
2519
2520}
2521
2522int ext4_mb_release(struct super_block *sb)
2523{
2524 ext4_group_t ngroups = ext4_get_groups_count(sb);
2525 ext4_group_t i;
2526 int num_meta_group_infos;
2527 struct ext4_group_info *grinfo;
2528 struct ext4_sb_info *sbi = EXT4_SB(sb);
2529 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2530
2531 if (sbi->s_group_info) {
2532 for (i = 0; i < ngroups; i++) {
2533 grinfo = ext4_get_group_info(sb, i);
2534#ifdef DOUBLE_CHECK
2535 kfree(grinfo->bb_bitmap);
2536#endif
2537 ext4_lock_group(sb, i);
2538 ext4_mb_cleanup_pa(grinfo);
2539 ext4_unlock_group(sb, i);
2540 kmem_cache_free(cachep, grinfo);
2541 }
2542 num_meta_group_infos = (ngroups +
2543 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2544 EXT4_DESC_PER_BLOCK_BITS(sb);
2545 for (i = 0; i < num_meta_group_infos; i++)
2546 kfree(sbi->s_group_info[i]);
2547 kfree(sbi->s_group_info);
2548 }
2549 kfree(sbi->s_mb_offsets);
2550 kfree(sbi->s_mb_maxs);
2551 if (sbi->s_buddy_cache)
2552 iput(sbi->s_buddy_cache);
2553 if (sbi->s_mb_stats) {
2554 printk(KERN_INFO
2555 "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n",
2556 atomic_read(&sbi->s_bal_allocated),
2557 atomic_read(&sbi->s_bal_reqs),
2558 atomic_read(&sbi->s_bal_success));
2559 printk(KERN_INFO
2560 "EXT4-fs: mballoc: %u extents scanned, %u goal hits, "
2561 "%u 2^N hits, %u breaks, %u lost\n",
2562 atomic_read(&sbi->s_bal_ex_scanned),
2563 atomic_read(&sbi->s_bal_goals),
2564 atomic_read(&sbi->s_bal_2orders),
2565 atomic_read(&sbi->s_bal_breaks),
2566 atomic_read(&sbi->s_mb_lost_chunks));
2567 printk(KERN_INFO
2568 "EXT4-fs: mballoc: %lu generated and it took %Lu\n",
2569 sbi->s_mb_buddies_generated++,
2570 sbi->s_mb_generation_time);
2571 printk(KERN_INFO
2572 "EXT4-fs: mballoc: %u preallocated, %u discarded\n",
2573 atomic_read(&sbi->s_mb_preallocated),
2574 atomic_read(&sbi->s_mb_discarded));
2575 }
2576
2577 free_percpu(sbi->s_locality_groups);
2578 if (sbi->s_proc)
2579 remove_proc_entry("mb_groups", sbi->s_proc);
2580
2581 return 0;
2582}
2583
2584static inline int ext4_issue_discard(struct super_block *sb,
2585 ext4_group_t block_group, ext4_grpblk_t block, int count)
2586{
2587 ext4_fsblk_t discard_block;
2588
2589 discard_block = block + ext4_group_first_block_no(sb, block_group);
2590 trace_ext4_discard_blocks(sb,
2591 (unsigned long long) discard_block, count);
2592 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2593}
2594
2595
2596
2597
2598
2599static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2600{
2601 struct super_block *sb = journal->j_private;
2602 struct ext4_buddy e4b;
2603 struct ext4_group_info *db;
2604 int err, count = 0, count2 = 0;
2605 struct ext4_free_data *entry;
2606 struct list_head *l, *ltmp;
2607
2608 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2609 entry = list_entry(l, struct ext4_free_data, list);
2610
2611 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2612 entry->count, entry->group, entry);
2613
2614 if (test_opt(sb, DISCARD))
2615 ext4_issue_discard(sb, entry->group,
2616 entry->start_blk, entry->count);
2617
2618 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2619
2620 BUG_ON(err != 0);
2621
2622 db = e4b.bd_info;
2623
2624 count += entry->count;
2625 count2++;
2626 ext4_lock_group(sb, entry->group);
2627
2628 rb_erase(&entry->node, &(db->bb_free_root));
2629 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2630
2631 if (!db->bb_free_root.rb_node) {
2632
2633
2634
2635 page_cache_release(e4b.bd_buddy_page);
2636 page_cache_release(e4b.bd_bitmap_page);
2637 }
2638 ext4_unlock_group(sb, entry->group);
2639 kmem_cache_free(ext4_free_ext_cachep, entry);
2640 ext4_mb_unload_buddy(&e4b);
2641 }
2642
2643 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2644}
2645
2646#ifdef CONFIG_EXT4_DEBUG
2647u8 mb_enable_debug __read_mostly;
2648
2649static struct dentry *debugfs_dir;
2650static struct dentry *debugfs_debug;
2651
2652static void __init ext4_create_debugfs_entry(void)
2653{
2654 debugfs_dir = debugfs_create_dir("ext4", NULL);
2655 if (debugfs_dir)
2656 debugfs_debug = debugfs_create_u8("mballoc-debug",
2657 S_IRUGO | S_IWUSR,
2658 debugfs_dir,
2659 &mb_enable_debug);
2660}
2661
2662static void ext4_remove_debugfs_entry(void)
2663{
2664 debugfs_remove(debugfs_debug);
2665 debugfs_remove(debugfs_dir);
2666}
2667
2668#else
2669
2670static void __init ext4_create_debugfs_entry(void)
2671{
2672}
2673
2674static void ext4_remove_debugfs_entry(void)
2675{
2676}
2677
2678#endif
2679
2680int __init ext4_init_mballoc(void)
2681{
2682 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2683 SLAB_RECLAIM_ACCOUNT);
2684 if (ext4_pspace_cachep == NULL)
2685 return -ENOMEM;
2686
2687 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2688 SLAB_RECLAIM_ACCOUNT);
2689 if (ext4_ac_cachep == NULL) {
2690 kmem_cache_destroy(ext4_pspace_cachep);
2691 return -ENOMEM;
2692 }
2693
2694 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
2695 SLAB_RECLAIM_ACCOUNT);
2696 if (ext4_free_ext_cachep == NULL) {
2697 kmem_cache_destroy(ext4_pspace_cachep);
2698 kmem_cache_destroy(ext4_ac_cachep);
2699 return -ENOMEM;
2700 }
2701 ext4_create_debugfs_entry();
2702 return 0;
2703}
2704
2705void ext4_exit_mballoc(void)
2706{
2707
2708
2709
2710
2711 rcu_barrier();
2712 kmem_cache_destroy(ext4_pspace_cachep);
2713 kmem_cache_destroy(ext4_ac_cachep);
2714 kmem_cache_destroy(ext4_free_ext_cachep);
2715 ext4_groupinfo_destroy_slabs();
2716 ext4_remove_debugfs_entry();
2717}
2718
2719
2720
2721
2722
2723
2724static noinline_for_stack int
2725ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2726 handle_t *handle, unsigned int reserv_blks)
2727{
2728 struct buffer_head *bitmap_bh = NULL;
2729 struct ext4_group_desc *gdp;
2730 struct buffer_head *gdp_bh;
2731 struct ext4_sb_info *sbi;
2732 struct super_block *sb;
2733 ext4_fsblk_t block;
2734 int err, len;
2735
2736 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2737 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2738
2739 sb = ac->ac_sb;
2740 sbi = EXT4_SB(sb);
2741
2742 err = -EIO;
2743 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2744 if (!bitmap_bh)
2745 goto out_err;
2746
2747 err = ext4_journal_get_write_access(handle, bitmap_bh);
2748 if (err)
2749 goto out_err;
2750
2751 err = -EIO;
2752 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2753 if (!gdp)
2754 goto out_err;
2755
2756 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2757 ext4_free_blks_count(sb, gdp));
2758
2759 err = ext4_journal_get_write_access(handle, gdp_bh);
2760 if (err)
2761 goto out_err;
2762
2763 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2764
2765 len = ac->ac_b_ex.fe_len;
2766 if (!ext4_data_block_valid(sbi, block, len)) {
2767 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2768 "fs metadata\n", block, block+len);
2769
2770
2771
2772
2773 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2774 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2775 ac->ac_b_ex.fe_len);
2776 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2777 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2778 if (!err)
2779 err = -EAGAIN;
2780 goto out_err;
2781 }
2782
2783 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2784#ifdef AGGRESSIVE_CHECK
2785 {
2786 int i;
2787 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2788 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2789 bitmap_bh->b_data));
2790 }
2791 }
2792#endif
2793 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
2794 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2795 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2796 ext4_free_blks_set(sb, gdp,
2797 ext4_free_blocks_after_init(sb,
2798 ac->ac_b_ex.fe_group, gdp));
2799 }
2800 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
2801 ext4_free_blks_set(sb, gdp, len);
2802 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2803
2804 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2805 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
2806
2807
2808
2809 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2810
2811 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2812
2813 if (sbi->s_log_groups_per_flex) {
2814 ext4_group_t flex_group = ext4_flex_group(sbi,
2815 ac->ac_b_ex.fe_group);
2816 atomic_sub(ac->ac_b_ex.fe_len,
2817 &sbi->s_flex_groups[flex_group].free_blocks);
2818 }
2819
2820 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2821 if (err)
2822 goto out_err;
2823 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2824
2825out_err:
2826 ext4_mark_super_dirty(sb);
2827 brelse(bitmap_bh);
2828 return err;
2829}
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2840{
2841 struct super_block *sb = ac->ac_sb;
2842 struct ext4_locality_group *lg = ac->ac_lg;
2843
2844 BUG_ON(lg == NULL);
2845 if (EXT4_SB(sb)->s_stripe)
2846 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
2847 else
2848 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2849 mb_debug(1, "#%u: goal %u blocks for locality group\n",
2850 current->pid, ac->ac_g_ex.fe_len);
2851}
2852
2853
2854
2855
2856
2857static noinline_for_stack void
2858ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2859 struct ext4_allocation_request *ar)
2860{
2861 int bsbits, max;
2862 ext4_lblk_t end;
2863 loff_t size, orig_size, start_off;
2864 ext4_lblk_t start;
2865 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2866 struct ext4_prealloc_space *pa;
2867
2868
2869
2870 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
2871 return;
2872
2873
2874 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2875 return;
2876
2877
2878
2879 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
2880 return;
2881
2882 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
2883 ext4_mb_normalize_group_request(ac);
2884 return ;
2885 }
2886
2887 bsbits = ac->ac_sb->s_blocksize_bits;
2888
2889
2890
2891 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
2892 size = size << bsbits;
2893 if (size < i_size_read(ac->ac_inode))
2894 size = i_size_read(ac->ac_inode);
2895 orig_size = size;
2896
2897
2898 max = 2 << bsbits;
2899
2900#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
2901 (req <= (size) || max <= (chunk_size))
2902
2903
2904
2905 start_off = 0;
2906 if (size <= 16 * 1024) {
2907 size = 16 * 1024;
2908 } else if (size <= 32 * 1024) {
2909 size = 32 * 1024;
2910 } else if (size <= 64 * 1024) {
2911 size = 64 * 1024;
2912 } else if (size <= 128 * 1024) {
2913 size = 128 * 1024;
2914 } else if (size <= 256 * 1024) {
2915 size = 256 * 1024;
2916 } else if (size <= 512 * 1024) {
2917 size = 512 * 1024;
2918 } else if (size <= 1024 * 1024) {
2919 size = 1024 * 1024;
2920 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
2921 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2922 (21 - bsbits)) << 21;
2923 size = 2 * 1024 * 1024;
2924 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
2925 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2926 (22 - bsbits)) << 22;
2927 size = 4 * 1024 * 1024;
2928 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
2929 (8<<20)>>bsbits, max, 8 * 1024)) {
2930 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2931 (23 - bsbits)) << 23;
2932 size = 8 * 1024 * 1024;
2933 } else {
2934 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
2935 size = ac->ac_o_ex.fe_len << bsbits;
2936 }
2937 size = size >> bsbits;
2938 start = start_off >> bsbits;
2939
2940
2941 if (ar->pleft && start <= ar->lleft) {
2942 size -= ar->lleft + 1 - start;
2943 start = ar->lleft + 1;
2944 }
2945 if (ar->pright && start + size - 1 >= ar->lright)
2946 size -= start + size - ar->lright;
2947
2948 end = start + size;
2949
2950
2951 rcu_read_lock();
2952 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
2953 ext4_lblk_t pa_end;
2954
2955 if (pa->pa_deleted)
2956 continue;
2957 spin_lock(&pa->pa_lock);
2958 if (pa->pa_deleted) {
2959 spin_unlock(&pa->pa_lock);
2960 continue;
2961 }
2962
2963 pa_end = pa->pa_lstart + pa->pa_len;
2964
2965
2966 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
2967 ac->ac_o_ex.fe_logical < pa->pa_lstart));
2968
2969
2970 if (pa->pa_lstart >= end || pa_end <= start) {
2971 spin_unlock(&pa->pa_lock);
2972 continue;
2973 }
2974 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
2975
2976
2977 if (pa_end <= ac->ac_o_ex.fe_logical) {
2978 BUG_ON(pa_end < start);
2979 start = pa_end;
2980 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
2981 BUG_ON(pa->pa_lstart > end);
2982 end = pa->pa_lstart;
2983 }
2984 spin_unlock(&pa->pa_lock);
2985 }
2986 rcu_read_unlock();
2987 size = end - start;
2988
2989
2990 rcu_read_lock();
2991 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
2992 ext4_lblk_t pa_end;
2993 spin_lock(&pa->pa_lock);
2994 if (pa->pa_deleted == 0) {
2995 pa_end = pa->pa_lstart + pa->pa_len;
2996 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
2997 }
2998 spin_unlock(&pa->pa_lock);
2999 }
3000 rcu_read_unlock();
3001
3002 if (start + size <= ac->ac_o_ex.fe_logical &&
3003 start > ac->ac_o_ex.fe_logical) {
3004 printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n",
3005 (unsigned long) start, (unsigned long) size,
3006 (unsigned long) ac->ac_o_ex.fe_logical);
3007 }
3008 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3009 start > ac->ac_o_ex.fe_logical);
3010 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3011
3012
3013
3014
3015
3016 ac->ac_g_ex.fe_logical = start;
3017 ac->ac_g_ex.fe_len = size;
3018
3019
3020 if (ar->pright && (ar->lright == (start + size))) {
3021
3022 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3023 &ac->ac_f_ex.fe_group,
3024 &ac->ac_f_ex.fe_start);
3025 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3026 }
3027 if (ar->pleft && (ar->lleft + 1 == start)) {
3028
3029 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3030 &ac->ac_f_ex.fe_group,
3031 &ac->ac_f_ex.fe_start);
3032 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3033 }
3034
3035 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3036 (unsigned) orig_size, (unsigned) start);
3037}
3038
3039static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3040{
3041 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3042
3043 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3044 atomic_inc(&sbi->s_bal_reqs);
3045 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3046 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3047 atomic_inc(&sbi->s_bal_success);
3048 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3049 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3050 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3051 atomic_inc(&sbi->s_bal_goals);
3052 if (ac->ac_found > sbi->s_mb_max_to_scan)
3053 atomic_inc(&sbi->s_bal_breaks);
3054 }
3055
3056 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3057 trace_ext4_mballoc_alloc(ac);
3058 else
3059 trace_ext4_mballoc_prealloc(ac);
3060}
3061
3062
3063
3064
3065
3066
3067
3068static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3069{
3070 struct ext4_prealloc_space *pa = ac->ac_pa;
3071 int len;
3072
3073 if (pa && pa->pa_type == MB_INODE_PA) {
3074 len = ac->ac_b_ex.fe_len;
3075 pa->pa_free += len;
3076 }
3077
3078}
3079
3080
3081
3082
3083static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3084 struct ext4_prealloc_space *pa)
3085{
3086 ext4_fsblk_t start;
3087 ext4_fsblk_t end;
3088 int len;
3089
3090
3091 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3092 end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len);
3093 len = end - start;
3094 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3095 &ac->ac_b_ex.fe_start);
3096 ac->ac_b_ex.fe_len = len;
3097 ac->ac_status = AC_STATUS_FOUND;
3098 ac->ac_pa = pa;
3099
3100 BUG_ON(start < pa->pa_pstart);
3101 BUG_ON(start + len > pa->pa_pstart + pa->pa_len);
3102 BUG_ON(pa->pa_free < len);
3103 pa->pa_free -= len;
3104
3105 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3106}
3107
3108
3109
3110
3111static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3112 struct ext4_prealloc_space *pa)
3113{
3114 unsigned int len = ac->ac_o_ex.fe_len;
3115
3116 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3117 &ac->ac_b_ex.fe_group,
3118 &ac->ac_b_ex.fe_start);
3119 ac->ac_b_ex.fe_len = len;
3120 ac->ac_status = AC_STATUS_FOUND;
3121 ac->ac_pa = pa;
3122
3123
3124
3125
3126
3127
3128
3129 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3130}
3131
3132
3133
3134
3135
3136
3137
3138static struct ext4_prealloc_space *
3139ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3140 struct ext4_prealloc_space *pa,
3141 struct ext4_prealloc_space *cpa)
3142{
3143 ext4_fsblk_t cur_distance, new_distance;
3144
3145 if (cpa == NULL) {
3146 atomic_inc(&pa->pa_count);
3147 return pa;
3148 }
3149 cur_distance = abs(goal_block - cpa->pa_pstart);
3150 new_distance = abs(goal_block - pa->pa_pstart);
3151
3152 if (cur_distance <= new_distance)
3153 return cpa;
3154
3155
3156 atomic_dec(&cpa->pa_count);
3157 atomic_inc(&pa->pa_count);
3158 return pa;
3159}
3160
3161
3162
3163
3164static noinline_for_stack int
3165ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3166{
3167 int order, i;
3168 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3169 struct ext4_locality_group *lg;
3170 struct ext4_prealloc_space *pa, *cpa = NULL;
3171 ext4_fsblk_t goal_block;
3172
3173
3174 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3175 return 0;
3176
3177
3178 rcu_read_lock();
3179 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3180
3181
3182
3183 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3184 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
3185 continue;
3186
3187
3188 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3189 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
3190 continue;
3191
3192
3193 spin_lock(&pa->pa_lock);
3194 if (pa->pa_deleted == 0 && pa->pa_free) {
3195 atomic_inc(&pa->pa_count);
3196 ext4_mb_use_inode_pa(ac, pa);
3197 spin_unlock(&pa->pa_lock);
3198 ac->ac_criteria = 10;
3199 rcu_read_unlock();
3200 return 1;
3201 }
3202 spin_unlock(&pa->pa_lock);
3203 }
3204 rcu_read_unlock();
3205
3206
3207 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3208 return 0;
3209
3210
3211 lg = ac->ac_lg;
3212 if (lg == NULL)
3213 return 0;
3214 order = fls(ac->ac_o_ex.fe_len) - 1;
3215 if (order > PREALLOC_TB_SIZE - 1)
3216
3217 order = PREALLOC_TB_SIZE - 1;
3218
3219 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3220
3221
3222
3223
3224 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3225 rcu_read_lock();
3226 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3227 pa_inode_list) {
3228 spin_lock(&pa->pa_lock);
3229 if (pa->pa_deleted == 0 &&
3230 pa->pa_free >= ac->ac_o_ex.fe_len) {
3231
3232 cpa = ext4_mb_check_group_pa(goal_block,
3233 pa, cpa);
3234 }
3235 spin_unlock(&pa->pa_lock);
3236 }
3237 rcu_read_unlock();
3238 }
3239 if (cpa) {
3240 ext4_mb_use_group_pa(ac, cpa);
3241 ac->ac_criteria = 20;
3242 return 1;
3243 }
3244 return 0;
3245}
3246
3247
3248
3249
3250
3251
3252
3253static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3254 ext4_group_t group)
3255{
3256 struct rb_node *n;
3257 struct ext4_group_info *grp;
3258 struct ext4_free_data *entry;
3259
3260 grp = ext4_get_group_info(sb, group);
3261 n = rb_first(&(grp->bb_free_root));
3262
3263 while (n) {
3264 entry = rb_entry(n, struct ext4_free_data, node);
3265 mb_set_bits(bitmap, entry->start_blk, entry->count);
3266 n = rb_next(n);
3267 }
3268 return;
3269}
3270
3271
3272
3273
3274
3275
3276static noinline_for_stack
3277void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3278 ext4_group_t group)
3279{
3280 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3281 struct ext4_prealloc_space *pa;
3282 struct list_head *cur;
3283 ext4_group_t groupnr;
3284 ext4_grpblk_t start;
3285 int preallocated = 0;
3286 int count = 0;
3287 int len;
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297 list_for_each(cur, &grp->bb_prealloc_list) {
3298 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3299 spin_lock(&pa->pa_lock);
3300 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3301 &groupnr, &start);
3302 len = pa->pa_len;
3303 spin_unlock(&pa->pa_lock);
3304 if (unlikely(len == 0))
3305 continue;
3306 BUG_ON(groupnr != group);
3307 mb_set_bits(bitmap, start, len);
3308 preallocated += len;
3309 count++;
3310 }
3311 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3312}
3313
3314static void ext4_mb_pa_callback(struct rcu_head *head)
3315{
3316 struct ext4_prealloc_space *pa;
3317 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3318 kmem_cache_free(ext4_pspace_cachep, pa);
3319}
3320
3321
3322
3323
3324
3325static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3326 struct super_block *sb, struct ext4_prealloc_space *pa)
3327{
3328 ext4_group_t grp;
3329 ext4_fsblk_t grp_blk;
3330
3331 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3332 return;
3333
3334
3335 spin_lock(&pa->pa_lock);
3336 if (pa->pa_deleted == 1) {
3337 spin_unlock(&pa->pa_lock);
3338 return;
3339 }
3340
3341 pa->pa_deleted = 1;
3342 spin_unlock(&pa->pa_lock);
3343
3344 grp_blk = pa->pa_pstart;
3345
3346
3347
3348
3349 if (pa->pa_type == MB_GROUP_PA)
3350 grp_blk--;
3351
3352 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368 ext4_lock_group(sb, grp);
3369 list_del(&pa->pa_group_list);
3370 ext4_unlock_group(sb, grp);
3371
3372 spin_lock(pa->pa_obj_lock);
3373 list_del_rcu(&pa->pa_inode_list);
3374 spin_unlock(pa->pa_obj_lock);
3375
3376 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3377}
3378
3379
3380
3381
3382static noinline_for_stack int
3383ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3384{
3385 struct super_block *sb = ac->ac_sb;
3386 struct ext4_prealloc_space *pa;
3387 struct ext4_group_info *grp;
3388 struct ext4_inode_info *ei;
3389
3390
3391 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3392 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3393 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3394
3395 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3396 if (pa == NULL)
3397 return -ENOMEM;
3398
3399 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3400 int winl;
3401 int wins;
3402 int win;
3403 int offs;
3404
3405
3406
3407
3408 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3409 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3410
3411
3412
3413
3414 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3415
3416
3417 wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len;
3418
3419
3420 win = min(winl, wins);
3421
3422 offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len;
3423 if (offs && offs < win)
3424 win = offs;
3425
3426 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win;
3427 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3428 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3429 }
3430
3431
3432
3433 ac->ac_f_ex = ac->ac_b_ex;
3434
3435 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3436 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3437 pa->pa_len = ac->ac_b_ex.fe_len;
3438 pa->pa_free = pa->pa_len;
3439 atomic_set(&pa->pa_count, 1);
3440 spin_lock_init(&pa->pa_lock);
3441 INIT_LIST_HEAD(&pa->pa_inode_list);
3442 INIT_LIST_HEAD(&pa->pa_group_list);
3443 pa->pa_deleted = 0;
3444 pa->pa_type = MB_INODE_PA;
3445
3446 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3447 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3448 trace_ext4_mb_new_inode_pa(ac, pa);
3449
3450 ext4_mb_use_inode_pa(ac, pa);
3451 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3452
3453 ei = EXT4_I(ac->ac_inode);
3454 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3455
3456 pa->pa_obj_lock = &ei->i_prealloc_lock;
3457 pa->pa_inode = ac->ac_inode;
3458
3459 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3460 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3461 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3462
3463 spin_lock(pa->pa_obj_lock);
3464 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3465 spin_unlock(pa->pa_obj_lock);
3466
3467 return 0;
3468}
3469
3470
3471
3472
3473static noinline_for_stack int
3474ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3475{
3476 struct super_block *sb = ac->ac_sb;
3477 struct ext4_locality_group *lg;
3478 struct ext4_prealloc_space *pa;
3479 struct ext4_group_info *grp;
3480
3481
3482 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3483 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3484 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3485
3486 BUG_ON(ext4_pspace_cachep == NULL);
3487 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3488 if (pa == NULL)
3489 return -ENOMEM;
3490
3491
3492
3493 ac->ac_f_ex = ac->ac_b_ex;
3494
3495 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3496 pa->pa_lstart = pa->pa_pstart;
3497 pa->pa_len = ac->ac_b_ex.fe_len;
3498 pa->pa_free = pa->pa_len;
3499 atomic_set(&pa->pa_count, 1);
3500 spin_lock_init(&pa->pa_lock);
3501 INIT_LIST_HEAD(&pa->pa_inode_list);
3502 INIT_LIST_HEAD(&pa->pa_group_list);
3503 pa->pa_deleted = 0;
3504 pa->pa_type = MB_GROUP_PA;
3505
3506 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3507 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3508 trace_ext4_mb_new_group_pa(ac, pa);
3509
3510 ext4_mb_use_group_pa(ac, pa);
3511 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3512
3513 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3514 lg = ac->ac_lg;
3515 BUG_ON(lg == NULL);
3516
3517 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3518 pa->pa_inode = NULL;
3519
3520 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3521 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3522 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3523
3524
3525
3526
3527
3528 return 0;
3529}
3530
3531static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3532{
3533 int err;
3534
3535 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3536 err = ext4_mb_new_group_pa(ac);
3537 else
3538 err = ext4_mb_new_inode_pa(ac);
3539 return err;
3540}
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550static noinline_for_stack int
3551ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3552 struct ext4_prealloc_space *pa)
3553{
3554 struct super_block *sb = e4b->bd_sb;
3555 struct ext4_sb_info *sbi = EXT4_SB(sb);
3556 unsigned int end;
3557 unsigned int next;
3558 ext4_group_t group;
3559 ext4_grpblk_t bit;
3560 unsigned long long grp_blk_start;
3561 int err = 0;
3562 int free = 0;
3563
3564 BUG_ON(pa->pa_deleted == 0);
3565 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3566 grp_blk_start = pa->pa_pstart - bit;
3567 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3568 end = bit + pa->pa_len;
3569
3570 while (bit < end) {
3571 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3572 if (bit >= end)
3573 break;
3574 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3575 mb_debug(1, " free preallocated %u/%u in group %u\n",
3576 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3577 (unsigned) next - bit, (unsigned) group);
3578 free += next - bit;
3579
3580 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3581 trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit,
3582 next - bit);
3583 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3584 bit = next + 1;
3585 }
3586 if (free != pa->pa_free) {
3587 printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
3588 pa, (unsigned long) pa->pa_lstart,
3589 (unsigned long) pa->pa_pstart,
3590 (unsigned long) pa->pa_len);
3591 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3592 free, pa->pa_free);
3593
3594
3595
3596
3597 }
3598 atomic_add(free, &sbi->s_mb_discarded);
3599
3600 return err;
3601}
3602
3603static noinline_for_stack int
3604ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3605 struct ext4_prealloc_space *pa)
3606{
3607 struct super_block *sb = e4b->bd_sb;
3608 ext4_group_t group;
3609 ext4_grpblk_t bit;
3610
3611 trace_ext4_mb_release_group_pa(pa);
3612 BUG_ON(pa->pa_deleted == 0);
3613 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3614 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3615 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3616 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3617 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3618
3619 return 0;
3620}
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631static noinline_for_stack int
3632ext4_mb_discard_group_preallocations(struct super_block *sb,
3633 ext4_group_t group, int needed)
3634{
3635 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3636 struct buffer_head *bitmap_bh = NULL;
3637 struct ext4_prealloc_space *pa, *tmp;
3638 struct list_head list;
3639 struct ext4_buddy e4b;
3640 int err;
3641 int busy = 0;
3642 int free = 0;
3643
3644 mb_debug(1, "discard preallocation for group %u\n", group);
3645
3646 if (list_empty(&grp->bb_prealloc_list))
3647 return 0;
3648
3649 bitmap_bh = ext4_read_block_bitmap(sb, group);
3650 if (bitmap_bh == NULL) {
3651 ext4_error(sb, "Error reading block bitmap for %u", group);
3652 return 0;
3653 }
3654
3655 err = ext4_mb_load_buddy(sb, group, &e4b);
3656 if (err) {
3657 ext4_error(sb, "Error loading buddy information for %u", group);
3658 put_bh(bitmap_bh);
3659 return 0;
3660 }
3661
3662 if (needed == 0)
3663 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3664
3665 INIT_LIST_HEAD(&list);
3666repeat:
3667 ext4_lock_group(sb, group);
3668 list_for_each_entry_safe(pa, tmp,
3669 &grp->bb_prealloc_list, pa_group_list) {
3670 spin_lock(&pa->pa_lock);
3671 if (atomic_read(&pa->pa_count)) {
3672 spin_unlock(&pa->pa_lock);
3673 busy = 1;
3674 continue;
3675 }
3676 if (pa->pa_deleted) {
3677 spin_unlock(&pa->pa_lock);
3678 continue;
3679 }
3680
3681
3682 pa->pa_deleted = 1;
3683
3684
3685 free += pa->pa_free;
3686
3687 spin_unlock(&pa->pa_lock);
3688
3689 list_del(&pa->pa_group_list);
3690 list_add(&pa->u.pa_tmp_list, &list);
3691 }
3692
3693
3694 if (free < needed && busy) {
3695 busy = 0;
3696 ext4_unlock_group(sb, group);
3697
3698
3699
3700
3701 yield();
3702 goto repeat;
3703 }
3704
3705
3706 if (list_empty(&list)) {
3707 BUG_ON(free != 0);
3708 goto out;
3709 }
3710
3711
3712 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3713
3714
3715 spin_lock(pa->pa_obj_lock);
3716 list_del_rcu(&pa->pa_inode_list);
3717 spin_unlock(pa->pa_obj_lock);
3718
3719 if (pa->pa_type == MB_GROUP_PA)
3720 ext4_mb_release_group_pa(&e4b, pa);
3721 else
3722 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3723
3724 list_del(&pa->u.pa_tmp_list);
3725 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3726 }
3727
3728out:
3729 ext4_unlock_group(sb, group);
3730 ext4_mb_unload_buddy(&e4b);
3731 put_bh(bitmap_bh);
3732 return free;
3733}
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744void ext4_discard_preallocations(struct inode *inode)
3745{
3746 struct ext4_inode_info *ei = EXT4_I(inode);
3747 struct super_block *sb = inode->i_sb;
3748 struct buffer_head *bitmap_bh = NULL;
3749 struct ext4_prealloc_space *pa, *tmp;
3750 ext4_group_t group = 0;
3751 struct list_head list;
3752 struct ext4_buddy e4b;
3753 int err;
3754
3755 if (!S_ISREG(inode->i_mode)) {
3756
3757 return;
3758 }
3759
3760 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3761 trace_ext4_discard_preallocations(inode);
3762
3763 INIT_LIST_HEAD(&list);
3764
3765repeat:
3766
3767 spin_lock(&ei->i_prealloc_lock);
3768 while (!list_empty(&ei->i_prealloc_list)) {
3769 pa = list_entry(ei->i_prealloc_list.next,
3770 struct ext4_prealloc_space, pa_inode_list);
3771 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3772 spin_lock(&pa->pa_lock);
3773 if (atomic_read(&pa->pa_count)) {
3774
3775
3776 spin_unlock(&pa->pa_lock);
3777 spin_unlock(&ei->i_prealloc_lock);
3778 printk(KERN_ERR "uh-oh! used pa while discarding\n");
3779 WARN_ON(1);
3780 schedule_timeout_uninterruptible(HZ);
3781 goto repeat;
3782
3783 }
3784 if (pa->pa_deleted == 0) {
3785 pa->pa_deleted = 1;
3786 spin_unlock(&pa->pa_lock);
3787 list_del_rcu(&pa->pa_inode_list);
3788 list_add(&pa->u.pa_tmp_list, &list);
3789 continue;
3790 }
3791
3792
3793 spin_unlock(&pa->pa_lock);
3794 spin_unlock(&ei->i_prealloc_lock);
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808 schedule_timeout_uninterruptible(HZ);
3809 goto repeat;
3810 }
3811 spin_unlock(&ei->i_prealloc_lock);
3812
3813 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3814 BUG_ON(pa->pa_type != MB_INODE_PA);
3815 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3816
3817 err = ext4_mb_load_buddy(sb, group, &e4b);
3818 if (err) {
3819 ext4_error(sb, "Error loading buddy information for %u",
3820 group);
3821 continue;
3822 }
3823
3824 bitmap_bh = ext4_read_block_bitmap(sb, group);
3825 if (bitmap_bh == NULL) {
3826 ext4_error(sb, "Error reading block bitmap for %u",
3827 group);
3828 ext4_mb_unload_buddy(&e4b);
3829 continue;
3830 }
3831
3832 ext4_lock_group(sb, group);
3833 list_del(&pa->pa_group_list);
3834 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3835 ext4_unlock_group(sb, group);
3836
3837 ext4_mb_unload_buddy(&e4b);
3838 put_bh(bitmap_bh);
3839
3840 list_del(&pa->u.pa_tmp_list);
3841 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3842 }
3843}
3844
3845#ifdef CONFIG_EXT4_DEBUG
3846static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3847{
3848 struct super_block *sb = ac->ac_sb;
3849 ext4_group_t ngroups, i;
3850
3851 if (!mb_enable_debug ||
3852 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3853 return;
3854
3855 printk(KERN_ERR "EXT4-fs: Can't allocate:"
3856 " Allocation context details:\n");
3857 printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
3858 ac->ac_status, ac->ac_flags);
3859 printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, "
3860 "best %lu/%lu/%lu@%lu cr %d\n",
3861 (unsigned long)ac->ac_o_ex.fe_group,
3862 (unsigned long)ac->ac_o_ex.fe_start,
3863 (unsigned long)ac->ac_o_ex.fe_len,
3864 (unsigned long)ac->ac_o_ex.fe_logical,
3865 (unsigned long)ac->ac_g_ex.fe_group,
3866 (unsigned long)ac->ac_g_ex.fe_start,
3867 (unsigned long)ac->ac_g_ex.fe_len,
3868 (unsigned long)ac->ac_g_ex.fe_logical,
3869 (unsigned long)ac->ac_b_ex.fe_group,
3870 (unsigned long)ac->ac_b_ex.fe_start,
3871 (unsigned long)ac->ac_b_ex.fe_len,
3872 (unsigned long)ac->ac_b_ex.fe_logical,
3873 (int)ac->ac_criteria);
3874 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
3875 ac->ac_found);
3876 printk(KERN_ERR "EXT4-fs: groups: \n");
3877 ngroups = ext4_get_groups_count(sb);
3878 for (i = 0; i < ngroups; i++) {
3879 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
3880 struct ext4_prealloc_space *pa;
3881 ext4_grpblk_t start;
3882 struct list_head *cur;
3883 ext4_lock_group(sb, i);
3884 list_for_each(cur, &grp->bb_prealloc_list) {
3885 pa = list_entry(cur, struct ext4_prealloc_space,
3886 pa_group_list);
3887 spin_lock(&pa->pa_lock);
3888 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3889 NULL, &start);
3890 spin_unlock(&pa->pa_lock);
3891 printk(KERN_ERR "PA:%u:%d:%u \n", i,
3892 start, pa->pa_len);
3893 }
3894 ext4_unlock_group(sb, i);
3895
3896 if (grp->bb_free == 0)
3897 continue;
3898 printk(KERN_ERR "%u: %d/%d \n",
3899 i, grp->bb_free, grp->bb_fragments);
3900 }
3901 printk(KERN_ERR "\n");
3902}
3903#else
3904static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3905{
3906 return;
3907}
3908#endif
3909
3910
3911
3912
3913
3914
3915
3916
3917static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3918{
3919 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3920 int bsbits = ac->ac_sb->s_blocksize_bits;
3921 loff_t size, isize;
3922
3923 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3924 return;
3925
3926 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3927 return;
3928
3929 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
3930 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3931 >> bsbits;
3932
3933 if ((size == isize) &&
3934 !ext4_fs_is_busy(sbi) &&
3935 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
3936 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
3937 return;
3938 }
3939
3940
3941 size = max(size, isize);
3942 if (size > sbi->s_mb_stream_request) {
3943 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3944 return;
3945 }
3946
3947 BUG_ON(ac->ac_lg != NULL);
3948
3949
3950
3951
3952
3953 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
3954
3955
3956 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
3957
3958
3959 mutex_lock(&ac->ac_lg->lg_mutex);
3960}
3961
3962static noinline_for_stack int
3963ext4_mb_initialize_context(struct ext4_allocation_context *ac,
3964 struct ext4_allocation_request *ar)
3965{
3966 struct super_block *sb = ar->inode->i_sb;
3967 struct ext4_sb_info *sbi = EXT4_SB(sb);
3968 struct ext4_super_block *es = sbi->s_es;
3969 ext4_group_t group;
3970 unsigned int len;
3971 ext4_fsblk_t goal;
3972 ext4_grpblk_t block;
3973
3974
3975 len = ar->len;
3976
3977
3978 if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10)
3979 len = EXT4_BLOCKS_PER_GROUP(sb) - 10;
3980
3981
3982 goal = ar->goal;
3983 if (goal < le32_to_cpu(es->s_first_data_block) ||
3984 goal >= ext4_blocks_count(es))
3985 goal = le32_to_cpu(es->s_first_data_block);
3986 ext4_get_group_no_and_offset(sb, goal, &group, &block);
3987
3988
3989 memset(ac, 0, sizeof(struct ext4_allocation_context));
3990 ac->ac_b_ex.fe_logical = ar->logical;
3991 ac->ac_status = AC_STATUS_CONTINUE;
3992 ac->ac_sb = sb;
3993 ac->ac_inode = ar->inode;
3994 ac->ac_o_ex.fe_logical = ar->logical;
3995 ac->ac_o_ex.fe_group = group;
3996 ac->ac_o_ex.fe_start = block;
3997 ac->ac_o_ex.fe_len = len;
3998 ac->ac_g_ex.fe_logical = ar->logical;
3999 ac->ac_g_ex.fe_group = group;
4000 ac->ac_g_ex.fe_start = block;
4001 ac->ac_g_ex.fe_len = len;
4002 ac->ac_flags = ar->flags;
4003
4004
4005
4006 ext4_mb_group_or_file(ac);
4007
4008 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4009 "left: %u/%u, right %u/%u to %swritable\n",
4010 (unsigned) ar->len, (unsigned) ar->logical,
4011 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4012 (unsigned) ar->lleft, (unsigned) ar->pleft,
4013 (unsigned) ar->lright, (unsigned) ar->pright,
4014 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4015 return 0;
4016
4017}
4018
4019static noinline_for_stack void
4020ext4_mb_discard_lg_preallocations(struct super_block *sb,
4021 struct ext4_locality_group *lg,
4022 int order, int total_entries)
4023{
4024 ext4_group_t group = 0;
4025 struct ext4_buddy e4b;
4026 struct list_head discard_list;
4027 struct ext4_prealloc_space *pa, *tmp;
4028
4029 mb_debug(1, "discard locality group preallocation\n");
4030
4031 INIT_LIST_HEAD(&discard_list);
4032
4033 spin_lock(&lg->lg_prealloc_lock);
4034 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4035 pa_inode_list) {
4036 spin_lock(&pa->pa_lock);
4037 if (atomic_read(&pa->pa_count)) {
4038
4039
4040
4041
4042
4043 spin_unlock(&pa->pa_lock);
4044 continue;
4045 }
4046 if (pa->pa_deleted) {
4047 spin_unlock(&pa->pa_lock);
4048 continue;
4049 }
4050
4051 BUG_ON(pa->pa_type != MB_GROUP_PA);
4052
4053
4054 pa->pa_deleted = 1;
4055 spin_unlock(&pa->pa_lock);
4056
4057 list_del_rcu(&pa->pa_inode_list);
4058 list_add(&pa->u.pa_tmp_list, &discard_list);
4059
4060 total_entries--;
4061 if (total_entries <= 5) {
4062
4063
4064
4065
4066
4067
4068 break;
4069 }
4070 }
4071 spin_unlock(&lg->lg_prealloc_lock);
4072
4073 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4074
4075 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4076 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4077 ext4_error(sb, "Error loading buddy information for %u",
4078 group);
4079 continue;
4080 }
4081 ext4_lock_group(sb, group);
4082 list_del(&pa->pa_group_list);
4083 ext4_mb_release_group_pa(&e4b, pa);
4084 ext4_unlock_group(sb, group);
4085
4086 ext4_mb_unload_buddy(&e4b);
4087 list_del(&pa->u.pa_tmp_list);
4088 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4089 }
4090}
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4102{
4103 int order, added = 0, lg_prealloc_count = 1;
4104 struct super_block *sb = ac->ac_sb;
4105 struct ext4_locality_group *lg = ac->ac_lg;
4106 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4107
4108 order = fls(pa->pa_free) - 1;
4109 if (order > PREALLOC_TB_SIZE - 1)
4110
4111 order = PREALLOC_TB_SIZE - 1;
4112
4113 rcu_read_lock();
4114 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4115 pa_inode_list) {
4116 spin_lock(&tmp_pa->pa_lock);
4117 if (tmp_pa->pa_deleted) {
4118 spin_unlock(&tmp_pa->pa_lock);
4119 continue;
4120 }
4121 if (!added && pa->pa_free < tmp_pa->pa_free) {
4122
4123 list_add_tail_rcu(&pa->pa_inode_list,
4124 &tmp_pa->pa_inode_list);
4125 added = 1;
4126
4127
4128
4129
4130 }
4131 spin_unlock(&tmp_pa->pa_lock);
4132 lg_prealloc_count++;
4133 }
4134 if (!added)
4135 list_add_tail_rcu(&pa->pa_inode_list,
4136 &lg->lg_prealloc_list[order]);
4137 rcu_read_unlock();
4138
4139
4140 if (lg_prealloc_count > 8) {
4141 ext4_mb_discard_lg_preallocations(sb, lg,
4142 order, lg_prealloc_count);
4143 return;
4144 }
4145 return ;
4146}
4147
4148
4149
4150
4151static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4152{
4153 struct ext4_prealloc_space *pa = ac->ac_pa;
4154 if (pa) {
4155 if (pa->pa_type == MB_GROUP_PA) {
4156
4157 spin_lock(&pa->pa_lock);
4158 pa->pa_pstart += ac->ac_b_ex.fe_len;
4159 pa->pa_lstart += ac->ac_b_ex.fe_len;
4160 pa->pa_free -= ac->ac_b_ex.fe_len;
4161 pa->pa_len -= ac->ac_b_ex.fe_len;
4162 spin_unlock(&pa->pa_lock);
4163 }
4164 }
4165 if (pa) {
4166
4167
4168
4169
4170
4171
4172 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4173 spin_lock(pa->pa_obj_lock);
4174 list_del_rcu(&pa->pa_inode_list);
4175 spin_unlock(pa->pa_obj_lock);
4176 ext4_mb_add_n_trim(ac);
4177 }
4178 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4179 }
4180 if (ac->ac_bitmap_page)
4181 page_cache_release(ac->ac_bitmap_page);
4182 if (ac->ac_buddy_page)
4183 page_cache_release(ac->ac_buddy_page);
4184 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4185 mutex_unlock(&ac->ac_lg->lg_mutex);
4186 ext4_mb_collect_stats(ac);
4187 return 0;
4188}
4189
4190static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4191{
4192 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4193 int ret;
4194 int freed = 0;
4195
4196 trace_ext4_mb_discard_preallocations(sb, needed);
4197 for (i = 0; i < ngroups && needed > 0; i++) {
4198 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4199 freed += ret;
4200 needed -= ret;
4201 }
4202
4203 return freed;
4204}
4205
4206
4207
4208
4209
4210
4211ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4212 struct ext4_allocation_request *ar, int *errp)
4213{
4214 int freed;
4215 struct ext4_allocation_context *ac = NULL;
4216 struct ext4_sb_info *sbi;
4217 struct super_block *sb;
4218 ext4_fsblk_t block = 0;
4219 unsigned int inquota = 0;
4220 unsigned int reserv_blks = 0;
4221
4222 sb = ar->inode->i_sb;
4223 sbi = EXT4_SB(sb);
4224
4225 trace_ext4_request_blocks(ar);
4226
4227
4228
4229
4230
4231
4232 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4233 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4234 else {
4235
4236
4237
4238
4239 while (ar->len &&
4240 ext4_claim_free_blocks(sbi, ar->len, ar->flags)) {
4241
4242
4243 yield();
4244 ar->len = ar->len >> 1;
4245 }
4246 if (!ar->len) {
4247 *errp = -ENOSPC;
4248 return 0;
4249 }
4250 reserv_blks = ar->len;
4251 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4252 dquot_alloc_block_nofail(ar->inode, ar->len);
4253 } else {
4254 while (ar->len &&
4255 dquot_alloc_block(ar->inode, ar->len)) {
4256
4257 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4258 ar->len--;
4259 }
4260 }
4261 inquota = ar->len;
4262 if (ar->len == 0) {
4263 *errp = -EDQUOT;
4264 goto out;
4265 }
4266 }
4267
4268 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4269 if (!ac) {
4270 ar->len = 0;
4271 *errp = -ENOMEM;
4272 goto out;
4273 }
4274
4275 *errp = ext4_mb_initialize_context(ac, ar);
4276 if (*errp) {
4277 ar->len = 0;
4278 goto out;
4279 }
4280
4281 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4282 if (!ext4_mb_use_preallocated(ac)) {
4283 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4284 ext4_mb_normalize_request(ac, ar);
4285repeat:
4286
4287 *errp = ext4_mb_regular_allocator(ac);
4288 if (*errp)
4289 goto errout;
4290
4291
4292
4293
4294 if (ac->ac_status == AC_STATUS_FOUND &&
4295 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4296 ext4_mb_new_preallocation(ac);
4297 }
4298 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4299 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4300 if (*errp == -EAGAIN) {
4301
4302
4303
4304
4305 ext4_mb_release_context(ac);
4306 ac->ac_b_ex.fe_group = 0;
4307 ac->ac_b_ex.fe_start = 0;
4308 ac->ac_b_ex.fe_len = 0;
4309 ac->ac_status = AC_STATUS_CONTINUE;
4310 goto repeat;
4311 } else if (*errp)
4312 errout:
4313 ext4_discard_allocated_blocks(ac);
4314 else {
4315 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4316 ar->len = ac->ac_b_ex.fe_len;
4317 }
4318 } else {
4319 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4320 if (freed)
4321 goto repeat;
4322 *errp = -ENOSPC;
4323 }
4324
4325 if (*errp) {
4326 ac->ac_b_ex.fe_len = 0;
4327 ar->len = 0;
4328 ext4_mb_show_ac(ac);
4329 }
4330 ext4_mb_release_context(ac);
4331out:
4332 if (ac)
4333 kmem_cache_free(ext4_ac_cachep, ac);
4334 if (inquota && ar->len < inquota)
4335 dquot_free_block(ar->inode, inquota - ar->len);
4336 if (!ar->len) {
4337 if (!ext4_test_inode_state(ar->inode,
4338 EXT4_STATE_DELALLOC_RESERVED))
4339
4340 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
4341 reserv_blks);
4342 }
4343
4344 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4345
4346 return block;
4347}
4348
4349
4350
4351
4352
4353
4354static int can_merge(struct ext4_free_data *entry1,
4355 struct ext4_free_data *entry2)
4356{
4357 if ((entry1->t_tid == entry2->t_tid) &&
4358 (entry1->group == entry2->group) &&
4359 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4360 return 1;
4361 return 0;
4362}
4363
4364static noinline_for_stack int
4365ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4366 struct ext4_free_data *new_entry)
4367{
4368 ext4_group_t group = e4b->bd_group;
4369 ext4_grpblk_t block;
4370 struct ext4_free_data *entry;
4371 struct ext4_group_info *db = e4b->bd_info;
4372 struct super_block *sb = e4b->bd_sb;
4373 struct ext4_sb_info *sbi = EXT4_SB(sb);
4374 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4375 struct rb_node *parent = NULL, *new_node;
4376
4377 BUG_ON(!ext4_handle_valid(handle));
4378 BUG_ON(e4b->bd_bitmap_page == NULL);
4379 BUG_ON(e4b->bd_buddy_page == NULL);
4380
4381 new_node = &new_entry->node;
4382 block = new_entry->start_blk;
4383
4384 if (!*n) {
4385
4386
4387
4388
4389
4390 page_cache_get(e4b->bd_buddy_page);
4391 page_cache_get(e4b->bd_bitmap_page);
4392 }
4393 while (*n) {
4394 parent = *n;
4395 entry = rb_entry(parent, struct ext4_free_data, node);
4396 if (block < entry->start_blk)
4397 n = &(*n)->rb_left;
4398 else if (block >= (entry->start_blk + entry->count))
4399 n = &(*n)->rb_right;
4400 else {
4401 ext4_grp_locked_error(sb, group, 0,
4402 ext4_group_first_block_no(sb, group) + block,
4403 "Block already on to-be-freed list");
4404 return 0;
4405 }
4406 }
4407
4408 rb_link_node(new_node, parent, n);
4409 rb_insert_color(new_node, &db->bb_free_root);
4410
4411
4412 node = rb_prev(new_node);
4413 if (node) {
4414 entry = rb_entry(node, struct ext4_free_data, node);
4415 if (can_merge(entry, new_entry)) {
4416 new_entry->start_blk = entry->start_blk;
4417 new_entry->count += entry->count;
4418 rb_erase(node, &(db->bb_free_root));
4419 spin_lock(&sbi->s_md_lock);
4420 list_del(&entry->list);
4421 spin_unlock(&sbi->s_md_lock);
4422 kmem_cache_free(ext4_free_ext_cachep, entry);
4423 }
4424 }
4425
4426 node = rb_next(new_node);
4427 if (node) {
4428 entry = rb_entry(node, struct ext4_free_data, node);
4429 if (can_merge(new_entry, entry)) {
4430 new_entry->count += entry->count;
4431 rb_erase(node, &(db->bb_free_root));
4432 spin_lock(&sbi->s_md_lock);
4433 list_del(&entry->list);
4434 spin_unlock(&sbi->s_md_lock);
4435 kmem_cache_free(ext4_free_ext_cachep, entry);
4436 }
4437 }
4438
4439 spin_lock(&sbi->s_md_lock);
4440 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4441 spin_unlock(&sbi->s_md_lock);
4442 return 0;
4443}
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453void ext4_free_blocks(handle_t *handle, struct inode *inode,
4454 struct buffer_head *bh, ext4_fsblk_t block,
4455 unsigned long count, int flags)
4456{
4457 struct buffer_head *bitmap_bh = NULL;
4458 struct super_block *sb = inode->i_sb;
4459 struct ext4_group_desc *gdp;
4460 unsigned long freed = 0;
4461 unsigned int overflow;
4462 ext4_grpblk_t bit;
4463 struct buffer_head *gd_bh;
4464 ext4_group_t block_group;
4465 struct ext4_sb_info *sbi;
4466 struct ext4_buddy e4b;
4467 int err = 0;
4468 int ret;
4469
4470 if (bh) {
4471 if (block)
4472 BUG_ON(block != bh->b_blocknr);
4473 else
4474 block = bh->b_blocknr;
4475 }
4476
4477 sbi = EXT4_SB(sb);
4478 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4479 !ext4_data_block_valid(sbi, block, count)) {
4480 ext4_error(sb, "Freeing blocks not in datazone - "
4481 "block = %llu, count = %lu", block, count);
4482 goto error_return;
4483 }
4484
4485 ext4_debug("freeing block %llu\n", block);
4486 trace_ext4_free_blocks(inode, block, count, flags);
4487
4488 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4489 struct buffer_head *tbh = bh;
4490 int i;
4491
4492 BUG_ON(bh && (count > 1));
4493
4494 for (i = 0; i < count; i++) {
4495 if (!bh)
4496 tbh = sb_find_get_block(inode->i_sb,
4497 block + i);
4498 if (unlikely(!tbh))
4499 continue;
4500 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4501 inode, tbh, block + i);
4502 }
4503 }
4504
4505
4506
4507
4508
4509
4510
4511
4512 if (!ext4_should_writeback_data(inode))
4513 flags |= EXT4_FREE_BLOCKS_METADATA;
4514
4515do_more:
4516 overflow = 0;
4517 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4518
4519
4520
4521
4522
4523 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4524 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
4525 count -= overflow;
4526 }
4527 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4528 if (!bitmap_bh) {
4529 err = -EIO;
4530 goto error_return;
4531 }
4532 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4533 if (!gdp) {
4534 err = -EIO;
4535 goto error_return;
4536 }
4537
4538 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4539 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4540 in_range(block, ext4_inode_table(sb, gdp),
4541 EXT4_SB(sb)->s_itb_per_group) ||
4542 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4543 EXT4_SB(sb)->s_itb_per_group)) {
4544
4545 ext4_error(sb, "Freeing blocks in system zone - "
4546 "Block = %llu, count = %lu", block, count);
4547
4548 goto error_return;
4549 }
4550
4551 BUFFER_TRACE(bitmap_bh, "getting write access");
4552 err = ext4_journal_get_write_access(handle, bitmap_bh);
4553 if (err)
4554 goto error_return;
4555
4556
4557
4558
4559
4560
4561 BUFFER_TRACE(gd_bh, "get_write_access");
4562 err = ext4_journal_get_write_access(handle, gd_bh);
4563 if (err)
4564 goto error_return;
4565#ifdef AGGRESSIVE_CHECK
4566 {
4567 int i;
4568 for (i = 0; i < count; i++)
4569 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4570 }
4571#endif
4572 trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
4573
4574 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4575 if (err)
4576 goto error_return;
4577
4578 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4579 struct ext4_free_data *new_entry;
4580
4581
4582
4583
4584 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4585 if (!new_entry) {
4586 err = -ENOMEM;
4587 goto error_return;
4588 }
4589 new_entry->start_blk = bit;
4590 new_entry->group = block_group;
4591 new_entry->count = count;
4592 new_entry->t_tid = handle->h_transaction->t_tid;
4593
4594 ext4_lock_group(sb, block_group);
4595 mb_clear_bits(bitmap_bh->b_data, bit, count);
4596 ext4_mb_free_metadata(handle, &e4b, new_entry);
4597 } else {
4598
4599
4600
4601
4602 ext4_lock_group(sb, block_group);
4603 mb_clear_bits(bitmap_bh->b_data, bit, count);
4604 mb_free_blocks(inode, &e4b, bit, count);
4605 }
4606
4607 ret = ext4_free_blks_count(sb, gdp) + count;
4608 ext4_free_blks_set(sb, gdp, ret);
4609 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4610 ext4_unlock_group(sb, block_group);
4611 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4612
4613 if (sbi->s_log_groups_per_flex) {
4614 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4615 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
4616 }
4617
4618 ext4_mb_unload_buddy(&e4b);
4619
4620 freed += count;
4621
4622
4623 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4624 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4625
4626
4627 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4628 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4629 if (!err)
4630 err = ret;
4631
4632 if (overflow && !err) {
4633 block += count;
4634 count = overflow;
4635 put_bh(bitmap_bh);
4636 goto do_more;
4637 }
4638 ext4_mark_super_dirty(sb);
4639error_return:
4640 if (freed)
4641 dquot_free_block(inode, freed);
4642 brelse(bitmap_bh);
4643 ext4_std_error(sb, err);
4644 return;
4645}
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
4657 ext4_fsblk_t block, unsigned long count)
4658{
4659 struct buffer_head *bitmap_bh = NULL;
4660 struct buffer_head *gd_bh;
4661 ext4_group_t block_group;
4662 ext4_grpblk_t bit;
4663 unsigned int i;
4664 struct ext4_group_desc *desc;
4665 struct ext4_sb_info *sbi = EXT4_SB(sb);
4666 struct ext4_buddy e4b;
4667 int err = 0, ret, blk_free_count;
4668 ext4_grpblk_t blocks_freed;
4669 struct ext4_group_info *grp;
4670
4671 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4672
4673 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4674 grp = ext4_get_group_info(sb, block_group);
4675
4676
4677
4678
4679 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb))
4680 goto error_return;
4681
4682 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4683 if (!bitmap_bh)
4684 goto error_return;
4685 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
4686 if (!desc)
4687 goto error_return;
4688
4689 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
4690 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
4691 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
4692 in_range(block + count - 1, ext4_inode_table(sb, desc),
4693 sbi->s_itb_per_group)) {
4694 ext4_error(sb, "Adding blocks in system zones - "
4695 "Block = %llu, count = %lu",
4696 block, count);
4697 goto error_return;
4698 }
4699
4700 BUFFER_TRACE(bitmap_bh, "getting write access");
4701 err = ext4_journal_get_write_access(handle, bitmap_bh);
4702 if (err)
4703 goto error_return;
4704
4705
4706
4707
4708
4709
4710 BUFFER_TRACE(gd_bh, "get_write_access");
4711 err = ext4_journal_get_write_access(handle, gd_bh);
4712 if (err)
4713 goto error_return;
4714
4715 for (i = 0, blocks_freed = 0; i < count; i++) {
4716 BUFFER_TRACE(bitmap_bh, "clear bit");
4717 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
4718 ext4_error(sb, "bit already cleared for block %llu",
4719 (ext4_fsblk_t)(block + i));
4720 BUFFER_TRACE(bitmap_bh, "bit already cleared");
4721 } else {
4722 blocks_freed++;
4723 }
4724 }
4725
4726 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4727 if (err)
4728 goto error_return;
4729
4730
4731
4732
4733
4734
4735 ext4_lock_group(sb, block_group);
4736 mb_clear_bits(bitmap_bh->b_data, bit, count);
4737 mb_free_blocks(NULL, &e4b, bit, count);
4738 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
4739 ext4_free_blks_set(sb, desc, blk_free_count);
4740 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
4741 ext4_unlock_group(sb, block_group);
4742 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
4743
4744 if (sbi->s_log_groups_per_flex) {
4745 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4746 atomic_add(blocks_freed,
4747 &sbi->s_flex_groups[flex_group].free_blocks);
4748 }
4749
4750 ext4_mb_unload_buddy(&e4b);
4751
4752
4753 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4754 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4755
4756
4757 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4758 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4759 if (!err)
4760 err = ret;
4761
4762error_return:
4763 brelse(bitmap_bh);
4764 ext4_std_error(sb, err);
4765 return;
4766}
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780static void ext4_trim_extent(struct super_block *sb, int start, int count,
4781 ext4_group_t group, struct ext4_buddy *e4b)
4782{
4783 struct ext4_free_extent ex;
4784
4785 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4786
4787 ex.fe_start = start;
4788 ex.fe_group = group;
4789 ex.fe_len = count;
4790
4791
4792
4793
4794
4795 mb_mark_used(e4b, &ex);
4796 ext4_unlock_group(sb, group);
4797 ext4_issue_discard(sb, group, start, count);
4798 ext4_lock_group(sb, group);
4799 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4800}
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820static ext4_grpblk_t
4821ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4822 ext4_grpblk_t start, ext4_grpblk_t max,
4823 ext4_grpblk_t minblocks)
4824{
4825 void *bitmap;
4826 ext4_grpblk_t next, count = 0;
4827 struct ext4_buddy e4b;
4828 int ret;
4829
4830 ret = ext4_mb_load_buddy(sb, group, &e4b);
4831 if (ret) {
4832 ext4_error(sb, "Error in loading buddy "
4833 "information for %u", group);
4834 return ret;
4835 }
4836 bitmap = e4b.bd_bitmap;
4837
4838 ext4_lock_group(sb, group);
4839 start = (e4b.bd_info->bb_first_free > start) ?
4840 e4b.bd_info->bb_first_free : start;
4841
4842 while (start < max) {
4843 start = mb_find_next_zero_bit(bitmap, max, start);
4844 if (start >= max)
4845 break;
4846 next = mb_find_next_bit(bitmap, max, start);
4847
4848 if ((next - start) >= minblocks) {
4849 ext4_trim_extent(sb, start,
4850 next - start, group, &e4b);
4851 count += next - start;
4852 }
4853 start = next + 1;
4854
4855 if (fatal_signal_pending(current)) {
4856 count = -ERESTARTSYS;
4857 break;
4858 }
4859
4860 if (need_resched()) {
4861 ext4_unlock_group(sb, group);
4862 cond_resched();
4863 ext4_lock_group(sb, group);
4864 }
4865
4866 if ((e4b.bd_info->bb_free - count) < minblocks)
4867 break;
4868 }
4869 ext4_unlock_group(sb, group);
4870 ext4_mb_unload_buddy(&e4b);
4871
4872 ext4_debug("trimmed %d blocks in the group %d\n",
4873 count, group);
4874
4875 return count;
4876}
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4891{
4892 struct ext4_group_info *grp;
4893 ext4_group_t first_group, last_group;
4894 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4895 ext4_grpblk_t cnt = 0, first_block, last_block;
4896 uint64_t start, len, minlen, trimmed = 0;
4897 ext4_fsblk_t first_data_blk =
4898 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4899 int ret = 0;
4900
4901 start = range->start >> sb->s_blocksize_bits;
4902 len = range->len >> sb->s_blocksize_bits;
4903 minlen = range->minlen >> sb->s_blocksize_bits;
4904
4905 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4906 return -EINVAL;
4907 if (start < first_data_blk) {
4908 len -= first_data_blk - start;
4909 start = first_data_blk;
4910 }
4911
4912
4913 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4914 &first_group, &first_block);
4915 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4916 &last_group, &last_block);
4917 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4918 last_block = EXT4_BLOCKS_PER_GROUP(sb);
4919
4920 if (first_group > last_group)
4921 return -EINVAL;
4922
4923 for (group = first_group; group <= last_group; group++) {
4924 grp = ext4_get_group_info(sb, group);
4925
4926 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
4927 ret = ext4_mb_init_group(sb, group);
4928 if (ret)
4929 break;
4930 }
4931
4932
4933
4934
4935
4936
4937
4938 if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
4939 last_block = first_block + len;
4940 len -= last_block - first_block;
4941
4942 if (grp->bb_free >= minlen) {
4943 cnt = ext4_trim_all_free(sb, group, first_block,
4944 last_block, minlen);
4945 if (cnt < 0) {
4946 ret = cnt;
4947 break;
4948 }
4949 }
4950 trimmed += cnt;
4951 first_block = 0;
4952 }
4953 range->len = trimmed * sb->s_blocksize;
4954
4955 return ret;
4956}
4957