1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "mballoc.h"
25#include <linux/debugfs.h>
26#include <linux/slab.h>
27#include <trace/events/ext4.h>
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338static struct kmem_cache *ext4_pspace_cachep;
339static struct kmem_cache *ext4_ac_cachep;
340static struct kmem_cache *ext4_free_ext_cachep;
341
342
343
344
345#define NR_GRPINFO_CACHES 8
346static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
347
348static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
349 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
350 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
351 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
352};
353
354static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
355 ext4_group_t group);
356static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
357 ext4_group_t group);
358static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
359
360static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
361{
362#if BITS_PER_LONG == 64
363 *bit += ((unsigned long) addr & 7UL) << 3;
364 addr = (void *) ((unsigned long) addr & ~7UL);
365#elif BITS_PER_LONG == 32
366 *bit += ((unsigned long) addr & 3UL) << 3;
367 addr = (void *) ((unsigned long) addr & ~3UL);
368#else
369#error "how many bits you are?!"
370#endif
371 return addr;
372}
373
374static inline int mb_test_bit(int bit, void *addr)
375{
376
377
378
379
380 addr = mb_correct_addr_and_bit(&bit, addr);
381 return ext4_test_bit(bit, addr);
382}
383
384static inline void mb_set_bit(int bit, void *addr)
385{
386 addr = mb_correct_addr_and_bit(&bit, addr);
387 ext4_set_bit(bit, addr);
388}
389
390static inline void mb_clear_bit(int bit, void *addr)
391{
392 addr = mb_correct_addr_and_bit(&bit, addr);
393 ext4_clear_bit(bit, addr);
394}
395
396static inline int mb_find_next_zero_bit(void *addr, int max, int start)
397{
398 int fix = 0, ret, tmpmax;
399 addr = mb_correct_addr_and_bit(&fix, addr);
400 tmpmax = max + fix;
401 start += fix;
402
403 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
404 if (ret > max)
405 return max;
406 return ret;
407}
408
409static inline int mb_find_next_bit(void *addr, int max, int start)
410{
411 int fix = 0, ret, tmpmax;
412 addr = mb_correct_addr_and_bit(&fix, addr);
413 tmpmax = max + fix;
414 start += fix;
415
416 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
417 if (ret > max)
418 return max;
419 return ret;
420}
421
422static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
423{
424 char *bb;
425
426 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
427 BUG_ON(max == NULL);
428
429 if (order > e4b->bd_blkbits + 1) {
430 *max = 0;
431 return NULL;
432 }
433
434
435 if (order == 0) {
436 *max = 1 << (e4b->bd_blkbits + 3);
437 return EXT4_MB_BITMAP(e4b);
438 }
439
440 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
441 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
442
443 return bb;
444}
445
446#ifdef DOUBLE_CHECK
447static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
448 int first, int count)
449{
450 int i;
451 struct super_block *sb = e4b->bd_sb;
452
453 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
454 return;
455 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
456 for (i = 0; i < count; i++) {
457 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
458 ext4_fsblk_t blocknr;
459
460 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
461 blocknr += first + i;
462 ext4_grp_locked_error(sb, e4b->bd_group,
463 inode ? inode->i_ino : 0,
464 blocknr,
465 "freeing block already freed "
466 "(bit %u)",
467 first + i);
468 }
469 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
470 }
471}
472
473static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
474{
475 int i;
476
477 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
478 return;
479 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
480 for (i = 0; i < count; i++) {
481 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
482 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
483 }
484}
485
486static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
487{
488 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
489 unsigned char *b1, *b2;
490 int i;
491 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
492 b2 = (unsigned char *) bitmap;
493 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
494 if (b1[i] != b2[i]) {
495 printk(KERN_ERR "corruption in group %u "
496 "at byte %u(%u): %x in copy != %x "
497 "on disk/prealloc\n",
498 e4b->bd_group, i, i * 8, b1[i], b2[i]);
499 BUG();
500 }
501 }
502 }
503}
504
505#else
506static inline void mb_free_blocks_double(struct inode *inode,
507 struct ext4_buddy *e4b, int first, int count)
508{
509 return;
510}
511static inline void mb_mark_used_double(struct ext4_buddy *e4b,
512 int first, int count)
513{
514 return;
515}
516static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
517{
518 return;
519}
520#endif
521
522#ifdef AGGRESSIVE_CHECK
523
524#define MB_CHECK_ASSERT(assert) \
525do { \
526 if (!(assert)) { \
527 printk(KERN_EMERG \
528 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
529 function, file, line, # assert); \
530 BUG(); \
531 } \
532} while (0)
533
534static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
535 const char *function, int line)
536{
537 struct super_block *sb = e4b->bd_sb;
538 int order = e4b->bd_blkbits + 1;
539 int max;
540 int max2;
541 int i;
542 int j;
543 int k;
544 int count;
545 struct ext4_group_info *grp;
546 int fragments = 0;
547 int fstart;
548 struct list_head *cur;
549 void *buddy;
550 void *buddy2;
551
552 {
553 static int mb_check_counter;
554 if (mb_check_counter++ % 100 != 0)
555 return 0;
556 }
557
558 while (order > 1) {
559 buddy = mb_find_buddy(e4b, order, &max);
560 MB_CHECK_ASSERT(buddy);
561 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
562 MB_CHECK_ASSERT(buddy2);
563 MB_CHECK_ASSERT(buddy != buddy2);
564 MB_CHECK_ASSERT(max * 2 == max2);
565
566 count = 0;
567 for (i = 0; i < max; i++) {
568
569 if (mb_test_bit(i, buddy)) {
570
571 if (!mb_test_bit(i << 1, buddy2)) {
572 MB_CHECK_ASSERT(
573 mb_test_bit((i<<1)+1, buddy2));
574 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
575 MB_CHECK_ASSERT(
576 mb_test_bit(i << 1, buddy2));
577 }
578 continue;
579 }
580
581
582 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
583 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
584
585 for (j = 0; j < (1 << order); j++) {
586 k = (i * (1 << order)) + j;
587 MB_CHECK_ASSERT(
588 !mb_test_bit(k, EXT4_MB_BITMAP(e4b)));
589 }
590 count++;
591 }
592 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
593 order--;
594 }
595
596 fstart = -1;
597 buddy = mb_find_buddy(e4b, 0, &max);
598 for (i = 0; i < max; i++) {
599 if (!mb_test_bit(i, buddy)) {
600 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
601 if (fstart == -1) {
602 fragments++;
603 fstart = i;
604 }
605 continue;
606 }
607 fstart = -1;
608
609 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
610 buddy2 = mb_find_buddy(e4b, j, &max2);
611 k = i >> j;
612 MB_CHECK_ASSERT(k < max2);
613 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
614 }
615 }
616 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
617 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
618
619 grp = ext4_get_group_info(sb, e4b->bd_group);
620 list_for_each(cur, &grp->bb_prealloc_list) {
621 ext4_group_t groupnr;
622 struct ext4_prealloc_space *pa;
623 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
624 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
625 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
626 for (i = 0; i < pa->pa_len; i++)
627 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
628 }
629 return 0;
630}
631#undef MB_CHECK_ASSERT
632#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
633 __FILE__, __func__, __LINE__)
634#else
635#define mb_check_buddy(e4b)
636#endif
637
638
639
640
641
642
643
644static void ext4_mb_mark_free_simple(struct super_block *sb,
645 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
646 struct ext4_group_info *grp)
647{
648 struct ext4_sb_info *sbi = EXT4_SB(sb);
649 ext4_grpblk_t min;
650 ext4_grpblk_t max;
651 ext4_grpblk_t chunk;
652 unsigned short border;
653
654 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
655
656 border = 2 << sb->s_blocksize_bits;
657
658 while (len > 0) {
659
660 max = ffs(first | border) - 1;
661
662
663 min = fls(len) - 1;
664
665 if (max < min)
666 min = max;
667 chunk = 1 << min;
668
669
670 grp->bb_counters[min]++;
671 if (min > 0)
672 mb_clear_bit(first >> min,
673 buddy + sbi->s_mb_offsets[min]);
674
675 len -= chunk;
676 first += chunk;
677 }
678}
679
680
681
682
683
684static void
685mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
686{
687 int i;
688 int bits;
689
690 grp->bb_largest_free_order = -1;
691
692 bits = sb->s_blocksize_bits + 1;
693 for (i = bits; i >= 0; i--) {
694 if (grp->bb_counters[i] > 0) {
695 grp->bb_largest_free_order = i;
696 break;
697 }
698 }
699}
700
701static noinline_for_stack
702void ext4_mb_generate_buddy(struct super_block *sb,
703 void *buddy, void *bitmap, ext4_group_t group)
704{
705 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
706 ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
707 ext4_grpblk_t i = 0;
708 ext4_grpblk_t first;
709 ext4_grpblk_t len;
710 unsigned free = 0;
711 unsigned fragments = 0;
712 unsigned long long period = get_cycles();
713
714
715
716 i = mb_find_next_zero_bit(bitmap, max, 0);
717 grp->bb_first_free = i;
718 while (i < max) {
719 fragments++;
720 first = i;
721 i = mb_find_next_bit(bitmap, max, i);
722 len = i - first;
723 free += len;
724 if (len > 1)
725 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
726 else
727 grp->bb_counters[0]++;
728 if (i < max)
729 i = mb_find_next_zero_bit(bitmap, max, i);
730 }
731 grp->bb_fragments = fragments;
732
733 if (free != grp->bb_free) {
734 ext4_grp_locked_error(sb, group, 0, 0,
735 "%u blocks in bitmap, %u in gd",
736 free, grp->bb_free);
737
738
739
740
741 grp->bb_free = free;
742 }
743 mb_set_largest_free_order(sb, grp);
744
745 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
746
747 period = get_cycles() - period;
748 spin_lock(&EXT4_SB(sb)->s_bal_lock);
749 EXT4_SB(sb)->s_mb_buddies_generated++;
750 EXT4_SB(sb)->s_mb_generation_time += period;
751 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
752}
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774static int ext4_mb_init_cache(struct page *page, char *incore)
775{
776 ext4_group_t ngroups;
777 int blocksize;
778 int blocks_per_page;
779 int groups_per_page;
780 int err = 0;
781 int i;
782 ext4_group_t first_group;
783 int first_block;
784 struct super_block *sb;
785 struct buffer_head *bhs;
786 struct buffer_head **bh;
787 struct inode *inode;
788 char *data;
789 char *bitmap;
790
791 mb_debug(1, "init page %lu\n", page->index);
792
793 inode = page->mapping->host;
794 sb = inode->i_sb;
795 ngroups = ext4_get_groups_count(sb);
796 blocksize = 1 << inode->i_blkbits;
797 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
798
799 groups_per_page = blocks_per_page >> 1;
800 if (groups_per_page == 0)
801 groups_per_page = 1;
802
803
804 if (groups_per_page > 1) {
805 err = -ENOMEM;
806 i = sizeof(struct buffer_head *) * groups_per_page;
807 bh = kzalloc(i, GFP_NOFS);
808 if (bh == NULL)
809 goto out;
810 } else
811 bh = &bhs;
812
813 first_group = page->index * blocks_per_page / 2;
814
815
816 for (i = 0; i < groups_per_page; i++) {
817 struct ext4_group_desc *desc;
818
819 if (first_group + i >= ngroups)
820 break;
821
822 err = -EIO;
823 desc = ext4_get_group_desc(sb, first_group + i, NULL);
824 if (desc == NULL)
825 goto out;
826
827 err = -ENOMEM;
828 bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
829 if (bh[i] == NULL)
830 goto out;
831
832 if (bitmap_uptodate(bh[i]))
833 continue;
834
835 lock_buffer(bh[i]);
836 if (bitmap_uptodate(bh[i])) {
837 unlock_buffer(bh[i]);
838 continue;
839 }
840 ext4_lock_group(sb, first_group + i);
841 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
842 ext4_init_block_bitmap(sb, bh[i],
843 first_group + i, desc);
844 set_bitmap_uptodate(bh[i]);
845 set_buffer_uptodate(bh[i]);
846 ext4_unlock_group(sb, first_group + i);
847 unlock_buffer(bh[i]);
848 continue;
849 }
850 ext4_unlock_group(sb, first_group + i);
851 if (buffer_uptodate(bh[i])) {
852
853
854
855
856 set_bitmap_uptodate(bh[i]);
857 unlock_buffer(bh[i]);
858 continue;
859 }
860 get_bh(bh[i]);
861
862
863
864
865
866
867 set_bitmap_uptodate(bh[i]);
868 bh[i]->b_end_io = end_buffer_read_sync;
869 submit_bh(READ, bh[i]);
870 mb_debug(1, "read bitmap for group %u\n", first_group + i);
871 }
872
873
874 for (i = 0; i < groups_per_page && bh[i]; i++)
875 wait_on_buffer(bh[i]);
876
877 err = -EIO;
878 for (i = 0; i < groups_per_page && bh[i]; i++)
879 if (!buffer_uptodate(bh[i]))
880 goto out;
881
882 err = 0;
883 first_block = page->index * blocks_per_page;
884
885 memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
886 for (i = 0; i < blocks_per_page; i++) {
887 int group;
888 struct ext4_group_info *grinfo;
889
890 group = (first_block + i) >> 1;
891 if (group >= ngroups)
892 break;
893
894
895
896
897
898
899
900 data = page_address(page) + (i * blocksize);
901 bitmap = bh[group - first_group]->b_data;
902
903
904
905
906
907 if ((first_block + i) & 1) {
908
909 BUG_ON(incore == NULL);
910 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
911 group, page->index, i * blocksize);
912 trace_ext4_mb_buddy_bitmap_load(sb, group);
913 grinfo = ext4_get_group_info(sb, group);
914 grinfo->bb_fragments = 0;
915 memset(grinfo->bb_counters, 0,
916 sizeof(*grinfo->bb_counters) *
917 (sb->s_blocksize_bits+2));
918
919
920
921 ext4_lock_group(sb, group);
922 ext4_mb_generate_buddy(sb, data, incore, group);
923 ext4_unlock_group(sb, group);
924 incore = NULL;
925 } else {
926
927 BUG_ON(incore != NULL);
928 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
929 group, page->index, i * blocksize);
930 trace_ext4_mb_bitmap_load(sb, group);
931
932
933 ext4_lock_group(sb, group);
934 memcpy(data, bitmap, blocksize);
935
936
937 ext4_mb_generate_from_pa(sb, data, group);
938 ext4_mb_generate_from_freelist(sb, data, group);
939 ext4_unlock_group(sb, group);
940
941
942
943
944 incore = data;
945 }
946 }
947 SetPageUptodate(page);
948
949out:
950 if (bh) {
951 for (i = 0; i < groups_per_page && bh[i]; i++)
952 brelse(bh[i]);
953 if (bh != &bhs)
954 kfree(bh);
955 }
956 return err;
957}
958
959
960
961
962
963
964
965
966static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
967 ext4_group_t group)
968{
969 int i;
970 int block, pnum;
971 int blocks_per_page;
972 int groups_per_page;
973 ext4_group_t ngroups = ext4_get_groups_count(sb);
974 ext4_group_t first_group;
975 struct ext4_group_info *grp;
976
977 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
978
979
980
981
982
983 block = group * 2;
984 pnum = block / blocks_per_page;
985 first_group = pnum * blocks_per_page / 2;
986
987 groups_per_page = blocks_per_page >> 1;
988 if (groups_per_page == 0)
989 groups_per_page = 1;
990
991 for (i = 0; i < groups_per_page; i++) {
992
993 if ((first_group + i) >= ngroups)
994 break;
995 grp = ext4_get_group_info(sb, first_group + i);
996
997
998
999
1000
1001 down_write_nested(&grp->alloc_sem, i);
1002 }
1003 return i;
1004}
1005
1006static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1007 ext4_group_t group, int locked_group)
1008{
1009 int i;
1010 int block, pnum;
1011 int blocks_per_page;
1012 ext4_group_t first_group;
1013 struct ext4_group_info *grp;
1014
1015 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1016
1017
1018
1019
1020
1021 block = group * 2;
1022 pnum = block / blocks_per_page;
1023 first_group = pnum * blocks_per_page / 2;
1024
1025 for (i = 0; i < locked_group; i++) {
1026
1027 grp = ext4_get_group_info(sb, first_group + i);
1028
1029
1030
1031
1032
1033 up_write(&grp->alloc_sem);
1034 }
1035
1036}
1037
1038
1039
1040
1041
1042
1043static noinline_for_stack
1044int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1045{
1046
1047 int ret = 0;
1048 void *bitmap;
1049 int blocks_per_page;
1050 int block, pnum, poff;
1051 int num_grp_locked = 0;
1052 struct ext4_group_info *this_grp;
1053 struct ext4_sb_info *sbi = EXT4_SB(sb);
1054 struct inode *inode = sbi->s_buddy_cache;
1055 struct page *page = NULL, *bitmap_page = NULL;
1056
1057 mb_debug(1, "init group %u\n", group);
1058 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1059 this_grp = ext4_get_group_info(sb, group);
1060
1061
1062
1063
1064
1065
1066
1067 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
1068 if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
1069
1070
1071
1072
1073 ret = 0;
1074 goto err;
1075 }
1076
1077
1078
1079
1080
1081 block = group * 2;
1082 pnum = block / blocks_per_page;
1083 poff = block % blocks_per_page;
1084 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1085 if (page) {
1086 BUG_ON(page->mapping != inode->i_mapping);
1087 ret = ext4_mb_init_cache(page, NULL);
1088 if (ret) {
1089 unlock_page(page);
1090 goto err;
1091 }
1092 unlock_page(page);
1093 }
1094 if (page == NULL || !PageUptodate(page)) {
1095 ret = -EIO;
1096 goto err;
1097 }
1098 mark_page_accessed(page);
1099 bitmap_page = page;
1100 bitmap = page_address(page) + (poff * sb->s_blocksize);
1101
1102
1103 block++;
1104 pnum = block / blocks_per_page;
1105 poff = block % blocks_per_page;
1106 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1107 if (page == bitmap_page) {
1108
1109
1110
1111
1112
1113 unlock_page(page);
1114 } else if (page) {
1115 BUG_ON(page->mapping != inode->i_mapping);
1116 ret = ext4_mb_init_cache(page, bitmap);
1117 if (ret) {
1118 unlock_page(page);
1119 goto err;
1120 }
1121 unlock_page(page);
1122 }
1123 if (page == NULL || !PageUptodate(page)) {
1124 ret = -EIO;
1125 goto err;
1126 }
1127 mark_page_accessed(page);
1128err:
1129 ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
1130 if (bitmap_page)
1131 page_cache_release(bitmap_page);
1132 if (page)
1133 page_cache_release(page);
1134 return ret;
1135}
1136
1137
1138
1139
1140
1141
1142static noinline_for_stack int
1143ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1144 struct ext4_buddy *e4b)
1145{
1146 int blocks_per_page;
1147 int block;
1148 int pnum;
1149 int poff;
1150 struct page *page;
1151 int ret;
1152 struct ext4_group_info *grp;
1153 struct ext4_sb_info *sbi = EXT4_SB(sb);
1154 struct inode *inode = sbi->s_buddy_cache;
1155
1156 mb_debug(1, "load group %u\n", group);
1157
1158 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1159 grp = ext4_get_group_info(sb, group);
1160
1161 e4b->bd_blkbits = sb->s_blocksize_bits;
1162 e4b->bd_info = ext4_get_group_info(sb, group);
1163 e4b->bd_sb = sb;
1164 e4b->bd_group = group;
1165 e4b->bd_buddy_page = NULL;
1166 e4b->bd_bitmap_page = NULL;
1167 e4b->alloc_semp = &grp->alloc_sem;
1168
1169
1170
1171
1172
1173
1174
1175repeat_load_buddy:
1176 down_read(e4b->alloc_semp);
1177
1178 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1179
1180
1181
1182
1183
1184 up_read(e4b->alloc_semp);
1185
1186
1187
1188
1189 ret = ext4_mb_init_group(sb, group);
1190 if (ret)
1191 return ret;
1192 goto repeat_load_buddy;
1193 }
1194
1195
1196
1197
1198
1199
1200 block = group * 2;
1201 pnum = block / blocks_per_page;
1202 poff = block % blocks_per_page;
1203
1204
1205
1206 page = find_get_page(inode->i_mapping, pnum);
1207 if (page == NULL || !PageUptodate(page)) {
1208 if (page)
1209
1210
1211
1212
1213
1214
1215
1216
1217 page_cache_release(page);
1218 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1219 if (page) {
1220 BUG_ON(page->mapping != inode->i_mapping);
1221 if (!PageUptodate(page)) {
1222 ret = ext4_mb_init_cache(page, NULL);
1223 if (ret) {
1224 unlock_page(page);
1225 goto err;
1226 }
1227 mb_cmp_bitmaps(e4b, page_address(page) +
1228 (poff * sb->s_blocksize));
1229 }
1230 unlock_page(page);
1231 }
1232 }
1233 if (page == NULL || !PageUptodate(page)) {
1234 ret = -EIO;
1235 goto err;
1236 }
1237 e4b->bd_bitmap_page = page;
1238 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1239 mark_page_accessed(page);
1240
1241 block++;
1242 pnum = block / blocks_per_page;
1243 poff = block % blocks_per_page;
1244
1245 page = find_get_page(inode->i_mapping, pnum);
1246 if (page == NULL || !PageUptodate(page)) {
1247 if (page)
1248 page_cache_release(page);
1249 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1250 if (page) {
1251 BUG_ON(page->mapping != inode->i_mapping);
1252 if (!PageUptodate(page)) {
1253 ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
1254 if (ret) {
1255 unlock_page(page);
1256 goto err;
1257 }
1258 }
1259 unlock_page(page);
1260 }
1261 }
1262 if (page == NULL || !PageUptodate(page)) {
1263 ret = -EIO;
1264 goto err;
1265 }
1266 e4b->bd_buddy_page = page;
1267 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1268 mark_page_accessed(page);
1269
1270 BUG_ON(e4b->bd_bitmap_page == NULL);
1271 BUG_ON(e4b->bd_buddy_page == NULL);
1272
1273 return 0;
1274
1275err:
1276 if (e4b->bd_bitmap_page)
1277 page_cache_release(e4b->bd_bitmap_page);
1278 if (e4b->bd_buddy_page)
1279 page_cache_release(e4b->bd_buddy_page);
1280 e4b->bd_buddy = NULL;
1281 e4b->bd_bitmap = NULL;
1282
1283
1284 up_read(e4b->alloc_semp);
1285 return ret;
1286}
1287
1288static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1289{
1290 if (e4b->bd_bitmap_page)
1291 page_cache_release(e4b->bd_bitmap_page);
1292 if (e4b->bd_buddy_page)
1293 page_cache_release(e4b->bd_buddy_page);
1294
1295 if (e4b->alloc_semp)
1296 up_read(e4b->alloc_semp);
1297}
1298
1299
1300static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1301{
1302 int order = 1;
1303 void *bb;
1304
1305 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
1306 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1307
1308 bb = EXT4_MB_BUDDY(e4b);
1309 while (order <= e4b->bd_blkbits + 1) {
1310 block = block >> 1;
1311 if (!mb_test_bit(block, bb)) {
1312
1313 return order;
1314 }
1315 bb += 1 << (e4b->bd_blkbits - order);
1316 order++;
1317 }
1318 return 0;
1319}
1320
1321static void mb_clear_bits(void *bm, int cur, int len)
1322{
1323 __u32 *addr;
1324
1325 len = cur + len;
1326 while (cur < len) {
1327 if ((cur & 31) == 0 && (len - cur) >= 32) {
1328
1329 addr = bm + (cur >> 3);
1330 *addr = 0;
1331 cur += 32;
1332 continue;
1333 }
1334 mb_clear_bit(cur, bm);
1335 cur++;
1336 }
1337}
1338
1339static void mb_set_bits(void *bm, int cur, int len)
1340{
1341 __u32 *addr;
1342
1343 len = cur + len;
1344 while (cur < len) {
1345 if ((cur & 31) == 0 && (len - cur) >= 32) {
1346
1347 addr = bm + (cur >> 3);
1348 *addr = 0xffffffff;
1349 cur += 32;
1350 continue;
1351 }
1352 mb_set_bit(cur, bm);
1353 cur++;
1354 }
1355}
1356
1357static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1358 int first, int count)
1359{
1360 int block = 0;
1361 int max = 0;
1362 int order;
1363 void *buddy;
1364 void *buddy2;
1365 struct super_block *sb = e4b->bd_sb;
1366
1367 BUG_ON(first + count > (sb->s_blocksize << 3));
1368 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1369 mb_check_buddy(e4b);
1370 mb_free_blocks_double(inode, e4b, first, count);
1371
1372 e4b->bd_info->bb_free += count;
1373 if (first < e4b->bd_info->bb_first_free)
1374 e4b->bd_info->bb_first_free = first;
1375
1376
1377 if (first != 0)
1378 block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b));
1379 if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
1380 max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b));
1381 if (block && max)
1382 e4b->bd_info->bb_fragments--;
1383 else if (!block && !max)
1384 e4b->bd_info->bb_fragments++;
1385
1386
1387 while (count-- > 0) {
1388 block = first++;
1389 order = 0;
1390
1391 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
1392 ext4_fsblk_t blocknr;
1393
1394 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1395 blocknr += block;
1396 ext4_grp_locked_error(sb, e4b->bd_group,
1397 inode ? inode->i_ino : 0,
1398 blocknr,
1399 "freeing already freed block "
1400 "(bit %u)", block);
1401 }
1402 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1403 e4b->bd_info->bb_counters[order]++;
1404
1405
1406 buddy = mb_find_buddy(e4b, order, &max);
1407
1408 do {
1409 block &= ~1UL;
1410 if (mb_test_bit(block, buddy) ||
1411 mb_test_bit(block + 1, buddy))
1412 break;
1413
1414
1415 buddy2 = mb_find_buddy(e4b, order + 1, &max);
1416
1417 if (!buddy2)
1418 break;
1419
1420 if (order > 0) {
1421
1422
1423 mb_set_bit(block, buddy);
1424 mb_set_bit(block + 1, buddy);
1425 }
1426 e4b->bd_info->bb_counters[order]--;
1427 e4b->bd_info->bb_counters[order]--;
1428
1429 block = block >> 1;
1430 order++;
1431 e4b->bd_info->bb_counters[order]++;
1432
1433 mb_clear_bit(block, buddy2);
1434 buddy = buddy2;
1435 } while (1);
1436 }
1437 mb_set_largest_free_order(sb, e4b->bd_info);
1438 mb_check_buddy(e4b);
1439}
1440
1441static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1442 int needed, struct ext4_free_extent *ex)
1443{
1444 int next = block;
1445 int max;
1446 int ord;
1447 void *buddy;
1448
1449 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1450 BUG_ON(ex == NULL);
1451
1452 buddy = mb_find_buddy(e4b, order, &max);
1453 BUG_ON(buddy == NULL);
1454 BUG_ON(block >= max);
1455 if (mb_test_bit(block, buddy)) {
1456 ex->fe_len = 0;
1457 ex->fe_start = 0;
1458 ex->fe_group = 0;
1459 return 0;
1460 }
1461
1462
1463 if (likely(order == 0)) {
1464
1465 order = mb_find_order_for_block(e4b, block);
1466 block = block >> order;
1467 }
1468
1469 ex->fe_len = 1 << order;
1470 ex->fe_start = block << order;
1471 ex->fe_group = e4b->bd_group;
1472
1473
1474 next = next - ex->fe_start;
1475 ex->fe_len -= next;
1476 ex->fe_start += next;
1477
1478 while (needed > ex->fe_len &&
1479 (buddy = mb_find_buddy(e4b, order, &max))) {
1480
1481 if (block + 1 >= max)
1482 break;
1483
1484 next = (block + 1) * (1 << order);
1485 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
1486 break;
1487
1488 ord = mb_find_order_for_block(e4b, next);
1489
1490 order = ord;
1491 block = next >> order;
1492 ex->fe_len += 1 << order;
1493 }
1494
1495 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1496 return ex->fe_len;
1497}
1498
1499static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1500{
1501 int ord;
1502 int mlen = 0;
1503 int max = 0;
1504 int cur;
1505 int start = ex->fe_start;
1506 int len = ex->fe_len;
1507 unsigned ret = 0;
1508 int len0 = len;
1509 void *buddy;
1510
1511 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1512 BUG_ON(e4b->bd_group != ex->fe_group);
1513 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1514 mb_check_buddy(e4b);
1515 mb_mark_used_double(e4b, start, len);
1516
1517 e4b->bd_info->bb_free -= len;
1518 if (e4b->bd_info->bb_first_free == start)
1519 e4b->bd_info->bb_first_free += len;
1520
1521
1522 if (start != 0)
1523 mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b));
1524 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1525 max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b));
1526 if (mlen && max)
1527 e4b->bd_info->bb_fragments++;
1528 else if (!mlen && !max)
1529 e4b->bd_info->bb_fragments--;
1530
1531
1532 while (len) {
1533 ord = mb_find_order_for_block(e4b, start);
1534
1535 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1536
1537 mlen = 1 << ord;
1538 buddy = mb_find_buddy(e4b, ord, &max);
1539 BUG_ON((start >> ord) >= max);
1540 mb_set_bit(start >> ord, buddy);
1541 e4b->bd_info->bb_counters[ord]--;
1542 start += mlen;
1543 len -= mlen;
1544 BUG_ON(len < 0);
1545 continue;
1546 }
1547
1548
1549 if (ret == 0)
1550 ret = len | (ord << 16);
1551
1552
1553 BUG_ON(ord <= 0);
1554 buddy = mb_find_buddy(e4b, ord, &max);
1555 mb_set_bit(start >> ord, buddy);
1556 e4b->bd_info->bb_counters[ord]--;
1557
1558 ord--;
1559 cur = (start >> ord) & ~1U;
1560 buddy = mb_find_buddy(e4b, ord, &max);
1561 mb_clear_bit(cur, buddy);
1562 mb_clear_bit(cur + 1, buddy);
1563 e4b->bd_info->bb_counters[ord]++;
1564 e4b->bd_info->bb_counters[ord]++;
1565 }
1566 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1567
1568 mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
1569 mb_check_buddy(e4b);
1570
1571 return ret;
1572}
1573
1574
1575
1576
1577static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1578 struct ext4_buddy *e4b)
1579{
1580 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1581 int ret;
1582
1583 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1584 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1585
1586 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1587 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1588 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1589
1590
1591
1592 ac->ac_f_ex = ac->ac_b_ex;
1593
1594 ac->ac_status = AC_STATUS_FOUND;
1595 ac->ac_tail = ret & 0xffff;
1596 ac->ac_buddy = ret >> 16;
1597
1598
1599
1600
1601
1602
1603
1604
1605 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1606 get_page(ac->ac_bitmap_page);
1607 ac->ac_buddy_page = e4b->bd_buddy_page;
1608 get_page(ac->ac_buddy_page);
1609
1610 ac->alloc_semp = e4b->alloc_semp;
1611 e4b->alloc_semp = NULL;
1612
1613 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1614 spin_lock(&sbi->s_md_lock);
1615 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1616 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1617 spin_unlock(&sbi->s_md_lock);
1618 }
1619}
1620
1621
1622
1623
1624
1625static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1626 struct ext4_buddy *e4b,
1627 int finish_group)
1628{
1629 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1630 struct ext4_free_extent *bex = &ac->ac_b_ex;
1631 struct ext4_free_extent *gex = &ac->ac_g_ex;
1632 struct ext4_free_extent ex;
1633 int max;
1634
1635 if (ac->ac_status == AC_STATUS_FOUND)
1636 return;
1637
1638
1639
1640 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1641 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1642 ac->ac_status = AC_STATUS_BREAK;
1643 return;
1644 }
1645
1646
1647
1648
1649 if (bex->fe_len < gex->fe_len)
1650 return;
1651
1652 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1653 && bex->fe_group == e4b->bd_group) {
1654
1655
1656
1657 max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
1658 if (max >= gex->fe_len) {
1659 ext4_mb_use_best_found(ac, e4b);
1660 return;
1661 }
1662 }
1663}
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1676 struct ext4_free_extent *ex,
1677 struct ext4_buddy *e4b)
1678{
1679 struct ext4_free_extent *bex = &ac->ac_b_ex;
1680 struct ext4_free_extent *gex = &ac->ac_g_ex;
1681
1682 BUG_ON(ex->fe_len <= 0);
1683 BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1684 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1685 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1686
1687 ac->ac_found++;
1688
1689
1690
1691
1692 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1693 *bex = *ex;
1694 ext4_mb_use_best_found(ac, e4b);
1695 return;
1696 }
1697
1698
1699
1700
1701 if (ex->fe_len == gex->fe_len) {
1702 *bex = *ex;
1703 ext4_mb_use_best_found(ac, e4b);
1704 return;
1705 }
1706
1707
1708
1709
1710 if (bex->fe_len == 0) {
1711 *bex = *ex;
1712 return;
1713 }
1714
1715
1716
1717
1718 if (bex->fe_len < gex->fe_len) {
1719
1720
1721 if (ex->fe_len > bex->fe_len)
1722 *bex = *ex;
1723 } else if (ex->fe_len > gex->fe_len) {
1724
1725
1726
1727 if (ex->fe_len < bex->fe_len)
1728 *bex = *ex;
1729 }
1730
1731 ext4_mb_check_limits(ac, e4b, 0);
1732}
1733
1734static noinline_for_stack
1735int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1736 struct ext4_buddy *e4b)
1737{
1738 struct ext4_free_extent ex = ac->ac_b_ex;
1739 ext4_group_t group = ex.fe_group;
1740 int max;
1741 int err;
1742
1743 BUG_ON(ex.fe_len <= 0);
1744 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1745 if (err)
1746 return err;
1747
1748 ext4_lock_group(ac->ac_sb, group);
1749 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
1750
1751 if (max > 0) {
1752 ac->ac_b_ex = ex;
1753 ext4_mb_use_best_found(ac, e4b);
1754 }
1755
1756 ext4_unlock_group(ac->ac_sb, group);
1757 ext4_mb_unload_buddy(e4b);
1758
1759 return 0;
1760}
1761
1762static noinline_for_stack
1763int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1764 struct ext4_buddy *e4b)
1765{
1766 ext4_group_t group = ac->ac_g_ex.fe_group;
1767 int max;
1768 int err;
1769 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1770 struct ext4_free_extent ex;
1771
1772 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1773 return 0;
1774
1775 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1776 if (err)
1777 return err;
1778
1779 ext4_lock_group(ac->ac_sb, group);
1780 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
1781 ac->ac_g_ex.fe_len, &ex);
1782
1783 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1784 ext4_fsblk_t start;
1785
1786 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1787 ex.fe_start;
1788
1789 if (do_div(start, sbi->s_stripe) == 0) {
1790 ac->ac_found++;
1791 ac->ac_b_ex = ex;
1792 ext4_mb_use_best_found(ac, e4b);
1793 }
1794 } else if (max >= ac->ac_g_ex.fe_len) {
1795 BUG_ON(ex.fe_len <= 0);
1796 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1797 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1798 ac->ac_found++;
1799 ac->ac_b_ex = ex;
1800 ext4_mb_use_best_found(ac, e4b);
1801 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1802
1803
1804 BUG_ON(ex.fe_len <= 0);
1805 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1806 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1807 ac->ac_found++;
1808 ac->ac_b_ex = ex;
1809 ext4_mb_use_best_found(ac, e4b);
1810 }
1811 ext4_unlock_group(ac->ac_sb, group);
1812 ext4_mb_unload_buddy(e4b);
1813
1814 return 0;
1815}
1816
1817
1818
1819
1820
1821static noinline_for_stack
1822void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1823 struct ext4_buddy *e4b)
1824{
1825 struct super_block *sb = ac->ac_sb;
1826 struct ext4_group_info *grp = e4b->bd_info;
1827 void *buddy;
1828 int i;
1829 int k;
1830 int max;
1831
1832 BUG_ON(ac->ac_2order <= 0);
1833 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1834 if (grp->bb_counters[i] == 0)
1835 continue;
1836
1837 buddy = mb_find_buddy(e4b, i, &max);
1838 BUG_ON(buddy == NULL);
1839
1840 k = mb_find_next_zero_bit(buddy, max, 0);
1841 BUG_ON(k >= max);
1842
1843 ac->ac_found++;
1844
1845 ac->ac_b_ex.fe_len = 1 << i;
1846 ac->ac_b_ex.fe_start = k << i;
1847 ac->ac_b_ex.fe_group = e4b->bd_group;
1848
1849 ext4_mb_use_best_found(ac, e4b);
1850
1851 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1852
1853 if (EXT4_SB(sb)->s_mb_stats)
1854 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1855
1856 break;
1857 }
1858}
1859
1860
1861
1862
1863
1864
1865static noinline_for_stack
1866void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1867 struct ext4_buddy *e4b)
1868{
1869 struct super_block *sb = ac->ac_sb;
1870 void *bitmap = EXT4_MB_BITMAP(e4b);
1871 struct ext4_free_extent ex;
1872 int i;
1873 int free;
1874
1875 free = e4b->bd_info->bb_free;
1876 BUG_ON(free <= 0);
1877
1878 i = e4b->bd_info->bb_first_free;
1879
1880 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1881 i = mb_find_next_zero_bit(bitmap,
1882 EXT4_BLOCKS_PER_GROUP(sb), i);
1883 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
1884
1885
1886
1887
1888
1889 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1890 "%d free blocks as per "
1891 "group info. But bitmap says 0",
1892 free);
1893 break;
1894 }
1895
1896 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1897 BUG_ON(ex.fe_len <= 0);
1898 if (free < ex.fe_len) {
1899 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1900 "%d free blocks as per "
1901 "group info. But got %d blocks",
1902 free, ex.fe_len);
1903
1904
1905
1906
1907
1908 break;
1909 }
1910
1911 ext4_mb_measure_extent(ac, &ex, e4b);
1912
1913 i += ex.fe_len;
1914 free -= ex.fe_len;
1915 }
1916
1917 ext4_mb_check_limits(ac, e4b, 1);
1918}
1919
1920
1921
1922
1923
1924static noinline_for_stack
1925void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1926 struct ext4_buddy *e4b)
1927{
1928 struct super_block *sb = ac->ac_sb;
1929 struct ext4_sb_info *sbi = EXT4_SB(sb);
1930 void *bitmap = EXT4_MB_BITMAP(e4b);
1931 struct ext4_free_extent ex;
1932 ext4_fsblk_t first_group_block;
1933 ext4_fsblk_t a;
1934 ext4_grpblk_t i;
1935 int max;
1936
1937 BUG_ON(sbi->s_stripe == 0);
1938
1939
1940 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1941
1942 a = first_group_block + sbi->s_stripe - 1;
1943 do_div(a, sbi->s_stripe);
1944 i = (a * sbi->s_stripe) - first_group_block;
1945
1946 while (i < EXT4_BLOCKS_PER_GROUP(sb)) {
1947 if (!mb_test_bit(i, bitmap)) {
1948 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1949 if (max >= sbi->s_stripe) {
1950 ac->ac_found++;
1951 ac->ac_b_ex = ex;
1952 ext4_mb_use_best_found(ac, e4b);
1953 break;
1954 }
1955 }
1956 i += sbi->s_stripe;
1957 }
1958}
1959
1960
1961static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1962 ext4_group_t group, int cr)
1963{
1964 unsigned free, fragments;
1965 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1966 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1967
1968 BUG_ON(cr < 0 || cr >= 4);
1969
1970
1971 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1972 int ret = ext4_mb_init_group(ac->ac_sb, group);
1973 if (ret)
1974 return 0;
1975 }
1976
1977 free = grp->bb_free;
1978 fragments = grp->bb_fragments;
1979 if (free == 0)
1980 return 0;
1981 if (fragments == 0)
1982 return 0;
1983
1984 switch (cr) {
1985 case 0:
1986 BUG_ON(ac->ac_2order == 0);
1987
1988 if (grp->bb_largest_free_order < ac->ac_2order)
1989 return 0;
1990
1991
1992 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1993 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1994 ((group % flex_size) == 0))
1995 return 0;
1996
1997 return 1;
1998 case 1:
1999 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2000 return 1;
2001 break;
2002 case 2:
2003 if (free >= ac->ac_g_ex.fe_len)
2004 return 1;
2005 break;
2006 case 3:
2007 return 1;
2008 default:
2009 BUG();
2010 }
2011
2012 return 0;
2013}
2014
2015static noinline_for_stack int
2016ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2017{
2018 ext4_group_t ngroups, group, i;
2019 int cr;
2020 int err = 0;
2021 struct ext4_sb_info *sbi;
2022 struct super_block *sb;
2023 struct ext4_buddy e4b;
2024
2025 sb = ac->ac_sb;
2026 sbi = EXT4_SB(sb);
2027 ngroups = ext4_get_groups_count(sb);
2028
2029 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2030 ngroups = sbi->s_blockfile_groups;
2031
2032 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2033
2034
2035 err = ext4_mb_find_by_goal(ac, &e4b);
2036 if (err || ac->ac_status == AC_STATUS_FOUND)
2037 goto out;
2038
2039 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2040 goto out;
2041
2042
2043
2044
2045
2046
2047 i = fls(ac->ac_g_ex.fe_len);
2048 ac->ac_2order = 0;
2049
2050
2051
2052
2053
2054 if (i >= sbi->s_mb_order2_reqs) {
2055
2056
2057
2058 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2059 ac->ac_2order = i - 1;
2060 }
2061
2062
2063 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2064
2065 spin_lock(&sbi->s_md_lock);
2066 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2067 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2068 spin_unlock(&sbi->s_md_lock);
2069 }
2070
2071
2072 cr = ac->ac_2order ? 0 : 1;
2073
2074
2075
2076
2077repeat:
2078 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2079 ac->ac_criteria = cr;
2080
2081
2082
2083
2084 group = ac->ac_g_ex.fe_group;
2085
2086 for (i = 0; i < ngroups; group++, i++) {
2087 if (group == ngroups)
2088 group = 0;
2089
2090
2091 if (!ext4_mb_good_group(ac, group, cr))
2092 continue;
2093
2094 err = ext4_mb_load_buddy(sb, group, &e4b);
2095 if (err)
2096 goto out;
2097
2098 ext4_lock_group(sb, group);
2099
2100
2101
2102
2103
2104 if (!ext4_mb_good_group(ac, group, cr)) {
2105 ext4_unlock_group(sb, group);
2106 ext4_mb_unload_buddy(&e4b);
2107 continue;
2108 }
2109
2110 ac->ac_groups_scanned++;
2111 if (cr == 0)
2112 ext4_mb_simple_scan_group(ac, &e4b);
2113 else if (cr == 1 && sbi->s_stripe &&
2114 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2115 ext4_mb_scan_aligned(ac, &e4b);
2116 else
2117 ext4_mb_complex_scan_group(ac, &e4b);
2118
2119 ext4_unlock_group(sb, group);
2120 ext4_mb_unload_buddy(&e4b);
2121
2122 if (ac->ac_status != AC_STATUS_CONTINUE)
2123 break;
2124 }
2125 }
2126
2127 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2128 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2129
2130
2131
2132
2133
2134 ext4_mb_try_best_found(ac, &e4b);
2135 if (ac->ac_status != AC_STATUS_FOUND) {
2136
2137
2138
2139
2140
2141
2142 ac->ac_b_ex.fe_group = 0;
2143 ac->ac_b_ex.fe_start = 0;
2144 ac->ac_b_ex.fe_len = 0;
2145 ac->ac_status = AC_STATUS_CONTINUE;
2146 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2147 cr = 3;
2148 atomic_inc(&sbi->s_mb_lost_chunks);
2149 goto repeat;
2150 }
2151 }
2152out:
2153 return err;
2154}
2155
2156static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2157{
2158 struct super_block *sb = seq->private;
2159 ext4_group_t group;
2160
2161 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2162 return NULL;
2163 group = *pos + 1;
2164 return (void *) ((unsigned long) group);
2165}
2166
2167static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2168{
2169 struct super_block *sb = seq->private;
2170 ext4_group_t group;
2171
2172 ++*pos;
2173 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2174 return NULL;
2175 group = *pos + 1;
2176 return (void *) ((unsigned long) group);
2177}
2178
2179static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2180{
2181 struct super_block *sb = seq->private;
2182 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2183 int i;
2184 int err;
2185 struct ext4_buddy e4b;
2186 struct sg {
2187 struct ext4_group_info info;
2188 ext4_grpblk_t counters[16];
2189 } sg;
2190
2191 group--;
2192 if (group == 0)
2193 seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
2194 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
2195 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
2196 "group", "free", "frags", "first",
2197 "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
2198 "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
2199
2200 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2201 sizeof(struct ext4_group_info);
2202 err = ext4_mb_load_buddy(sb, group, &e4b);
2203 if (err) {
2204 seq_printf(seq, "#%-5u: I/O error\n", group);
2205 return 0;
2206 }
2207 ext4_lock_group(sb, group);
2208 memcpy(&sg, ext4_get_group_info(sb, group), i);
2209 ext4_unlock_group(sb, group);
2210 ext4_mb_unload_buddy(&e4b);
2211
2212 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2213 sg.info.bb_fragments, sg.info.bb_first_free);
2214 for (i = 0; i <= 13; i++)
2215 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2216 sg.info.bb_counters[i] : 0);
2217 seq_printf(seq, " ]\n");
2218
2219 return 0;
2220}
2221
2222static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2223{
2224}
2225
2226static const struct seq_operations ext4_mb_seq_groups_ops = {
2227 .start = ext4_mb_seq_groups_start,
2228 .next = ext4_mb_seq_groups_next,
2229 .stop = ext4_mb_seq_groups_stop,
2230 .show = ext4_mb_seq_groups_show,
2231};
2232
2233static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2234{
2235 struct super_block *sb = PDE(inode)->data;
2236 int rc;
2237
2238 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2239 if (rc == 0) {
2240 struct seq_file *m = file->private_data;
2241 m->private = sb;
2242 }
2243 return rc;
2244
2245}
2246
2247static const struct file_operations ext4_mb_seq_groups_fops = {
2248 .owner = THIS_MODULE,
2249 .open = ext4_mb_seq_groups_open,
2250 .read = seq_read,
2251 .llseek = seq_lseek,
2252 .release = seq_release,
2253};
2254
2255static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2256{
2257 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2258 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2259
2260 BUG_ON(!cachep);
2261 return cachep;
2262}
2263
2264
2265int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2266 struct ext4_group_desc *desc)
2267{
2268 int i;
2269 int metalen = 0;
2270 struct ext4_sb_info *sbi = EXT4_SB(sb);
2271 struct ext4_group_info **meta_group_info;
2272 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2273
2274
2275
2276
2277
2278
2279 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2280 metalen = sizeof(*meta_group_info) <<
2281 EXT4_DESC_PER_BLOCK_BITS(sb);
2282 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2283 if (meta_group_info == NULL) {
2284 printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
2285 "buddy group\n");
2286 goto exit_meta_group_info;
2287 }
2288 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2289 meta_group_info;
2290 }
2291
2292 meta_group_info =
2293 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2294 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2295
2296 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2297 if (meta_group_info[i] == NULL) {
2298 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2299 goto exit_group_info;
2300 }
2301 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2302 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2303 &(meta_group_info[i]->bb_state));
2304
2305
2306
2307
2308
2309 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2310 meta_group_info[i]->bb_free =
2311 ext4_free_blocks_after_init(sb, group, desc);
2312 } else {
2313 meta_group_info[i]->bb_free =
2314 ext4_free_blks_count(sb, desc);
2315 }
2316
2317 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2318 init_rwsem(&meta_group_info[i]->alloc_sem);
2319 meta_group_info[i]->bb_free_root = RB_ROOT;
2320 meta_group_info[i]->bb_largest_free_order = -1;
2321
2322#ifdef DOUBLE_CHECK
2323 {
2324 struct buffer_head *bh;
2325 meta_group_info[i]->bb_bitmap =
2326 kmalloc(sb->s_blocksize, GFP_KERNEL);
2327 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2328 bh = ext4_read_block_bitmap(sb, group);
2329 BUG_ON(bh == NULL);
2330 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2331 sb->s_blocksize);
2332 put_bh(bh);
2333 }
2334#endif
2335
2336 return 0;
2337
2338exit_group_info:
2339
2340 if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
2341 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2342exit_meta_group_info:
2343 return -ENOMEM;
2344}
2345
2346static int ext4_mb_init_backend(struct super_block *sb)
2347{
2348 ext4_group_t ngroups = ext4_get_groups_count(sb);
2349 ext4_group_t i;
2350 struct ext4_sb_info *sbi = EXT4_SB(sb);
2351 struct ext4_super_block *es = sbi->s_es;
2352 int num_meta_group_infos;
2353 int num_meta_group_infos_max;
2354 int array_size;
2355 struct ext4_group_desc *desc;
2356 struct kmem_cache *cachep;
2357
2358
2359 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2360 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373 num_meta_group_infos_max = num_meta_group_infos +
2374 le16_to_cpu(es->s_reserved_gdt_blocks);
2375
2376
2377
2378
2379
2380
2381
2382 array_size = 1;
2383 while (array_size < sizeof(*sbi->s_group_info) *
2384 num_meta_group_infos_max)
2385 array_size = array_size << 1;
2386
2387
2388
2389 sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
2390 if (sbi->s_group_info == NULL) {
2391 printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
2392 return -ENOMEM;
2393 }
2394 sbi->s_buddy_cache = new_inode(sb);
2395 if (sbi->s_buddy_cache == NULL) {
2396 printk(KERN_ERR "EXT4-fs: can't get new inode\n");
2397 goto err_freesgi;
2398 }
2399 sbi->s_buddy_cache->i_ino = get_next_ino();
2400 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2401 for (i = 0; i < ngroups; i++) {
2402 desc = ext4_get_group_desc(sb, i, NULL);
2403 if (desc == NULL) {
2404 printk(KERN_ERR
2405 "EXT4-fs: can't read descriptor %u\n", i);
2406 goto err_freebuddy;
2407 }
2408 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2409 goto err_freebuddy;
2410 }
2411
2412 return 0;
2413
2414err_freebuddy:
2415 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2416 while (i-- > 0)
2417 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2418 i = num_meta_group_infos;
2419 while (i-- > 0)
2420 kfree(sbi->s_group_info[i]);
2421 iput(sbi->s_buddy_cache);
2422err_freesgi:
2423 kfree(sbi->s_group_info);
2424 return -ENOMEM;
2425}
2426
2427static void ext4_groupinfo_destroy_slabs(void)
2428{
2429 int i;
2430
2431 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2432 if (ext4_groupinfo_caches[i])
2433 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2434 ext4_groupinfo_caches[i] = NULL;
2435 }
2436}
2437
2438static int ext4_groupinfo_create_slab(size_t size)
2439{
2440 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2441 int slab_size;
2442 int blocksize_bits = order_base_2(size);
2443 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2444 struct kmem_cache *cachep;
2445
2446 if (cache_index >= NR_GRPINFO_CACHES)
2447 return -EINVAL;
2448
2449 if (unlikely(cache_index < 0))
2450 cache_index = 0;
2451
2452 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2453 if (ext4_groupinfo_caches[cache_index]) {
2454 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2455 return 0;
2456 }
2457
2458 slab_size = offsetof(struct ext4_group_info,
2459 bb_counters[blocksize_bits + 2]);
2460
2461 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2462 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2463 NULL);
2464
2465 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2466 if (!cachep) {
2467 printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
2468 return -ENOMEM;
2469 }
2470
2471 ext4_groupinfo_caches[cache_index] = cachep;
2472
2473 return 0;
2474}
2475
2476int ext4_mb_init(struct super_block *sb, int needs_recovery)
2477{
2478 struct ext4_sb_info *sbi = EXT4_SB(sb);
2479 unsigned i, j;
2480 unsigned offset;
2481 unsigned max;
2482 int ret;
2483
2484 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2485
2486 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2487 if (sbi->s_mb_offsets == NULL) {
2488 ret = -ENOMEM;
2489 goto out;
2490 }
2491
2492 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2493 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2494 if (sbi->s_mb_maxs == NULL) {
2495 ret = -ENOMEM;
2496 goto out;
2497 }
2498
2499 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2500 if (ret < 0)
2501 goto out;
2502
2503
2504 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2505 sbi->s_mb_offsets[0] = 0;
2506
2507 i = 1;
2508 offset = 0;
2509 max = sb->s_blocksize << 2;
2510 do {
2511 sbi->s_mb_offsets[i] = offset;
2512 sbi->s_mb_maxs[i] = max;
2513 offset += 1 << (sb->s_blocksize_bits - i);
2514 max = max >> 1;
2515 i++;
2516 } while (i <= sb->s_blocksize_bits + 1);
2517
2518
2519 ret = ext4_mb_init_backend(sb);
2520 if (ret != 0) {
2521 goto out;
2522 }
2523
2524 spin_lock_init(&sbi->s_md_lock);
2525 spin_lock_init(&sbi->s_bal_lock);
2526
2527 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2528 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2529 sbi->s_mb_stats = MB_DEFAULT_STATS;
2530 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2531 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2532 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2533
2534 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2535 if (sbi->s_locality_groups == NULL) {
2536 ret = -ENOMEM;
2537 goto out;
2538 }
2539 for_each_possible_cpu(i) {
2540 struct ext4_locality_group *lg;
2541 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2542 mutex_init(&lg->lg_mutex);
2543 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2544 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2545 spin_lock_init(&lg->lg_prealloc_lock);
2546 }
2547
2548 if (sbi->s_proc)
2549 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2550 &ext4_mb_seq_groups_fops, sb);
2551
2552 if (sbi->s_journal)
2553 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2554out:
2555 if (ret) {
2556 kfree(sbi->s_mb_offsets);
2557 kfree(sbi->s_mb_maxs);
2558 }
2559 return ret;
2560}
2561
2562
2563static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2564{
2565 struct ext4_prealloc_space *pa;
2566 struct list_head *cur, *tmp;
2567 int count = 0;
2568
2569 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2570 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2571 list_del(&pa->pa_group_list);
2572 count++;
2573 kmem_cache_free(ext4_pspace_cachep, pa);
2574 }
2575 if (count)
2576 mb_debug(1, "mballoc: %u PAs left\n", count);
2577
2578}
2579
2580int ext4_mb_release(struct super_block *sb)
2581{
2582 ext4_group_t ngroups = ext4_get_groups_count(sb);
2583 ext4_group_t i;
2584 int num_meta_group_infos;
2585 struct ext4_group_info *grinfo;
2586 struct ext4_sb_info *sbi = EXT4_SB(sb);
2587 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2588
2589 if (sbi->s_group_info) {
2590 for (i = 0; i < ngroups; i++) {
2591 grinfo = ext4_get_group_info(sb, i);
2592#ifdef DOUBLE_CHECK
2593 kfree(grinfo->bb_bitmap);
2594#endif
2595 ext4_lock_group(sb, i);
2596 ext4_mb_cleanup_pa(grinfo);
2597 ext4_unlock_group(sb, i);
2598 kmem_cache_free(cachep, grinfo);
2599 }
2600 num_meta_group_infos = (ngroups +
2601 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2602 EXT4_DESC_PER_BLOCK_BITS(sb);
2603 for (i = 0; i < num_meta_group_infos; i++)
2604 kfree(sbi->s_group_info[i]);
2605 kfree(sbi->s_group_info);
2606 }
2607 kfree(sbi->s_mb_offsets);
2608 kfree(sbi->s_mb_maxs);
2609 if (sbi->s_buddy_cache)
2610 iput(sbi->s_buddy_cache);
2611 if (sbi->s_mb_stats) {
2612 printk(KERN_INFO
2613 "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n",
2614 atomic_read(&sbi->s_bal_allocated),
2615 atomic_read(&sbi->s_bal_reqs),
2616 atomic_read(&sbi->s_bal_success));
2617 printk(KERN_INFO
2618 "EXT4-fs: mballoc: %u extents scanned, %u goal hits, "
2619 "%u 2^N hits, %u breaks, %u lost\n",
2620 atomic_read(&sbi->s_bal_ex_scanned),
2621 atomic_read(&sbi->s_bal_goals),
2622 atomic_read(&sbi->s_bal_2orders),
2623 atomic_read(&sbi->s_bal_breaks),
2624 atomic_read(&sbi->s_mb_lost_chunks));
2625 printk(KERN_INFO
2626 "EXT4-fs: mballoc: %lu generated and it took %Lu\n",
2627 sbi->s_mb_buddies_generated++,
2628 sbi->s_mb_generation_time);
2629 printk(KERN_INFO
2630 "EXT4-fs: mballoc: %u preallocated, %u discarded\n",
2631 atomic_read(&sbi->s_mb_preallocated),
2632 atomic_read(&sbi->s_mb_discarded));
2633 }
2634
2635 free_percpu(sbi->s_locality_groups);
2636 if (sbi->s_proc)
2637 remove_proc_entry("mb_groups", sbi->s_proc);
2638
2639 return 0;
2640}
2641
2642static inline int ext4_issue_discard(struct super_block *sb,
2643 ext4_group_t block_group, ext4_grpblk_t block, int count)
2644{
2645 ext4_fsblk_t discard_block;
2646
2647 discard_block = block + ext4_group_first_block_no(sb, block_group);
2648 trace_ext4_discard_blocks(sb,
2649 (unsigned long long) discard_block, count);
2650 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2651}
2652
2653
2654
2655
2656
2657static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2658{
2659 struct super_block *sb = journal->j_private;
2660 struct ext4_buddy e4b;
2661 struct ext4_group_info *db;
2662 int err, ret, count = 0, count2 = 0;
2663 struct ext4_free_data *entry;
2664 struct list_head *l, *ltmp;
2665
2666 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2667 entry = list_entry(l, struct ext4_free_data, list);
2668
2669 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2670 entry->count, entry->group, entry);
2671
2672 if (test_opt(sb, DISCARD)) {
2673 ret = ext4_issue_discard(sb, entry->group,
2674 entry->start_blk, entry->count);
2675 if (unlikely(ret == -EOPNOTSUPP)) {
2676 ext4_warning(sb, "discard not supported, "
2677 "disabling");
2678 clear_opt(sb, DISCARD);
2679 }
2680 }
2681
2682 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2683
2684 BUG_ON(err != 0);
2685
2686 db = e4b.bd_info;
2687
2688 count += entry->count;
2689 count2++;
2690 ext4_lock_group(sb, entry->group);
2691
2692 rb_erase(&entry->node, &(db->bb_free_root));
2693 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2694
2695 if (!db->bb_free_root.rb_node) {
2696
2697
2698
2699 page_cache_release(e4b.bd_buddy_page);
2700 page_cache_release(e4b.bd_bitmap_page);
2701 }
2702 ext4_unlock_group(sb, entry->group);
2703 kmem_cache_free(ext4_free_ext_cachep, entry);
2704 ext4_mb_unload_buddy(&e4b);
2705 }
2706
2707 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2708}
2709
2710#ifdef CONFIG_EXT4_DEBUG
2711u8 mb_enable_debug __read_mostly;
2712
2713static struct dentry *debugfs_dir;
2714static struct dentry *debugfs_debug;
2715
2716static void __init ext4_create_debugfs_entry(void)
2717{
2718 debugfs_dir = debugfs_create_dir("ext4", NULL);
2719 if (debugfs_dir)
2720 debugfs_debug = debugfs_create_u8("mballoc-debug",
2721 S_IRUGO | S_IWUSR,
2722 debugfs_dir,
2723 &mb_enable_debug);
2724}
2725
2726static void ext4_remove_debugfs_entry(void)
2727{
2728 debugfs_remove(debugfs_debug);
2729 debugfs_remove(debugfs_dir);
2730}
2731
2732#else
2733
2734static void __init ext4_create_debugfs_entry(void)
2735{
2736}
2737
2738static void ext4_remove_debugfs_entry(void)
2739{
2740}
2741
2742#endif
2743
2744int __init ext4_init_mballoc(void)
2745{
2746 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2747 SLAB_RECLAIM_ACCOUNT);
2748 if (ext4_pspace_cachep == NULL)
2749 return -ENOMEM;
2750
2751 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2752 SLAB_RECLAIM_ACCOUNT);
2753 if (ext4_ac_cachep == NULL) {
2754 kmem_cache_destroy(ext4_pspace_cachep);
2755 return -ENOMEM;
2756 }
2757
2758 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
2759 SLAB_RECLAIM_ACCOUNT);
2760 if (ext4_free_ext_cachep == NULL) {
2761 kmem_cache_destroy(ext4_pspace_cachep);
2762 kmem_cache_destroy(ext4_ac_cachep);
2763 return -ENOMEM;
2764 }
2765 ext4_create_debugfs_entry();
2766 return 0;
2767}
2768
2769void ext4_exit_mballoc(void)
2770{
2771
2772
2773
2774
2775 rcu_barrier();
2776 kmem_cache_destroy(ext4_pspace_cachep);
2777 kmem_cache_destroy(ext4_ac_cachep);
2778 kmem_cache_destroy(ext4_free_ext_cachep);
2779 ext4_groupinfo_destroy_slabs();
2780 ext4_remove_debugfs_entry();
2781}
2782
2783
2784
2785
2786
2787
2788static noinline_for_stack int
2789ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2790 handle_t *handle, unsigned int reserv_blks)
2791{
2792 struct buffer_head *bitmap_bh = NULL;
2793 struct ext4_group_desc *gdp;
2794 struct buffer_head *gdp_bh;
2795 struct ext4_sb_info *sbi;
2796 struct super_block *sb;
2797 ext4_fsblk_t block;
2798 int err, len;
2799
2800 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2801 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2802
2803 sb = ac->ac_sb;
2804 sbi = EXT4_SB(sb);
2805
2806 err = -EIO;
2807 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2808 if (!bitmap_bh)
2809 goto out_err;
2810
2811 err = ext4_journal_get_write_access(handle, bitmap_bh);
2812 if (err)
2813 goto out_err;
2814
2815 err = -EIO;
2816 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2817 if (!gdp)
2818 goto out_err;
2819
2820 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2821 ext4_free_blks_count(sb, gdp));
2822
2823 err = ext4_journal_get_write_access(handle, gdp_bh);
2824 if (err)
2825 goto out_err;
2826
2827 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2828
2829 len = ac->ac_b_ex.fe_len;
2830 if (!ext4_data_block_valid(sbi, block, len)) {
2831 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2832 "fs metadata\n", block, block+len);
2833
2834
2835
2836
2837 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2838 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2839 ac->ac_b_ex.fe_len);
2840 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2841 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2842 if (!err)
2843 err = -EAGAIN;
2844 goto out_err;
2845 }
2846
2847 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2848#ifdef AGGRESSIVE_CHECK
2849 {
2850 int i;
2851 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2852 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2853 bitmap_bh->b_data));
2854 }
2855 }
2856#endif
2857 mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
2858 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2859 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2860 ext4_free_blks_set(sb, gdp,
2861 ext4_free_blocks_after_init(sb,
2862 ac->ac_b_ex.fe_group, gdp));
2863 }
2864 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
2865 ext4_free_blks_set(sb, gdp, len);
2866 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2867
2868 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2869 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
2870
2871
2872
2873 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2874
2875 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2876
2877 if (sbi->s_log_groups_per_flex) {
2878 ext4_group_t flex_group = ext4_flex_group(sbi,
2879 ac->ac_b_ex.fe_group);
2880 atomic_sub(ac->ac_b_ex.fe_len,
2881 &sbi->s_flex_groups[flex_group].free_blocks);
2882 }
2883
2884 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2885 if (err)
2886 goto out_err;
2887 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
2888
2889out_err:
2890 ext4_mark_super_dirty(sb);
2891 brelse(bitmap_bh);
2892 return err;
2893}
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
2904{
2905 struct super_block *sb = ac->ac_sb;
2906 struct ext4_locality_group *lg = ac->ac_lg;
2907
2908 BUG_ON(lg == NULL);
2909 if (EXT4_SB(sb)->s_stripe)
2910 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
2911 else
2912 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
2913 mb_debug(1, "#%u: goal %u blocks for locality group\n",
2914 current->pid, ac->ac_g_ex.fe_len);
2915}
2916
2917
2918
2919
2920
2921static noinline_for_stack void
2922ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2923 struct ext4_allocation_request *ar)
2924{
2925 int bsbits, max;
2926 ext4_lblk_t end;
2927 loff_t size, orig_size, start_off;
2928 ext4_lblk_t start;
2929 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2930 struct ext4_prealloc_space *pa;
2931
2932
2933
2934 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
2935 return;
2936
2937
2938 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2939 return;
2940
2941
2942
2943 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
2944 return;
2945
2946 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
2947 ext4_mb_normalize_group_request(ac);
2948 return ;
2949 }
2950
2951 bsbits = ac->ac_sb->s_blocksize_bits;
2952
2953
2954
2955 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
2956 size = size << bsbits;
2957 if (size < i_size_read(ac->ac_inode))
2958 size = i_size_read(ac->ac_inode);
2959 orig_size = size;
2960
2961
2962 max = 2 << bsbits;
2963
2964#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
2965 (req <= (size) || max <= (chunk_size))
2966
2967
2968
2969 start_off = 0;
2970 if (size <= 16 * 1024) {
2971 size = 16 * 1024;
2972 } else if (size <= 32 * 1024) {
2973 size = 32 * 1024;
2974 } else if (size <= 64 * 1024) {
2975 size = 64 * 1024;
2976 } else if (size <= 128 * 1024) {
2977 size = 128 * 1024;
2978 } else if (size <= 256 * 1024) {
2979 size = 256 * 1024;
2980 } else if (size <= 512 * 1024) {
2981 size = 512 * 1024;
2982 } else if (size <= 1024 * 1024) {
2983 size = 1024 * 1024;
2984 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
2985 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2986 (21 - bsbits)) << 21;
2987 size = 2 * 1024 * 1024;
2988 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
2989 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2990 (22 - bsbits)) << 22;
2991 size = 4 * 1024 * 1024;
2992 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
2993 (8<<20)>>bsbits, max, 8 * 1024)) {
2994 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2995 (23 - bsbits)) << 23;
2996 size = 8 * 1024 * 1024;
2997 } else {
2998 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
2999 size = ac->ac_o_ex.fe_len << bsbits;
3000 }
3001 size = size >> bsbits;
3002 start = start_off >> bsbits;
3003
3004
3005 if (ar->pleft && start <= ar->lleft) {
3006 size -= ar->lleft + 1 - start;
3007 start = ar->lleft + 1;
3008 }
3009 if (ar->pright && start + size - 1 >= ar->lright)
3010 size -= start + size - ar->lright;
3011
3012 end = start + size;
3013
3014
3015 rcu_read_lock();
3016 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3017 ext4_lblk_t pa_end;
3018
3019 if (pa->pa_deleted)
3020 continue;
3021 spin_lock(&pa->pa_lock);
3022 if (pa->pa_deleted) {
3023 spin_unlock(&pa->pa_lock);
3024 continue;
3025 }
3026
3027 pa_end = pa->pa_lstart + pa->pa_len;
3028
3029
3030 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3031 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3032
3033
3034 if (pa->pa_lstart >= end || pa_end <= start) {
3035 spin_unlock(&pa->pa_lock);
3036 continue;
3037 }
3038 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3039
3040
3041 if (pa_end <= ac->ac_o_ex.fe_logical) {
3042 BUG_ON(pa_end < start);
3043 start = pa_end;
3044 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3045 BUG_ON(pa->pa_lstart > end);
3046 end = pa->pa_lstart;
3047 }
3048 spin_unlock(&pa->pa_lock);
3049 }
3050 rcu_read_unlock();
3051 size = end - start;
3052
3053
3054 rcu_read_lock();
3055 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3056 ext4_lblk_t pa_end;
3057 spin_lock(&pa->pa_lock);
3058 if (pa->pa_deleted == 0) {
3059 pa_end = pa->pa_lstart + pa->pa_len;
3060 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3061 }
3062 spin_unlock(&pa->pa_lock);
3063 }
3064 rcu_read_unlock();
3065
3066 if (start + size <= ac->ac_o_ex.fe_logical &&
3067 start > ac->ac_o_ex.fe_logical) {
3068 printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n",
3069 (unsigned long) start, (unsigned long) size,
3070 (unsigned long) ac->ac_o_ex.fe_logical);
3071 }
3072 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3073 start > ac->ac_o_ex.fe_logical);
3074 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3075
3076
3077
3078
3079
3080 ac->ac_g_ex.fe_logical = start;
3081 ac->ac_g_ex.fe_len = size;
3082
3083
3084 if (ar->pright && (ar->lright == (start + size))) {
3085
3086 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3087 &ac->ac_f_ex.fe_group,
3088 &ac->ac_f_ex.fe_start);
3089 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3090 }
3091 if (ar->pleft && (ar->lleft + 1 == start)) {
3092
3093 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3094 &ac->ac_f_ex.fe_group,
3095 &ac->ac_f_ex.fe_start);
3096 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3097 }
3098
3099 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3100 (unsigned) orig_size, (unsigned) start);
3101}
3102
3103static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3104{
3105 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3106
3107 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3108 atomic_inc(&sbi->s_bal_reqs);
3109 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3110 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3111 atomic_inc(&sbi->s_bal_success);
3112 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3113 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3114 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3115 atomic_inc(&sbi->s_bal_goals);
3116 if (ac->ac_found > sbi->s_mb_max_to_scan)
3117 atomic_inc(&sbi->s_bal_breaks);
3118 }
3119
3120 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3121 trace_ext4_mballoc_alloc(ac);
3122 else
3123 trace_ext4_mballoc_prealloc(ac);
3124}
3125
3126
3127
3128
3129
3130
3131
3132static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3133{
3134 struct ext4_prealloc_space *pa = ac->ac_pa;
3135 int len;
3136
3137 if (pa && pa->pa_type == MB_INODE_PA) {
3138 len = ac->ac_b_ex.fe_len;
3139 pa->pa_free += len;
3140 }
3141
3142}
3143
3144
3145
3146
3147static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3148 struct ext4_prealloc_space *pa)
3149{
3150 ext4_fsblk_t start;
3151 ext4_fsblk_t end;
3152 int len;
3153
3154
3155 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3156 end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len);
3157 len = end - start;
3158 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3159 &ac->ac_b_ex.fe_start);
3160 ac->ac_b_ex.fe_len = len;
3161 ac->ac_status = AC_STATUS_FOUND;
3162 ac->ac_pa = pa;
3163
3164 BUG_ON(start < pa->pa_pstart);
3165 BUG_ON(start + len > pa->pa_pstart + pa->pa_len);
3166 BUG_ON(pa->pa_free < len);
3167 pa->pa_free -= len;
3168
3169 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3170}
3171
3172
3173
3174
3175static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3176 struct ext4_prealloc_space *pa)
3177{
3178 unsigned int len = ac->ac_o_ex.fe_len;
3179
3180 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3181 &ac->ac_b_ex.fe_group,
3182 &ac->ac_b_ex.fe_start);
3183 ac->ac_b_ex.fe_len = len;
3184 ac->ac_status = AC_STATUS_FOUND;
3185 ac->ac_pa = pa;
3186
3187
3188
3189
3190
3191
3192
3193 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3194}
3195
3196
3197
3198
3199
3200
3201
3202static struct ext4_prealloc_space *
3203ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3204 struct ext4_prealloc_space *pa,
3205 struct ext4_prealloc_space *cpa)
3206{
3207 ext4_fsblk_t cur_distance, new_distance;
3208
3209 if (cpa == NULL) {
3210 atomic_inc(&pa->pa_count);
3211 return pa;
3212 }
3213 cur_distance = abs(goal_block - cpa->pa_pstart);
3214 new_distance = abs(goal_block - pa->pa_pstart);
3215
3216 if (cur_distance <= new_distance)
3217 return cpa;
3218
3219
3220 atomic_dec(&cpa->pa_count);
3221 atomic_inc(&pa->pa_count);
3222 return pa;
3223}
3224
3225
3226
3227
3228static noinline_for_stack int
3229ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3230{
3231 int order, i;
3232 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3233 struct ext4_locality_group *lg;
3234 struct ext4_prealloc_space *pa, *cpa = NULL;
3235 ext4_fsblk_t goal_block;
3236
3237
3238 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3239 return 0;
3240
3241
3242 rcu_read_lock();
3243 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3244
3245
3246
3247 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3248 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
3249 continue;
3250
3251
3252 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3253 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
3254 continue;
3255
3256
3257 spin_lock(&pa->pa_lock);
3258 if (pa->pa_deleted == 0 && pa->pa_free) {
3259 atomic_inc(&pa->pa_count);
3260 ext4_mb_use_inode_pa(ac, pa);
3261 spin_unlock(&pa->pa_lock);
3262 ac->ac_criteria = 10;
3263 rcu_read_unlock();
3264 return 1;
3265 }
3266 spin_unlock(&pa->pa_lock);
3267 }
3268 rcu_read_unlock();
3269
3270
3271 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3272 return 0;
3273
3274
3275 lg = ac->ac_lg;
3276 if (lg == NULL)
3277 return 0;
3278 order = fls(ac->ac_o_ex.fe_len) - 1;
3279 if (order > PREALLOC_TB_SIZE - 1)
3280
3281 order = PREALLOC_TB_SIZE - 1;
3282
3283 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3284
3285
3286
3287
3288 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3289 rcu_read_lock();
3290 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3291 pa_inode_list) {
3292 spin_lock(&pa->pa_lock);
3293 if (pa->pa_deleted == 0 &&
3294 pa->pa_free >= ac->ac_o_ex.fe_len) {
3295
3296 cpa = ext4_mb_check_group_pa(goal_block,
3297 pa, cpa);
3298 }
3299 spin_unlock(&pa->pa_lock);
3300 }
3301 rcu_read_unlock();
3302 }
3303 if (cpa) {
3304 ext4_mb_use_group_pa(ac, cpa);
3305 ac->ac_criteria = 20;
3306 return 1;
3307 }
3308 return 0;
3309}
3310
3311
3312
3313
3314
3315
3316
3317static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3318 ext4_group_t group)
3319{
3320 struct rb_node *n;
3321 struct ext4_group_info *grp;
3322 struct ext4_free_data *entry;
3323
3324 grp = ext4_get_group_info(sb, group);
3325 n = rb_first(&(grp->bb_free_root));
3326
3327 while (n) {
3328 entry = rb_entry(n, struct ext4_free_data, node);
3329 mb_set_bits(bitmap, entry->start_blk, entry->count);
3330 n = rb_next(n);
3331 }
3332 return;
3333}
3334
3335
3336
3337
3338
3339
3340static noinline_for_stack
3341void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3342 ext4_group_t group)
3343{
3344 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3345 struct ext4_prealloc_space *pa;
3346 struct list_head *cur;
3347 ext4_group_t groupnr;
3348 ext4_grpblk_t start;
3349 int preallocated = 0;
3350 int count = 0;
3351 int len;
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361 list_for_each(cur, &grp->bb_prealloc_list) {
3362 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3363 spin_lock(&pa->pa_lock);
3364 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3365 &groupnr, &start);
3366 len = pa->pa_len;
3367 spin_unlock(&pa->pa_lock);
3368 if (unlikely(len == 0))
3369 continue;
3370 BUG_ON(groupnr != group);
3371 mb_set_bits(bitmap, start, len);
3372 preallocated += len;
3373 count++;
3374 }
3375 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3376}
3377
3378static void ext4_mb_pa_callback(struct rcu_head *head)
3379{
3380 struct ext4_prealloc_space *pa;
3381 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3382 kmem_cache_free(ext4_pspace_cachep, pa);
3383}
3384
3385
3386
3387
3388
3389static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3390 struct super_block *sb, struct ext4_prealloc_space *pa)
3391{
3392 ext4_group_t grp;
3393 ext4_fsblk_t grp_blk;
3394
3395 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3396 return;
3397
3398
3399 spin_lock(&pa->pa_lock);
3400 if (pa->pa_deleted == 1) {
3401 spin_unlock(&pa->pa_lock);
3402 return;
3403 }
3404
3405 pa->pa_deleted = 1;
3406 spin_unlock(&pa->pa_lock);
3407
3408 grp_blk = pa->pa_pstart;
3409
3410
3411
3412
3413 if (pa->pa_type == MB_GROUP_PA)
3414 grp_blk--;
3415
3416 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432 ext4_lock_group(sb, grp);
3433 list_del(&pa->pa_group_list);
3434 ext4_unlock_group(sb, grp);
3435
3436 spin_lock(pa->pa_obj_lock);
3437 list_del_rcu(&pa->pa_inode_list);
3438 spin_unlock(pa->pa_obj_lock);
3439
3440 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3441}
3442
3443
3444
3445
3446static noinline_for_stack int
3447ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3448{
3449 struct super_block *sb = ac->ac_sb;
3450 struct ext4_prealloc_space *pa;
3451 struct ext4_group_info *grp;
3452 struct ext4_inode_info *ei;
3453
3454
3455 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3456 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3457 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3458
3459 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3460 if (pa == NULL)
3461 return -ENOMEM;
3462
3463 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3464 int winl;
3465 int wins;
3466 int win;
3467 int offs;
3468
3469
3470
3471
3472 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3473 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3474
3475
3476
3477
3478 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3479
3480
3481 wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len;
3482
3483
3484 win = min(winl, wins);
3485
3486 offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len;
3487 if (offs && offs < win)
3488 win = offs;
3489
3490 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win;
3491 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3492 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3493 }
3494
3495
3496
3497 ac->ac_f_ex = ac->ac_b_ex;
3498
3499 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3500 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3501 pa->pa_len = ac->ac_b_ex.fe_len;
3502 pa->pa_free = pa->pa_len;
3503 atomic_set(&pa->pa_count, 1);
3504 spin_lock_init(&pa->pa_lock);
3505 INIT_LIST_HEAD(&pa->pa_inode_list);
3506 INIT_LIST_HEAD(&pa->pa_group_list);
3507 pa->pa_deleted = 0;
3508 pa->pa_type = MB_INODE_PA;
3509
3510 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3511 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3512 trace_ext4_mb_new_inode_pa(ac, pa);
3513
3514 ext4_mb_use_inode_pa(ac, pa);
3515 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3516
3517 ei = EXT4_I(ac->ac_inode);
3518 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3519
3520 pa->pa_obj_lock = &ei->i_prealloc_lock;
3521 pa->pa_inode = ac->ac_inode;
3522
3523 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3524 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3525 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3526
3527 spin_lock(pa->pa_obj_lock);
3528 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3529 spin_unlock(pa->pa_obj_lock);
3530
3531 return 0;
3532}
3533
3534
3535
3536
3537static noinline_for_stack int
3538ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3539{
3540 struct super_block *sb = ac->ac_sb;
3541 struct ext4_locality_group *lg;
3542 struct ext4_prealloc_space *pa;
3543 struct ext4_group_info *grp;
3544
3545
3546 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3547 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3548 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3549
3550 BUG_ON(ext4_pspace_cachep == NULL);
3551 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3552 if (pa == NULL)
3553 return -ENOMEM;
3554
3555
3556
3557 ac->ac_f_ex = ac->ac_b_ex;
3558
3559 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3560 pa->pa_lstart = pa->pa_pstart;
3561 pa->pa_len = ac->ac_b_ex.fe_len;
3562 pa->pa_free = pa->pa_len;
3563 atomic_set(&pa->pa_count, 1);
3564 spin_lock_init(&pa->pa_lock);
3565 INIT_LIST_HEAD(&pa->pa_inode_list);
3566 INIT_LIST_HEAD(&pa->pa_group_list);
3567 pa->pa_deleted = 0;
3568 pa->pa_type = MB_GROUP_PA;
3569
3570 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3571 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3572 trace_ext4_mb_new_group_pa(ac, pa);
3573
3574 ext4_mb_use_group_pa(ac, pa);
3575 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3576
3577 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3578 lg = ac->ac_lg;
3579 BUG_ON(lg == NULL);
3580
3581 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3582 pa->pa_inode = NULL;
3583
3584 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3585 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3586 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3587
3588
3589
3590
3591
3592 return 0;
3593}
3594
3595static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3596{
3597 int err;
3598
3599 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3600 err = ext4_mb_new_group_pa(ac);
3601 else
3602 err = ext4_mb_new_inode_pa(ac);
3603 return err;
3604}
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614static noinline_for_stack int
3615ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3616 struct ext4_prealloc_space *pa)
3617{
3618 struct super_block *sb = e4b->bd_sb;
3619 struct ext4_sb_info *sbi = EXT4_SB(sb);
3620 unsigned int end;
3621 unsigned int next;
3622 ext4_group_t group;
3623 ext4_grpblk_t bit;
3624 unsigned long long grp_blk_start;
3625 int err = 0;
3626 int free = 0;
3627
3628 BUG_ON(pa->pa_deleted == 0);
3629 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3630 grp_blk_start = pa->pa_pstart - bit;
3631 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3632 end = bit + pa->pa_len;
3633
3634 while (bit < end) {
3635 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3636 if (bit >= end)
3637 break;
3638 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3639 mb_debug(1, " free preallocated %u/%u in group %u\n",
3640 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3641 (unsigned) next - bit, (unsigned) group);
3642 free += next - bit;
3643
3644 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3645 trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
3646 grp_blk_start + bit, next - bit);
3647 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3648 bit = next + 1;
3649 }
3650 if (free != pa->pa_free) {
3651 printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
3652 pa, (unsigned long) pa->pa_lstart,
3653 (unsigned long) pa->pa_pstart,
3654 (unsigned long) pa->pa_len);
3655 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3656 free, pa->pa_free);
3657
3658
3659
3660
3661 }
3662 atomic_add(free, &sbi->s_mb_discarded);
3663
3664 return err;
3665}
3666
3667static noinline_for_stack int
3668ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3669 struct ext4_prealloc_space *pa)
3670{
3671 struct super_block *sb = e4b->bd_sb;
3672 ext4_group_t group;
3673 ext4_grpblk_t bit;
3674
3675 trace_ext4_mb_release_group_pa(sb, pa);
3676 BUG_ON(pa->pa_deleted == 0);
3677 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3678 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3679 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3680 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3681 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3682
3683 return 0;
3684}
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695static noinline_for_stack int
3696ext4_mb_discard_group_preallocations(struct super_block *sb,
3697 ext4_group_t group, int needed)
3698{
3699 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3700 struct buffer_head *bitmap_bh = NULL;
3701 struct ext4_prealloc_space *pa, *tmp;
3702 struct list_head list;
3703 struct ext4_buddy e4b;
3704 int err;
3705 int busy = 0;
3706 int free = 0;
3707
3708 mb_debug(1, "discard preallocation for group %u\n", group);
3709
3710 if (list_empty(&grp->bb_prealloc_list))
3711 return 0;
3712
3713 bitmap_bh = ext4_read_block_bitmap(sb, group);
3714 if (bitmap_bh == NULL) {
3715 ext4_error(sb, "Error reading block bitmap for %u", group);
3716 return 0;
3717 }
3718
3719 err = ext4_mb_load_buddy(sb, group, &e4b);
3720 if (err) {
3721 ext4_error(sb, "Error loading buddy information for %u", group);
3722 put_bh(bitmap_bh);
3723 return 0;
3724 }
3725
3726 if (needed == 0)
3727 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3728
3729 INIT_LIST_HEAD(&list);
3730repeat:
3731 ext4_lock_group(sb, group);
3732 list_for_each_entry_safe(pa, tmp,
3733 &grp->bb_prealloc_list, pa_group_list) {
3734 spin_lock(&pa->pa_lock);
3735 if (atomic_read(&pa->pa_count)) {
3736 spin_unlock(&pa->pa_lock);
3737 busy = 1;
3738 continue;
3739 }
3740 if (pa->pa_deleted) {
3741 spin_unlock(&pa->pa_lock);
3742 continue;
3743 }
3744
3745
3746 pa->pa_deleted = 1;
3747
3748
3749 free += pa->pa_free;
3750
3751 spin_unlock(&pa->pa_lock);
3752
3753 list_del(&pa->pa_group_list);
3754 list_add(&pa->u.pa_tmp_list, &list);
3755 }
3756
3757
3758 if (free < needed && busy) {
3759 busy = 0;
3760 ext4_unlock_group(sb, group);
3761
3762
3763
3764
3765 yield();
3766 goto repeat;
3767 }
3768
3769
3770 if (list_empty(&list)) {
3771 BUG_ON(free != 0);
3772 goto out;
3773 }
3774
3775
3776 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3777
3778
3779 spin_lock(pa->pa_obj_lock);
3780 list_del_rcu(&pa->pa_inode_list);
3781 spin_unlock(pa->pa_obj_lock);
3782
3783 if (pa->pa_type == MB_GROUP_PA)
3784 ext4_mb_release_group_pa(&e4b, pa);
3785 else
3786 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3787
3788 list_del(&pa->u.pa_tmp_list);
3789 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3790 }
3791
3792out:
3793 ext4_unlock_group(sb, group);
3794 ext4_mb_unload_buddy(&e4b);
3795 put_bh(bitmap_bh);
3796 return free;
3797}
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808void ext4_discard_preallocations(struct inode *inode)
3809{
3810 struct ext4_inode_info *ei = EXT4_I(inode);
3811 struct super_block *sb = inode->i_sb;
3812 struct buffer_head *bitmap_bh = NULL;
3813 struct ext4_prealloc_space *pa, *tmp;
3814 ext4_group_t group = 0;
3815 struct list_head list;
3816 struct ext4_buddy e4b;
3817 int err;
3818
3819 if (!S_ISREG(inode->i_mode)) {
3820
3821 return;
3822 }
3823
3824 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3825 trace_ext4_discard_preallocations(inode);
3826
3827 INIT_LIST_HEAD(&list);
3828
3829repeat:
3830
3831 spin_lock(&ei->i_prealloc_lock);
3832 while (!list_empty(&ei->i_prealloc_list)) {
3833 pa = list_entry(ei->i_prealloc_list.next,
3834 struct ext4_prealloc_space, pa_inode_list);
3835 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3836 spin_lock(&pa->pa_lock);
3837 if (atomic_read(&pa->pa_count)) {
3838
3839
3840 spin_unlock(&pa->pa_lock);
3841 spin_unlock(&ei->i_prealloc_lock);
3842 printk(KERN_ERR "uh-oh! used pa while discarding\n");
3843 WARN_ON(1);
3844 schedule_timeout_uninterruptible(HZ);
3845 goto repeat;
3846
3847 }
3848 if (pa->pa_deleted == 0) {
3849 pa->pa_deleted = 1;
3850 spin_unlock(&pa->pa_lock);
3851 list_del_rcu(&pa->pa_inode_list);
3852 list_add(&pa->u.pa_tmp_list, &list);
3853 continue;
3854 }
3855
3856
3857 spin_unlock(&pa->pa_lock);
3858 spin_unlock(&ei->i_prealloc_lock);
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872 schedule_timeout_uninterruptible(HZ);
3873 goto repeat;
3874 }
3875 spin_unlock(&ei->i_prealloc_lock);
3876
3877 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3878 BUG_ON(pa->pa_type != MB_INODE_PA);
3879 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3880
3881 err = ext4_mb_load_buddy(sb, group, &e4b);
3882 if (err) {
3883 ext4_error(sb, "Error loading buddy information for %u",
3884 group);
3885 continue;
3886 }
3887
3888 bitmap_bh = ext4_read_block_bitmap(sb, group);
3889 if (bitmap_bh == NULL) {
3890 ext4_error(sb, "Error reading block bitmap for %u",
3891 group);
3892 ext4_mb_unload_buddy(&e4b);
3893 continue;
3894 }
3895
3896 ext4_lock_group(sb, group);
3897 list_del(&pa->pa_group_list);
3898 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3899 ext4_unlock_group(sb, group);
3900
3901 ext4_mb_unload_buddy(&e4b);
3902 put_bh(bitmap_bh);
3903
3904 list_del(&pa->u.pa_tmp_list);
3905 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3906 }
3907}
3908
3909#ifdef CONFIG_EXT4_DEBUG
3910static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3911{
3912 struct super_block *sb = ac->ac_sb;
3913 ext4_group_t ngroups, i;
3914
3915 if (!mb_enable_debug ||
3916 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3917 return;
3918
3919 printk(KERN_ERR "EXT4-fs: Can't allocate:"
3920 " Allocation context details:\n");
3921 printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
3922 ac->ac_status, ac->ac_flags);
3923 printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, "
3924 "best %lu/%lu/%lu@%lu cr %d\n",
3925 (unsigned long)ac->ac_o_ex.fe_group,
3926 (unsigned long)ac->ac_o_ex.fe_start,
3927 (unsigned long)ac->ac_o_ex.fe_len,
3928 (unsigned long)ac->ac_o_ex.fe_logical,
3929 (unsigned long)ac->ac_g_ex.fe_group,
3930 (unsigned long)ac->ac_g_ex.fe_start,
3931 (unsigned long)ac->ac_g_ex.fe_len,
3932 (unsigned long)ac->ac_g_ex.fe_logical,
3933 (unsigned long)ac->ac_b_ex.fe_group,
3934 (unsigned long)ac->ac_b_ex.fe_start,
3935 (unsigned long)ac->ac_b_ex.fe_len,
3936 (unsigned long)ac->ac_b_ex.fe_logical,
3937 (int)ac->ac_criteria);
3938 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
3939 ac->ac_found);
3940 printk(KERN_ERR "EXT4-fs: groups: \n");
3941 ngroups = ext4_get_groups_count(sb);
3942 for (i = 0; i < ngroups; i++) {
3943 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
3944 struct ext4_prealloc_space *pa;
3945 ext4_grpblk_t start;
3946 struct list_head *cur;
3947 ext4_lock_group(sb, i);
3948 list_for_each(cur, &grp->bb_prealloc_list) {
3949 pa = list_entry(cur, struct ext4_prealloc_space,
3950 pa_group_list);
3951 spin_lock(&pa->pa_lock);
3952 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3953 NULL, &start);
3954 spin_unlock(&pa->pa_lock);
3955 printk(KERN_ERR "PA:%u:%d:%u \n", i,
3956 start, pa->pa_len);
3957 }
3958 ext4_unlock_group(sb, i);
3959
3960 if (grp->bb_free == 0)
3961 continue;
3962 printk(KERN_ERR "%u: %d/%d \n",
3963 i, grp->bb_free, grp->bb_fragments);
3964 }
3965 printk(KERN_ERR "\n");
3966}
3967#else
3968static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3969{
3970 return;
3971}
3972#endif
3973
3974
3975
3976
3977
3978
3979
3980
3981static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3982{
3983 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3984 int bsbits = ac->ac_sb->s_blocksize_bits;
3985 loff_t size, isize;
3986
3987 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3988 return;
3989
3990 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3991 return;
3992
3993 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
3994 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3995 >> bsbits;
3996
3997 if ((size == isize) &&
3998 !ext4_fs_is_busy(sbi) &&
3999 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
4000 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4001 return;
4002 }
4003
4004
4005 size = max(size, isize);
4006 if (size > sbi->s_mb_stream_request) {
4007 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4008 return;
4009 }
4010
4011 BUG_ON(ac->ac_lg != NULL);
4012
4013
4014
4015
4016
4017 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
4018
4019
4020 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4021
4022
4023 mutex_lock(&ac->ac_lg->lg_mutex);
4024}
4025
4026static noinline_for_stack int
4027ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4028 struct ext4_allocation_request *ar)
4029{
4030 struct super_block *sb = ar->inode->i_sb;
4031 struct ext4_sb_info *sbi = EXT4_SB(sb);
4032 struct ext4_super_block *es = sbi->s_es;
4033 ext4_group_t group;
4034 unsigned int len;
4035 ext4_fsblk_t goal;
4036 ext4_grpblk_t block;
4037
4038
4039 len = ar->len;
4040
4041
4042 if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10)
4043 len = EXT4_BLOCKS_PER_GROUP(sb) - 10;
4044
4045
4046 goal = ar->goal;
4047 if (goal < le32_to_cpu(es->s_first_data_block) ||
4048 goal >= ext4_blocks_count(es))
4049 goal = le32_to_cpu(es->s_first_data_block);
4050 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4051
4052
4053 memset(ac, 0, sizeof(struct ext4_allocation_context));
4054 ac->ac_b_ex.fe_logical = ar->logical;
4055 ac->ac_status = AC_STATUS_CONTINUE;
4056 ac->ac_sb = sb;
4057 ac->ac_inode = ar->inode;
4058 ac->ac_o_ex.fe_logical = ar->logical;
4059 ac->ac_o_ex.fe_group = group;
4060 ac->ac_o_ex.fe_start = block;
4061 ac->ac_o_ex.fe_len = len;
4062 ac->ac_g_ex.fe_logical = ar->logical;
4063 ac->ac_g_ex.fe_group = group;
4064 ac->ac_g_ex.fe_start = block;
4065 ac->ac_g_ex.fe_len = len;
4066 ac->ac_flags = ar->flags;
4067
4068
4069
4070 ext4_mb_group_or_file(ac);
4071
4072 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4073 "left: %u/%u, right %u/%u to %swritable\n",
4074 (unsigned) ar->len, (unsigned) ar->logical,
4075 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4076 (unsigned) ar->lleft, (unsigned) ar->pleft,
4077 (unsigned) ar->lright, (unsigned) ar->pright,
4078 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4079 return 0;
4080
4081}
4082
4083static noinline_for_stack void
4084ext4_mb_discard_lg_preallocations(struct super_block *sb,
4085 struct ext4_locality_group *lg,
4086 int order, int total_entries)
4087{
4088 ext4_group_t group = 0;
4089 struct ext4_buddy e4b;
4090 struct list_head discard_list;
4091 struct ext4_prealloc_space *pa, *tmp;
4092
4093 mb_debug(1, "discard locality group preallocation\n");
4094
4095 INIT_LIST_HEAD(&discard_list);
4096
4097 spin_lock(&lg->lg_prealloc_lock);
4098 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4099 pa_inode_list) {
4100 spin_lock(&pa->pa_lock);
4101 if (atomic_read(&pa->pa_count)) {
4102
4103
4104
4105
4106
4107 spin_unlock(&pa->pa_lock);
4108 continue;
4109 }
4110 if (pa->pa_deleted) {
4111 spin_unlock(&pa->pa_lock);
4112 continue;
4113 }
4114
4115 BUG_ON(pa->pa_type != MB_GROUP_PA);
4116
4117
4118 pa->pa_deleted = 1;
4119 spin_unlock(&pa->pa_lock);
4120
4121 list_del_rcu(&pa->pa_inode_list);
4122 list_add(&pa->u.pa_tmp_list, &discard_list);
4123
4124 total_entries--;
4125 if (total_entries <= 5) {
4126
4127
4128
4129
4130
4131
4132 break;
4133 }
4134 }
4135 spin_unlock(&lg->lg_prealloc_lock);
4136
4137 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4138
4139 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4140 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4141 ext4_error(sb, "Error loading buddy information for %u",
4142 group);
4143 continue;
4144 }
4145 ext4_lock_group(sb, group);
4146 list_del(&pa->pa_group_list);
4147 ext4_mb_release_group_pa(&e4b, pa);
4148 ext4_unlock_group(sb, group);
4149
4150 ext4_mb_unload_buddy(&e4b);
4151 list_del(&pa->u.pa_tmp_list);
4152 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4153 }
4154}
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4166{
4167 int order, added = 0, lg_prealloc_count = 1;
4168 struct super_block *sb = ac->ac_sb;
4169 struct ext4_locality_group *lg = ac->ac_lg;
4170 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4171
4172 order = fls(pa->pa_free) - 1;
4173 if (order > PREALLOC_TB_SIZE - 1)
4174
4175 order = PREALLOC_TB_SIZE - 1;
4176
4177 rcu_read_lock();
4178 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4179 pa_inode_list) {
4180 spin_lock(&tmp_pa->pa_lock);
4181 if (tmp_pa->pa_deleted) {
4182 spin_unlock(&tmp_pa->pa_lock);
4183 continue;
4184 }
4185 if (!added && pa->pa_free < tmp_pa->pa_free) {
4186
4187 list_add_tail_rcu(&pa->pa_inode_list,
4188 &tmp_pa->pa_inode_list);
4189 added = 1;
4190
4191
4192
4193
4194 }
4195 spin_unlock(&tmp_pa->pa_lock);
4196 lg_prealloc_count++;
4197 }
4198 if (!added)
4199 list_add_tail_rcu(&pa->pa_inode_list,
4200 &lg->lg_prealloc_list[order]);
4201 rcu_read_unlock();
4202
4203
4204 if (lg_prealloc_count > 8) {
4205 ext4_mb_discard_lg_preallocations(sb, lg,
4206 order, lg_prealloc_count);
4207 return;
4208 }
4209 return ;
4210}
4211
4212
4213
4214
4215static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4216{
4217 struct ext4_prealloc_space *pa = ac->ac_pa;
4218 if (pa) {
4219 if (pa->pa_type == MB_GROUP_PA) {
4220
4221 spin_lock(&pa->pa_lock);
4222 pa->pa_pstart += ac->ac_b_ex.fe_len;
4223 pa->pa_lstart += ac->ac_b_ex.fe_len;
4224 pa->pa_free -= ac->ac_b_ex.fe_len;
4225 pa->pa_len -= ac->ac_b_ex.fe_len;
4226 spin_unlock(&pa->pa_lock);
4227 }
4228 }
4229 if (ac->alloc_semp)
4230 up_read(ac->alloc_semp);
4231 if (pa) {
4232
4233
4234
4235
4236
4237
4238
4239 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4240 spin_lock(pa->pa_obj_lock);
4241 list_del_rcu(&pa->pa_inode_list);
4242 spin_unlock(pa->pa_obj_lock);
4243 ext4_mb_add_n_trim(ac);
4244 }
4245 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4246 }
4247 if (ac->ac_bitmap_page)
4248 page_cache_release(ac->ac_bitmap_page);
4249 if (ac->ac_buddy_page)
4250 page_cache_release(ac->ac_buddy_page);
4251 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4252 mutex_unlock(&ac->ac_lg->lg_mutex);
4253 ext4_mb_collect_stats(ac);
4254 return 0;
4255}
4256
4257static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4258{
4259 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4260 int ret;
4261 int freed = 0;
4262
4263 trace_ext4_mb_discard_preallocations(sb, needed);
4264 for (i = 0; i < ngroups && needed > 0; i++) {
4265 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4266 freed += ret;
4267 needed -= ret;
4268 }
4269
4270 return freed;
4271}
4272
4273
4274
4275
4276
4277
4278ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4279 struct ext4_allocation_request *ar, int *errp)
4280{
4281 int freed;
4282 struct ext4_allocation_context *ac = NULL;
4283 struct ext4_sb_info *sbi;
4284 struct super_block *sb;
4285 ext4_fsblk_t block = 0;
4286 unsigned int inquota = 0;
4287 unsigned int reserv_blks = 0;
4288
4289 sb = ar->inode->i_sb;
4290 sbi = EXT4_SB(sb);
4291
4292 trace_ext4_request_blocks(ar);
4293
4294
4295
4296
4297
4298
4299 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4300 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4301 else {
4302
4303
4304
4305
4306 while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
4307
4308 yield();
4309 ar->len = ar->len >> 1;
4310 }
4311 if (!ar->len) {
4312 *errp = -ENOSPC;
4313 return 0;
4314 }
4315 reserv_blks = ar->len;
4316 while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
4317 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4318 ar->len--;
4319 }
4320 inquota = ar->len;
4321 if (ar->len == 0) {
4322 *errp = -EDQUOT;
4323 goto out;
4324 }
4325 }
4326
4327 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4328 if (!ac) {
4329 ar->len = 0;
4330 *errp = -ENOMEM;
4331 goto out;
4332 }
4333
4334 *errp = ext4_mb_initialize_context(ac, ar);
4335 if (*errp) {
4336 ar->len = 0;
4337 goto out;
4338 }
4339
4340 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4341 if (!ext4_mb_use_preallocated(ac)) {
4342 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4343 ext4_mb_normalize_request(ac, ar);
4344repeat:
4345
4346 *errp = ext4_mb_regular_allocator(ac);
4347 if (*errp)
4348 goto errout;
4349
4350
4351
4352
4353 if (ac->ac_status == AC_STATUS_FOUND &&
4354 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4355 ext4_mb_new_preallocation(ac);
4356 }
4357 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4358 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4359 if (*errp == -EAGAIN) {
4360
4361
4362
4363
4364 ext4_mb_release_context(ac);
4365 ac->ac_b_ex.fe_group = 0;
4366 ac->ac_b_ex.fe_start = 0;
4367 ac->ac_b_ex.fe_len = 0;
4368 ac->ac_status = AC_STATUS_CONTINUE;
4369 goto repeat;
4370 } else if (*errp)
4371 errout:
4372 ext4_discard_allocated_blocks(ac);
4373 else {
4374 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4375 ar->len = ac->ac_b_ex.fe_len;
4376 }
4377 } else {
4378 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4379 if (freed)
4380 goto repeat;
4381 *errp = -ENOSPC;
4382 }
4383
4384 if (*errp) {
4385 ac->ac_b_ex.fe_len = 0;
4386 ar->len = 0;
4387 ext4_mb_show_ac(ac);
4388 }
4389 ext4_mb_release_context(ac);
4390out:
4391 if (ac)
4392 kmem_cache_free(ext4_ac_cachep, ac);
4393 if (inquota && ar->len < inquota)
4394 dquot_free_block(ar->inode, inquota - ar->len);
4395 if (!ar->len) {
4396 if (!ext4_test_inode_state(ar->inode,
4397 EXT4_STATE_DELALLOC_RESERVED))
4398
4399 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
4400 reserv_blks);
4401 }
4402
4403 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4404
4405 return block;
4406}
4407
4408
4409
4410
4411
4412
4413static int can_merge(struct ext4_free_data *entry1,
4414 struct ext4_free_data *entry2)
4415{
4416 if ((entry1->t_tid == entry2->t_tid) &&
4417 (entry1->group == entry2->group) &&
4418 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4419 return 1;
4420 return 0;
4421}
4422
4423static noinline_for_stack int
4424ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4425 struct ext4_free_data *new_entry)
4426{
4427 ext4_group_t group = e4b->bd_group;
4428 ext4_grpblk_t block;
4429 struct ext4_free_data *entry;
4430 struct ext4_group_info *db = e4b->bd_info;
4431 struct super_block *sb = e4b->bd_sb;
4432 struct ext4_sb_info *sbi = EXT4_SB(sb);
4433 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4434 struct rb_node *parent = NULL, *new_node;
4435
4436 BUG_ON(!ext4_handle_valid(handle));
4437 BUG_ON(e4b->bd_bitmap_page == NULL);
4438 BUG_ON(e4b->bd_buddy_page == NULL);
4439
4440 new_node = &new_entry->node;
4441 block = new_entry->start_blk;
4442
4443 if (!*n) {
4444
4445
4446
4447
4448
4449 page_cache_get(e4b->bd_buddy_page);
4450 page_cache_get(e4b->bd_bitmap_page);
4451 }
4452 while (*n) {
4453 parent = *n;
4454 entry = rb_entry(parent, struct ext4_free_data, node);
4455 if (block < entry->start_blk)
4456 n = &(*n)->rb_left;
4457 else if (block >= (entry->start_blk + entry->count))
4458 n = &(*n)->rb_right;
4459 else {
4460 ext4_grp_locked_error(sb, group, 0,
4461 ext4_group_first_block_no(sb, group) + block,
4462 "Block already on to-be-freed list");
4463 return 0;
4464 }
4465 }
4466
4467 rb_link_node(new_node, parent, n);
4468 rb_insert_color(new_node, &db->bb_free_root);
4469
4470
4471 node = rb_prev(new_node);
4472 if (node) {
4473 entry = rb_entry(node, struct ext4_free_data, node);
4474 if (can_merge(entry, new_entry)) {
4475 new_entry->start_blk = entry->start_blk;
4476 new_entry->count += entry->count;
4477 rb_erase(node, &(db->bb_free_root));
4478 spin_lock(&sbi->s_md_lock);
4479 list_del(&entry->list);
4480 spin_unlock(&sbi->s_md_lock);
4481 kmem_cache_free(ext4_free_ext_cachep, entry);
4482 }
4483 }
4484
4485 node = rb_next(new_node);
4486 if (node) {
4487 entry = rb_entry(node, struct ext4_free_data, node);
4488 if (can_merge(new_entry, entry)) {
4489 new_entry->count += entry->count;
4490 rb_erase(node, &(db->bb_free_root));
4491 spin_lock(&sbi->s_md_lock);
4492 list_del(&entry->list);
4493 spin_unlock(&sbi->s_md_lock);
4494 kmem_cache_free(ext4_free_ext_cachep, entry);
4495 }
4496 }
4497
4498 spin_lock(&sbi->s_md_lock);
4499 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4500 spin_unlock(&sbi->s_md_lock);
4501 return 0;
4502}
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512void ext4_free_blocks(handle_t *handle, struct inode *inode,
4513 struct buffer_head *bh, ext4_fsblk_t block,
4514 unsigned long count, int flags)
4515{
4516 struct buffer_head *bitmap_bh = NULL;
4517 struct super_block *sb = inode->i_sb;
4518 struct ext4_group_desc *gdp;
4519 unsigned long freed = 0;
4520 unsigned int overflow;
4521 ext4_grpblk_t bit;
4522 struct buffer_head *gd_bh;
4523 ext4_group_t block_group;
4524 struct ext4_sb_info *sbi;
4525 struct ext4_buddy e4b;
4526 int err = 0;
4527 int ret;
4528
4529 if (bh) {
4530 if (block)
4531 BUG_ON(block != bh->b_blocknr);
4532 else
4533 block = bh->b_blocknr;
4534 }
4535
4536 sbi = EXT4_SB(sb);
4537 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4538 !ext4_data_block_valid(sbi, block, count)) {
4539 ext4_error(sb, "Freeing blocks not in datazone - "
4540 "block = %llu, count = %lu", block, count);
4541 goto error_return;
4542 }
4543
4544 ext4_debug("freeing block %llu\n", block);
4545 trace_ext4_free_blocks(inode, block, count, flags);
4546
4547 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4548 struct buffer_head *tbh = bh;
4549 int i;
4550
4551 BUG_ON(bh && (count > 1));
4552
4553 for (i = 0; i < count; i++) {
4554 if (!bh)
4555 tbh = sb_find_get_block(inode->i_sb,
4556 block + i);
4557 if (unlikely(!tbh))
4558 continue;
4559 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4560 inode, tbh, block + i);
4561 }
4562 }
4563
4564
4565
4566
4567
4568
4569
4570
4571 if (!ext4_should_writeback_data(inode))
4572 flags |= EXT4_FREE_BLOCKS_METADATA;
4573
4574do_more:
4575 overflow = 0;
4576 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4577
4578
4579
4580
4581
4582 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4583 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
4584 count -= overflow;
4585 }
4586 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4587 if (!bitmap_bh) {
4588 err = -EIO;
4589 goto error_return;
4590 }
4591 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4592 if (!gdp) {
4593 err = -EIO;
4594 goto error_return;
4595 }
4596
4597 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4598 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4599 in_range(block, ext4_inode_table(sb, gdp),
4600 EXT4_SB(sb)->s_itb_per_group) ||
4601 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4602 EXT4_SB(sb)->s_itb_per_group)) {
4603
4604 ext4_error(sb, "Freeing blocks in system zone - "
4605 "Block = %llu, count = %lu", block, count);
4606
4607 goto error_return;
4608 }
4609
4610 BUFFER_TRACE(bitmap_bh, "getting write access");
4611 err = ext4_journal_get_write_access(handle, bitmap_bh);
4612 if (err)
4613 goto error_return;
4614
4615
4616
4617
4618
4619
4620 BUFFER_TRACE(gd_bh, "get_write_access");
4621 err = ext4_journal_get_write_access(handle, gd_bh);
4622 if (err)
4623 goto error_return;
4624#ifdef AGGRESSIVE_CHECK
4625 {
4626 int i;
4627 for (i = 0; i < count; i++)
4628 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4629 }
4630#endif
4631 trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
4632
4633 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4634 if (err)
4635 goto error_return;
4636
4637 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4638 struct ext4_free_data *new_entry;
4639
4640
4641
4642
4643 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4644 if (!new_entry) {
4645 err = -ENOMEM;
4646 goto error_return;
4647 }
4648 new_entry->start_blk = bit;
4649 new_entry->group = block_group;
4650 new_entry->count = count;
4651 new_entry->t_tid = handle->h_transaction->t_tid;
4652
4653 ext4_lock_group(sb, block_group);
4654 mb_clear_bits(bitmap_bh->b_data, bit, count);
4655 ext4_mb_free_metadata(handle, &e4b, new_entry);
4656 } else {
4657
4658
4659
4660
4661 ext4_lock_group(sb, block_group);
4662 mb_clear_bits(bitmap_bh->b_data, bit, count);
4663 mb_free_blocks(inode, &e4b, bit, count);
4664 }
4665
4666 ret = ext4_free_blks_count(sb, gdp) + count;
4667 ext4_free_blks_set(sb, gdp, ret);
4668 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4669 ext4_unlock_group(sb, block_group);
4670 percpu_counter_add(&sbi->s_freeblocks_counter, count);
4671
4672 if (sbi->s_log_groups_per_flex) {
4673 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4674 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
4675 }
4676
4677 ext4_mb_unload_buddy(&e4b);
4678
4679 freed += count;
4680
4681
4682 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4683 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4684
4685
4686 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4687 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4688 if (!err)
4689 err = ret;
4690
4691 if (overflow && !err) {
4692 block += count;
4693 count = overflow;
4694 put_bh(bitmap_bh);
4695 goto do_more;
4696 }
4697 ext4_mark_super_dirty(sb);
4698error_return:
4699 if (freed)
4700 dquot_free_block(inode, freed);
4701 brelse(bitmap_bh);
4702 ext4_std_error(sb, err);
4703 return;
4704}
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718static int ext4_trim_extent(struct super_block *sb, int start, int count,
4719 ext4_group_t group, struct ext4_buddy *e4b)
4720{
4721 struct ext4_free_extent ex;
4722 int ret = 0;
4723
4724 assert_spin_locked(ext4_group_lock_ptr(sb, group));
4725
4726 ex.fe_start = start;
4727 ex.fe_group = group;
4728 ex.fe_len = count;
4729
4730
4731
4732
4733
4734 mb_mark_used(e4b, &ex);
4735 ext4_unlock_group(sb, group);
4736
4737 ret = ext4_issue_discard(sb, group, start, count);
4738
4739 ext4_lock_group(sb, group);
4740 mb_free_blocks(NULL, e4b, start, ex.fe_len);
4741 return ret;
4742}
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762static ext4_grpblk_t
4763ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
4764 ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
4765{
4766 void *bitmap;
4767 ext4_grpblk_t next, count = 0;
4768 ext4_group_t group;
4769 int ret = 0;
4770
4771 BUG_ON(e4b == NULL);
4772
4773 bitmap = e4b->bd_bitmap;
4774 group = e4b->bd_group;
4775 start = (e4b->bd_info->bb_first_free > start) ?
4776 e4b->bd_info->bb_first_free : start;
4777 ext4_lock_group(sb, group);
4778
4779 while (start < max) {
4780 start = mb_find_next_zero_bit(bitmap, max, start);
4781 if (start >= max)
4782 break;
4783 next = mb_find_next_bit(bitmap, max, start);
4784
4785 if ((next - start) >= minblocks) {
4786 ret = ext4_trim_extent(sb, start,
4787 next - start, group, e4b);
4788 if (ret < 0)
4789 break;
4790 count += next - start;
4791 }
4792 start = next + 1;
4793
4794 if (fatal_signal_pending(current)) {
4795 count = -ERESTARTSYS;
4796 break;
4797 }
4798
4799 if (need_resched()) {
4800 ext4_unlock_group(sb, group);
4801 cond_resched();
4802 ext4_lock_group(sb, group);
4803 }
4804
4805 if ((e4b->bd_info->bb_free - count) < minblocks)
4806 break;
4807 }
4808 ext4_unlock_group(sb, group);
4809
4810 ext4_debug("trimmed %d blocks in the group %d\n",
4811 count, group);
4812
4813 if (ret < 0)
4814 count = ret;
4815
4816 return count;
4817}
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4832{
4833 struct ext4_buddy e4b;
4834 ext4_group_t first_group, last_group;
4835 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4836 ext4_grpblk_t cnt = 0, first_block, last_block;
4837 uint64_t start, len, minlen, trimmed;
4838 ext4_fsblk_t first_data_blk =
4839 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4840 int ret = 0;
4841
4842 start = range->start >> sb->s_blocksize_bits;
4843 len = range->len >> sb->s_blocksize_bits;
4844 minlen = range->minlen >> sb->s_blocksize_bits;
4845 trimmed = 0;
4846
4847 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
4848 return -EINVAL;
4849 if (start < first_data_blk) {
4850 len -= first_data_blk - start;
4851 start = first_data_blk;
4852 }
4853
4854
4855 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4856 &first_group, &first_block);
4857 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4858 &last_group, &last_block);
4859 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4860 last_block = EXT4_BLOCKS_PER_GROUP(sb);
4861
4862 if (first_group > last_group)
4863 return -EINVAL;
4864
4865 for (group = first_group; group <= last_group; group++) {
4866 ret = ext4_mb_load_buddy(sb, group, &e4b);
4867 if (ret) {
4868 ext4_error(sb, "Error in loading buddy "
4869 "information for %u", group);
4870 break;
4871 }
4872
4873
4874
4875
4876
4877
4878
4879 if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
4880 last_block = first_block + len;
4881 len -= last_block - first_block;
4882
4883 if (e4b.bd_info->bb_free >= minlen) {
4884 cnt = ext4_trim_all_free(sb, &e4b, first_block,
4885 last_block, minlen);
4886 if (cnt < 0) {
4887 ret = cnt;
4888 ext4_mb_unload_buddy(&e4b);
4889 break;
4890 }
4891 }
4892 ext4_mb_unload_buddy(&e4b);
4893 trimmed += cnt;
4894 first_block = 0;
4895 }
4896 range->len = trimmed * sb->s_blocksize;
4897
4898 return ret;
4899}
4900