1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include "ext4_jbd2.h"
25#include "mballoc.h"
26#include <linux/log2.h>
27#include <linux/module.h>
28#include <linux/slab.h>
29#include <linux/backing-dev.h>
30#include <trace/events/ext4.h>
31
32#ifdef CONFIG_EXT4_DEBUG
33ushort ext4_mballoc_debug __read_mostly;
34
35module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
36MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
37#endif
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350static struct kmem_cache *ext4_pspace_cachep;
351static struct kmem_cache *ext4_ac_cachep;
352static struct kmem_cache *ext4_free_data_cachep;
353
354
355
356
357#define NR_GRPINFO_CACHES 8
358static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
359
360static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
361 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
362 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
363 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
364};
365
366static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
367 ext4_group_t group);
368static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
369 ext4_group_t group);
370static void ext4_free_data_callback(struct super_block *sb,
371 struct ext4_journal_cb_entry *jce, int rc);
372
373static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
374{
375#if BITS_PER_LONG == 64
376 *bit += ((unsigned long) addr & 7UL) << 3;
377 addr = (void *) ((unsigned long) addr & ~7UL);
378#elif BITS_PER_LONG == 32
379 *bit += ((unsigned long) addr & 3UL) << 3;
380 addr = (void *) ((unsigned long) addr & ~3UL);
381#else
382#error "how many bits you are?!"
383#endif
384 return addr;
385}
386
387static inline int mb_test_bit(int bit, void *addr)
388{
389
390
391
392
393 addr = mb_correct_addr_and_bit(&bit, addr);
394 return ext4_test_bit(bit, addr);
395}
396
397static inline void mb_set_bit(int bit, void *addr)
398{
399 addr = mb_correct_addr_and_bit(&bit, addr);
400 ext4_set_bit(bit, addr);
401}
402
403static inline void mb_clear_bit(int bit, void *addr)
404{
405 addr = mb_correct_addr_and_bit(&bit, addr);
406 ext4_clear_bit(bit, addr);
407}
408
409static inline int mb_test_and_clear_bit(int bit, void *addr)
410{
411 addr = mb_correct_addr_and_bit(&bit, addr);
412 return ext4_test_and_clear_bit(bit, addr);
413}
414
415static inline int mb_find_next_zero_bit(void *addr, int max, int start)
416{
417 int fix = 0, ret, tmpmax;
418 addr = mb_correct_addr_and_bit(&fix, addr);
419 tmpmax = max + fix;
420 start += fix;
421
422 ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
423 if (ret > max)
424 return max;
425 return ret;
426}
427
428static inline int mb_find_next_bit(void *addr, int max, int start)
429{
430 int fix = 0, ret, tmpmax;
431 addr = mb_correct_addr_and_bit(&fix, addr);
432 tmpmax = max + fix;
433 start += fix;
434
435 ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
436 if (ret > max)
437 return max;
438 return ret;
439}
440
441static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
442{
443 char *bb;
444
445 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
446 BUG_ON(max == NULL);
447
448 if (order > e4b->bd_blkbits + 1) {
449 *max = 0;
450 return NULL;
451 }
452
453
454 if (order == 0) {
455 *max = 1 << (e4b->bd_blkbits + 3);
456 return e4b->bd_bitmap;
457 }
458
459 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
460 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
461
462 return bb;
463}
464
465#ifdef DOUBLE_CHECK
466static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
467 int first, int count)
468{
469 int i;
470 struct super_block *sb = e4b->bd_sb;
471
472 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
473 return;
474 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
475 for (i = 0; i < count; i++) {
476 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
477 ext4_fsblk_t blocknr;
478
479 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
480 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
481 ext4_grp_locked_error(sb, e4b->bd_group,
482 inode ? inode->i_ino : 0,
483 blocknr,
484 "freeing block already freed "
485 "(bit %u)",
486 first + i);
487 }
488 mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
489 }
490}
491
492static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
493{
494 int i;
495
496 if (unlikely(e4b->bd_info->bb_bitmap == NULL))
497 return;
498 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
499 for (i = 0; i < count; i++) {
500 BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
501 mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
502 }
503}
504
505static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
506{
507 if (memcmp(e4b->bd_info->bb_bitmap, bitmap, e4b->bd_sb->s_blocksize)) {
508 unsigned char *b1, *b2;
509 int i;
510 b1 = (unsigned char *) e4b->bd_info->bb_bitmap;
511 b2 = (unsigned char *) bitmap;
512 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
513 if (b1[i] != b2[i]) {
514 ext4_msg(e4b->bd_sb, KERN_ERR,
515 "corruption in group %u "
516 "at byte %u(%u): %x in copy != %x "
517 "on disk/prealloc",
518 e4b->bd_group, i, i * 8, b1[i], b2[i]);
519 BUG();
520 }
521 }
522 }
523}
524
525#else
526static inline void mb_free_blocks_double(struct inode *inode,
527 struct ext4_buddy *e4b, int first, int count)
528{
529 return;
530}
531static inline void mb_mark_used_double(struct ext4_buddy *e4b,
532 int first, int count)
533{
534 return;
535}
536static inline void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
537{
538 return;
539}
540#endif
541
542#ifdef AGGRESSIVE_CHECK
543
544#define MB_CHECK_ASSERT(assert) \
545do { \
546 if (!(assert)) { \
547 printk(KERN_EMERG \
548 "Assertion failure in %s() at %s:%d: \"%s\"\n", \
549 function, file, line, # assert); \
550 BUG(); \
551 } \
552} while (0)
553
554static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
555 const char *function, int line)
556{
557 struct super_block *sb = e4b->bd_sb;
558 int order = e4b->bd_blkbits + 1;
559 int max;
560 int max2;
561 int i;
562 int j;
563 int k;
564 int count;
565 struct ext4_group_info *grp;
566 int fragments = 0;
567 int fstart;
568 struct list_head *cur;
569 void *buddy;
570 void *buddy2;
571
572 {
573 static int mb_check_counter;
574 if (mb_check_counter++ % 100 != 0)
575 return 0;
576 }
577
578 while (order > 1) {
579 buddy = mb_find_buddy(e4b, order, &max);
580 MB_CHECK_ASSERT(buddy);
581 buddy2 = mb_find_buddy(e4b, order - 1, &max2);
582 MB_CHECK_ASSERT(buddy2);
583 MB_CHECK_ASSERT(buddy != buddy2);
584 MB_CHECK_ASSERT(max * 2 == max2);
585
586 count = 0;
587 for (i = 0; i < max; i++) {
588
589 if (mb_test_bit(i, buddy)) {
590
591 if (!mb_test_bit(i << 1, buddy2)) {
592 MB_CHECK_ASSERT(
593 mb_test_bit((i<<1)+1, buddy2));
594 } else if (!mb_test_bit((i << 1) + 1, buddy2)) {
595 MB_CHECK_ASSERT(
596 mb_test_bit(i << 1, buddy2));
597 }
598 continue;
599 }
600
601
602 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
603 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
604
605 for (j = 0; j < (1 << order); j++) {
606 k = (i * (1 << order)) + j;
607 MB_CHECK_ASSERT(
608 !mb_test_bit(k, e4b->bd_bitmap));
609 }
610 count++;
611 }
612 MB_CHECK_ASSERT(e4b->bd_info->bb_counters[order] == count);
613 order--;
614 }
615
616 fstart = -1;
617 buddy = mb_find_buddy(e4b, 0, &max);
618 for (i = 0; i < max; i++) {
619 if (!mb_test_bit(i, buddy)) {
620 MB_CHECK_ASSERT(i >= e4b->bd_info->bb_first_free);
621 if (fstart == -1) {
622 fragments++;
623 fstart = i;
624 }
625 continue;
626 }
627 fstart = -1;
628
629 for (j = 0; j < e4b->bd_blkbits + 1; j++) {
630 buddy2 = mb_find_buddy(e4b, j, &max2);
631 k = i >> j;
632 MB_CHECK_ASSERT(k < max2);
633 MB_CHECK_ASSERT(mb_test_bit(k, buddy2));
634 }
635 }
636 MB_CHECK_ASSERT(!EXT4_MB_GRP_NEED_INIT(e4b->bd_info));
637 MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
638
639 grp = ext4_get_group_info(sb, e4b->bd_group);
640 list_for_each(cur, &grp->bb_prealloc_list) {
641 ext4_group_t groupnr;
642 struct ext4_prealloc_space *pa;
643 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
644 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
645 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
646 for (i = 0; i < pa->pa_len; i++)
647 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
648 }
649 return 0;
650}
651#undef MB_CHECK_ASSERT
652#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
653 __FILE__, __func__, __LINE__)
654#else
655#define mb_check_buddy(e4b)
656#endif
657
658
659
660
661
662
663
664static void ext4_mb_mark_free_simple(struct super_block *sb,
665 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
666 struct ext4_group_info *grp)
667{
668 struct ext4_sb_info *sbi = EXT4_SB(sb);
669 ext4_grpblk_t min;
670 ext4_grpblk_t max;
671 ext4_grpblk_t chunk;
672 unsigned short border;
673
674 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
675
676 border = 2 << sb->s_blocksize_bits;
677
678 while (len > 0) {
679
680 max = ffs(first | border) - 1;
681
682
683 min = fls(len) - 1;
684
685 if (max < min)
686 min = max;
687 chunk = 1 << min;
688
689
690 grp->bb_counters[min]++;
691 if (min > 0)
692 mb_clear_bit(first >> min,
693 buddy + sbi->s_mb_offsets[min]);
694
695 len -= chunk;
696 first += chunk;
697 }
698}
699
700
701
702
703
704static void
705mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
706{
707 int i;
708 int bits;
709
710 grp->bb_largest_free_order = -1;
711
712 bits = sb->s_blocksize_bits + 1;
713 for (i = bits; i >= 0; i--) {
714 if (grp->bb_counters[i] > 0) {
715 grp->bb_largest_free_order = i;
716 break;
717 }
718 }
719}
720
721static noinline_for_stack
722void ext4_mb_generate_buddy(struct super_block *sb,
723 void *buddy, void *bitmap, ext4_group_t group)
724{
725 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
726 struct ext4_sb_info *sbi = EXT4_SB(sb);
727 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
728 ext4_grpblk_t i = 0;
729 ext4_grpblk_t first;
730 ext4_grpblk_t len;
731 unsigned free = 0;
732 unsigned fragments = 0;
733 unsigned long long period = get_cycles();
734
735
736
737 i = mb_find_next_zero_bit(bitmap, max, 0);
738 grp->bb_first_free = i;
739 while (i < max) {
740 fragments++;
741 first = i;
742 i = mb_find_next_bit(bitmap, max, i);
743 len = i - first;
744 free += len;
745 if (len > 1)
746 ext4_mb_mark_free_simple(sb, buddy, first, len, grp);
747 else
748 grp->bb_counters[0]++;
749 if (i < max)
750 i = mb_find_next_zero_bit(bitmap, max, i);
751 }
752 grp->bb_fragments = fragments;
753
754 if (free != grp->bb_free) {
755 ext4_grp_locked_error(sb, group, 0, 0,
756 "block bitmap and bg descriptor "
757 "inconsistent: %u vs %u free clusters",
758 free, grp->bb_free);
759
760
761
762
763 grp->bb_free = free;
764 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
765 percpu_counter_sub(&sbi->s_freeclusters_counter,
766 grp->bb_free);
767 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
768 }
769 mb_set_largest_free_order(sb, grp);
770
771 clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
772
773 period = get_cycles() - period;
774 spin_lock(&EXT4_SB(sb)->s_bal_lock);
775 EXT4_SB(sb)->s_mb_buddies_generated++;
776 EXT4_SB(sb)->s_mb_generation_time += period;
777 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
778}
779
780static void mb_regenerate_buddy(struct ext4_buddy *e4b)
781{
782 int count;
783 int order = 1;
784 void *buddy;
785
786 while ((buddy = mb_find_buddy(e4b, order++, &count))) {
787 ext4_set_bits(buddy, 0, count);
788 }
789 e4b->bd_info->bb_fragments = 0;
790 memset(e4b->bd_info->bb_counters, 0,
791 sizeof(*e4b->bd_info->bb_counters) *
792 (e4b->bd_sb->s_blocksize_bits + 2));
793
794 ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
795 e4b->bd_bitmap, e4b->bd_group);
796}
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
819{
820 ext4_group_t ngroups;
821 int blocksize;
822 int blocks_per_page;
823 int groups_per_page;
824 int err = 0;
825 int i;
826 ext4_group_t first_group, group;
827 int first_block;
828 struct super_block *sb;
829 struct buffer_head *bhs;
830 struct buffer_head **bh = NULL;
831 struct inode *inode;
832 char *data;
833 char *bitmap;
834 struct ext4_group_info *grinfo;
835
836 mb_debug(1, "init page %lu\n", page->index);
837
838 inode = page->mapping->host;
839 sb = inode->i_sb;
840 ngroups = ext4_get_groups_count(sb);
841 blocksize = 1 << inode->i_blkbits;
842 blocks_per_page = PAGE_SIZE / blocksize;
843
844 groups_per_page = blocks_per_page >> 1;
845 if (groups_per_page == 0)
846 groups_per_page = 1;
847
848
849 if (groups_per_page > 1) {
850 i = sizeof(struct buffer_head *) * groups_per_page;
851 bh = kzalloc(i, gfp);
852 if (bh == NULL) {
853 err = -ENOMEM;
854 goto out;
855 }
856 } else
857 bh = &bhs;
858
859 first_group = page->index * blocks_per_page / 2;
860
861
862 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
863 if (group >= ngroups)
864 break;
865
866 grinfo = ext4_get_group_info(sb, group);
867
868
869
870
871
872
873 if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
874 bh[i] = NULL;
875 continue;
876 }
877 bh[i] = ext4_read_block_bitmap_nowait(sb, group);
878 if (IS_ERR(bh[i])) {
879 err = PTR_ERR(bh[i]);
880 bh[i] = NULL;
881 goto out;
882 }
883 mb_debug(1, "read bitmap for group %u\n", group);
884 }
885
886
887 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
888 int err2;
889
890 if (!bh[i])
891 continue;
892 err2 = ext4_wait_block_bitmap(sb, group, bh[i]);
893 if (!err)
894 err = err2;
895 }
896
897 first_block = page->index * blocks_per_page;
898 for (i = 0; i < blocks_per_page; i++) {
899 group = (first_block + i) >> 1;
900 if (group >= ngroups)
901 break;
902
903 if (!bh[group - first_group])
904
905 continue;
906
907 if (!buffer_verified(bh[group - first_group]))
908
909 continue;
910 err = 0;
911
912
913
914
915
916
917
918 data = page_address(page) + (i * blocksize);
919 bitmap = bh[group - first_group]->b_data;
920
921
922
923
924
925 if ((first_block + i) & 1) {
926
927 BUG_ON(incore == NULL);
928 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
929 group, page->index, i * blocksize);
930 trace_ext4_mb_buddy_bitmap_load(sb, group);
931 grinfo = ext4_get_group_info(sb, group);
932 grinfo->bb_fragments = 0;
933 memset(grinfo->bb_counters, 0,
934 sizeof(*grinfo->bb_counters) *
935 (sb->s_blocksize_bits+2));
936
937
938
939 ext4_lock_group(sb, group);
940
941 memset(data, 0xff, blocksize);
942 ext4_mb_generate_buddy(sb, data, incore, group);
943 ext4_unlock_group(sb, group);
944 incore = NULL;
945 } else {
946
947 BUG_ON(incore != NULL);
948 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
949 group, page->index, i * blocksize);
950 trace_ext4_mb_bitmap_load(sb, group);
951
952
953 ext4_lock_group(sb, group);
954 memcpy(data, bitmap, blocksize);
955
956
957 ext4_mb_generate_from_pa(sb, data, group);
958 ext4_mb_generate_from_freelist(sb, data, group);
959 ext4_unlock_group(sb, group);
960
961
962
963
964 incore = data;
965 }
966 }
967 SetPageUptodate(page);
968
969out:
970 if (bh) {
971 for (i = 0; i < groups_per_page; i++)
972 brelse(bh[i]);
973 if (bh != &bhs)
974 kfree(bh);
975 }
976 return err;
977}
978
979
980
981
982
983
984
985static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
986 ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
987{
988 struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
989 int block, pnum, poff;
990 int blocks_per_page;
991 struct page *page;
992
993 e4b->bd_buddy_page = NULL;
994 e4b->bd_bitmap_page = NULL;
995
996 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
997
998
999
1000
1001
1002 block = group * 2;
1003 pnum = block / blocks_per_page;
1004 poff = block % blocks_per_page;
1005 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1006 if (!page)
1007 return -ENOMEM;
1008 BUG_ON(page->mapping != inode->i_mapping);
1009 e4b->bd_bitmap_page = page;
1010 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1011
1012 if (blocks_per_page >= 2) {
1013
1014 return 0;
1015 }
1016
1017 block++;
1018 pnum = block / blocks_per_page;
1019 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1020 if (!page)
1021 return -ENOMEM;
1022 BUG_ON(page->mapping != inode->i_mapping);
1023 e4b->bd_buddy_page = page;
1024 return 0;
1025}
1026
1027static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
1028{
1029 if (e4b->bd_bitmap_page) {
1030 unlock_page(e4b->bd_bitmap_page);
1031 put_page(e4b->bd_bitmap_page);
1032 }
1033 if (e4b->bd_buddy_page) {
1034 unlock_page(e4b->bd_buddy_page);
1035 put_page(e4b->bd_buddy_page);
1036 }
1037}
1038
1039
1040
1041
1042
1043
1044static noinline_for_stack
1045int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
1046{
1047
1048 struct ext4_group_info *this_grp;
1049 struct ext4_buddy e4b;
1050 struct page *page;
1051 int ret = 0;
1052
1053 might_sleep();
1054 mb_debug(1, "init group %u\n", group);
1055 this_grp = ext4_get_group_info(sb, group);
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065 ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
1066 if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
1067
1068
1069
1070
1071 goto err;
1072 }
1073
1074 page = e4b.bd_bitmap_page;
1075 ret = ext4_mb_init_cache(page, NULL, gfp);
1076 if (ret)
1077 goto err;
1078 if (!PageUptodate(page)) {
1079 ret = -EIO;
1080 goto err;
1081 }
1082
1083 if (e4b.bd_buddy_page == NULL) {
1084
1085
1086
1087
1088
1089 ret = 0;
1090 goto err;
1091 }
1092
1093 page = e4b.bd_buddy_page;
1094 ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
1095 if (ret)
1096 goto err;
1097 if (!PageUptodate(page)) {
1098 ret = -EIO;
1099 goto err;
1100 }
1101err:
1102 ext4_mb_put_buddy_page_lock(&e4b);
1103 return ret;
1104}
1105
1106
1107
1108
1109
1110
1111static noinline_for_stack int
1112ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
1113 struct ext4_buddy *e4b, gfp_t gfp)
1114{
1115 int blocks_per_page;
1116 int block;
1117 int pnum;
1118 int poff;
1119 struct page *page;
1120 int ret;
1121 struct ext4_group_info *grp;
1122 struct ext4_sb_info *sbi = EXT4_SB(sb);
1123 struct inode *inode = sbi->s_buddy_cache;
1124
1125 might_sleep();
1126 mb_debug(1, "load group %u\n", group);
1127
1128 blocks_per_page = PAGE_SIZE / sb->s_blocksize;
1129 grp = ext4_get_group_info(sb, group);
1130
1131 e4b->bd_blkbits = sb->s_blocksize_bits;
1132 e4b->bd_info = grp;
1133 e4b->bd_sb = sb;
1134 e4b->bd_group = group;
1135 e4b->bd_buddy_page = NULL;
1136 e4b->bd_bitmap_page = NULL;
1137
1138 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1139
1140
1141
1142
1143 ret = ext4_mb_init_group(sb, group, gfp);
1144 if (ret)
1145 return ret;
1146 }
1147
1148
1149
1150
1151
1152
1153 block = group * 2;
1154 pnum = block / blocks_per_page;
1155 poff = block % blocks_per_page;
1156
1157
1158
1159 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1160 if (page == NULL || !PageUptodate(page)) {
1161 if (page)
1162
1163
1164
1165
1166
1167
1168
1169
1170 put_page(page);
1171 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1172 if (page) {
1173 BUG_ON(page->mapping != inode->i_mapping);
1174 if (!PageUptodate(page)) {
1175 ret = ext4_mb_init_cache(page, NULL, gfp);
1176 if (ret) {
1177 unlock_page(page);
1178 goto err;
1179 }
1180 mb_cmp_bitmaps(e4b, page_address(page) +
1181 (poff * sb->s_blocksize));
1182 }
1183 unlock_page(page);
1184 }
1185 }
1186 if (page == NULL) {
1187 ret = -ENOMEM;
1188 goto err;
1189 }
1190 if (!PageUptodate(page)) {
1191 ret = -EIO;
1192 goto err;
1193 }
1194
1195
1196 e4b->bd_bitmap_page = page;
1197 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
1198
1199 block++;
1200 pnum = block / blocks_per_page;
1201 poff = block % blocks_per_page;
1202
1203 page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
1204 if (page == NULL || !PageUptodate(page)) {
1205 if (page)
1206 put_page(page);
1207 page = find_or_create_page(inode->i_mapping, pnum, gfp);
1208 if (page) {
1209 BUG_ON(page->mapping != inode->i_mapping);
1210 if (!PageUptodate(page)) {
1211 ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
1212 gfp);
1213 if (ret) {
1214 unlock_page(page);
1215 goto err;
1216 }
1217 }
1218 unlock_page(page);
1219 }
1220 }
1221 if (page == NULL) {
1222 ret = -ENOMEM;
1223 goto err;
1224 }
1225 if (!PageUptodate(page)) {
1226 ret = -EIO;
1227 goto err;
1228 }
1229
1230
1231 e4b->bd_buddy_page = page;
1232 e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
1233
1234 BUG_ON(e4b->bd_bitmap_page == NULL);
1235 BUG_ON(e4b->bd_buddy_page == NULL);
1236
1237 return 0;
1238
1239err:
1240 if (page)
1241 put_page(page);
1242 if (e4b->bd_bitmap_page)
1243 put_page(e4b->bd_bitmap_page);
1244 if (e4b->bd_buddy_page)
1245 put_page(e4b->bd_buddy_page);
1246 e4b->bd_buddy = NULL;
1247 e4b->bd_bitmap = NULL;
1248 return ret;
1249}
1250
1251static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1252 struct ext4_buddy *e4b)
1253{
1254 return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
1255}
1256
1257static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
1258{
1259 if (e4b->bd_bitmap_page)
1260 put_page(e4b->bd_bitmap_page);
1261 if (e4b->bd_buddy_page)
1262 put_page(e4b->bd_buddy_page);
1263}
1264
1265
1266static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1267{
1268 int order = 1;
1269 int bb_incr = 1 << (e4b->bd_blkbits - 1);
1270 void *bb;
1271
1272 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1273 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1274
1275 bb = e4b->bd_buddy;
1276 while (order <= e4b->bd_blkbits + 1) {
1277 block = block >> 1;
1278 if (!mb_test_bit(block, bb)) {
1279
1280 return order;
1281 }
1282 bb += bb_incr;
1283 bb_incr >>= 1;
1284 order++;
1285 }
1286 return 0;
1287}
1288
1289static void mb_clear_bits(void *bm, int cur, int len)
1290{
1291 __u32 *addr;
1292
1293 len = cur + len;
1294 while (cur < len) {
1295 if ((cur & 31) == 0 && (len - cur) >= 32) {
1296
1297 addr = bm + (cur >> 3);
1298 *addr = 0;
1299 cur += 32;
1300 continue;
1301 }
1302 mb_clear_bit(cur, bm);
1303 cur++;
1304 }
1305}
1306
1307
1308
1309
1310static int mb_test_and_clear_bits(void *bm, int cur, int len)
1311{
1312 __u32 *addr;
1313 int zero_bit = -1;
1314
1315 len = cur + len;
1316 while (cur < len) {
1317 if ((cur & 31) == 0 && (len - cur) >= 32) {
1318
1319 addr = bm + (cur >> 3);
1320 if (*addr != (__u32)(-1) && zero_bit == -1)
1321 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1322 *addr = 0;
1323 cur += 32;
1324 continue;
1325 }
1326 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1327 zero_bit = cur;
1328 cur++;
1329 }
1330
1331 return zero_bit;
1332}
1333
1334void ext4_set_bits(void *bm, int cur, int len)
1335{
1336 __u32 *addr;
1337
1338 len = cur + len;
1339 while (cur < len) {
1340 if ((cur & 31) == 0 && (len - cur) >= 32) {
1341
1342 addr = bm + (cur >> 3);
1343 *addr = 0xffffffff;
1344 cur += 32;
1345 continue;
1346 }
1347 mb_set_bit(cur, bm);
1348 cur++;
1349 }
1350}
1351
1352
1353
1354
1355static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1356{
1357 if (mb_test_bit(*bit + side, bitmap)) {
1358 mb_clear_bit(*bit, bitmap);
1359 (*bit) -= side;
1360 return 1;
1361 }
1362 else {
1363 (*bit) += side;
1364 mb_set_bit(*bit, bitmap);
1365 return -1;
1366 }
1367}
1368
1369static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1370{
1371 int max;
1372 int order = 1;
1373 void *buddy = mb_find_buddy(e4b, order, &max);
1374
1375 while (buddy) {
1376 void *buddy2;
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407 if (first & 1)
1408 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1409 if (!(last & 1))
1410 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1411 if (first > last)
1412 break;
1413 order++;
1414
1415 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1416 mb_clear_bits(buddy, first, last - first + 1);
1417 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1418 break;
1419 }
1420 first >>= 1;
1421 last >>= 1;
1422 buddy = buddy2;
1423 }
1424}
1425
1426static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1427 int first, int count)
1428{
1429 int left_is_free = 0;
1430 int right_is_free = 0;
1431 int block;
1432 int last = first + count - 1;
1433 struct super_block *sb = e4b->bd_sb;
1434
1435 if (WARN_ON(count == 0))
1436 return;
1437 BUG_ON(last >= (sb->s_blocksize << 3));
1438 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1439
1440 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
1441 return;
1442
1443 mb_check_buddy(e4b);
1444 mb_free_blocks_double(inode, e4b, first, count);
1445
1446 e4b->bd_info->bb_free += count;
1447 if (first < e4b->bd_info->bb_first_free)
1448 e4b->bd_info->bb_first_free = first;
1449
1450
1451
1452
1453 if (first != 0)
1454 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1455 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1456 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1457 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1458
1459 if (unlikely(block != -1)) {
1460 struct ext4_sb_info *sbi = EXT4_SB(sb);
1461 ext4_fsblk_t blocknr;
1462
1463 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1464 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1465 ext4_grp_locked_error(sb, e4b->bd_group,
1466 inode ? inode->i_ino : 0,
1467 blocknr,
1468 "freeing already freed block "
1469 "(bit %u); block bitmap corrupt.",
1470 block);
1471 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
1472 percpu_counter_sub(&sbi->s_freeclusters_counter,
1473 e4b->bd_info->bb_free);
1474
1475 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1476 &e4b->bd_info->bb_state);
1477 mb_regenerate_buddy(e4b);
1478 goto done;
1479 }
1480
1481
1482 if (left_is_free && right_is_free)
1483 e4b->bd_info->bb_fragments--;
1484 else if (!left_is_free && !right_is_free)
1485 e4b->bd_info->bb_fragments++;
1486
1487
1488
1489
1490
1491
1492
1493 if (first & 1) {
1494 first += !left_is_free;
1495 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1496 }
1497 if (!(last & 1)) {
1498 last -= !right_is_free;
1499 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1500 }
1501
1502 if (first <= last)
1503 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1504
1505done:
1506 mb_set_largest_free_order(sb, e4b->bd_info);
1507 mb_check_buddy(e4b);
1508}
1509
1510static int mb_find_extent(struct ext4_buddy *e4b, int block,
1511 int needed, struct ext4_free_extent *ex)
1512{
1513 int next = block;
1514 int max, order;
1515 void *buddy;
1516
1517 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1518 BUG_ON(ex == NULL);
1519
1520 buddy = mb_find_buddy(e4b, 0, &max);
1521 BUG_ON(buddy == NULL);
1522 BUG_ON(block >= max);
1523 if (mb_test_bit(block, buddy)) {
1524 ex->fe_len = 0;
1525 ex->fe_start = 0;
1526 ex->fe_group = 0;
1527 return 0;
1528 }
1529
1530
1531 order = mb_find_order_for_block(e4b, block);
1532 block = block >> order;
1533
1534 ex->fe_len = 1 << order;
1535 ex->fe_start = block << order;
1536 ex->fe_group = e4b->bd_group;
1537
1538
1539 next = next - ex->fe_start;
1540 ex->fe_len -= next;
1541 ex->fe_start += next;
1542
1543 while (needed > ex->fe_len &&
1544 mb_find_buddy(e4b, order, &max)) {
1545
1546 if (block + 1 >= max)
1547 break;
1548
1549 next = (block + 1) * (1 << order);
1550 if (mb_test_bit(next, e4b->bd_bitmap))
1551 break;
1552
1553 order = mb_find_order_for_block(e4b, next);
1554
1555 block = next >> order;
1556 ex->fe_len += 1 << order;
1557 }
1558
1559 BUG_ON(ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3)));
1560 return ex->fe_len;
1561}
1562
1563static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1564{
1565 int ord;
1566 int mlen = 0;
1567 int max = 0;
1568 int cur;
1569 int start = ex->fe_start;
1570 int len = ex->fe_len;
1571 unsigned ret = 0;
1572 int len0 = len;
1573 void *buddy;
1574
1575 BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
1576 BUG_ON(e4b->bd_group != ex->fe_group);
1577 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1578 mb_check_buddy(e4b);
1579 mb_mark_used_double(e4b, start, len);
1580
1581 e4b->bd_info->bb_free -= len;
1582 if (e4b->bd_info->bb_first_free == start)
1583 e4b->bd_info->bb_first_free += len;
1584
1585
1586 if (start != 0)
1587 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1588 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1589 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1590 if (mlen && max)
1591 e4b->bd_info->bb_fragments++;
1592 else if (!mlen && !max)
1593 e4b->bd_info->bb_fragments--;
1594
1595
1596 while (len) {
1597 ord = mb_find_order_for_block(e4b, start);
1598
1599 if (((start >> ord) << ord) == start && len >= (1 << ord)) {
1600
1601 mlen = 1 << ord;
1602 buddy = mb_find_buddy(e4b, ord, &max);
1603 BUG_ON((start >> ord) >= max);
1604 mb_set_bit(start >> ord, buddy);
1605 e4b->bd_info->bb_counters[ord]--;
1606 start += mlen;
1607 len -= mlen;
1608 BUG_ON(len < 0);
1609 continue;
1610 }
1611
1612
1613 if (ret == 0)
1614 ret = len | (ord << 16);
1615
1616
1617 BUG_ON(ord <= 0);
1618 buddy = mb_find_buddy(e4b, ord, &max);
1619 mb_set_bit(start >> ord, buddy);
1620 e4b->bd_info->bb_counters[ord]--;
1621
1622 ord--;
1623 cur = (start >> ord) & ~1U;
1624 buddy = mb_find_buddy(e4b, ord, &max);
1625 mb_clear_bit(cur, buddy);
1626 mb_clear_bit(cur + 1, buddy);
1627 e4b->bd_info->bb_counters[ord]++;
1628 e4b->bd_info->bb_counters[ord]++;
1629 }
1630 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1631
1632 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1633 mb_check_buddy(e4b);
1634
1635 return ret;
1636}
1637
1638
1639
1640
1641static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1642 struct ext4_buddy *e4b)
1643{
1644 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1645 int ret;
1646
1647 BUG_ON(ac->ac_b_ex.fe_group != e4b->bd_group);
1648 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1649
1650 ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
1651 ac->ac_b_ex.fe_logical = ac->ac_g_ex.fe_logical;
1652 ret = mb_mark_used(e4b, &ac->ac_b_ex);
1653
1654
1655
1656 ac->ac_f_ex = ac->ac_b_ex;
1657
1658 ac->ac_status = AC_STATUS_FOUND;
1659 ac->ac_tail = ret & 0xffff;
1660 ac->ac_buddy = ret >> 16;
1661
1662
1663
1664
1665
1666
1667
1668
1669 ac->ac_bitmap_page = e4b->bd_bitmap_page;
1670 get_page(ac->ac_bitmap_page);
1671 ac->ac_buddy_page = e4b->bd_buddy_page;
1672 get_page(ac->ac_buddy_page);
1673
1674 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1675 spin_lock(&sbi->s_md_lock);
1676 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1677 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
1678 spin_unlock(&sbi->s_md_lock);
1679 }
1680}
1681
1682
1683
1684
1685
1686static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1687 struct ext4_buddy *e4b,
1688 int finish_group)
1689{
1690 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1691 struct ext4_free_extent *bex = &ac->ac_b_ex;
1692 struct ext4_free_extent *gex = &ac->ac_g_ex;
1693 struct ext4_free_extent ex;
1694 int max;
1695
1696 if (ac->ac_status == AC_STATUS_FOUND)
1697 return;
1698
1699
1700
1701 if (ac->ac_found > sbi->s_mb_max_to_scan &&
1702 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1703 ac->ac_status = AC_STATUS_BREAK;
1704 return;
1705 }
1706
1707
1708
1709
1710 if (bex->fe_len < gex->fe_len)
1711 return;
1712
1713 if ((finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
1714 && bex->fe_group == e4b->bd_group) {
1715
1716
1717
1718 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1719 if (max >= gex->fe_len) {
1720 ext4_mb_use_best_found(ac, e4b);
1721 return;
1722 }
1723 }
1724}
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1737 struct ext4_free_extent *ex,
1738 struct ext4_buddy *e4b)
1739{
1740 struct ext4_free_extent *bex = &ac->ac_b_ex;
1741 struct ext4_free_extent *gex = &ac->ac_g_ex;
1742
1743 BUG_ON(ex->fe_len <= 0);
1744 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1745 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1746 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1747
1748 ac->ac_found++;
1749
1750
1751
1752
1753 if (unlikely(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
1754 *bex = *ex;
1755 ext4_mb_use_best_found(ac, e4b);
1756 return;
1757 }
1758
1759
1760
1761
1762 if (ex->fe_len == gex->fe_len) {
1763 *bex = *ex;
1764 ext4_mb_use_best_found(ac, e4b);
1765 return;
1766 }
1767
1768
1769
1770
1771 if (bex->fe_len == 0) {
1772 *bex = *ex;
1773 return;
1774 }
1775
1776
1777
1778
1779 if (bex->fe_len < gex->fe_len) {
1780
1781
1782 if (ex->fe_len > bex->fe_len)
1783 *bex = *ex;
1784 } else if (ex->fe_len > gex->fe_len) {
1785
1786
1787
1788 if (ex->fe_len < bex->fe_len)
1789 *bex = *ex;
1790 }
1791
1792 ext4_mb_check_limits(ac, e4b, 0);
1793}
1794
1795static noinline_for_stack
1796int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1797 struct ext4_buddy *e4b)
1798{
1799 struct ext4_free_extent ex = ac->ac_b_ex;
1800 ext4_group_t group = ex.fe_group;
1801 int max;
1802 int err;
1803
1804 BUG_ON(ex.fe_len <= 0);
1805 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1806 if (err)
1807 return err;
1808
1809 ext4_lock_group(ac->ac_sb, group);
1810 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1811
1812 if (max > 0) {
1813 ac->ac_b_ex = ex;
1814 ext4_mb_use_best_found(ac, e4b);
1815 }
1816
1817 ext4_unlock_group(ac->ac_sb, group);
1818 ext4_mb_unload_buddy(e4b);
1819
1820 return 0;
1821}
1822
1823static noinline_for_stack
1824int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1825 struct ext4_buddy *e4b)
1826{
1827 ext4_group_t group = ac->ac_g_ex.fe_group;
1828 int max;
1829 int err;
1830 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1831 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1832 struct ext4_free_extent ex;
1833
1834 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1835 return 0;
1836 if (grp->bb_free == 0)
1837 return 0;
1838
1839 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1840 if (err)
1841 return err;
1842
1843 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
1844 ext4_mb_unload_buddy(e4b);
1845 return 0;
1846 }
1847
1848 ext4_lock_group(ac->ac_sb, group);
1849 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1850 ac->ac_g_ex.fe_len, &ex);
1851 ex.fe_logical = 0xDEADFA11;
1852
1853 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1854 ext4_fsblk_t start;
1855
1856 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1857 ex.fe_start;
1858
1859 if (do_div(start, sbi->s_stripe) == 0) {
1860 ac->ac_found++;
1861 ac->ac_b_ex = ex;
1862 ext4_mb_use_best_found(ac, e4b);
1863 }
1864 } else if (max >= ac->ac_g_ex.fe_len) {
1865 BUG_ON(ex.fe_len <= 0);
1866 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1867 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1868 ac->ac_found++;
1869 ac->ac_b_ex = ex;
1870 ext4_mb_use_best_found(ac, e4b);
1871 } else if (max > 0 && (ac->ac_flags & EXT4_MB_HINT_MERGE)) {
1872
1873
1874 BUG_ON(ex.fe_len <= 0);
1875 BUG_ON(ex.fe_group != ac->ac_g_ex.fe_group);
1876 BUG_ON(ex.fe_start != ac->ac_g_ex.fe_start);
1877 ac->ac_found++;
1878 ac->ac_b_ex = ex;
1879 ext4_mb_use_best_found(ac, e4b);
1880 }
1881 ext4_unlock_group(ac->ac_sb, group);
1882 ext4_mb_unload_buddy(e4b);
1883
1884 return 0;
1885}
1886
1887
1888
1889
1890
1891static noinline_for_stack
1892void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1893 struct ext4_buddy *e4b)
1894{
1895 struct super_block *sb = ac->ac_sb;
1896 struct ext4_group_info *grp = e4b->bd_info;
1897 void *buddy;
1898 int i;
1899 int k;
1900 int max;
1901
1902 BUG_ON(ac->ac_2order <= 0);
1903 for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
1904 if (grp->bb_counters[i] == 0)
1905 continue;
1906
1907 buddy = mb_find_buddy(e4b, i, &max);
1908 BUG_ON(buddy == NULL);
1909
1910 k = mb_find_next_zero_bit(buddy, max, 0);
1911 BUG_ON(k >= max);
1912
1913 ac->ac_found++;
1914
1915 ac->ac_b_ex.fe_len = 1 << i;
1916 ac->ac_b_ex.fe_start = k << i;
1917 ac->ac_b_ex.fe_group = e4b->bd_group;
1918
1919 ext4_mb_use_best_found(ac, e4b);
1920
1921 BUG_ON(ac->ac_b_ex.fe_len != ac->ac_g_ex.fe_len);
1922
1923 if (EXT4_SB(sb)->s_mb_stats)
1924 atomic_inc(&EXT4_SB(sb)->s_bal_2orders);
1925
1926 break;
1927 }
1928}
1929
1930
1931
1932
1933
1934
1935static noinline_for_stack
1936void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1937 struct ext4_buddy *e4b)
1938{
1939 struct super_block *sb = ac->ac_sb;
1940 void *bitmap = e4b->bd_bitmap;
1941 struct ext4_free_extent ex;
1942 int i;
1943 int free;
1944
1945 free = e4b->bd_info->bb_free;
1946 BUG_ON(free <= 0);
1947
1948 i = e4b->bd_info->bb_first_free;
1949
1950 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1951 i = mb_find_next_zero_bit(bitmap,
1952 EXT4_CLUSTERS_PER_GROUP(sb), i);
1953 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1954
1955
1956
1957
1958
1959 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1960 "%d free clusters as per "
1961 "group info. But bitmap says 0",
1962 free);
1963 break;
1964 }
1965
1966 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1967 BUG_ON(ex.fe_len <= 0);
1968 if (free < ex.fe_len) {
1969 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1970 "%d free clusters as per "
1971 "group info. But got %d blocks",
1972 free, ex.fe_len);
1973
1974
1975
1976
1977
1978 break;
1979 }
1980 ex.fe_logical = 0xDEADC0DE;
1981 ext4_mb_measure_extent(ac, &ex, e4b);
1982
1983 i += ex.fe_len;
1984 free -= ex.fe_len;
1985 }
1986
1987 ext4_mb_check_limits(ac, e4b, 1);
1988}
1989
1990
1991
1992
1993
1994static noinline_for_stack
1995void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1996 struct ext4_buddy *e4b)
1997{
1998 struct super_block *sb = ac->ac_sb;
1999 struct ext4_sb_info *sbi = EXT4_SB(sb);
2000 void *bitmap = e4b->bd_bitmap;
2001 struct ext4_free_extent ex;
2002 ext4_fsblk_t first_group_block;
2003 ext4_fsblk_t a;
2004 ext4_grpblk_t i;
2005 int max;
2006
2007 BUG_ON(sbi->s_stripe == 0);
2008
2009
2010 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
2011
2012 a = first_group_block + sbi->s_stripe - 1;
2013 do_div(a, sbi->s_stripe);
2014 i = (a * sbi->s_stripe) - first_group_block;
2015
2016 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
2017 if (!mb_test_bit(i, bitmap)) {
2018 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
2019 if (max >= sbi->s_stripe) {
2020 ac->ac_found++;
2021 ex.fe_logical = 0xDEADF00D;
2022 ac->ac_b_ex = ex;
2023 ext4_mb_use_best_found(ac, e4b);
2024 break;
2025 }
2026 }
2027 i += sbi->s_stripe;
2028 }
2029}
2030
2031
2032
2033
2034
2035
2036
2037static int ext4_mb_good_group(struct ext4_allocation_context *ac,
2038 ext4_group_t group, int cr)
2039{
2040 unsigned free, fragments;
2041 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
2042 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
2043
2044 BUG_ON(cr < 0 || cr >= 4);
2045
2046 free = grp->bb_free;
2047 if (free == 0)
2048 return 0;
2049 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
2050 return 0;
2051
2052 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
2053 return 0;
2054
2055
2056 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
2057 int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
2058 if (ret)
2059 return ret;
2060 }
2061
2062 fragments = grp->bb_fragments;
2063 if (fragments == 0)
2064 return 0;
2065
2066 switch (cr) {
2067 case 0:
2068 BUG_ON(ac->ac_2order == 0);
2069
2070
2071 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
2072 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
2073 ((group % flex_size) == 0))
2074 return 0;
2075
2076 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
2077 (free / fragments) >= ac->ac_g_ex.fe_len)
2078 return 1;
2079
2080 if (grp->bb_largest_free_order < ac->ac_2order)
2081 return 0;
2082
2083 return 1;
2084 case 1:
2085 if ((free / fragments) >= ac->ac_g_ex.fe_len)
2086 return 1;
2087 break;
2088 case 2:
2089 if (free >= ac->ac_g_ex.fe_len)
2090 return 1;
2091 break;
2092 case 3:
2093 return 1;
2094 default:
2095 BUG();
2096 }
2097
2098 return 0;
2099}
2100
2101static noinline_for_stack int
2102ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
2103{
2104 ext4_group_t ngroups, group, i;
2105 int cr;
2106 int err = 0, first_err = 0;
2107 struct ext4_sb_info *sbi;
2108 struct super_block *sb;
2109 struct ext4_buddy e4b;
2110
2111 sb = ac->ac_sb;
2112 sbi = EXT4_SB(sb);
2113 ngroups = ext4_get_groups_count(sb);
2114
2115 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
2116 ngroups = sbi->s_blockfile_groups;
2117
2118 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
2119
2120
2121 err = ext4_mb_find_by_goal(ac, &e4b);
2122 if (err || ac->ac_status == AC_STATUS_FOUND)
2123 goto out;
2124
2125 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
2126 goto out;
2127
2128
2129
2130
2131
2132
2133 i = fls(ac->ac_g_ex.fe_len);
2134 ac->ac_2order = 0;
2135
2136
2137
2138
2139
2140 if (i >= sbi->s_mb_order2_reqs) {
2141
2142
2143
2144 if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
2145 ac->ac_2order = i - 1;
2146 }
2147
2148
2149 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
2150
2151 spin_lock(&sbi->s_md_lock);
2152 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
2153 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
2154 spin_unlock(&sbi->s_md_lock);
2155 }
2156
2157
2158 cr = ac->ac_2order ? 0 : 1;
2159
2160
2161
2162
2163repeat:
2164 for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
2165 ac->ac_criteria = cr;
2166
2167
2168
2169
2170 group = ac->ac_g_ex.fe_group;
2171
2172 for (i = 0; i < ngroups; group++, i++) {
2173 int ret = 0;
2174 cond_resched();
2175
2176
2177
2178
2179 if (group >= ngroups)
2180 group = 0;
2181
2182
2183 ret = ext4_mb_good_group(ac, group, cr);
2184 if (ret <= 0) {
2185 if (!first_err)
2186 first_err = ret;
2187 continue;
2188 }
2189
2190 err = ext4_mb_load_buddy(sb, group, &e4b);
2191 if (err)
2192 goto out;
2193
2194 ext4_lock_group(sb, group);
2195
2196
2197
2198
2199
2200 ret = ext4_mb_good_group(ac, group, cr);
2201 if (ret <= 0) {
2202 ext4_unlock_group(sb, group);
2203 ext4_mb_unload_buddy(&e4b);
2204 if (!first_err)
2205 first_err = ret;
2206 continue;
2207 }
2208
2209 ac->ac_groups_scanned++;
2210 if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
2211 ext4_mb_simple_scan_group(ac, &e4b);
2212 else if (cr == 1 && sbi->s_stripe &&
2213 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
2214 ext4_mb_scan_aligned(ac, &e4b);
2215 else
2216 ext4_mb_complex_scan_group(ac, &e4b);
2217
2218 ext4_unlock_group(sb, group);
2219 ext4_mb_unload_buddy(&e4b);
2220
2221 if (ac->ac_status != AC_STATUS_CONTINUE)
2222 break;
2223 }
2224 }
2225
2226 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
2227 !(ac->ac_flags & EXT4_MB_HINT_FIRST)) {
2228
2229
2230
2231
2232
2233 ext4_mb_try_best_found(ac, &e4b);
2234 if (ac->ac_status != AC_STATUS_FOUND) {
2235
2236
2237
2238
2239
2240
2241 ac->ac_b_ex.fe_group = 0;
2242 ac->ac_b_ex.fe_start = 0;
2243 ac->ac_b_ex.fe_len = 0;
2244 ac->ac_status = AC_STATUS_CONTINUE;
2245 ac->ac_flags |= EXT4_MB_HINT_FIRST;
2246 cr = 3;
2247 atomic_inc(&sbi->s_mb_lost_chunks);
2248 goto repeat;
2249 }
2250 }
2251out:
2252 if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
2253 err = first_err;
2254 return err;
2255}
2256
2257static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2258{
2259 struct super_block *sb = seq->private;
2260 ext4_group_t group;
2261
2262 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2263 return NULL;
2264 group = *pos + 1;
2265 return (void *) ((unsigned long) group);
2266}
2267
2268static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2269{
2270 struct super_block *sb = seq->private;
2271 ext4_group_t group;
2272
2273 ++*pos;
2274 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2275 return NULL;
2276 group = *pos + 1;
2277 return (void *) ((unsigned long) group);
2278}
2279
2280static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2281{
2282 struct super_block *sb = seq->private;
2283 ext4_group_t group = (ext4_group_t) ((unsigned long) v);
2284 int i;
2285 int err, buddy_loaded = 0;
2286 struct ext4_buddy e4b;
2287 struct ext4_group_info *grinfo;
2288 struct sg {
2289 struct ext4_group_info info;
2290 ext4_grpblk_t counters[16];
2291 } sg;
2292
2293 group--;
2294 if (group == 0)
2295 seq_puts(seq, "#group: free frags first ["
2296 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2297 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2298
2299 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2300 sizeof(struct ext4_group_info);
2301 grinfo = ext4_get_group_info(sb, group);
2302
2303 if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
2304 err = ext4_mb_load_buddy(sb, group, &e4b);
2305 if (err) {
2306 seq_printf(seq, "#%-5u: I/O error\n", group);
2307 return 0;
2308 }
2309 buddy_loaded = 1;
2310 }
2311
2312 memcpy(&sg, ext4_get_group_info(sb, group), i);
2313
2314 if (buddy_loaded)
2315 ext4_mb_unload_buddy(&e4b);
2316
2317 seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
2318 sg.info.bb_fragments, sg.info.bb_first_free);
2319 for (i = 0; i <= 13; i++)
2320 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
2321 sg.info.bb_counters[i] : 0);
2322 seq_printf(seq, " ]\n");
2323
2324 return 0;
2325}
2326
2327static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2328{
2329}
2330
2331static const struct seq_operations ext4_mb_seq_groups_ops = {
2332 .start = ext4_mb_seq_groups_start,
2333 .next = ext4_mb_seq_groups_next,
2334 .stop = ext4_mb_seq_groups_stop,
2335 .show = ext4_mb_seq_groups_show,
2336};
2337
2338static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2339{
2340 struct super_block *sb = PDE_DATA(inode);
2341 int rc;
2342
2343 rc = seq_open(file, &ext4_mb_seq_groups_ops);
2344 if (rc == 0) {
2345 struct seq_file *m = file->private_data;
2346 m->private = sb;
2347 }
2348 return rc;
2349
2350}
2351
2352const struct file_operations ext4_seq_mb_groups_fops = {
2353 .open = ext4_mb_seq_groups_open,
2354 .read = seq_read,
2355 .llseek = seq_lseek,
2356 .release = seq_release,
2357};
2358
2359static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2360{
2361 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2362 struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
2363
2364 BUG_ON(!cachep);
2365 return cachep;
2366}
2367
2368
2369
2370
2371
2372int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2373{
2374 struct ext4_sb_info *sbi = EXT4_SB(sb);
2375 unsigned size;
2376 struct ext4_group_info ***new_groupinfo;
2377
2378 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2379 EXT4_DESC_PER_BLOCK_BITS(sb);
2380 if (size <= sbi->s_group_info_size)
2381 return 0;
2382
2383 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2384 new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
2385 if (!new_groupinfo) {
2386 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2387 return -ENOMEM;
2388 }
2389 if (sbi->s_group_info) {
2390 memcpy(new_groupinfo, sbi->s_group_info,
2391 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2392 kvfree(sbi->s_group_info);
2393 }
2394 sbi->s_group_info = new_groupinfo;
2395 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2396 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2397 sbi->s_group_info_size);
2398 return 0;
2399}
2400
2401
2402int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2403 struct ext4_group_desc *desc)
2404{
2405 int i;
2406 int metalen = 0;
2407 struct ext4_sb_info *sbi = EXT4_SB(sb);
2408 struct ext4_group_info **meta_group_info;
2409 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2410
2411
2412
2413
2414
2415
2416 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2417 metalen = sizeof(*meta_group_info) <<
2418 EXT4_DESC_PER_BLOCK_BITS(sb);
2419 meta_group_info = kmalloc(metalen, GFP_NOFS);
2420 if (meta_group_info == NULL) {
2421 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2422 "for a buddy group");
2423 goto exit_meta_group_info;
2424 }
2425 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
2426 meta_group_info;
2427 }
2428
2429 meta_group_info =
2430 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2431 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2432
2433 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
2434 if (meta_group_info[i] == NULL) {
2435 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2436 goto exit_group_info;
2437 }
2438 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2439 &(meta_group_info[i]->bb_state));
2440
2441
2442
2443
2444
2445 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2446 meta_group_info[i]->bb_free =
2447 ext4_free_clusters_after_init(sb, group, desc);
2448 } else {
2449 meta_group_info[i]->bb_free =
2450 ext4_free_group_clusters(sb, desc);
2451 }
2452
2453 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2454 init_rwsem(&meta_group_info[i]->alloc_sem);
2455 meta_group_info[i]->bb_free_root = RB_ROOT;
2456 meta_group_info[i]->bb_largest_free_order = -1;
2457
2458#ifdef DOUBLE_CHECK
2459 {
2460 struct buffer_head *bh;
2461 meta_group_info[i]->bb_bitmap =
2462 kmalloc(sb->s_blocksize, GFP_NOFS);
2463 BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
2464 bh = ext4_read_block_bitmap(sb, group);
2465 BUG_ON(IS_ERR_OR_NULL(bh));
2466 memcpy(meta_group_info[i]->bb_bitmap, bh->b_data,
2467 sb->s_blocksize);
2468 put_bh(bh);
2469 }
2470#endif
2471
2472 return 0;
2473
2474exit_group_info:
2475
2476 if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
2477 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
2478 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
2479 }
2480exit_meta_group_info:
2481 return -ENOMEM;
2482}
2483
2484static int ext4_mb_init_backend(struct super_block *sb)
2485{
2486 ext4_group_t ngroups = ext4_get_groups_count(sb);
2487 ext4_group_t i;
2488 struct ext4_sb_info *sbi = EXT4_SB(sb);
2489 int err;
2490 struct ext4_group_desc *desc;
2491 struct kmem_cache *cachep;
2492
2493 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2494 if (err)
2495 return err;
2496
2497 sbi->s_buddy_cache = new_inode(sb);
2498 if (sbi->s_buddy_cache == NULL) {
2499 ext4_msg(sb, KERN_ERR, "can't get new inode");
2500 goto err_freesgi;
2501 }
2502
2503
2504
2505
2506 sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
2507 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2508 for (i = 0; i < ngroups; i++) {
2509 desc = ext4_get_group_desc(sb, i, NULL);
2510 if (desc == NULL) {
2511 ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
2512 goto err_freebuddy;
2513 }
2514 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
2515 goto err_freebuddy;
2516 }
2517
2518 return 0;
2519
2520err_freebuddy:
2521 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2522 while (i-- > 0)
2523 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2524 i = sbi->s_group_info_size;
2525 while (i-- > 0)
2526 kfree(sbi->s_group_info[i]);
2527 iput(sbi->s_buddy_cache);
2528err_freesgi:
2529 kvfree(sbi->s_group_info);
2530 return -ENOMEM;
2531}
2532
2533static void ext4_groupinfo_destroy_slabs(void)
2534{
2535 int i;
2536
2537 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2538 if (ext4_groupinfo_caches[i])
2539 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2540 ext4_groupinfo_caches[i] = NULL;
2541 }
2542}
2543
2544static int ext4_groupinfo_create_slab(size_t size)
2545{
2546 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2547 int slab_size;
2548 int blocksize_bits = order_base_2(size);
2549 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2550 struct kmem_cache *cachep;
2551
2552 if (cache_index >= NR_GRPINFO_CACHES)
2553 return -EINVAL;
2554
2555 if (unlikely(cache_index < 0))
2556 cache_index = 0;
2557
2558 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2559 if (ext4_groupinfo_caches[cache_index]) {
2560 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2561 return 0;
2562 }
2563
2564 slab_size = offsetof(struct ext4_group_info,
2565 bb_counters[blocksize_bits + 2]);
2566
2567 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2568 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2569 NULL);
2570
2571 ext4_groupinfo_caches[cache_index] = cachep;
2572
2573 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2574 if (!cachep) {
2575 printk(KERN_EMERG
2576 "EXT4-fs: no memory for groupinfo slab cache\n");
2577 return -ENOMEM;
2578 }
2579
2580 return 0;
2581}
2582
2583int ext4_mb_init(struct super_block *sb)
2584{
2585 struct ext4_sb_info *sbi = EXT4_SB(sb);
2586 unsigned i, j;
2587 unsigned offset, offset_incr;
2588 unsigned max;
2589 int ret;
2590
2591 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2592
2593 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2594 if (sbi->s_mb_offsets == NULL) {
2595 ret = -ENOMEM;
2596 goto out;
2597 }
2598
2599 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2600 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2601 if (sbi->s_mb_maxs == NULL) {
2602 ret = -ENOMEM;
2603 goto out;
2604 }
2605
2606 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2607 if (ret < 0)
2608 goto out;
2609
2610
2611 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
2612 sbi->s_mb_offsets[0] = 0;
2613
2614 i = 1;
2615 offset = 0;
2616 offset_incr = 1 << (sb->s_blocksize_bits - 1);
2617 max = sb->s_blocksize << 2;
2618 do {
2619 sbi->s_mb_offsets[i] = offset;
2620 sbi->s_mb_maxs[i] = max;
2621 offset += offset_incr;
2622 offset_incr = offset_incr >> 1;
2623 max = max >> 1;
2624 i++;
2625 } while (i <= sb->s_blocksize_bits + 1);
2626
2627 spin_lock_init(&sbi->s_md_lock);
2628 spin_lock_init(&sbi->s_bal_lock);
2629 sbi->s_mb_free_pending = 0;
2630
2631 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
2632 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
2633 sbi->s_mb_stats = MB_DEFAULT_STATS;
2634 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2635 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2649 sbi->s_cluster_bits, 32);
2650
2651
2652
2653
2654
2655
2656
2657
2658 if (sbi->s_stripe > 1) {
2659 sbi->s_mb_group_prealloc = roundup(
2660 sbi->s_mb_group_prealloc, sbi->s_stripe);
2661 }
2662
2663 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2664 if (sbi->s_locality_groups == NULL) {
2665 ret = -ENOMEM;
2666 goto out;
2667 }
2668 for_each_possible_cpu(i) {
2669 struct ext4_locality_group *lg;
2670 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2671 mutex_init(&lg->lg_mutex);
2672 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2673 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2674 spin_lock_init(&lg->lg_prealloc_lock);
2675 }
2676
2677
2678 ret = ext4_mb_init_backend(sb);
2679 if (ret != 0)
2680 goto out_free_locality_groups;
2681
2682 return 0;
2683
2684out_free_locality_groups:
2685 free_percpu(sbi->s_locality_groups);
2686 sbi->s_locality_groups = NULL;
2687out:
2688 kfree(sbi->s_mb_offsets);
2689 sbi->s_mb_offsets = NULL;
2690 kfree(sbi->s_mb_maxs);
2691 sbi->s_mb_maxs = NULL;
2692 return ret;
2693}
2694
2695
2696static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2697{
2698 struct ext4_prealloc_space *pa;
2699 struct list_head *cur, *tmp;
2700 int count = 0;
2701
2702 list_for_each_safe(cur, tmp, &grp->bb_prealloc_list) {
2703 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2704 list_del(&pa->pa_group_list);
2705 count++;
2706 kmem_cache_free(ext4_pspace_cachep, pa);
2707 }
2708 if (count)
2709 mb_debug(1, "mballoc: %u PAs left\n", count);
2710
2711}
2712
2713int ext4_mb_release(struct super_block *sb)
2714{
2715 ext4_group_t ngroups = ext4_get_groups_count(sb);
2716 ext4_group_t i;
2717 int num_meta_group_infos;
2718 struct ext4_group_info *grinfo;
2719 struct ext4_sb_info *sbi = EXT4_SB(sb);
2720 struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2721
2722 if (sbi->s_group_info) {
2723 for (i = 0; i < ngroups; i++) {
2724 grinfo = ext4_get_group_info(sb, i);
2725#ifdef DOUBLE_CHECK
2726 kfree(grinfo->bb_bitmap);
2727#endif
2728 ext4_lock_group(sb, i);
2729 ext4_mb_cleanup_pa(grinfo);
2730 ext4_unlock_group(sb, i);
2731 kmem_cache_free(cachep, grinfo);
2732 }
2733 num_meta_group_infos = (ngroups +
2734 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2735 EXT4_DESC_PER_BLOCK_BITS(sb);
2736 for (i = 0; i < num_meta_group_infos; i++)
2737 kfree(sbi->s_group_info[i]);
2738 kvfree(sbi->s_group_info);
2739 }
2740 kfree(sbi->s_mb_offsets);
2741 kfree(sbi->s_mb_maxs);
2742 iput(sbi->s_buddy_cache);
2743 if (sbi->s_mb_stats) {
2744 ext4_msg(sb, KERN_INFO,
2745 "mballoc: %u blocks %u reqs (%u success)",
2746 atomic_read(&sbi->s_bal_allocated),
2747 atomic_read(&sbi->s_bal_reqs),
2748 atomic_read(&sbi->s_bal_success));
2749 ext4_msg(sb, KERN_INFO,
2750 "mballoc: %u extents scanned, %u goal hits, "
2751 "%u 2^N hits, %u breaks, %u lost",
2752 atomic_read(&sbi->s_bal_ex_scanned),
2753 atomic_read(&sbi->s_bal_goals),
2754 atomic_read(&sbi->s_bal_2orders),
2755 atomic_read(&sbi->s_bal_breaks),
2756 atomic_read(&sbi->s_mb_lost_chunks));
2757 ext4_msg(sb, KERN_INFO,
2758 "mballoc: %lu generated and it took %Lu",
2759 sbi->s_mb_buddies_generated,
2760 sbi->s_mb_generation_time);
2761 ext4_msg(sb, KERN_INFO,
2762 "mballoc: %u preallocated, %u discarded",
2763 atomic_read(&sbi->s_mb_preallocated),
2764 atomic_read(&sbi->s_mb_discarded));
2765 }
2766
2767 free_percpu(sbi->s_locality_groups);
2768
2769 return 0;
2770}
2771
2772static inline int ext4_issue_discard(struct super_block *sb,
2773 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2774{
2775 ext4_fsblk_t discard_block;
2776
2777 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2778 ext4_group_first_block_no(sb, block_group));
2779 count = EXT4_C2B(EXT4_SB(sb), count);
2780 trace_ext4_discard_blocks(sb,
2781 (unsigned long long) discard_block, count);
2782 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
2783}
2784
2785
2786
2787
2788
2789static void ext4_free_data_callback(struct super_block *sb,
2790 struct ext4_journal_cb_entry *jce,
2791 int rc)
2792{
2793 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2794 struct ext4_buddy e4b;
2795 struct ext4_group_info *db;
2796 int err, count = 0, count2 = 0;
2797
2798 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2799 entry->efd_count, entry->efd_group, entry);
2800
2801 if (test_opt(sb, DISCARD)) {
2802 err = ext4_issue_discard(sb, entry->efd_group,
2803 entry->efd_start_cluster,
2804 entry->efd_count);
2805 if (err && err != -EOPNOTSUPP)
2806 ext4_msg(sb, KERN_WARNING, "discard request in"
2807 " group:%d block:%d count:%d failed"
2808 " with %d", entry->efd_group,
2809 entry->efd_start_cluster,
2810 entry->efd_count, err);
2811 }
2812
2813 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2814
2815 BUG_ON(err != 0);
2816
2817 spin_lock(&EXT4_SB(sb)->s_md_lock);
2818 EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
2819 spin_unlock(&EXT4_SB(sb)->s_md_lock);
2820
2821 db = e4b.bd_info;
2822
2823 count += entry->efd_count;
2824 count2++;
2825 ext4_lock_group(sb, entry->efd_group);
2826
2827 rb_erase(&entry->efd_node, &(db->bb_free_root));
2828 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2829
2830
2831
2832
2833
2834
2835
2836 if (!test_opt(sb, DISCARD))
2837 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2838
2839 if (!db->bb_free_root.rb_node) {
2840
2841
2842
2843 put_page(e4b.bd_buddy_page);
2844 put_page(e4b.bd_bitmap_page);
2845 }
2846 ext4_unlock_group(sb, entry->efd_group);
2847 kmem_cache_free(ext4_free_data_cachep, entry);
2848 ext4_mb_unload_buddy(&e4b);
2849
2850 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2851}
2852
2853int __init ext4_init_mballoc(void)
2854{
2855 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
2856 SLAB_RECLAIM_ACCOUNT);
2857 if (ext4_pspace_cachep == NULL)
2858 return -ENOMEM;
2859
2860 ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
2861 SLAB_RECLAIM_ACCOUNT);
2862 if (ext4_ac_cachep == NULL) {
2863 kmem_cache_destroy(ext4_pspace_cachep);
2864 return -ENOMEM;
2865 }
2866
2867 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2868 SLAB_RECLAIM_ACCOUNT);
2869 if (ext4_free_data_cachep == NULL) {
2870 kmem_cache_destroy(ext4_pspace_cachep);
2871 kmem_cache_destroy(ext4_ac_cachep);
2872 return -ENOMEM;
2873 }
2874 return 0;
2875}
2876
2877void ext4_exit_mballoc(void)
2878{
2879
2880
2881
2882
2883 rcu_barrier();
2884 kmem_cache_destroy(ext4_pspace_cachep);
2885 kmem_cache_destroy(ext4_ac_cachep);
2886 kmem_cache_destroy(ext4_free_data_cachep);
2887 ext4_groupinfo_destroy_slabs();
2888}
2889
2890
2891
2892
2893
2894
2895static noinline_for_stack int
2896ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2897 handle_t *handle, unsigned int reserv_clstrs)
2898{
2899 struct buffer_head *bitmap_bh = NULL;
2900 struct ext4_group_desc *gdp;
2901 struct buffer_head *gdp_bh;
2902 struct ext4_sb_info *sbi;
2903 struct super_block *sb;
2904 ext4_fsblk_t block;
2905 int err, len;
2906
2907 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
2908 BUG_ON(ac->ac_b_ex.fe_len <= 0);
2909
2910 sb = ac->ac_sb;
2911 sbi = EXT4_SB(sb);
2912
2913 bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
2914 if (IS_ERR(bitmap_bh)) {
2915 err = PTR_ERR(bitmap_bh);
2916 bitmap_bh = NULL;
2917 goto out_err;
2918 }
2919
2920 BUFFER_TRACE(bitmap_bh, "getting write access");
2921 err = ext4_journal_get_write_access(handle, bitmap_bh);
2922 if (err)
2923 goto out_err;
2924
2925 err = -EIO;
2926 gdp = ext4_get_group_desc(sb, ac->ac_b_ex.fe_group, &gdp_bh);
2927 if (!gdp)
2928 goto out_err;
2929
2930 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2931 ext4_free_group_clusters(sb, gdp));
2932
2933 BUFFER_TRACE(gdp_bh, "get_write_access");
2934 err = ext4_journal_get_write_access(handle, gdp_bh);
2935 if (err)
2936 goto out_err;
2937
2938 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2939
2940 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2941 if (!ext4_data_block_valid(sbi, block, len)) {
2942 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2943 "fs metadata", block, block+len);
2944
2945
2946
2947
2948 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2949 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2950 ac->ac_b_ex.fe_len);
2951 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2952 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2953 if (!err)
2954 err = -EFSCORRUPTED;
2955 goto out_err;
2956 }
2957
2958 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
2959#ifdef AGGRESSIVE_CHECK
2960 {
2961 int i;
2962 for (i = 0; i < ac->ac_b_ex.fe_len; i++) {
2963 BUG_ON(mb_test_bit(ac->ac_b_ex.fe_start + i,
2964 bitmap_bh->b_data));
2965 }
2966 }
2967#endif
2968 ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
2969 ac->ac_b_ex.fe_len);
2970 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2971 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2972 ext4_free_group_clusters_set(sb, gdp,
2973 ext4_free_clusters_after_init(sb,
2974 ac->ac_b_ex.fe_group, gdp));
2975 }
2976 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2977 ext4_free_group_clusters_set(sb, gdp, len);
2978 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
2979 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
2980
2981 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2982 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
2983
2984
2985
2986 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2987
2988 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
2989 reserv_clstrs);
2990
2991 if (sbi->s_log_groups_per_flex) {
2992 ext4_group_t flex_group = ext4_flex_group(sbi,
2993 ac->ac_b_ex.fe_group);
2994 atomic64_sub(ac->ac_b_ex.fe_len,
2995 &sbi->s_flex_groups[flex_group].free_clusters);
2996 }
2997
2998 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
2999 if (err)
3000 goto out_err;
3001 err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
3002
3003out_err:
3004 brelse(bitmap_bh);
3005 return err;
3006}
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3018{
3019 struct super_block *sb = ac->ac_sb;
3020 struct ext4_locality_group *lg = ac->ac_lg;
3021
3022 BUG_ON(lg == NULL);
3023 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3024 mb_debug(1, "#%u: goal %u blocks for locality group\n",
3025 current->pid, ac->ac_g_ex.fe_len);
3026}
3027
3028
3029
3030
3031
3032static noinline_for_stack void
3033ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3034 struct ext4_allocation_request *ar)
3035{
3036 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3037 int bsbits, max;
3038 ext4_lblk_t end;
3039 loff_t size, start_off;
3040 loff_t orig_size __maybe_unused;
3041 ext4_lblk_t start;
3042 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3043 struct ext4_prealloc_space *pa;
3044
3045
3046
3047 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3048 return;
3049
3050
3051 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3052 return;
3053
3054
3055
3056 if (ac->ac_flags & EXT4_MB_HINT_NOPREALLOC)
3057 return;
3058
3059 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) {
3060 ext4_mb_normalize_group_request(ac);
3061 return ;
3062 }
3063
3064 bsbits = ac->ac_sb->s_blocksize_bits;
3065
3066
3067
3068 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3069 size = size << bsbits;
3070 if (size < i_size_read(ac->ac_inode))
3071 size = i_size_read(ac->ac_inode);
3072 orig_size = size;
3073
3074
3075 max = 2 << bsbits;
3076
3077#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
3078 (req <= (size) || max <= (chunk_size))
3079
3080
3081
3082 start_off = 0;
3083 if (size <= 16 * 1024) {
3084 size = 16 * 1024;
3085 } else if (size <= 32 * 1024) {
3086 size = 32 * 1024;
3087 } else if (size <= 64 * 1024) {
3088 size = 64 * 1024;
3089 } else if (size <= 128 * 1024) {
3090 size = 128 * 1024;
3091 } else if (size <= 256 * 1024) {
3092 size = 256 * 1024;
3093 } else if (size <= 512 * 1024) {
3094 size = 512 * 1024;
3095 } else if (size <= 1024 * 1024) {
3096 size = 1024 * 1024;
3097 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
3098 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3099 (21 - bsbits)) << 21;
3100 size = 2 * 1024 * 1024;
3101 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
3102 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3103 (22 - bsbits)) << 22;
3104 size = 4 * 1024 * 1024;
3105 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
3106 (8<<20)>>bsbits, max, 8 * 1024)) {
3107 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
3108 (23 - bsbits)) << 23;
3109 size = 8 * 1024 * 1024;
3110 } else {
3111 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3112 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3113 ac->ac_o_ex.fe_len) << bsbits;
3114 }
3115 size = size >> bsbits;
3116 start = start_off >> bsbits;
3117
3118
3119 if (ar->pleft && start <= ar->lleft) {
3120 size -= ar->lleft + 1 - start;
3121 start = ar->lleft + 1;
3122 }
3123 if (ar->pright && start + size - 1 >= ar->lright)
3124 size -= start + size - ar->lright;
3125
3126 end = start + size;
3127
3128
3129 rcu_read_lock();
3130 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3131 ext4_lblk_t pa_end;
3132
3133 if (pa->pa_deleted)
3134 continue;
3135 spin_lock(&pa->pa_lock);
3136 if (pa->pa_deleted) {
3137 spin_unlock(&pa->pa_lock);
3138 continue;
3139 }
3140
3141 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3142 pa->pa_len);
3143
3144
3145 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3146 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3147
3148
3149 if (pa->pa_lstart >= end || pa_end <= start) {
3150 spin_unlock(&pa->pa_lock);
3151 continue;
3152 }
3153 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3154
3155
3156 if (pa_end <= ac->ac_o_ex.fe_logical) {
3157 BUG_ON(pa_end < start);
3158 start = pa_end;
3159 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3160 BUG_ON(pa->pa_lstart > end);
3161 end = pa->pa_lstart;
3162 }
3163 spin_unlock(&pa->pa_lock);
3164 }
3165 rcu_read_unlock();
3166 size = end - start;
3167
3168
3169 rcu_read_lock();
3170 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3171 ext4_lblk_t pa_end;
3172
3173 spin_lock(&pa->pa_lock);
3174 if (pa->pa_deleted == 0) {
3175 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3176 pa->pa_len);
3177 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3178 }
3179 spin_unlock(&pa->pa_lock);
3180 }
3181 rcu_read_unlock();
3182
3183 if (start + size <= ac->ac_o_ex.fe_logical &&
3184 start > ac->ac_o_ex.fe_logical) {
3185 ext4_msg(ac->ac_sb, KERN_ERR,
3186 "start %lu, size %lu, fe_logical %lu",
3187 (unsigned long) start, (unsigned long) size,
3188 (unsigned long) ac->ac_o_ex.fe_logical);
3189 BUG();
3190 }
3191 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3192
3193
3194
3195
3196
3197 ac->ac_g_ex.fe_logical = start;
3198 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3199
3200
3201 if (ar->pright && (ar->lright == (start + size))) {
3202
3203 ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
3204 &ac->ac_f_ex.fe_group,
3205 &ac->ac_f_ex.fe_start);
3206 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3207 }
3208 if (ar->pleft && (ar->lleft + 1 == start)) {
3209
3210 ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
3211 &ac->ac_f_ex.fe_group,
3212 &ac->ac_f_ex.fe_start);
3213 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3214 }
3215
3216 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3217 (unsigned) orig_size, (unsigned) start);
3218}
3219
3220static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3221{
3222 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3223
3224 if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
3225 atomic_inc(&sbi->s_bal_reqs);
3226 atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
3227 if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
3228 atomic_inc(&sbi->s_bal_success);
3229 atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
3230 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
3231 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
3232 atomic_inc(&sbi->s_bal_goals);
3233 if (ac->ac_found > sbi->s_mb_max_to_scan)
3234 atomic_inc(&sbi->s_bal_breaks);
3235 }
3236
3237 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3238 trace_ext4_mballoc_alloc(ac);
3239 else
3240 trace_ext4_mballoc_prealloc(ac);
3241}
3242
3243
3244
3245
3246
3247
3248
3249static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3250{
3251 struct ext4_prealloc_space *pa = ac->ac_pa;
3252 struct ext4_buddy e4b;
3253 int err;
3254
3255 if (pa == NULL) {
3256 if (ac->ac_f_ex.fe_len == 0)
3257 return;
3258 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3259 if (err) {
3260
3261
3262
3263
3264
3265 WARN(1, "mb_load_buddy failed (%d)", err);
3266 return;
3267 }
3268 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3269 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3270 ac->ac_f_ex.fe_len);
3271 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3272 ext4_mb_unload_buddy(&e4b);
3273 return;
3274 }
3275 if (pa->pa_type == MB_INODE_PA)
3276 pa->pa_free += ac->ac_b_ex.fe_len;
3277}
3278
3279
3280
3281
3282static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3283 struct ext4_prealloc_space *pa)
3284{
3285 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3286 ext4_fsblk_t start;
3287 ext4_fsblk_t end;
3288 int len;
3289
3290
3291 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3292 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3293 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3294 len = EXT4_NUM_B2C(sbi, end - start);
3295 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3296 &ac->ac_b_ex.fe_start);
3297 ac->ac_b_ex.fe_len = len;
3298 ac->ac_status = AC_STATUS_FOUND;
3299 ac->ac_pa = pa;
3300
3301 BUG_ON(start < pa->pa_pstart);
3302 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3303 BUG_ON(pa->pa_free < len);
3304 pa->pa_free -= len;
3305
3306 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3307}
3308
3309
3310
3311
3312static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3313 struct ext4_prealloc_space *pa)
3314{
3315 unsigned int len = ac->ac_o_ex.fe_len;
3316
3317 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3318 &ac->ac_b_ex.fe_group,
3319 &ac->ac_b_ex.fe_start);
3320 ac->ac_b_ex.fe_len = len;
3321 ac->ac_status = AC_STATUS_FOUND;
3322 ac->ac_pa = pa;
3323
3324
3325
3326
3327
3328
3329
3330 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3331}
3332
3333
3334
3335
3336
3337
3338
3339static struct ext4_prealloc_space *
3340ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3341 struct ext4_prealloc_space *pa,
3342 struct ext4_prealloc_space *cpa)
3343{
3344 ext4_fsblk_t cur_distance, new_distance;
3345
3346 if (cpa == NULL) {
3347 atomic_inc(&pa->pa_count);
3348 return pa;
3349 }
3350 cur_distance = abs(goal_block - cpa->pa_pstart);
3351 new_distance = abs(goal_block - pa->pa_pstart);
3352
3353 if (cur_distance <= new_distance)
3354 return cpa;
3355
3356
3357 atomic_dec(&cpa->pa_count);
3358 atomic_inc(&pa->pa_count);
3359 return pa;
3360}
3361
3362
3363
3364
3365static noinline_for_stack int
3366ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3367{
3368 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3369 int order, i;
3370 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3371 struct ext4_locality_group *lg;
3372 struct ext4_prealloc_space *pa, *cpa = NULL;
3373 ext4_fsblk_t goal_block;
3374
3375
3376 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
3377 return 0;
3378
3379
3380 rcu_read_lock();
3381 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3382
3383
3384
3385 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3386 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3387 EXT4_C2B(sbi, pa->pa_len)))
3388 continue;
3389
3390
3391 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3392 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3393 EXT4_MAX_BLOCK_FILE_PHYS))
3394 continue;
3395
3396
3397 spin_lock(&pa->pa_lock);
3398 if (pa->pa_deleted == 0 && pa->pa_free) {
3399 atomic_inc(&pa->pa_count);
3400 ext4_mb_use_inode_pa(ac, pa);
3401 spin_unlock(&pa->pa_lock);
3402 ac->ac_criteria = 10;
3403 rcu_read_unlock();
3404 return 1;
3405 }
3406 spin_unlock(&pa->pa_lock);
3407 }
3408 rcu_read_unlock();
3409
3410
3411 if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
3412 return 0;
3413
3414
3415 lg = ac->ac_lg;
3416 if (lg == NULL)
3417 return 0;
3418 order = fls(ac->ac_o_ex.fe_len) - 1;
3419 if (order > PREALLOC_TB_SIZE - 1)
3420
3421 order = PREALLOC_TB_SIZE - 1;
3422
3423 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3424
3425
3426
3427
3428 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3429 rcu_read_lock();
3430 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3431 pa_inode_list) {
3432 spin_lock(&pa->pa_lock);
3433 if (pa->pa_deleted == 0 &&
3434 pa->pa_free >= ac->ac_o_ex.fe_len) {
3435
3436 cpa = ext4_mb_check_group_pa(goal_block,
3437 pa, cpa);
3438 }
3439 spin_unlock(&pa->pa_lock);
3440 }
3441 rcu_read_unlock();
3442 }
3443 if (cpa) {
3444 ext4_mb_use_group_pa(ac, cpa);
3445 ac->ac_criteria = 20;
3446 return 1;
3447 }
3448 return 0;
3449}
3450
3451
3452
3453
3454
3455
3456
3457static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3458 ext4_group_t group)
3459{
3460 struct rb_node *n;
3461 struct ext4_group_info *grp;
3462 struct ext4_free_data *entry;
3463
3464 grp = ext4_get_group_info(sb, group);
3465 n = rb_first(&(grp->bb_free_root));
3466
3467 while (n) {
3468 entry = rb_entry(n, struct ext4_free_data, efd_node);
3469 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3470 n = rb_next(n);
3471 }
3472 return;
3473}
3474
3475
3476
3477
3478
3479
3480static noinline_for_stack
3481void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3482 ext4_group_t group)
3483{
3484 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3485 struct ext4_prealloc_space *pa;
3486 struct list_head *cur;
3487 ext4_group_t groupnr;
3488 ext4_grpblk_t start;
3489 int preallocated = 0;
3490 int len;
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500 list_for_each(cur, &grp->bb_prealloc_list) {
3501 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
3502 spin_lock(&pa->pa_lock);
3503 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
3504 &groupnr, &start);
3505 len = pa->pa_len;
3506 spin_unlock(&pa->pa_lock);
3507 if (unlikely(len == 0))
3508 continue;
3509 BUG_ON(groupnr != group);
3510 ext4_set_bits(bitmap, start, len);
3511 preallocated += len;
3512 }
3513 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3514}
3515
3516static void ext4_mb_pa_callback(struct rcu_head *head)
3517{
3518 struct ext4_prealloc_space *pa;
3519 pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
3520
3521 BUG_ON(atomic_read(&pa->pa_count));
3522 BUG_ON(pa->pa_deleted == 0);
3523 kmem_cache_free(ext4_pspace_cachep, pa);
3524}
3525
3526
3527
3528
3529
3530static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3531 struct super_block *sb, struct ext4_prealloc_space *pa)
3532{
3533 ext4_group_t grp;
3534 ext4_fsblk_t grp_blk;
3535
3536
3537 spin_lock(&pa->pa_lock);
3538 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
3539 spin_unlock(&pa->pa_lock);
3540 return;
3541 }
3542
3543 if (pa->pa_deleted == 1) {
3544 spin_unlock(&pa->pa_lock);
3545 return;
3546 }
3547
3548 pa->pa_deleted = 1;
3549 spin_unlock(&pa->pa_lock);
3550
3551 grp_blk = pa->pa_pstart;
3552
3553
3554
3555
3556 if (pa->pa_type == MB_GROUP_PA)
3557 grp_blk--;
3558
3559 grp = ext4_get_group_number(sb, grp_blk);
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575 ext4_lock_group(sb, grp);
3576 list_del(&pa->pa_group_list);
3577 ext4_unlock_group(sb, grp);
3578
3579 spin_lock(pa->pa_obj_lock);
3580 list_del_rcu(&pa->pa_inode_list);
3581 spin_unlock(pa->pa_obj_lock);
3582
3583 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3584}
3585
3586
3587
3588
3589static noinline_for_stack int
3590ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3591{
3592 struct super_block *sb = ac->ac_sb;
3593 struct ext4_sb_info *sbi = EXT4_SB(sb);
3594 struct ext4_prealloc_space *pa;
3595 struct ext4_group_info *grp;
3596 struct ext4_inode_info *ei;
3597
3598
3599 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3600 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3601 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3602
3603 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3604 if (pa == NULL)
3605 return -ENOMEM;
3606
3607 if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
3608 int winl;
3609 int wins;
3610 int win;
3611 int offs;
3612
3613
3614
3615
3616 BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
3617 BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
3618
3619
3620
3621
3622 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3623
3624
3625 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3626
3627
3628 win = min(winl, wins);
3629
3630 offs = ac->ac_o_ex.fe_logical %
3631 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3632 if (offs && offs < win)
3633 win = offs;
3634
3635 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3636 EXT4_NUM_B2C(sbi, win);
3637 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3638 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3639 }
3640
3641
3642
3643 ac->ac_f_ex = ac->ac_b_ex;
3644
3645 pa->pa_lstart = ac->ac_b_ex.fe_logical;
3646 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3647 pa->pa_len = ac->ac_b_ex.fe_len;
3648 pa->pa_free = pa->pa_len;
3649 atomic_set(&pa->pa_count, 1);
3650 spin_lock_init(&pa->pa_lock);
3651 INIT_LIST_HEAD(&pa->pa_inode_list);
3652 INIT_LIST_HEAD(&pa->pa_group_list);
3653 pa->pa_deleted = 0;
3654 pa->pa_type = MB_INODE_PA;
3655
3656 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3657 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3658 trace_ext4_mb_new_inode_pa(ac, pa);
3659
3660 ext4_mb_use_inode_pa(ac, pa);
3661 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3662
3663 ei = EXT4_I(ac->ac_inode);
3664 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3665
3666 pa->pa_obj_lock = &ei->i_prealloc_lock;
3667 pa->pa_inode = ac->ac_inode;
3668
3669 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3670 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3671 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3672
3673 spin_lock(pa->pa_obj_lock);
3674 list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
3675 spin_unlock(pa->pa_obj_lock);
3676
3677 return 0;
3678}
3679
3680
3681
3682
3683static noinline_for_stack int
3684ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3685{
3686 struct super_block *sb = ac->ac_sb;
3687 struct ext4_locality_group *lg;
3688 struct ext4_prealloc_space *pa;
3689 struct ext4_group_info *grp;
3690
3691
3692 BUG_ON(ac->ac_o_ex.fe_len >= ac->ac_b_ex.fe_len);
3693 BUG_ON(ac->ac_status != AC_STATUS_FOUND);
3694 BUG_ON(!S_ISREG(ac->ac_inode->i_mode));
3695
3696 BUG_ON(ext4_pspace_cachep == NULL);
3697 pa = kmem_cache_alloc(ext4_pspace_cachep, GFP_NOFS);
3698 if (pa == NULL)
3699 return -ENOMEM;
3700
3701
3702
3703 ac->ac_f_ex = ac->ac_b_ex;
3704
3705 pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
3706 pa->pa_lstart = pa->pa_pstart;
3707 pa->pa_len = ac->ac_b_ex.fe_len;
3708 pa->pa_free = pa->pa_len;
3709 atomic_set(&pa->pa_count, 1);
3710 spin_lock_init(&pa->pa_lock);
3711 INIT_LIST_HEAD(&pa->pa_inode_list);
3712 INIT_LIST_HEAD(&pa->pa_group_list);
3713 pa->pa_deleted = 0;
3714 pa->pa_type = MB_GROUP_PA;
3715
3716 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3717 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3718 trace_ext4_mb_new_group_pa(ac, pa);
3719
3720 ext4_mb_use_group_pa(ac, pa);
3721 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
3722
3723 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
3724 lg = ac->ac_lg;
3725 BUG_ON(lg == NULL);
3726
3727 pa->pa_obj_lock = &lg->lg_prealloc_lock;
3728 pa->pa_inode = NULL;
3729
3730 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
3731 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3732 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3733
3734
3735
3736
3737
3738 return 0;
3739}
3740
3741static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3742{
3743 int err;
3744
3745 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
3746 err = ext4_mb_new_group_pa(ac);
3747 else
3748 err = ext4_mb_new_inode_pa(ac);
3749 return err;
3750}
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760static noinline_for_stack int
3761ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3762 struct ext4_prealloc_space *pa)
3763{
3764 struct super_block *sb = e4b->bd_sb;
3765 struct ext4_sb_info *sbi = EXT4_SB(sb);
3766 unsigned int end;
3767 unsigned int next;
3768 ext4_group_t group;
3769 ext4_grpblk_t bit;
3770 unsigned long long grp_blk_start;
3771 int err = 0;
3772 int free = 0;
3773
3774 BUG_ON(pa->pa_deleted == 0);
3775 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3776 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3777 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3778 end = bit + pa->pa_len;
3779
3780 while (bit < end) {
3781 bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
3782 if (bit >= end)
3783 break;
3784 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3785 mb_debug(1, " free preallocated %u/%u in group %u\n",
3786 (unsigned) ext4_group_first_block_no(sb, group) + bit,
3787 (unsigned) next - bit, (unsigned) group);
3788 free += next - bit;
3789
3790 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3791 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3792 EXT4_C2B(sbi, bit)),
3793 next - bit);
3794 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3795 bit = next + 1;
3796 }
3797 if (free != pa->pa_free) {
3798 ext4_msg(e4b->bd_sb, KERN_CRIT,
3799 "pa %p: logic %lu, phys. %lu, len %lu",
3800 pa, (unsigned long) pa->pa_lstart,
3801 (unsigned long) pa->pa_pstart,
3802 (unsigned long) pa->pa_len);
3803 ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
3804 free, pa->pa_free);
3805
3806
3807
3808
3809 }
3810 atomic_add(free, &sbi->s_mb_discarded);
3811
3812 return err;
3813}
3814
3815static noinline_for_stack int
3816ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3817 struct ext4_prealloc_space *pa)
3818{
3819 struct super_block *sb = e4b->bd_sb;
3820 ext4_group_t group;
3821 ext4_grpblk_t bit;
3822
3823 trace_ext4_mb_release_group_pa(sb, pa);
3824 BUG_ON(pa->pa_deleted == 0);
3825 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3826 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3827 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3828 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3829 trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
3830
3831 return 0;
3832}
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843static noinline_for_stack int
3844ext4_mb_discard_group_preallocations(struct super_block *sb,
3845 ext4_group_t group, int needed)
3846{
3847 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3848 struct buffer_head *bitmap_bh = NULL;
3849 struct ext4_prealloc_space *pa, *tmp;
3850 struct list_head list;
3851 struct ext4_buddy e4b;
3852 int err;
3853 int busy = 0;
3854 int free = 0;
3855
3856 mb_debug(1, "discard preallocation for group %u\n", group);
3857
3858 if (list_empty(&grp->bb_prealloc_list))
3859 return 0;
3860
3861 bitmap_bh = ext4_read_block_bitmap(sb, group);
3862 if (IS_ERR(bitmap_bh)) {
3863 err = PTR_ERR(bitmap_bh);
3864 ext4_error(sb, "Error %d reading block bitmap for %u",
3865 err, group);
3866 return 0;
3867 }
3868
3869 err = ext4_mb_load_buddy(sb, group, &e4b);
3870 if (err) {
3871 ext4_error(sb, "Error loading buddy information for %u", group);
3872 put_bh(bitmap_bh);
3873 return 0;
3874 }
3875
3876 if (needed == 0)
3877 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3878
3879 INIT_LIST_HEAD(&list);
3880repeat:
3881 ext4_lock_group(sb, group);
3882 list_for_each_entry_safe(pa, tmp,
3883 &grp->bb_prealloc_list, pa_group_list) {
3884 spin_lock(&pa->pa_lock);
3885 if (atomic_read(&pa->pa_count)) {
3886 spin_unlock(&pa->pa_lock);
3887 busy = 1;
3888 continue;
3889 }
3890 if (pa->pa_deleted) {
3891 spin_unlock(&pa->pa_lock);
3892 continue;
3893 }
3894
3895
3896 pa->pa_deleted = 1;
3897
3898
3899 free += pa->pa_free;
3900
3901 spin_unlock(&pa->pa_lock);
3902
3903 list_del(&pa->pa_group_list);
3904 list_add(&pa->u.pa_tmp_list, &list);
3905 }
3906
3907
3908 if (free < needed && busy) {
3909 busy = 0;
3910 ext4_unlock_group(sb, group);
3911 cond_resched();
3912 goto repeat;
3913 }
3914
3915
3916 if (list_empty(&list)) {
3917 BUG_ON(free != 0);
3918 goto out;
3919 }
3920
3921
3922 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
3923
3924
3925 spin_lock(pa->pa_obj_lock);
3926 list_del_rcu(&pa->pa_inode_list);
3927 spin_unlock(pa->pa_obj_lock);
3928
3929 if (pa->pa_type == MB_GROUP_PA)
3930 ext4_mb_release_group_pa(&e4b, pa);
3931 else
3932 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
3933
3934 list_del(&pa->u.pa_tmp_list);
3935 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
3936 }
3937
3938out:
3939 ext4_unlock_group(sb, group);
3940 ext4_mb_unload_buddy(&e4b);
3941 put_bh(bitmap_bh);
3942 return free;
3943}
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954void ext4_discard_preallocations(struct inode *inode)
3955{
3956 struct ext4_inode_info *ei = EXT4_I(inode);
3957 struct super_block *sb = inode->i_sb;
3958 struct buffer_head *bitmap_bh = NULL;
3959 struct ext4_prealloc_space *pa, *tmp;
3960 ext4_group_t group = 0;
3961 struct list_head list;
3962 struct ext4_buddy e4b;
3963 int err;
3964
3965 if (!S_ISREG(inode->i_mode)) {
3966
3967 return;
3968 }
3969
3970 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3971 trace_ext4_discard_preallocations(inode);
3972
3973 INIT_LIST_HEAD(&list);
3974
3975repeat:
3976
3977 spin_lock(&ei->i_prealloc_lock);
3978 while (!list_empty(&ei->i_prealloc_list)) {
3979 pa = list_entry(ei->i_prealloc_list.next,
3980 struct ext4_prealloc_space, pa_inode_list);
3981 BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
3982 spin_lock(&pa->pa_lock);
3983 if (atomic_read(&pa->pa_count)) {
3984
3985
3986 spin_unlock(&pa->pa_lock);
3987 spin_unlock(&ei->i_prealloc_lock);
3988 ext4_msg(sb, KERN_ERR,
3989 "uh-oh! used pa while discarding");
3990 WARN_ON(1);
3991 schedule_timeout_uninterruptible(HZ);
3992 goto repeat;
3993
3994 }
3995 if (pa->pa_deleted == 0) {
3996 pa->pa_deleted = 1;
3997 spin_unlock(&pa->pa_lock);
3998 list_del_rcu(&pa->pa_inode_list);
3999 list_add(&pa->u.pa_tmp_list, &list);
4000 continue;
4001 }
4002
4003
4004 spin_unlock(&pa->pa_lock);
4005 spin_unlock(&ei->i_prealloc_lock);
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019 schedule_timeout_uninterruptible(HZ);
4020 goto repeat;
4021 }
4022 spin_unlock(&ei->i_prealloc_lock);
4023
4024 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4025 BUG_ON(pa->pa_type != MB_INODE_PA);
4026 group = ext4_get_group_number(sb, pa->pa_pstart);
4027
4028 err = ext4_mb_load_buddy(sb, group, &e4b);
4029 if (err) {
4030 ext4_error(sb, "Error loading buddy information for %u",
4031 group);
4032 continue;
4033 }
4034
4035 bitmap_bh = ext4_read_block_bitmap(sb, group);
4036 if (IS_ERR(bitmap_bh)) {
4037 err = PTR_ERR(bitmap_bh);
4038 ext4_error(sb, "Error %d reading block bitmap for %u",
4039 err, group);
4040 ext4_mb_unload_buddy(&e4b);
4041 continue;
4042 }
4043
4044 ext4_lock_group(sb, group);
4045 list_del(&pa->pa_group_list);
4046 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
4047 ext4_unlock_group(sb, group);
4048
4049 ext4_mb_unload_buddy(&e4b);
4050 put_bh(bitmap_bh);
4051
4052 list_del(&pa->u.pa_tmp_list);
4053 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4054 }
4055}
4056
4057#ifdef CONFIG_EXT4_DEBUG
4058static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4059{
4060 struct super_block *sb = ac->ac_sb;
4061 ext4_group_t ngroups, i;
4062
4063 if (!ext4_mballoc_debug ||
4064 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
4065 return;
4066
4067 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
4068 " Allocation context details:");
4069 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
4070 ac->ac_status, ac->ac_flags);
4071 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
4072 "goal %lu/%lu/%lu@%lu, "
4073 "best %lu/%lu/%lu@%lu cr %d",
4074 (unsigned long)ac->ac_o_ex.fe_group,
4075 (unsigned long)ac->ac_o_ex.fe_start,
4076 (unsigned long)ac->ac_o_ex.fe_len,
4077 (unsigned long)ac->ac_o_ex.fe_logical,
4078 (unsigned long)ac->ac_g_ex.fe_group,
4079 (unsigned long)ac->ac_g_ex.fe_start,
4080 (unsigned long)ac->ac_g_ex.fe_len,
4081 (unsigned long)ac->ac_g_ex.fe_logical,
4082 (unsigned long)ac->ac_b_ex.fe_group,
4083 (unsigned long)ac->ac_b_ex.fe_start,
4084 (unsigned long)ac->ac_b_ex.fe_len,
4085 (unsigned long)ac->ac_b_ex.fe_logical,
4086 (int)ac->ac_criteria);
4087 ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found);
4088 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
4089 ngroups = ext4_get_groups_count(sb);
4090 for (i = 0; i < ngroups; i++) {
4091 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4092 struct ext4_prealloc_space *pa;
4093 ext4_grpblk_t start;
4094 struct list_head *cur;
4095 ext4_lock_group(sb, i);
4096 list_for_each(cur, &grp->bb_prealloc_list) {
4097 pa = list_entry(cur, struct ext4_prealloc_space,
4098 pa_group_list);
4099 spin_lock(&pa->pa_lock);
4100 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4101 NULL, &start);
4102 spin_unlock(&pa->pa_lock);
4103 printk(KERN_ERR "PA:%u:%d:%u \n", i,
4104 start, pa->pa_len);
4105 }
4106 ext4_unlock_group(sb, i);
4107
4108 if (grp->bb_free == 0)
4109 continue;
4110 printk(KERN_ERR "%u: %d/%d \n",
4111 i, grp->bb_free, grp->bb_fragments);
4112 }
4113 printk(KERN_ERR "\n");
4114}
4115#else
4116static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4117{
4118 return;
4119}
4120#endif
4121
4122
4123
4124
4125
4126
4127
4128
4129static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4130{
4131 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4132 int bsbits = ac->ac_sb->s_blocksize_bits;
4133 loff_t size, isize;
4134
4135 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4136 return;
4137
4138 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
4139 return;
4140
4141 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
4142 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4143 >> bsbits;
4144
4145 if ((size == isize) &&
4146 !ext4_fs_is_busy(sbi) &&
4147 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
4148 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4149 return;
4150 }
4151
4152 if (sbi->s_mb_group_prealloc <= 0) {
4153 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4154 return;
4155 }
4156
4157
4158 size = max(size, isize);
4159 if (size > sbi->s_mb_stream_request) {
4160 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4161 return;
4162 }
4163
4164 BUG_ON(ac->ac_lg != NULL);
4165
4166
4167
4168
4169
4170 ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups);
4171
4172
4173 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
4174
4175
4176 mutex_lock(&ac->ac_lg->lg_mutex);
4177}
4178
4179static noinline_for_stack int
4180ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4181 struct ext4_allocation_request *ar)
4182{
4183 struct super_block *sb = ar->inode->i_sb;
4184 struct ext4_sb_info *sbi = EXT4_SB(sb);
4185 struct ext4_super_block *es = sbi->s_es;
4186 ext4_group_t group;
4187 unsigned int len;
4188 ext4_fsblk_t goal;
4189 ext4_grpblk_t block;
4190
4191
4192 len = ar->len;
4193
4194
4195 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4196 len = EXT4_CLUSTERS_PER_GROUP(sb);
4197
4198
4199 goal = ar->goal;
4200 if (goal < le32_to_cpu(es->s_first_data_block) ||
4201 goal >= ext4_blocks_count(es))
4202 goal = le32_to_cpu(es->s_first_data_block);
4203 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4204
4205
4206 ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
4207 ac->ac_status = AC_STATUS_CONTINUE;
4208 ac->ac_sb = sb;
4209 ac->ac_inode = ar->inode;
4210 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4211 ac->ac_o_ex.fe_group = group;
4212 ac->ac_o_ex.fe_start = block;
4213 ac->ac_o_ex.fe_len = len;
4214 ac->ac_g_ex = ac->ac_o_ex;
4215 ac->ac_flags = ar->flags;
4216
4217
4218
4219 ext4_mb_group_or_file(ac);
4220
4221 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4222 "left: %u/%u, right %u/%u to %swritable\n",
4223 (unsigned) ar->len, (unsigned) ar->logical,
4224 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4225 (unsigned) ar->lleft, (unsigned) ar->pleft,
4226 (unsigned) ar->lright, (unsigned) ar->pright,
4227 atomic_read(&ar->inode->i_writecount) ? "" : "non-");
4228 return 0;
4229
4230}
4231
4232static noinline_for_stack void
4233ext4_mb_discard_lg_preallocations(struct super_block *sb,
4234 struct ext4_locality_group *lg,
4235 int order, int total_entries)
4236{
4237 ext4_group_t group = 0;
4238 struct ext4_buddy e4b;
4239 struct list_head discard_list;
4240 struct ext4_prealloc_space *pa, *tmp;
4241
4242 mb_debug(1, "discard locality group preallocation\n");
4243
4244 INIT_LIST_HEAD(&discard_list);
4245
4246 spin_lock(&lg->lg_prealloc_lock);
4247 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4248 pa_inode_list) {
4249 spin_lock(&pa->pa_lock);
4250 if (atomic_read(&pa->pa_count)) {
4251
4252
4253
4254
4255
4256 spin_unlock(&pa->pa_lock);
4257 continue;
4258 }
4259 if (pa->pa_deleted) {
4260 spin_unlock(&pa->pa_lock);
4261 continue;
4262 }
4263
4264 BUG_ON(pa->pa_type != MB_GROUP_PA);
4265
4266
4267 pa->pa_deleted = 1;
4268 spin_unlock(&pa->pa_lock);
4269
4270 list_del_rcu(&pa->pa_inode_list);
4271 list_add(&pa->u.pa_tmp_list, &discard_list);
4272
4273 total_entries--;
4274 if (total_entries <= 5) {
4275
4276
4277
4278
4279
4280
4281 break;
4282 }
4283 }
4284 spin_unlock(&lg->lg_prealloc_lock);
4285
4286 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4287
4288 group = ext4_get_group_number(sb, pa->pa_pstart);
4289 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4290 ext4_error(sb, "Error loading buddy information for %u",
4291 group);
4292 continue;
4293 }
4294 ext4_lock_group(sb, group);
4295 list_del(&pa->pa_group_list);
4296 ext4_mb_release_group_pa(&e4b, pa);
4297 ext4_unlock_group(sb, group);
4298
4299 ext4_mb_unload_buddy(&e4b);
4300 list_del(&pa->u.pa_tmp_list);
4301 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4302 }
4303}
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4315{
4316 int order, added = 0, lg_prealloc_count = 1;
4317 struct super_block *sb = ac->ac_sb;
4318 struct ext4_locality_group *lg = ac->ac_lg;
4319 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4320
4321 order = fls(pa->pa_free) - 1;
4322 if (order > PREALLOC_TB_SIZE - 1)
4323
4324 order = PREALLOC_TB_SIZE - 1;
4325
4326 spin_lock(&lg->lg_prealloc_lock);
4327 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4328 pa_inode_list) {
4329 spin_lock(&tmp_pa->pa_lock);
4330 if (tmp_pa->pa_deleted) {
4331 spin_unlock(&tmp_pa->pa_lock);
4332 continue;
4333 }
4334 if (!added && pa->pa_free < tmp_pa->pa_free) {
4335
4336 list_add_tail_rcu(&pa->pa_inode_list,
4337 &tmp_pa->pa_inode_list);
4338 added = 1;
4339
4340
4341
4342
4343 }
4344 spin_unlock(&tmp_pa->pa_lock);
4345 lg_prealloc_count++;
4346 }
4347 if (!added)
4348 list_add_tail_rcu(&pa->pa_inode_list,
4349 &lg->lg_prealloc_list[order]);
4350 spin_unlock(&lg->lg_prealloc_lock);
4351
4352
4353 if (lg_prealloc_count > 8) {
4354 ext4_mb_discard_lg_preallocations(sb, lg,
4355 order, lg_prealloc_count);
4356 return;
4357 }
4358 return ;
4359}
4360
4361
4362
4363
4364static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4365{
4366 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4367 struct ext4_prealloc_space *pa = ac->ac_pa;
4368 if (pa) {
4369 if (pa->pa_type == MB_GROUP_PA) {
4370
4371 spin_lock(&pa->pa_lock);
4372 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4373 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4374 pa->pa_free -= ac->ac_b_ex.fe_len;
4375 pa->pa_len -= ac->ac_b_ex.fe_len;
4376 spin_unlock(&pa->pa_lock);
4377 }
4378 }
4379 if (pa) {
4380
4381
4382
4383
4384
4385
4386 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4387 spin_lock(pa->pa_obj_lock);
4388 list_del_rcu(&pa->pa_inode_list);
4389 spin_unlock(pa->pa_obj_lock);
4390 ext4_mb_add_n_trim(ac);
4391 }
4392 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4393 }
4394 if (ac->ac_bitmap_page)
4395 put_page(ac->ac_bitmap_page);
4396 if (ac->ac_buddy_page)
4397 put_page(ac->ac_buddy_page);
4398 if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
4399 mutex_unlock(&ac->ac_lg->lg_mutex);
4400 ext4_mb_collect_stats(ac);
4401 return 0;
4402}
4403
4404static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4405{
4406 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4407 int ret;
4408 int freed = 0;
4409
4410 trace_ext4_mb_discard_preallocations(sb, needed);
4411 for (i = 0; i < ngroups && needed > 0; i++) {
4412 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4413 freed += ret;
4414 needed -= ret;
4415 }
4416
4417 return freed;
4418}
4419
4420
4421
4422
4423
4424
4425ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4426 struct ext4_allocation_request *ar, int *errp)
4427{
4428 int freed;
4429 struct ext4_allocation_context *ac = NULL;
4430 struct ext4_sb_info *sbi;
4431 struct super_block *sb;
4432 ext4_fsblk_t block = 0;
4433 unsigned int inquota = 0;
4434 unsigned int reserv_clstrs = 0;
4435
4436 might_sleep();
4437 sb = ar->inode->i_sb;
4438 sbi = EXT4_SB(sb);
4439
4440 trace_ext4_request_blocks(ar);
4441
4442
4443 if (IS_NOQUOTA(ar->inode))
4444 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4445
4446 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
4447
4448
4449
4450
4451 while (ar->len &&
4452 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4453
4454
4455 cond_resched();
4456 ar->len = ar->len >> 1;
4457 }
4458 if (!ar->len) {
4459 *errp = -ENOSPC;
4460 return 0;
4461 }
4462 reserv_clstrs = ar->len;
4463 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4464 dquot_alloc_block_nofail(ar->inode,
4465 EXT4_C2B(sbi, ar->len));
4466 } else {
4467 while (ar->len &&
4468 dquot_alloc_block(ar->inode,
4469 EXT4_C2B(sbi, ar->len))) {
4470
4471 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4472 ar->len--;
4473 }
4474 }
4475 inquota = ar->len;
4476 if (ar->len == 0) {
4477 *errp = -EDQUOT;
4478 goto out;
4479 }
4480 }
4481
4482 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4483 if (!ac) {
4484 ar->len = 0;
4485 *errp = -ENOMEM;
4486 goto out;
4487 }
4488
4489 *errp = ext4_mb_initialize_context(ac, ar);
4490 if (*errp) {
4491 ar->len = 0;
4492 goto out;
4493 }
4494
4495 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4496 if (!ext4_mb_use_preallocated(ac)) {
4497 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4498 ext4_mb_normalize_request(ac, ar);
4499repeat:
4500
4501 *errp = ext4_mb_regular_allocator(ac);
4502 if (*errp)
4503 goto discard_and_exit;
4504
4505
4506
4507
4508 if (ac->ac_status == AC_STATUS_FOUND &&
4509 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4510 *errp = ext4_mb_new_preallocation(ac);
4511 if (*errp) {
4512 discard_and_exit:
4513 ext4_discard_allocated_blocks(ac);
4514 goto errout;
4515 }
4516 }
4517 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4518 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4519 if (*errp) {
4520 ext4_discard_allocated_blocks(ac);
4521 goto errout;
4522 } else {
4523 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4524 ar->len = ac->ac_b_ex.fe_len;
4525 }
4526 } else {
4527 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4528 if (freed)
4529 goto repeat;
4530 *errp = -ENOSPC;
4531 }
4532
4533errout:
4534 if (*errp) {
4535 ac->ac_b_ex.fe_len = 0;
4536 ar->len = 0;
4537 ext4_mb_show_ac(ac);
4538 }
4539 ext4_mb_release_context(ac);
4540out:
4541 if (ac)
4542 kmem_cache_free(ext4_ac_cachep, ac);
4543 if (inquota && ar->len < inquota)
4544 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4545 if (!ar->len) {
4546 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
4547
4548 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4549 reserv_clstrs);
4550 }
4551
4552 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
4553
4554 return block;
4555}
4556
4557
4558
4559
4560
4561
4562static int can_merge(struct ext4_free_data *entry1,
4563 struct ext4_free_data *entry2)
4564{
4565 if ((entry1->efd_tid == entry2->efd_tid) &&
4566 (entry1->efd_group == entry2->efd_group) &&
4567 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
4568 return 1;
4569 return 0;
4570}
4571
4572static noinline_for_stack int
4573ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4574 struct ext4_free_data *new_entry)
4575{
4576 ext4_group_t group = e4b->bd_group;
4577 ext4_grpblk_t cluster;
4578 ext4_grpblk_t clusters = new_entry->efd_count;
4579 struct ext4_free_data *entry;
4580 struct ext4_group_info *db = e4b->bd_info;
4581 struct super_block *sb = e4b->bd_sb;
4582 struct ext4_sb_info *sbi = EXT4_SB(sb);
4583 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4584 struct rb_node *parent = NULL, *new_node;
4585
4586 BUG_ON(!ext4_handle_valid(handle));
4587 BUG_ON(e4b->bd_bitmap_page == NULL);
4588 BUG_ON(e4b->bd_buddy_page == NULL);
4589
4590 new_node = &new_entry->efd_node;
4591 cluster = new_entry->efd_start_cluster;
4592
4593 if (!*n) {
4594
4595
4596
4597
4598
4599 get_page(e4b->bd_buddy_page);
4600 get_page(e4b->bd_bitmap_page);
4601 }
4602 while (*n) {
4603 parent = *n;
4604 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4605 if (cluster < entry->efd_start_cluster)
4606 n = &(*n)->rb_left;
4607 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4608 n = &(*n)->rb_right;
4609 else {
4610 ext4_grp_locked_error(sb, group, 0,
4611 ext4_group_first_block_no(sb, group) +
4612 EXT4_C2B(sbi, cluster),
4613 "Block already on to-be-freed list");
4614 return 0;
4615 }
4616 }
4617
4618 rb_link_node(new_node, parent, n);
4619 rb_insert_color(new_node, &db->bb_free_root);
4620
4621
4622 node = rb_prev(new_node);
4623 if (node) {
4624 entry = rb_entry(node, struct ext4_free_data, efd_node);
4625 if (can_merge(entry, new_entry) &&
4626 ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
4627 new_entry->efd_start_cluster = entry->efd_start_cluster;
4628 new_entry->efd_count += entry->efd_count;
4629 rb_erase(node, &(db->bb_free_root));
4630 kmem_cache_free(ext4_free_data_cachep, entry);
4631 }
4632 }
4633
4634 node = rb_next(new_node);
4635 if (node) {
4636 entry = rb_entry(node, struct ext4_free_data, efd_node);
4637 if (can_merge(new_entry, entry) &&
4638 ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
4639 new_entry->efd_count += entry->efd_count;
4640 rb_erase(node, &(db->bb_free_root));
4641 kmem_cache_free(ext4_free_data_cachep, entry);
4642 }
4643 }
4644
4645 new_entry->efd_jce.jce_func = ext4_free_data_callback;
4646 spin_lock(&sbi->s_md_lock);
4647 _ext4_journal_callback_add(handle, &new_entry->efd_jce);
4648 sbi->s_mb_free_pending += clusters;
4649 spin_unlock(&sbi->s_md_lock);
4650 return 0;
4651}
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661void ext4_free_blocks(handle_t *handle, struct inode *inode,
4662 struct buffer_head *bh, ext4_fsblk_t block,
4663 unsigned long count, int flags)
4664{
4665 struct buffer_head *bitmap_bh = NULL;
4666 struct super_block *sb = inode->i_sb;
4667 struct ext4_group_desc *gdp;
4668 unsigned int overflow;
4669 ext4_grpblk_t bit;
4670 struct buffer_head *gd_bh;
4671 ext4_group_t block_group;
4672 struct ext4_sb_info *sbi;
4673 struct ext4_buddy e4b;
4674 unsigned int count_clusters;
4675 int err = 0;
4676 int ret;
4677
4678 might_sleep();
4679 if (bh) {
4680 if (block)
4681 BUG_ON(block != bh->b_blocknr);
4682 else
4683 block = bh->b_blocknr;
4684 }
4685
4686 sbi = EXT4_SB(sb);
4687 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4688 !ext4_data_block_valid(sbi, block, count)) {
4689 ext4_error(sb, "Freeing blocks not in datazone - "
4690 "block = %llu, count = %lu", block, count);
4691 goto error_return;
4692 }
4693
4694 ext4_debug("freeing block %llu\n", block);
4695 trace_ext4_free_blocks(inode, block, count, flags);
4696
4697 if (bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4698 BUG_ON(count > 1);
4699
4700 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4701 inode, bh, block);
4702 }
4703
4704
4705
4706
4707
4708
4709
4710
4711 overflow = EXT4_PBLK_COFF(sbi, block);
4712 if (overflow) {
4713 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4714 overflow = sbi->s_cluster_ratio - overflow;
4715 block += overflow;
4716 if (count > overflow)
4717 count -= overflow;
4718 else
4719 return;
4720 } else {
4721 block -= overflow;
4722 count += overflow;
4723 }
4724 }
4725 overflow = EXT4_LBLK_COFF(sbi, count);
4726 if (overflow) {
4727 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4728 if (count > overflow)
4729 count -= overflow;
4730 else
4731 return;
4732 } else
4733 count += sbi->s_cluster_ratio - overflow;
4734 }
4735
4736 if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
4737 int i;
4738 int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
4739
4740 for (i = 0; i < count; i++) {
4741 cond_resched();
4742 if (is_metadata)
4743 bh = sb_find_get_block(inode->i_sb, block + i);
4744 ext4_forget(handle, is_metadata, inode, bh, block + i);
4745 }
4746 }
4747
4748do_more:
4749 overflow = 0;
4750 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4751
4752 if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
4753 ext4_get_group_info(sb, block_group))))
4754 return;
4755
4756
4757
4758
4759
4760 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4761 overflow = EXT4_C2B(sbi, bit) + count -
4762 EXT4_BLOCKS_PER_GROUP(sb);
4763 count -= overflow;
4764 }
4765 count_clusters = EXT4_NUM_B2C(sbi, count);
4766 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4767 if (IS_ERR(bitmap_bh)) {
4768 err = PTR_ERR(bitmap_bh);
4769 bitmap_bh = NULL;
4770 goto error_return;
4771 }
4772 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4773 if (!gdp) {
4774 err = -EIO;
4775 goto error_return;
4776 }
4777
4778 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4779 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4780 in_range(block, ext4_inode_table(sb, gdp),
4781 EXT4_SB(sb)->s_itb_per_group) ||
4782 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4783 EXT4_SB(sb)->s_itb_per_group)) {
4784
4785 ext4_error(sb, "Freeing blocks in system zone - "
4786 "Block = %llu, count = %lu", block, count);
4787
4788 goto error_return;
4789 }
4790
4791 BUFFER_TRACE(bitmap_bh, "getting write access");
4792 err = ext4_journal_get_write_access(handle, bitmap_bh);
4793 if (err)
4794 goto error_return;
4795
4796
4797
4798
4799
4800
4801 BUFFER_TRACE(gd_bh, "get_write_access");
4802 err = ext4_journal_get_write_access(handle, gd_bh);
4803 if (err)
4804 goto error_return;
4805#ifdef AGGRESSIVE_CHECK
4806 {
4807 int i;
4808 for (i = 0; i < count_clusters; i++)
4809 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4810 }
4811#endif
4812 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4813
4814
4815 err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
4816 GFP_NOFS|__GFP_NOFAIL);
4817 if (err)
4818 goto error_return;
4819
4820
4821
4822
4823
4824
4825
4826 if (ext4_handle_valid(handle) &&
4827 ((flags & EXT4_FREE_BLOCKS_METADATA) ||
4828 !ext4_should_writeback_data(inode))) {
4829 struct ext4_free_data *new_entry;
4830
4831
4832
4833
4834 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
4835 GFP_NOFS|__GFP_NOFAIL);
4836 new_entry->efd_start_cluster = bit;
4837 new_entry->efd_group = block_group;
4838 new_entry->efd_count = count_clusters;
4839 new_entry->efd_tid = handle->h_transaction->t_tid;
4840
4841 ext4_lock_group(sb, block_group);
4842 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4843 ext4_mb_free_metadata(handle, &e4b, new_entry);
4844 } else {
4845
4846
4847
4848
4849 if (test_opt(sb, DISCARD)) {
4850 err = ext4_issue_discard(sb, block_group, bit, count);
4851 if (err && err != -EOPNOTSUPP)
4852 ext4_msg(sb, KERN_WARNING, "discard request in"
4853 " group:%d block:%d count:%lu failed"
4854 " with %d", block_group, bit, count,
4855 err);
4856 } else
4857 EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
4858
4859 ext4_lock_group(sb, block_group);
4860 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4861 mb_free_blocks(inode, &e4b, bit, count_clusters);
4862 }
4863
4864 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4865 ext4_free_group_clusters_set(sb, gdp, ret);
4866 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4867 ext4_group_desc_csum_set(sb, block_group, gdp);
4868 ext4_unlock_group(sb, block_group);
4869
4870 if (sbi->s_log_groups_per_flex) {
4871 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4872 atomic64_add(count_clusters,
4873 &sbi->s_flex_groups[flex_group].free_clusters);
4874 }
4875
4876 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4877 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4878 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4879
4880 ext4_mb_unload_buddy(&e4b);
4881
4882
4883 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4884 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4885
4886
4887 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
4888 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
4889 if (!err)
4890 err = ret;
4891
4892 if (overflow && !err) {
4893 block += count;
4894 count = overflow;
4895 put_bh(bitmap_bh);
4896 goto do_more;
4897 }
4898error_return:
4899 brelse(bitmap_bh);
4900 ext4_std_error(sb, err);
4901 return;
4902}
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4914 ext4_fsblk_t block, unsigned long count)
4915{
4916 struct buffer_head *bitmap_bh = NULL;
4917 struct buffer_head *gd_bh;
4918 ext4_group_t block_group;
4919 ext4_grpblk_t bit;
4920 unsigned int i;
4921 struct ext4_group_desc *desc;
4922 struct ext4_sb_info *sbi = EXT4_SB(sb);
4923 struct ext4_buddy e4b;
4924 int err = 0, ret, blk_free_count;
4925 ext4_grpblk_t blocks_freed;
4926
4927 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
4928
4929 if (count == 0)
4930 return 0;
4931
4932 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
4933
4934
4935
4936
4937 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4938 ext4_warning(sb, "too much blocks added to group %u",
4939 block_group);
4940 err = -EINVAL;
4941 goto error_return;
4942 }
4943
4944 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4945 if (IS_ERR(bitmap_bh)) {
4946 err = PTR_ERR(bitmap_bh);
4947 bitmap_bh = NULL;
4948 goto error_return;
4949 }
4950
4951 desc = ext4_get_group_desc(sb, block_group, &gd_bh);
4952 if (!desc) {
4953 err = -EIO;
4954 goto error_return;
4955 }
4956
4957 if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
4958 in_range(ext4_inode_bitmap(sb, desc), block, count) ||
4959 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
4960 in_range(block + count - 1, ext4_inode_table(sb, desc),
4961 sbi->s_itb_per_group)) {
4962 ext4_error(sb, "Adding blocks in system zones - "
4963 "Block = %llu, count = %lu",
4964 block, count);
4965 err = -EINVAL;
4966 goto error_return;
4967 }
4968
4969 BUFFER_TRACE(bitmap_bh, "getting write access");
4970 err = ext4_journal_get_write_access(handle, bitmap_bh);
4971 if (err)
4972 goto error_return;
4973
4974
4975
4976
4977
4978
4979 BUFFER_TRACE(gd_bh, "get_write_access");
4980 err = ext4_journal_get_write_access(handle, gd_bh);
4981 if (err)
4982 goto error_return;
4983
4984 for (i = 0, blocks_freed = 0; i < count; i++) {
4985 BUFFER_TRACE(bitmap_bh, "clear bit");
4986 if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
4987 ext4_error(sb, "bit already cleared for block %llu",
4988 (ext4_fsblk_t)(block + i));
4989 BUFFER_TRACE(bitmap_bh, "bit already cleared");
4990 } else {
4991 blocks_freed++;
4992 }
4993 }
4994
4995 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4996 if (err)
4997 goto error_return;
4998
4999
5000
5001
5002
5003
5004 ext4_lock_group(sb, block_group);
5005 mb_clear_bits(bitmap_bh->b_data, bit, count);
5006 mb_free_blocks(NULL, &e4b, bit, count);
5007 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
5008 ext4_free_group_clusters_set(sb, desc, blk_free_count);
5009 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
5010 ext4_group_desc_csum_set(sb, block_group, desc);
5011 ext4_unlock_group(sb, block_group);
5012 percpu_counter_add(&sbi->s_freeclusters_counter,
5013 EXT4_NUM_B2C(sbi, blocks_freed));
5014
5015 if (sbi->s_log_groups_per_flex) {
5016 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
5017 atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
5018 &sbi->s_flex_groups[flex_group].free_clusters);
5019 }
5020
5021 ext4_mb_unload_buddy(&e4b);
5022
5023
5024 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
5025 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
5026
5027
5028 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
5029 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
5030 if (!err)
5031 err = ret;
5032
5033error_return:
5034 brelse(bitmap_bh);
5035 ext4_std_error(sb, err);
5036 return err;
5037}
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051static int ext4_trim_extent(struct super_block *sb, int start, int count,
5052 ext4_group_t group, struct ext4_buddy *e4b)
5053__releases(bitlock)
5054__acquires(bitlock)
5055{
5056 struct ext4_free_extent ex;
5057 int ret = 0;
5058
5059 trace_ext4_trim_extent(sb, group, start, count);
5060
5061 assert_spin_locked(ext4_group_lock_ptr(sb, group));
5062
5063 ex.fe_start = start;
5064 ex.fe_group = group;
5065 ex.fe_len = count;
5066
5067
5068
5069
5070
5071 mb_mark_used(e4b, &ex);
5072 ext4_unlock_group(sb, group);
5073 ret = ext4_issue_discard(sb, group, start, count);
5074 ext4_lock_group(sb, group);
5075 mb_free_blocks(NULL, e4b, start, ex.fe_len);
5076 return ret;
5077}
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097static ext4_grpblk_t
5098ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
5099 ext4_grpblk_t start, ext4_grpblk_t max,
5100 ext4_grpblk_t minblocks)
5101{
5102 void *bitmap;
5103 ext4_grpblk_t next, count = 0, free_count = 0;
5104 struct ext4_buddy e4b;
5105 int ret = 0;
5106
5107 trace_ext4_trim_all_free(sb, group, start, max);
5108
5109 ret = ext4_mb_load_buddy(sb, group, &e4b);
5110 if (ret) {
5111 ext4_error(sb, "Error in loading buddy "
5112 "information for %u", group);
5113 return ret;
5114 }
5115 bitmap = e4b.bd_bitmap;
5116
5117 ext4_lock_group(sb, group);
5118 if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
5119 minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
5120 goto out;
5121
5122 start = (e4b.bd_info->bb_first_free > start) ?
5123 e4b.bd_info->bb_first_free : start;
5124
5125 while (start <= max) {
5126 start = mb_find_next_zero_bit(bitmap, max + 1, start);
5127 if (start > max)
5128 break;
5129 next = mb_find_next_bit(bitmap, max + 1, start);
5130
5131 if ((next - start) >= minblocks) {
5132 ret = ext4_trim_extent(sb, start,
5133 next - start, group, &e4b);
5134 if (ret && ret != -EOPNOTSUPP)
5135 break;
5136 ret = 0;
5137 count += next - start;
5138 }
5139 free_count += next - start;
5140 start = next + 1;
5141
5142 if (fatal_signal_pending(current)) {
5143 count = -ERESTARTSYS;
5144 break;
5145 }
5146
5147 if (need_resched()) {
5148 ext4_unlock_group(sb, group);
5149 cond_resched();
5150 ext4_lock_group(sb, group);
5151 }
5152
5153 if ((e4b.bd_info->bb_free - free_count) < minblocks)
5154 break;
5155 }
5156
5157 if (!ret) {
5158 ret = count;
5159 EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
5160 }
5161out:
5162 ext4_unlock_group(sb, group);
5163 ext4_mb_unload_buddy(&e4b);
5164
5165 ext4_debug("trimmed %d blocks in the group %d\n",
5166 count, group);
5167
5168 return ret;
5169}
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5184{
5185 struct ext4_group_info *grp;
5186 ext4_group_t group, first_group, last_group;
5187 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5188 uint64_t start, end, minlen, trimmed = 0;
5189 ext4_fsblk_t first_data_blk =
5190 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5191 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5192 int ret = 0;
5193
5194 start = range->start >> sb->s_blocksize_bits;
5195 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5196 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
5197 range->minlen >> sb->s_blocksize_bits);
5198
5199 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
5200 start >= max_blks ||
5201 range->len < sb->s_blocksize)
5202 return -EINVAL;
5203 if (end >= max_blks)
5204 end = max_blks - 1;
5205 if (end <= first_data_blk)
5206 goto out;
5207 if (start < first_data_blk)
5208 start = first_data_blk;
5209
5210
5211 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5212 &first_group, &first_cluster);
5213 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5214 &last_group, &last_cluster);
5215
5216
5217 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5218
5219 for (group = first_group; group <= last_group; group++) {
5220 grp = ext4_get_group_info(sb, group);
5221
5222 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
5223 ret = ext4_mb_init_group(sb, group, GFP_NOFS);
5224 if (ret)
5225 break;
5226 }
5227
5228
5229
5230
5231
5232
5233
5234 if (group == last_group)
5235 end = last_cluster;
5236
5237 if (grp->bb_free >= minlen) {
5238 cnt = ext4_trim_all_free(sb, group, first_cluster,
5239 end, minlen);
5240 if (cnt < 0) {
5241 ret = cnt;
5242 break;
5243 }
5244 trimmed += cnt;
5245 }
5246
5247
5248
5249
5250
5251 first_cluster = 0;
5252 }
5253
5254 if (!ret)
5255 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5256
5257out:
5258 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5259 return ret;
5260}
5261